I have a csv with over 4M lines that I'm loading into an array.
csv: EURUSD,20010102,230100,0.9507,0.9507,0.9507,0.9507,4
This operation takes about 3.5 minutes.
... typedef struct Rates_t { char open[7]; char high[7]; char low[7]; char close[7]; } Rates_t; void Substr(char *src, char **dst, int start, int length) { char *ptr1 = *dst; char *ptr2 = src+start; int i; for (i = 0; i < length; i++) { *(ptr1 + i) = *(ptr2 + i); } (*dst)[length] = '\0'; } void FillRates(char *tmp, char *price) { Substr(tmp, &price, 0, 6); } bool BacktestServer() { ... Rates_t r = { {0}, {0}, {0}, {0} }; Rates_t *rates = &r; rates = (Rates_t *) malloc(sizeof(Rates_t)); FILE *f; if (!(f = fopen("EURUSD.txt", "r"))) { fprintf(stderr, "Unable to open 'EURUSD.txt' for reading.\n"); exit(1); } ... while (fgets(line, 72, f)) { tmp = line; for (skip = 0; skip < 3; skip++) { tmp = strchr(tmp, ','); tmp++; } sz += sizeof(Rates_t); rates = (Rates_t *) realloc(rates, sz); FillRates(tmp, rates[i].open); tmp = strchr(tmp, ','); tmp++; FillRates(tmp, rates[i].high); tmp = strchr(tmp, ','); tmp++; FillRates(tmp, rates[i].low); tmp = strchr(tmp, ','); tmp++; FillRates(tmp, rates[i].close); i++; free(line); line = NULL; line = (char *) malloc(72 * sizeof(char)); } ... } This takes about 1 minute.
... typedef struct Rates_t { char *open; char *high; char *low; char *close; } Rates_t; void Substr(char *src, char **dst, int start, int length) { char *ptr1 = *dst; char *ptr2 = src+start; int i; for (i = 0; i < length; i++) { *(ptr1 + i) = *(ptr2 + i); } (*dst)[length] = '\0'; } void FillRates(char *tmp, char *price) { Substr(tmp, &price, 0, 6); } bool BacktestServer() { ... Rates_t r = { NULL, NULL, NULL, NULL }; Rates_t *rates = &r; rates = (Rates_t *) malloc(sizeof(Rates_t)); FILE *f; if (!(f = fopen("EURUSD.txt", "r"))) { fprintf(stderr, "Unable to open 'EURUSD.txt' for reading.\n"); exit(1); } ... while (fgets(line, 72, f)) { tmp = line; for (skip = 0; skip < 3; skip++) { tmp = strchr(tmp, ','); tmp++; } sz += sizeof(Rates_t); rates = (Rates_t *) realloc(rates, sz); rates[i].open = (char *) malloc(7 * sizeof(char)); FillRates(tmp, rates[i].open); tmp = strchr(tmp, ','); tmp++; rates[i].high = (char *) malloc(7 * sizeof(char)); FillRates(tmp, rates[i].high); tmp = strchr(tmp, ','); tmp++; rates[i].low = (char *) malloc(7 * sizeof(char)); FillRates(tmp, rates[i].low); tmp = strchr(tmp, ','); tmp++; rates[i].close = (char *) malloc(7 * sizeof(char)); FillRates(tmp, rates[i].close); i++; free(line); line = NULL; line = (char *) malloc(72 * sizeof(char)); } ... } Using either memcpy or snprintf, the program will be a few seconds longer.
void Substr(char *src, char **dst, int start, int length) { memcpy(*dst, src+start, length); (*dst)[length] = '\0'; } void Substr(char *src, char **dst, int start, int length) { snprintf(*dst, length + 1, "%s", src+start); (*dst)[length] = '\0'; } From the consensus online, the static array should be faster than the dynamic array. If anyone needs more information I'll edit the post to that effect.
UPDATE:
I increased the allocation to not 2 as suggested but 4096 and I'm still getting the same results for the dynamic array version, about a minute or less. The static array version has decreased to about 2.75 minutes.
The initial allocation:
int sz = 256 * sizeof(Rates_t); rates = (Rates_t *) malloc(sz); The reallocation:
if (realloc_count == 256) { sz += 256 * sizeof(Rates_t); rates = (Rates_t *) realloc(rates, sz); realloc_count = 0; } realloc_count++; I am on a 64-bit Windows machine but I compile 32-bit programs via cygwin gcc. On the other hand, on 64-bit Linux in a VM, the speeds are obviously significantly less, but the speeds are reversed. The dynamically allocated version takes longer than the static version. On Linux, dynamic memory = ~20-30 seconds, static = ~15 seconds. On Linux @1, 2, 256, 4096 or 524,288 there was little to no change in speed. When I increased the allocation to 524,288 on cygwin, I get ~6 seconds for static allocation and ~8 seconds for dynamic allocation.
rjust so you can initializeratesto&r? That's unnecessary; you could skip that and doRates_t *rates = malloc(sizeof(Rates_t));.