static int importer_dates_check (char **labels, BookFlag *pflags, DATASET *newset, PRN *prn, int *err) { int d, t; char dstr[12]; char *s; int ret = 0; for (t=0; t<newset->n; t++) { s = labels[t]; if (s == NULL || *s == '\0') { fprintf(stderr, "importer_dates_check: got blank label\n"); return 0; } } *err = dataset_allocate_obs_markers(newset); if (*err) { return 0; } for (t=0; t<newset->n && !*err; t++) { s = labels[t]; if (*s == '"' || *s == '\'') s++; if (*pflags & BOOK_NUMERIC_DATES) { if (sscanf(s, "%d", &d)) { MS_excel_date_string(dstr, d, 0, *pflags & BOOK_DATE_BASE_1904); s = dstr; } else { pprintf(prn, "Bad date on row %d: '%s'\n", t+1, s); *err = E_DATA; } } strncat(newset->S[t], s, OBSLEN - 1); } if (!*err) { int reversed = 0; ret = test_markers_for_dates(newset, &reversed, NULL, prn); if (reversed) { *pflags |= BOOK_DATA_REVERSED; } } if (newset->markers != DAILY_DATE_STRINGS) { dataset_destroy_obs_markers(newset); } return ret; }
static void xlsx_dates_check (DATASET *dset) { int t, maybe_dates = 1; int date_min = 0, date_max = 0; int d, delta_min = 0, delta_max = 0; #if DATE_DEBUG fprintf(stderr, "xlsx_dates_check: starting\n"); #endif /* We're dealing here with the case where our prior heuristics suggest we got an "observations" column, yet the values we found there were numeric (and we converted them to strings). Here we see if it might be reasonable to interpret the labels as representing MS dates (days since Dec 31, 1899). For this purpose we'll require that all the obs labels are integer strings, and that the gap between successive values should be constant, or variable to a degree that's consistent with a sane time-series frequency. We should bear in mind, however, that the numeric values that we started with could be plain years rather than MS dates. */ for (t=0; t<dset->n && maybe_dates; t++) { if (!integer_string(dset->S[t])) { #if DATE_DEBUG fprintf(stderr, "S[%d] = '%s', giving up\n", t, dset->S[t]); #endif maybe_dates = 0; } else if (t == 0) { if (!strcmp(dset->S[0], "1")) { maybe_dates = 0; } else { date_min = date_max = atoi(dset->S[t]); } } else { d = atoi(dset->S[t]); if (d < date_min) { date_min = d; } if (d > date_max) { date_max = d; } d = atoi(dset->S[t]) - atoi(dset->S[t-1]); if (t == 1) { delta_min = delta_max = d; } else if (d < delta_min) { #if DATE_DEBUG fprintf(stderr, " at t=%d, delta_min = %d - %d = %d\n", t, atoi(dset->S[t]), atoi(dset->S[t-1]), d); #endif delta_min = d; } else if (d > delta_max) { delta_max = d; } } } #if DATE_DEBUG fprintf(stderr, "after obs loop, maybe_dates=%d\n" " (date_min=%d, date_max=%d, delta_min=%d, delta_max=%d)\n", maybe_dates, date_min, date_max, delta_min, delta_max); #endif if (maybe_dates && delta_max < 0) { /* allow for the possibility that time runs backwards */ int tmp = delta_min; delta_min = -delta_max; delta_max = -tmp; fprintf(stderr, "xlsx_dates_check: diffmin=%d, diffmax=%d\n", delta_min, delta_max); } if (maybe_dates) { /* are these things in fact more plausibly years? */ if (delta_min == 1 && delta_max == 1 && date_min > 1749 && date_max < 2050) { #if DATE_DEBUG fprintf(stderr, "assuming these are years, not MS dates\n"); #endif maybe_dates = 0; } } if (maybe_dates) { if (delta_min >= 364 && delta_max <= 365) { ; /* annual? */ } else if (delta_min >= 90 && delta_max <= 92) { ; /* quarterly? */ } else if (delta_min >= 28 && delta_max <= 31) { ; /* monthly? */ } else if (delta_min == 7 && delta_max == 7) { ; /* weekly? */ } else if (delta_min == 1 && delta_max <= 5) { ; /* daily? */ } else { /* unsupported frequency or nonsensical */ #if DATE_DEBUG fprintf(stderr, "delta_max = %d, delta_min = %d, unsupported\n", delta_max, delta_min); #endif maybe_dates = 0; } } #if DATE_DEBUG fprintf(stderr, "xlsx_dates_check: maybe_dates = %d\n", maybe_dates); #endif if (maybe_dates) { char datestr[OBSLEN]; for (t=0; t<dset->n; t++) { /* FIXME detect use of 1904-based dates? */ MS_excel_date_string(datestr, atoi(dset->S[t]), 0, 0); strcpy(dset->S[t], datestr); } } }