static int 
importer_dates_check (char **labels, BookFlag *pflags,
		      DATASET *newset, PRN *prn, 
		      int *err)
{
    int d, t;
    char dstr[12];
    char *s;
    int ret = 0;

    for (t=0; t<newset->n; t++) {
	s = labels[t];
	if (s == NULL || *s == '\0') {
	    fprintf(stderr, "importer_dates_check: got blank label\n");
	    return 0;
	}
    }

    *err = dataset_allocate_obs_markers(newset);
    if (*err) {
	return 0;
    }

    for (t=0; t<newset->n && !*err; t++) {
	s = labels[t];
	if (*s == '"' || *s == '\'') s++;
	if (*pflags & BOOK_NUMERIC_DATES) {
	    if (sscanf(s, "%d", &d)) {
		MS_excel_date_string(dstr, d, 0, *pflags & BOOK_DATE_BASE_1904);
		s = dstr;
	    } else {
		pprintf(prn, "Bad date on row %d: '%s'\n", t+1, s);
		*err = E_DATA;
	    }
	}
	strncat(newset->S[t], s, OBSLEN - 1);
    }

    if (!*err) {
	int reversed = 0;

	ret = test_markers_for_dates(newset, &reversed, NULL, prn);
	if (reversed) {
	    *pflags |= BOOK_DATA_REVERSED;
	}
    }

    if (newset->markers != DAILY_DATE_STRINGS) {
	dataset_destroy_obs_markers(newset);
    }

    return ret;
}
Exemple #2
0
static void xlsx_dates_check (DATASET *dset)
{
    int t, maybe_dates = 1;
    int date_min = 0, date_max = 0;
    int d, delta_min = 0, delta_max = 0;

#if DATE_DEBUG
    fprintf(stderr, "xlsx_dates_check: starting\n");
#endif

    /* We're dealing here with the case where our prior heuristics
       suggest we got an "observations" column, yet the values we
       found there were numeric (and we converted them to strings).
       Here we see if it might be reasonable to interpret the
       labels as representing MS dates (days since Dec 31, 1899).

       For this purpose we'll require that all the obs labels are 
       integer strings, and that the gap between successive values
       should be constant, or variable to a degree that's consistent
       with a sane time-series frequency.

       We should bear in mind, however, that the numeric values that
       we started with could be plain years rather than MS dates.
    */

    for (t=0; t<dset->n && maybe_dates; t++) {
	if (!integer_string(dset->S[t])) {
#if DATE_DEBUG
	    fprintf(stderr, "S[%d] = '%s', giving up\n", t, dset->S[t]);
#endif
	    maybe_dates = 0;
	} else if (t == 0) {
	    if (!strcmp(dset->S[0], "1")) {
		maybe_dates = 0;
	    } else {
		date_min = date_max = atoi(dset->S[t]);
	    }
	} else {
	    d = atoi(dset->S[t]);
	    if (d < date_min) {
		date_min = d;
	    }
	    if (d > date_max) {
		date_max = d;
	    }
	    d = atoi(dset->S[t]) - atoi(dset->S[t-1]);
	    if (t == 1) {
		delta_min = delta_max = d;
	    } else if (d < delta_min) {
#if DATE_DEBUG
		fprintf(stderr, " at t=%d, delta_min = %d - %d = %d\n",
			t, atoi(dset->S[t]), atoi(dset->S[t-1]), d);
#endif
		delta_min = d;
	    } else if (d > delta_max) {
		delta_max = d;
	    }
	}
    }

#if DATE_DEBUG
    fprintf(stderr, "after obs loop, maybe_dates=%d\n"
	    " (date_min=%d, date_max=%d, delta_min=%d, delta_max=%d)\n",
	    maybe_dates, date_min, date_max, delta_min, delta_max);
#endif

    if (maybe_dates && delta_max < 0) {
	/* allow for the possibility that time runs backwards */
	int tmp = delta_min;

	delta_min = -delta_max;
	delta_max = -tmp;
	fprintf(stderr, "xlsx_dates_check: diffmin=%d, diffmax=%d\n", 
		delta_min, delta_max);
    }

    if (maybe_dates) {
	/* are these things in fact more plausibly years? */
	if (delta_min == 1 && delta_max == 1 &&
	    date_min > 1749 && date_max < 2050) {
#if DATE_DEBUG
	    fprintf(stderr, "assuming these are years, not MS dates\n");
#endif
	    maybe_dates = 0;
	}
    }

    if (maybe_dates) {
	if (delta_min >= 364 && delta_max <= 365) {
	    ; /* annual? */
	} else if (delta_min >= 90 && delta_max <= 92) {
	    ; /* quarterly? */
	} else if (delta_min >= 28 && delta_max <= 31) {
	    ; /* monthly? */
	} else if (delta_min == 7 && delta_max == 7) {
	    ; /* weekly? */
	} else if (delta_min == 1 && delta_max <= 5) {
	    ; /* daily? */
	} else {
	    /* unsupported frequency or nonsensical */
#if DATE_DEBUG
	    fprintf(stderr, "delta_max = %d, delta_min = %d, unsupported\n", 
		    delta_max, delta_min);
#endif
	    maybe_dates = 0;
	} 
    }

#if DATE_DEBUG
    fprintf(stderr, "xlsx_dates_check: maybe_dates = %d\n", maybe_dates);
#endif

    if (maybe_dates) {
	char datestr[OBSLEN];

	for (t=0; t<dset->n; t++) {
	    /* FIXME detect use of 1904-based dates? */
	    MS_excel_date_string(datestr, atoi(dset->S[t]), 0, 0);
	    strcpy(dset->S[t], datestr);
	}
    }
}