void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr, pandas_datetimestruct *result) { pandas_datetime_metadata meta; meta.base = fr; meta.num = 1; convert_datetime_to_datetimestruct(&meta, val, result); }
void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr, pandas_datetimestruct *result) { convert_datetime_to_datetimestruct(fr, val, result); }
/* * Parses (almost) standard ISO 8601 date strings. The differences are: * * + The date "20100312" is parsed as the year 20100312, not as * equivalent to "2010-03-12". The '-' in the dates are not optional. * + Only seconds may have a decimal point, with up to 18 digits after it * (maximum attoseconds precision). * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate * the date and the time. Both are treated equivalently. * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats. * + Doesn't handle leap seconds (seconds value has 60 in these cases). * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow * + Accepts special values "NaT" (not a time), "Today", (current * day according to local time) and "Now" (current time in UTC). * * 'str' must be a NULL-terminated string, and 'len' must be its length. * 'unit' should contain -1 if the unit is unknown, or the unit * which will be used if it is. * 'casting' controls how the detected unit from the string is allowed * to be cast to the 'unit' parameter. * * 'out' gets filled with the parsed date-time. * 'out_local' gets set to 1 if the parsed time was in local time, * to 0 otherwise. The values 'now' and 'today' don't get counted * as local, and neither do UTC +/-#### timezone offsets, because * they aren't using the computer's local timezone offset. * 'out_bestunit' gives a suggested unit based on the amount of * resolution provided in the string, or -1 for NaT. * 'out_special' gets set to 1 if the parsed time was 'today', * 'now', or ''/'NaT'. For 'today', the unit recommended is * 'D', for 'now', the unit recommended is 's', and for 'NaT' * the unit recommended is 'Y'. * * Returns 0 on success, -1 on failure. */ int parse_iso_8601_datetime(char *str, int len, PANDAS_DATETIMEUNIT unit, NPY_CASTING casting, pandas_datetimestruct *out, npy_bool *out_local, PANDAS_DATETIMEUNIT *out_bestunit, npy_bool *out_special) { int year_leap = 0; int i, numdigits; char *substr, sublen; PANDAS_DATETIMEUNIT bestunit; /* Initialize the output to all zeros */ memset(out, 0, sizeof(pandas_datetimestruct)); out->month = 1; out->day = 1; /* * The string "today" means take today's date in local time, and * convert it to a date representation. This date representation, if * forced into a time unit, will be at midnight UTC. * This is perhaps a little weird, but done so that the * 'datetime64[D]' type produces the date you expect, rather than * switching to an adjacent day depending on the current time and your * timezone. */ if (len == 5 && tolower(str[0]) == 't' && tolower(str[1]) == 'o' && tolower(str[2]) == 'd' && tolower(str[3]) == 'a' && tolower(str[4]) == 'y') { NPY_TIME_T rawtime = 0; struct tm tm_; time(&rawtime); if (get_localtime(&rawtime, &tm_) < 0) { return -1; } out->year = tm_.tm_year + 1900; out->month = tm_.tm_mon + 1; out->day = tm_.tm_mday; bestunit = PANDAS_FR_D; /* * Indicate that this was a special value, and * is a date (unit 'D'). */ if (out_local != NULL) { *out_local = 0; } if (out_bestunit != NULL) { *out_bestunit = bestunit; } if (out_special != NULL) { *out_special = 1; } /* Check the casting rule */ if (unit != -1 && !can_cast_datetime64_units(bestunit, unit, casting)) { PyErr_Format(PyExc_TypeError, "Cannot parse \"%s\" as unit " "'%s' using casting rule %s", str, _datetime_strings[unit], npy_casting_to_string(casting)); return -1; } return 0; } /* The string "now" resolves to the current UTC time */ if (len == 3 && tolower(str[0]) == 'n' && tolower(str[1]) == 'o' && tolower(str[2]) == 'w') { NPY_TIME_T rawtime = 0; pandas_datetime_metadata meta; time(&rawtime); /* Set up a dummy metadata for the conversion */ meta.base = PANDAS_FR_s; meta.num = 1; bestunit = PANDAS_FR_s; /* * Indicate that this was a special value, and * use 's' because the time() function has resolution * seconds. */ if (out_local != NULL) { *out_local = 0; } if (out_bestunit != NULL) { *out_bestunit = bestunit; } if (out_special != NULL) { *out_special = 1; } /* Check the casting rule */ if (unit != -1 && !can_cast_datetime64_units(bestunit, unit, casting)) { PyErr_Format(PyExc_TypeError, "Cannot parse \"%s\" as unit " "'%s' using casting rule %s", str, _datetime_strings[unit], npy_casting_to_string(casting)); return -1; } return convert_datetime_to_datetimestruct(&meta, rawtime, out); } /* Anything else isn't a special value */ if (out_special != NULL) { *out_special = 0; } substr = str; sublen = len; /* Skip leading whitespace */ while (sublen > 0 && isspace(*substr)) { ++substr; --sublen; } /* Leading '-' sign for negative year */ if (*substr == '-') { ++substr; --sublen; } if (sublen == 0) { goto parse_error; } /* PARSE THE YEAR (digits until the '-' character) */ out->year = 0; while (sublen > 0 && isdigit(*substr)) { out->year = 10 * out->year + (*substr - '0'); ++substr; --sublen; } /* Negate the year if necessary */ if (str[0] == '-') { out->year = -out->year; } /* Check whether it's a leap-year */ year_leap = is_leapyear(out->year); /* Next character must be a '-' or the end of the string */ if (sublen == 0) { if (out_local != NULL) { *out_local = 0; } bestunit = PANDAS_FR_Y; goto finish; } else if (*substr == '-') { ++substr; --sublen; } else { goto parse_error; } /* Can't have a trailing '-' */ if (sublen == 0) { goto parse_error; } /* PARSE THE MONTH (2 digits) */ if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { out->month = 10 * (substr[0] - '0') + (substr[1] - '0'); if (out->month < 1 || out->month > 12) { PyErr_Format(PyExc_ValueError, "Month out of range in datetime string \"%s\"", str); goto error; } substr += 2; sublen -= 2; } else { goto parse_error; } /* Next character must be a '-' or the end of the string */ if (sublen == 0) { if (out_local != NULL) { *out_local = 0; } bestunit = PANDAS_FR_M; goto finish; } else if (*substr == '-') { ++substr; --sublen; } else { goto parse_error; } /* Can't have a trailing '-' */ if (sublen == 0) { goto parse_error; } /* PARSE THE DAY (2 digits) */ if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { out->day = 10 * (substr[0] - '0') + (substr[1] - '0'); if (out->day < 1 || out->day > _days_per_month_table[year_leap][out->month-1]) { PyErr_Format(PyExc_ValueError, "Day out of range in datetime string \"%s\"", str); goto error; } substr += 2; sublen -= 2; } else { goto parse_error; } /* Next character must be a 'T', ' ', or end of string */ if (sublen == 0) { if (out_local != NULL) { *out_local = 0; } bestunit = PANDAS_FR_D; goto finish; } else if (*substr != 'T' && *substr != ' ') { goto parse_error; } else { ++substr; --sublen; } /* PARSE THE HOURS (2 digits) */ if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { out->hour = 10 * (substr[0] - '0') + (substr[1] - '0'); if (out->hour < 0 || out->hour >= 24) { PyErr_Format(PyExc_ValueError, "Hours out of range in datetime string \"%s\"", str); goto error; } substr += 2; sublen -= 2; } else { goto parse_error; } /* Next character must be a ':' or the end of the string */ if (sublen > 0 && *substr == ':') { ++substr; --sublen; } else { bestunit = PANDAS_FR_h; goto parse_timezone; } /* Can't have a trailing ':' */ if (sublen == 0) { goto parse_error; } /* PARSE THE MINUTES (2 digits) */ if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { out->min = 10 * (substr[0] - '0') + (substr[1] - '0'); if (out->hour < 0 || out->min >= 60) { PyErr_Format(PyExc_ValueError, "Minutes out of range in datetime string \"%s\"", str); goto error; } substr += 2; sublen -= 2; } else { goto parse_error; } /* Next character must be a ':' or the end of the string */ if (sublen > 0 && *substr == ':') { ++substr; --sublen; } else { bestunit = PANDAS_FR_m; goto parse_timezone; } /* Can't have a trailing ':' */ if (sublen == 0) { goto parse_error; } /* PARSE THE SECONDS (2 digits) */ if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { out->sec = 10 * (substr[0] - '0') + (substr[1] - '0'); if (out->sec < 0 || out->sec >= 60) { PyErr_Format(PyExc_ValueError, "Seconds out of range in datetime string \"%s\"", str); goto error; } substr += 2; sublen -= 2; } else { goto parse_error; } /* Next character may be a '.' indicating fractional seconds */ if (sublen > 0 && *substr == '.') { ++substr; --sublen; } else { bestunit = PANDAS_FR_s; goto parse_timezone; } /* PARSE THE MICROSECONDS (0 to 6 digits) */ numdigits = 0; for (i = 0; i < 6; ++i) { out->us *= 10; if (sublen > 0 && isdigit(*substr)) { out->us += (*substr - '0'); ++substr; --sublen; ++numdigits; } } if (sublen == 0 || !isdigit(*substr)) { if (numdigits > 3) { bestunit = PANDAS_FR_us; } else { bestunit = PANDAS_FR_ms; } goto parse_timezone; } /* PARSE THE PICOSECONDS (0 to 6 digits) */ numdigits = 0; for (i = 0; i < 6; ++i) { out->ps *= 10; if (sublen > 0 && isdigit(*substr)) { out->ps += (*substr - '0'); ++substr; --sublen; ++numdigits; } } if (sublen == 0 || !isdigit(*substr)) { if (numdigits > 3) { bestunit = PANDAS_FR_ps; } else { bestunit = PANDAS_FR_ns; } goto parse_timezone; } /* PARSE THE ATTOSECONDS (0 to 6 digits) */ numdigits = 0; for (i = 0; i < 6; ++i) { out->as *= 10; if (sublen > 0 && isdigit(*substr)) { out->as += (*substr - '0'); ++substr; --sublen; ++numdigits; } } if (numdigits > 3) { bestunit = PANDAS_FR_as; } else { bestunit = PANDAS_FR_fs; } parse_timezone: if (sublen == 0) { // Unlike NumPy, treating no time zone as naive goto finish; /* if (convert_datetimestruct_local_to_utc(out, out) < 0) { goto error; } // Since neither "Z" nor a time-zone was specified, it's local if (out_local != NULL) { *out_local = 1; } goto finish; */ } /* UTC specifier */ if (*substr == 'Z') { /* "Z" means not local */ if (out_local != NULL) { *out_local = 0; } if (sublen == 1) { goto finish; } else { ++substr; --sublen; } } /* Time zone offset */ else if (*substr == '-' || *substr == '+') { int offset_neg = 0, offset_hour = 0, offset_minute = 0; /* * Since "local" means local with respect to the current * machine, we say this is non-local. */ if (out_local != NULL) { *out_local = 0; } if (*substr == '-') { offset_neg = 1; } ++substr; --sublen; /* The hours offset */ if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { offset_hour = 10 * (substr[0] - '0') + (substr[1] - '0'); substr += 2; sublen -= 2; if (offset_hour >= 24) { PyErr_Format(PyExc_ValueError, "Timezone hours offset out of range " "in datetime string \"%s\"", str); goto error; } } else { goto parse_error; } /* The minutes offset is optional */ if (sublen > 0) { /* Optional ':' */ if (*substr == ':') { ++substr; --sublen; } /* The minutes offset (at the end of the string) */ if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { offset_minute = 10 * (substr[0] - '0') + (substr[1] - '0'); substr += 2; sublen -= 2; if (offset_minute >= 60) { PyErr_Format(PyExc_ValueError, "Timezone minutes offset out of range " "in datetime string \"%s\"", str); goto error; } } else { goto parse_error; } } /* Apply the time zone offset */ if (offset_neg) { offset_hour = -offset_hour; offset_minute = -offset_minute; } add_minutes_to_datetimestruct(out, -60 * offset_hour - offset_minute); } /* Skip trailing whitespace */ while (sublen > 0 && isspace(*substr)) { ++substr; --sublen; } if (sublen != 0) { goto parse_error; } finish: if (out_bestunit != NULL) { *out_bestunit = bestunit; } /* Check the casting rule */ if (unit != -1 && !can_cast_datetime64_units(bestunit, unit, casting)) { PyErr_Format(PyExc_TypeError, "Cannot parse \"%s\" as unit " "'%s' using casting rule %s", str, _datetime_strings[unit], npy_casting_to_string(casting)); return -1; } return 0; parse_error: PyErr_Format(PyExc_ValueError, "Error parsing datetime string \"%s\" at position %d", str, (int)(substr-str)); return -1; error: return -1; }