static int inspect_top_left (xmlNodePtr p, int *obscol) { char *content = NULL; int err, vtype = 0; err = node_get_vtype_and_content(p, &vtype, &content); if (!err) { if (vtype == VALUE_EMPTY) { *obscol = 1; } else if (vtype == VALUE_STRING) { if (import_obs_label(content)) { *obscol = 1; } } } free(content); return err; }
static void xlsx_check_top_left (xlsx_info *xinfo, int r, int c, int stringcell, const char *s, double x) { if (r == xinfo->yoffset + 1 && c == xinfo->xoffset + 1) { /* We're in the top left cell of the reading area: this could be blank, or could hold the first varname, could hold "obs" or similar, or could be the first numerical value. */ #if XDEBUG fprintf(stderr, "xlsx_check_top_left: r=%d, c=%d, x=%g, stringcell=%d, " "s='%s'\n", r, c, x, stringcell, s); #endif if (!na(x)) { /* got a valid numerical value: that means we don't have variable names on the top row */ xinfo->flags |= BOOK_AUTO_VARNAMES; } else if (stringcell && import_obs_label(s)) { /* blank or "obs" or similar */ xinfo->flags |= BOOK_OBS_LABELS; xinfo->obscol = c; } if (!na(x) || stringcell) { /* record the fact that the top-left corner is not empty */ xinfo->flags &= ~BOOK_TOP_LEFT_EMPTY; } } else if (r == xinfo->yoffset + 1 && c == xinfo->xoffset + 2) { /* first row, second column */ if (!na(x)) { /* got a number, not a varname */ xinfo->flags |= BOOK_AUTO_VARNAMES; } else { xinfo->namerow = r; } } }
int gnumeric_get_data (const char *fname, int *list, char *sheetname, DATASET *dset, gretlopt opt, PRN *prn) { int gui = (opt & OPT_G); wbook gbook; wbook *book = &gbook; wsheet gsheet; wsheet *sheet = &gsheet; int sheetnum = -1; DATASET *newset; int err = 0; newset = datainfo_new(); if (newset == NULL) { pputs(prn, _("Out of memory\n")); return 1; } wsheet_init(sheet); gretl_push_c_numeric_locale(); if (wbook_get_info(fname, list, sheetname, book, prn)) { pputs(prn, _("Failed to get workbook info")); err = 1; goto getout; } wbook_print_info(book); if (book->nsheets == 0) { pputs(prn, _("No worksheets found")); err = 1; goto getout; } if (gui) { if (book->nsheets > 1) { wsheet_menu(book, 1); sheetnum = book->selected; } else { wsheet_menu(book, 0); sheetnum = 0; } } else { err = wbook_check_params(book); if (err) { gretl_errmsg_set(_("Invalid argument for worksheet import")); } else if (book->selected >= 0) { sheetnum = book->selected; } else { sheetnum = 0; } } if (book->selected == -1) { /* canceled */ err = -1; } if (!err && sheetnum >= 0) { fprintf(stderr, "Getting data...\n"); if (wsheet_setup(sheet, book, sheetnum)) { pputs(prn, _("error in wsheet_setup()")); err = 1; } else { err = wsheet_get_data(fname, sheet, prn); if (!err) { wsheet_print_info(sheet); book->flags |= sheet->flags; } } } if (err) { goto getout; } else { int r0 = 1; int i, j, t; int ts_markers = 0; int merge = (dset->Z != NULL); char **ts_S = NULL; int blank_cols = 0; int missvals = 0; int pd = 0; if (sheet->text_cols > 0) { book_set_obs_labels(book); } if (sheet->colheads == 0) { book_set_auto_varnames(book); r0 = 0; } if (book_numeric_dates(book)) { puts("found calendar dates in first imported column"); } else if (sheet->text_cols > 0) { puts("found label strings in first imported column"); } else { puts("check for label strings in first imported column: not found"); } newset->n = sheet->maxrow - sheet->row_offset; if (!sheet->colheads) { pputs(prn, _("it seems there are no variable names\n")); newset->n += 1; } if (book_numeric_dates(book) || (sheet->colheads > 0 && import_obs_label(sheet->label[0]))) { pd = importer_dates_check(sheet->label + r0, &book->flags, newset, prn, &err); if (pd > 0) { /* got time-series info from dates/labels */ sheet_time_series_setup(sheet, book, newset, pd); ts_markers = newset->markers; ts_S = newset->S; } else if (!book_numeric_dates(book)) { if (labels_numeric(sheet->label, newset->n)) { sheet->text_cols = 0; book_unset_obs_labels(book); } } } newset->v = sheet->maxcol + 2 - sheet->col_offset - sheet->text_cols; fprintf(stderr, "newset->v = %d, newset->n = %d\n", newset->v, newset->n); /* create import dataset */ err = worksheet_start_dataset(newset); if (err) { goto getout; } if (book_time_series(book)) { newset->markers = ts_markers; newset->S = ts_S; } else { dataset_obs_info_default(newset); } j = 1; for (i=1; i<newset->v; i++) { int s = (sheet->colheads)? 1 : 0; int k = i - 1 + sheet->text_cols; double zkt; if (column_is_blank(sheet, k, newset->n)) { blank_cols++; continue; } if (sheet->colheads && *sheet->varname[k] != '\0') { strcpy(newset->varname[j], sheet->varname[k]); } else { sprintf(newset->varname[j], "v%d", j); } for (t=0; t<newset->n; t++) { zkt = sheet->Z[k][s++]; if (zkt == -999 || zkt == -9999) { newset->Z[j][t] = NADBL; } else { newset->Z[j][t] = zkt; } if (na(newset->Z[j][t])) { missvals = 1; } } j++; } if (blank_cols > 0) { fprintf(stderr, "Dropping %d apparently blank column(s)\n", blank_cols); dataset_drop_last_variables(newset, blank_cols); } if (missvals) { pputs(prn, _("Warning: there were missing values\n")); } if (fix_varname_duplicates(newset)) { pputs(prn, _("warning: some variable names were duplicated\n")); } if (book_obs_labels(book) && wsheet_labels_complete(sheet)) { int offset = (sheet->colheads)? 1 : 0; dataset_allocate_obs_markers(newset); if (newset->S != NULL) { for (t=0; t<newset->n; t++) { strcpy(newset->S[t], sheet->label[t+offset]); } } } if (book->flags & BOOK_DATA_REVERSED) { reverse_data(newset, prn); } if (!err && !dataset_is_time_series(newset) && newset->S != NULL) { /* we didn't time series info above, but it's possible the observation strings carry such info */ import_ts_check(newset); } err = merge_or_replace_data(dset, &newset, opt, prn); if (!err && !merge) { dataset_add_import_info(dset, fname, GRETL_GNUMERIC); } if (!err && gui) { wbook_record_params(book, list); } } getout: wbook_free(book); wsheet_free(sheet); gretl_pop_c_numeric_locale(); if (err && newset != NULL) { destroy_dataset(newset); } return err; }
static int wsheet_parse_cells (xmlNodePtr node, wsheet *sheet, PRN *prn) { xmlNodePtr p = node->xmlChildrenNode; char *tmp; double x; int vtype = 0; int gotlabels = 0; int cols, rows; int i, t, r, c; int err = 0; cols = sheet->maxcol + 1 - sheet->col_offset; rows = sheet->maxrow + 1 - sheet->row_offset; if (rows < 1) { pputs(prn, _("Starting row is out of bounds.\n")); return 1; } if (cols < 1) { pputs(prn, _("Starting column is out of bounds.\n")); return 1; } if (wsheet_allocate(sheet, cols, rows)) { return 1; } sheet->colheads = 0; while (p != NULL && !err) { if (!xmlStrcmp(p->name, (XUC) "Cell")) { x = NADBL; c = r = 0; i = t = -1; /* what column are we in? */ tmp = (char *) xmlGetProp(p, (XUC) "Col"); if (tmp) { c = atoi(tmp); i = c - sheet->col_offset; free(tmp); } /* what row are we on? */ tmp = (char *) xmlGetProp(p, (XUC) "Row"); if (tmp) { r = atoi(tmp); t = r - sheet->row_offset; free(tmp); } if (i < 0 || t < 0) { /* we're not in the requested reading area */ p = p->next; continue; } /* check that we have a value of some type available */ tmp = (char *) xmlGetProp(p, (XUC) "ValueType"); if (tmp) { vtype = atoi(tmp); free(tmp); } else { /* a formula perhaps? */ pprintf(prn, _("Couldn't get value for col %d, row %d.\n" "Maybe there's a formula in the sheet?"), c, r); err = 1; break; } /* get and process the actual cell content */ tmp = (char *) xmlNodeGetContent(p); if (tmp != NULL) { if (VTYPE_IS_NUMERIC(vtype) || vtype == VALUE_STRING) { if (i == 0) { /* first column: write content to labels */ gretl_utf8_strncat_trim(sheet->label[t], tmp, OBSLEN - 1); } } if (i == 0 && t == 1 && VTYPE_IS_NUMERIC(vtype)) { char *fmt = (char *) xmlGetProp(p, (XUC) "ValueFormat"); if (fmt) { check_for_date_format(sheet, fmt); free(fmt); } } if (VTYPE_IS_NUMERIC(vtype)) { x = atof(tmp); sheet->Z[i][t] = x; } else if (i > 0 && stray_numeric(vtype, tmp, &x)) { sheet->Z[i][t] = x; } else if (vtype == VALUE_STRING) { if (t == 0) { /* first row: look for varnames */ strncat(sheet->varname[i], tmp, VNAMELEN - 1); sheet->colheads += 1; if (i == 0 && import_obs_label(tmp)) { ; /* keep going */ } else { err = check_imported_varname(sheet->varname[i], r, c, prn); } } else if (i == 0) { /* first column, not first row */ if (!gotlabels) { gotlabels = 1; } sheet->text_cols = 1; } else { pprintf(prn, _("Expected numeric data, found string:\n" "'%s' at row %d, column %d\n"), tmp, r, c); err = 1; } } free(tmp); } } p = p->next; } if (gotlabels && sheet->colheads == 1) { /* rough notion here: if there's only one heading, it's probably not really a variable name, but rather a first observation label */ sheet->colheads = 0; } return err; }