Пример #1
0
static int inspect_top_left (xmlNodePtr p, int *obscol)
{
    char *content = NULL;
    int err, vtype = 0;

    err = node_get_vtype_and_content(p, &vtype, &content);

    if (!err) {
	if (vtype == VALUE_EMPTY) {
	    *obscol = 1;
	} else if (vtype == VALUE_STRING) {
	    if (import_obs_label(content)) {
		*obscol = 1;
	    }
	}
    }
	
    free(content);

    return err;
}
Пример #2
0
static void xlsx_check_top_left (xlsx_info *xinfo, int r, int c,
				 int stringcell, const char *s,
				 double x)
{
    if (r == xinfo->yoffset + 1 && c == xinfo->xoffset + 1) {
	/* We're in the top left cell of the reading area:
	   this could be blank, or could hold the first
	   varname, could hold "obs" or similar, or could
	   be the first numerical value.
	*/
#if XDEBUG
	fprintf(stderr, "xlsx_check_top_left: r=%d, c=%d, x=%g, stringcell=%d, "
		"s='%s'\n", r, c, x, stringcell, s);
#endif
	if (!na(x)) {
	    /* got a valid numerical value: that means we don't
	       have variable names on the top row */
	    xinfo->flags |= BOOK_AUTO_VARNAMES;
	} else if (stringcell && import_obs_label(s)) {
	    /* blank or "obs" or similar */
	    xinfo->flags |= BOOK_OBS_LABELS;
	    xinfo->obscol = c;
	}
	if (!na(x) || stringcell) {
	    /* record the fact that the top-left corner is not empty */
	    xinfo->flags &= ~BOOK_TOP_LEFT_EMPTY;
	}
    } else if (r == xinfo->yoffset + 1 && c == xinfo->xoffset + 2) {
	/* first row, second column */
	if (!na(x)) {
	    /* got a number, not a varname */
	    xinfo->flags |= BOOK_AUTO_VARNAMES;
	} else {
	    xinfo->namerow = r;
	}
    }
}
Пример #3
0
int gnumeric_get_data (const char *fname, int *list, char *sheetname,
		       DATASET *dset, gretlopt opt, PRN *prn)
{
    int gui = (opt & OPT_G);
    wbook gbook;
    wbook *book = &gbook;
    wsheet gsheet;
    wsheet *sheet = &gsheet;
    int sheetnum = -1;
    DATASET *newset;
    int err = 0;

    newset = datainfo_new();
    if (newset == NULL) {
	pputs(prn, _("Out of memory\n"));
	return 1;
    }

    wsheet_init(sheet);

    gretl_push_c_numeric_locale();

    if (wbook_get_info(fname, list, sheetname, book, prn)) {
	pputs(prn, _("Failed to get workbook info"));
	err = 1;
	goto getout;
    } 

    wbook_print_info(book);

    if (book->nsheets == 0) {
	pputs(prn, _("No worksheets found"));
	err = 1;
	goto getout;
    }

    if (gui) {
	if (book->nsheets > 1) {
	    wsheet_menu(book, 1);
	    sheetnum = book->selected;
	} else {
	    wsheet_menu(book, 0);
	    sheetnum = 0;
	}
    } else {
	err = wbook_check_params(book);
	if (err) {
	    gretl_errmsg_set(_("Invalid argument for worksheet import"));
	} else if (book->selected >= 0) {
	    sheetnum = book->selected;
	} else {
	    sheetnum = 0;
	}
    }

    if (book->selected == -1) {
	/* canceled */
	err = -1;
    }

    if (!err && sheetnum >= 0) {
	fprintf(stderr, "Getting data...\n");
	if (wsheet_setup(sheet, book, sheetnum)) {
	    pputs(prn, _("error in wsheet_setup()"));
	    err = 1;
	} else {
	    err = wsheet_get_data(fname, sheet, prn);
	    if (!err) {
		wsheet_print_info(sheet);
		book->flags |= sheet->flags;
	    } 
	}
    } 

    if (err) {
	goto getout;
    } else {
	int r0 = 1;
	int i, j, t;
	int ts_markers = 0;
	int merge = (dset->Z != NULL);
	char **ts_S = NULL;
	int blank_cols = 0;
	int missvals = 0;
	int pd = 0;

	if (sheet->text_cols > 0) {
	    book_set_obs_labels(book);
	}

	if (sheet->colheads == 0) {
	    book_set_auto_varnames(book);
	    r0 = 0;
	}

	if (book_numeric_dates(book)) {
	    puts("found calendar dates in first imported column");
	} else if (sheet->text_cols > 0) {
	    puts("found label strings in first imported column");
	} else {
	    puts("check for label strings in first imported column: not found");
	}

	newset->n = sheet->maxrow - sheet->row_offset;

	if (!sheet->colheads) {
	    pputs(prn, _("it seems there are no variable names\n"));
	    newset->n += 1;
	}

	if (book_numeric_dates(book) || 
	    (sheet->colheads > 0 && import_obs_label(sheet->label[0]))) {
	    pd = importer_dates_check(sheet->label + r0, &book->flags,
				      newset, prn, &err);
	    if (pd > 0) {
		/* got time-series info from dates/labels */
		sheet_time_series_setup(sheet, book, newset, pd);
		ts_markers = newset->markers;
		ts_S = newset->S;
	    } else if (!book_numeric_dates(book)) {
		if (labels_numeric(sheet->label, newset->n)) {
		    sheet->text_cols = 0;
		    book_unset_obs_labels(book);
		}
	    }
	}

	newset->v = sheet->maxcol + 2 - sheet->col_offset - sheet->text_cols;
	fprintf(stderr, "newset->v = %d, newset->n = %d\n",
		newset->v, newset->n);

	/* create import dataset */
	err = worksheet_start_dataset(newset);
	if (err) {
	    goto getout;
	}

	if (book_time_series(book)) {
	    newset->markers = ts_markers;
	    newset->S = ts_S;
	} else {
	    dataset_obs_info_default(newset);
	} 

	j = 1;
	for (i=1; i<newset->v; i++) {
	    int s = (sheet->colheads)? 1 : 0;
	    int k = i - 1 + sheet->text_cols;
	    double zkt;

	    if (column_is_blank(sheet, k, newset->n)) {
		blank_cols++;
		continue;
	    } 

	    if (sheet->colheads && *sheet->varname[k] != '\0') {
		strcpy(newset->varname[j], sheet->varname[k]);
	    } else {
		sprintf(newset->varname[j], "v%d", j);
	    }
	    for (t=0; t<newset->n; t++) {
		zkt = sheet->Z[k][s++];
		if (zkt == -999 || zkt == -9999) {
		    newset->Z[j][t] = NADBL;
		} else {
		    newset->Z[j][t] = zkt;
		}
		if (na(newset->Z[j][t])) {
		    missvals = 1;
		}
	    }
	    j++;
	}

	if (blank_cols > 0) {
	    fprintf(stderr, "Dropping %d apparently blank column(s)\n", 
		    blank_cols);
	    dataset_drop_last_variables(newset, blank_cols);
	}

	if (missvals) {
	    pputs(prn, _("Warning: there were missing values\n"));
	}

	if (fix_varname_duplicates(newset)) {
	    pputs(prn, _("warning: some variable names were duplicated\n"));
	}

	if (book_obs_labels(book) && wsheet_labels_complete(sheet)) {
	    int offset = (sheet->colheads)? 1 : 0;

	    dataset_allocate_obs_markers(newset);
	    if (newset->S != NULL) {
		for (t=0; t<newset->n; t++) {
		    strcpy(newset->S[t], sheet->label[t+offset]);
		}
	    }
	}

	if (book->flags & BOOK_DATA_REVERSED) {
	    reverse_data(newset, prn);
	}

	if (!err && !dataset_is_time_series(newset) && newset->S != NULL) {
	    /* we didn't time series info above, but it's possible
	       the observation strings carry such info
	    */
	    import_ts_check(newset);
	}

	err = merge_or_replace_data(dset, &newset, opt, prn);

	if (!err && !merge) {
	    dataset_add_import_info(dset, fname, GRETL_GNUMERIC);
	}

	if (!err && gui) {
	    wbook_record_params(book, list);
	}
    } 

 getout:

    wbook_free(book);
    wsheet_free(sheet);

    gretl_pop_c_numeric_locale();

    if (err && newset != NULL) {
	destroy_dataset(newset);
    }

    return err;
}
Пример #4
0
static int wsheet_parse_cells (xmlNodePtr node, wsheet *sheet, PRN *prn)
{
    xmlNodePtr p = node->xmlChildrenNode;
    char *tmp;
    double x;
    int vtype = 0;
    int gotlabels = 0;
    int cols, rows;
    int i, t, r, c;
    int err = 0;

    cols = sheet->maxcol + 1 - sheet->col_offset;
    rows = sheet->maxrow + 1 - sheet->row_offset;

    if (rows < 1) {
	pputs(prn, _("Starting row is out of bounds.\n"));
	return 1;
    }
    
    if (cols < 1) {
	pputs(prn, _("Starting column is out of bounds.\n"));
	return 1;
    }	

    if (wsheet_allocate(sheet, cols, rows)) {
	return 1;
    }

    sheet->colheads = 0;

    while (p != NULL && !err) {
	if (!xmlStrcmp(p->name, (XUC) "Cell")) {
	    x = NADBL;
	    c = r = 0;
	    i = t = -1;

	    /* what column are we in? */
	    tmp = (char *) xmlGetProp(p, (XUC) "Col");
	    if (tmp) {
		c = atoi(tmp);
		i = c - sheet->col_offset;
		free(tmp);
	    }

	    /* what row are we on? */
	    tmp = (char *) xmlGetProp(p, (XUC) "Row");
	    if (tmp) {
		r = atoi(tmp);
		t = r - sheet->row_offset;
		free(tmp);
	    }

	    if (i < 0 || t < 0) {
		/* we're not in the requested reading area */
		p = p->next;
		continue;
	    }

	    /* check that we have a value of some type available */
	    tmp = (char *) xmlGetProp(p, (XUC) "ValueType");
	    if (tmp) {
		vtype = atoi(tmp);
		free(tmp);
	    } else { 
		/* a formula perhaps? */
		pprintf(prn, _("Couldn't get value for col %d, row %d.\n"
			       "Maybe there's a formula in the sheet?"),
			c, r);
		err = 1;
		break;
	    }

	    /* get and process the actual cell content */
	    tmp = (char *) xmlNodeGetContent(p);
	    if (tmp != NULL) {
		if (VTYPE_IS_NUMERIC(vtype) || vtype == VALUE_STRING) {
		    if (i == 0) {
			/* first column: write content to labels */
			gretl_utf8_strncat_trim(sheet->label[t], tmp, OBSLEN - 1);
		    }
		}

		if (i == 0 && t == 1 && VTYPE_IS_NUMERIC(vtype)) {
		    char *fmt = (char *) xmlGetProp(p, (XUC) "ValueFormat");

		    if (fmt) {
			check_for_date_format(sheet, fmt);
			free(fmt);
		    }
		}

		if (VTYPE_IS_NUMERIC(vtype)) {
		    x = atof(tmp);
		    sheet->Z[i][t] = x;
		} else if (i > 0 && stray_numeric(vtype, tmp, &x)) {
		    sheet->Z[i][t] = x;
		} else if (vtype == VALUE_STRING) {
		    if (t == 0) {
			/* first row: look for varnames */
			strncat(sheet->varname[i], tmp, VNAMELEN - 1);
			sheet->colheads += 1;
			if (i == 0 && import_obs_label(tmp)) {
			    ; /* keep going */
			} else {
			    err = check_imported_varname(sheet->varname[i],
							 r, c, prn);
			}
		    } else if (i == 0) {
			/* first column, not first row */
			if (!gotlabels) {
			    gotlabels = 1;
			}
			sheet->text_cols = 1;
		    } else {
			pprintf(prn, _("Expected numeric data, found string:\n"
				       "'%s' at row %d, column %d\n"), 
				tmp, r, c);
			err = 1;
		    }
		}
		free(tmp);
	    }
	}
	p = p->next;
    }

    if (gotlabels && sheet->colheads == 1) {
	/* rough notion here: if there's only one heading, it's
	   probably not really a variable name, but rather
	   a first observation label 
	*/
	sheet->colheads = 0;
    }

    return err;
}