Exemple #1
0
static int finalize_xlsx_import (DATASET *dset,
				 xlsx_info *xinfo, 
				 const char *fname,
				 gretlopt opt,
				 PRN *prn)
{
    int merge, err;

    merge = (dset->Z != NULL);
    err = import_prune_columns(xinfo->dset);

    if (!err) {
	int i;

	for (i=1; i<xinfo->dset->v && !err; i++) {
	    if (*xinfo->dset->varname[i] == '\0') {
		pprintf(prn, "Name missing for variable %d\n", i);
		err = E_DATA;
	    }
	}
    }

    if (!err && fix_varname_duplicates(xinfo->dset)) {
	pputs(prn, _("warning: some variable names were duplicated\n"));
    }

    if (!err && xinfo->trydates) {
	xlsx_dates_check(xinfo->dset);
    }

    if (!err && xinfo->dset->S != NULL) {
	import_ts_check(xinfo->dset);
    }

    if (!err) {
	err = merge_or_replace_data(dset, &xinfo->dset, opt, prn);
    } 

    if (!err && !merge) {
	dataset_add_import_info(dset, fname, GRETL_XLSX);
    }

    return err;
}
int gnumeric_get_data (const char *fname, int *list, char *sheetname,
		       DATASET *dset, gretlopt opt, PRN *prn)
{
    int gui = (opt & OPT_G);
    wbook gbook;
    wbook *book = &gbook;
    wsheet gsheet;
    wsheet *sheet = &gsheet;
    int sheetnum = -1;
    int obscol = 0;
    DATASET *newset;
    int err = 0;

    newset = datainfo_new();
    if (newset == NULL) {
	pputs(prn, _("Out of memory\n"));
	return 1;
    }

    wsheet_init(sheet);

    gretl_push_c_numeric_locale();

    if (wbook_get_info(fname, list, sheetname, book, prn)) {
	pputs(prn, _("Failed to get workbook info"));
	err = 1;
	goto getout;
    } 

    wbook_print_info(book);

    if (book->nsheets == 0) {
	pputs(prn, _("No worksheets found"));
	err = 1;
	goto getout;
    }

    if (gui) {
	if (book->nsheets > 1) {
	    wsheet_menu(book, 1);
	    sheetnum = book->selected;
	} else {
	    wsheet_menu(book, 0);
	    sheetnum = 0;
	}
    } else {
	err = wbook_check_params(book);
	if (err) {
	    gretl_errmsg_set(_("Invalid argument for worksheet import"));
	} else if (book->selected >= 0) {
	    sheetnum = book->selected;
	} else {
	    sheetnum = 0;
	}
    }

    if (book->selected == -1) {
	/* canceled */
	err = -1;
    }

    if (!err && sheetnum >= 0) {
	fprintf(stderr, "Getting data...\n");
	err = wsheet_setup(sheet, book, sheetnum);
	if (!err) {
	    err = wsheet_get_data(fname, sheet, &obscol, prn);
	    if (err) {
		fprintf(stderr, "wsheet_get_data returned %d\n", err);
	    } else {
		wsheet_print_info(sheet);
		book->flags |= sheet->flags;
	    } 
	}
    } 

    if (err) {
	goto getout;
    } else {
	int r0 = 1; /* the first data row */
	int i, j, t;
	int ts_markers = 0;
	int merge = (dset->Z != NULL);
	char **ts_S = NULL;
	int blank_cols = 0;
	int missvals = 0;
	int pd = 0;

	if (obscol) {
	    book_set_obs_labels(book);
	    if (sheet->text_cols == 0) {
		sheet->text_cols = 1;
	    }
	} else if (sheet->text_cols > 0) {
	    /* string-valued variable? */
	    fprintf(stderr, "Problem: sheet->text_cols = %d\n", sheet->text_cols);
	}

	if (sheet->colheads == 0) {
	    book_set_auto_varnames(book);
	    r0 = 0;
	}

	if (book_numeric_dates(book)) {
	    fputs("found calendar dates in first imported column\n", stderr);
	} else if (obscol) {
	    fprintf(stderr, "found label strings in first imported column (text_cols = %d)\n",
		    sheet->text_cols);
	} else if (sheet->text_cols > 0) {
	    fputs("found string-valued variable in first imported column?\n", stderr);
	} else {
	    fputs("check for label strings in first imported column: not found\n", stderr);
	}

	newset->n = sheet->maxrow - sheet->row_offset;

	if (!sheet->colheads) {
	    pputs(prn, _("it seems there are no variable names\n"));
	    newset->n += 1;
	}

	if (book_numeric_dates(book) || obscol) {
	    pd = importer_dates_check(sheet->label + r0, &book->flags,
				      newset, prn, &err);
	    if (pd > 0) {
		/* got time-series info from dates/labels */
		sheet_time_series_setup(sheet, book, newset, pd);
		ts_markers = newset->markers;
		ts_S = newset->S;
	    } else if (!book_numeric_dates(book)) {
		if (labels_are_index(sheet->label, newset->n)) {
		    /* trash the labels */
		    book_unset_obs_labels(book);
		}
	    }
	}

	newset->v = sheet->maxcol + 2 - sheet->col_offset - sheet->text_cols;
	fprintf(stderr, "newset->v = %d, newset->n = %d\n",
		newset->v, newset->n);

	/* create import dataset */
	err = worksheet_start_dataset(newset);
	if (err) {
	    goto getout;
	}

	if (book_time_series(book)) {
	    newset->markers = ts_markers;
	    newset->S = ts_S;
	} else {
	    dataset_obs_info_default(newset);
	} 

	j = 1;
	for (i=1; i<newset->v; i++) {
	    int s = (sheet->colheads)? 1 : 0;
	    int k = i - 1 + sheet->text_cols;
	    double zkt;

	    if (column_is_blank(sheet, k, newset->n)) {
		blank_cols++;
		continue;
	    } 

	    if (sheet->colheads && *sheet->varname[k] != '\0') {
		strcpy(newset->varname[j], sheet->varname[k]);
	    } else {
		sprintf(newset->varname[j], "v%d", j);
	    }
	    for (t=0; t<newset->n; t++) {
		zkt = sheet->Z[k][s++];
		if (zkt == -999 || zkt == -9999) {
		    newset->Z[j][t] = NADBL;
		} else {
		    newset->Z[j][t] = zkt;
		}
		if (na(newset->Z[j][t])) {
		    missvals = 1;
		}
	    }
	    j++;
	}

	if (blank_cols > 0) {
	    fprintf(stderr, "Dropping %d apparently blank column(s)\n", 
		    blank_cols);
	    dataset_drop_last_variables(newset, blank_cols);
	}

	if (missvals) {
	    pputs(prn, _("Warning: there were missing values\n"));
	}

	if (fix_varname_duplicates(newset)) {
	    pputs(prn, _("warning: some variable names were duplicated\n"));
	}

	if (book_obs_labels(book) && wsheet_labels_complete(sheet)) {
	    int offset = (sheet->colheads)? 1 : 0;

	    dataset_allocate_obs_markers(newset);
	    if (newset->S != NULL) {
		for (t=0; t<newset->n; t++) {
		    strcpy(newset->S[t], sheet->label[t+offset]);
		}
	    }
	}

	if (book->flags & BOOK_DATA_REVERSED) {
	    reverse_data(newset, prn);
	}

	if (!err && !dataset_is_time_series(newset) && newset->S != NULL) {
	    /* we didn't time series info above, but it's possible
	       the observation strings carry such info
	    */
	    import_ts_check(newset);
	}

	err = merge_or_replace_data(dset, &newset, opt, prn);

	if (!err && !merge) {
	    dataset_add_import_info(dset, fname, GRETL_GNUMERIC);
	}

	if (!err && gui) {
	    wbook_record_params(book, list);
	}
    } 

 getout:

    wbook_free(book);
    wsheet_free(sheet);

    gretl_pop_c_numeric_locale();

    if (err && newset != NULL) {
	destroy_dataset(newset);
    }

    return err;
}
int wf1_get_data (const char *fname, DATASET *dset,
		  gretlopt opt, PRN *prn)
{
    FILE *fp;
    DATASET *newset = NULL;
    unsigned offset;
    int nvread, ftype;
    int err = 0;

    fp = gretl_fopen(fname, "rb");
    if (fp == NULL) {
	return E_FOPEN;
    }

    ftype = wf1_check_file_type(fp);
    
    if (ftype < 0) {
	fclose(fp);
	pputs(prn, "This file does not seem to be an EViews workfile\n");
	return E_DATA;
    }

    if (ftype == 1) {
	pputs(prn, "EViews 7+ file: expect problems!\n");
    }

    newset = datainfo_new();
    if (newset == NULL) {
	pputs(prn, _("Out of memory\n"));
	fclose(fp);
	return E_ALLOC;
    }

    err = parse_wf1_header(fp, ftype, newset, &offset);
    if (err) {
	pputs(prn, _("Error reading workfile header\n"));
	free_datainfo(newset);
	fclose(fp);
	return err;
    }

    err = start_new_Z(newset, 0);
    if (err) {
	pputs(prn, _("Out of memory\n"));
	free_datainfo(newset);
	fclose(fp);
	return E_ALLOC;
    }	

    err = read_wf1_variables(fp, ftype, offset, newset, &nvread, prn);

    if (err) {
	destroy_dataset(newset);
    } else {
	int merge = (dset->Z != NULL);
	int nvtarg = newset->v - 1;

	if (nvread < nvtarg) {
	    dataset_drop_last_variables(newset, nvtarg - nvread);
	}

	if (fix_varname_duplicates(newset)) {
	    pputs(prn, _("warning: some variable names were duplicated\n"));
	}

	err = merge_or_replace_data(dset, &newset, opt, prn);

	if (!err && !merge) {
	    dataset_add_import_info(dset, fname, GRETL_WF1);
	}
    }

    fclose(fp);

    return err;
}