static int xlsx_verify_sheets (xlsx_info *xinfo, PRN *prn) { xmlDocPtr doc = NULL; xmlNodePtr cur = NULL; char *checker; char *ID, *fname; int i, err; checker = calloc(xinfo->n_sheets, 1); if (checker == NULL) { return E_ALLOC; } err = gretl_xml_open_doc_root("xl/_rels/workbook.xml.rels", "Relationships", &doc, &cur); if (!err) { cur = cur->xmlChildrenNode; while (cur != NULL) { if (!xmlStrcmp(cur->name, (XUC) "Relationship")) { ID = (char *) xmlGetProp(cur, (XUC) "Id"); if ((i = xlsx_match_sheet_id(xinfo, ID)) >= 0) { fname = (char *) xmlGetProp(cur, (XUC) "Target"); if (fname != NULL) { if (xlsx_sheet_has_data(fname)) { checker[i] = 1; free(xinfo->filenames[i]); xinfo->filenames[i] = fname; } else { free(fname); } } } free(ID); } cur = cur->next; } xmlFreeDoc(doc); } if (!err) { int j = 0; for (i=0; i<xinfo->n_sheets; i++) { if (checker[j++] == 0) { fprintf(stderr, "dropping sheet '%s'\n", xinfo->sheetnames[i]); xlsx_expunge_sheet(xinfo, i--); } } } free(checker); return err; }
static int xlsx_verify_specific_sheet (xlsx_info *xinfo, int idx, PRN *prn) { xmlDocPtr doc = NULL; xmlNodePtr cur = NULL; char *ID, *fname; int err; err = gretl_xml_open_doc_root("xl/_rels/workbook.xml.rels", "Relationships", &doc, &cur); if (!err) { const char *sname = xinfo->sheetnames[idx]; int found = 0; cur = cur->xmlChildrenNode; while (cur != NULL && !found) { if (!xmlStrcmp(cur->name, (XUC) "Relationship")) { ID = (char *) xmlGetProp(cur, (XUC) "Id"); if (xlsx_match_sheet_id(xinfo, ID) == idx) { found = 1; fname = (char *) xmlGetProp(cur, (XUC) "Target"); if (fname == NULL) { pprintf(prn, "'%s': couldn't find filename\n", sname); err = E_DATA; } else if (xlsx_sheet_has_data(fname)) { pprintf(prn, "'%s' -> %s\n", sname, fname); free(xinfo->filenames[idx]); xinfo->filenames[idx] = fname; } else { pprintf(prn, "'%s': contains no data\n", sname); err = E_DATA; free(fname); } } free(ID); } cur = cur->next; } xmlFreeDoc(doc); if (!found) { pprintf(prn, "'%s': couldn't find file Id\n", sname); err = E_DATA; } if (!err) { /* record the pre-checked sheet selection */ xinfo->selsheet = idx; } } return err; }
static int wbook_get_info (const char *fname, const int *list, char *sheetname, wbook *book, PRN *prn) { xmlDocPtr doc; xmlNodePtr cur, sub; char *tmp = NULL; int got_index = 0; int err = 0; err = gretl_xml_open_doc_root(fname, "Workbook", &doc, &cur); if (err) { return err; } wbook_init(book, list, sheetname); /* Now walk the tree */ cur = cur->xmlChildrenNode; while (cur != NULL && !got_index && !err) { if (!xmlStrcmp(cur->name, (XUC) "SheetNameIndex")) { got_index = 1; sub = cur->xmlChildrenNode; while (sub != NULL && !err) { if (!xmlStrcmp(sub->name, (XUC) "SheetName")) { tmp = (char *) xmlNodeGetContent(sub); if (tmp != NULL) { if (wbook_record_name(tmp, book)) { err = 1; free(tmp); } } } sub = sub->next; } } cur = cur->next; } xmlFreeDoc(doc); return err; }
static int xlsx_workbook_get_sheetnames (xlsx_info *xinfo, const char *fname) { xmlDocPtr doc = NULL; xmlNodePtr c1, cur = NULL; char *ID, *sheetname; int ns = 0, found = 0; int err; err = gretl_xml_open_doc_root(fname, "workbook", &doc, &cur); if (!err) { cur = cur->xmlChildrenNode; while (cur != NULL && !found) { if (!xmlStrcmp(cur->name, (XUC) "sheets")) { c1 = cur->xmlChildrenNode; while (c1 != NULL) { if (!xmlStrcmp(c1->name, (XUC) "sheet")) { ID = (char *) xmlGetProp(c1, (XUC) "id"); sheetname = (char *) xmlGetProp(c1, (XUC) "name"); if (ID != NULL && sheetname != NULL) { strings_array_add(&xinfo->sheetnames, &xinfo->n_sheets, sheetname); strings_array_add(&xinfo->filenames, &ns, ID); } free(ID); free(sheetname); } c1 = c1->next; } found = 1; } cur = cur->next; } xmlFreeDoc(doc); } return err; }
gretl_bundle *gretl_bundle_read_from_file (const char *fname, int from_dotdir, int *err) { xmlDocPtr doc = NULL; xmlNodePtr cur = NULL; char fullname[FILENAME_MAX]; gretl_bundle *b; b = gretl_bundle_new(); if (b == NULL) { *err = E_ALLOC; return NULL; } if (from_dotdir) { build_path(fullname, gretl_dotdir(), fname, NULL); } else { strcpy(fullname, fname); } *err = gretl_xml_open_doc_root(fullname, "gretl-bundle", &doc, &cur); if (!*err) { gretl_push_c_numeric_locale(); cur = cur->xmlChildrenNode; *err = load_bundled_items(b, cur, doc); gretl_pop_c_numeric_locale(); xmlFreeDoc(doc); } if (*err) { gretl_bundle_destroy(b); b = NULL; } return b; }
static int xlsx_sheet_has_data (const char *fname) { xmlDocPtr doc = NULL; xmlNodePtr c1, cur = NULL; gchar *fullname; int err, ret = 0; fullname = g_strdup_printf("xl%c%s", SLASH, fname); err = gretl_xml_open_doc_root(fullname, "worksheet", &doc, &cur); if (!err) { cur = cur->xmlChildrenNode; while (cur != NULL && ret == 0) { if (!xmlStrcmp(cur->name, (XUC) "sheetData")) { c1 = cur->xmlChildrenNode; while (c1 != NULL && ret == 0) { if (!xmlStrcmp(c1->name, (XUC) "row")) { ret = 1; } c1 = c1->next; } } cur = cur->next; } xmlFreeDoc(doc); } if (!ret) { fprintf(stderr, "%s: contains no data\n", fname); } g_free(fullname); return ret; }
static int xlsx_read_shared_strings (xlsx_info *xinfo, PRN *prn) { xmlDocPtr doc = NULL; xmlNodePtr cur = NULL; xmlNodePtr val; char *tmp; int i, n = 0; int err = 0; err = gretl_xml_open_doc_root(xinfo->stringsfile, "sst", &doc, &cur); if (err) { pprintf(prn, "Couldn't find shared strings table\n"); pprintf(prn, "%s", gretl_errmsg_get()); return err; } tmp = (char *) xmlGetProp(cur, (XUC) "uniqueCount"); if (tmp == NULL) { tmp = (char *) xmlGetProp(cur, (XUC) "count"); } if (tmp == NULL) { pprintf(prn, "didn't get sst count\n"); err = E_DATA; } else { n = atoi(tmp); if (n <= 0) { pprintf(prn, "didn't get valid sst count\n"); err = E_DATA; } free(tmp); } if (!err) { xinfo->strings = strings_array_new(n); if (xinfo->strings == NULL) { err = E_ALLOC; } } cur = cur->xmlChildrenNode; /* The strings in an <sst> are mostly set up as <si><t>XXX</t></si> <si><t>YYY</t></si> ... But there are also weird cases where junk is interposed and the structure becomes <si><r>...<t>XXX</t></r><r>...<t>YYY</t></r></si> ... That is, an <si> element may contain more than one <r> element, which embeds a <t> along with formatting crap. */ i = 0; while (cur != NULL && !err) { if (!xmlStrcmp(cur->name, (XUC) "si")) { int gotstr = 0; val = cur->xmlChildrenNode; while (val != NULL && !err && !gotstr) { if (!xmlStrcmp(val->name, (XUC) "t")) { /* got a regular <t> element */ tmp = (char *) xmlNodeGetContent(val); if (tmp == NULL) { pprintf(prn, "failed reading string %d\n", i); err = E_DATA; } else { xinfo->strings[i++] = tmp; gotstr = 1; } } else if (!xmlStrcmp(val->name, (XUC) "r")) { /* hunt for <t> inside an <r> element */ xmlNodePtr sub = val->xmlChildrenNode; while (sub != NULL && !err && i < n) { if (!xmlStrcmp(sub->name, (XUC) "t")) { tmp = (char *) xmlNodeGetContent(sub); if (tmp == NULL) { pprintf(prn, "failed reading string %d\n", i); err = E_DATA; } else { xinfo->strings[i++] = tmp; gotstr = 1; } } sub = sub->next; } } val = val->next; } } if (i == n) { break; } cur = cur->next; } if (!err && i < n) { pprintf(prn, "expected %d shared strings but only found %d\n", n, i); err = E_DATA; } if (!err) { xinfo->n_strings = i; } else if (xinfo->strings != NULL) { strings_array_free(xinfo->strings, n); xinfo->strings = NULL; } xmlFreeDoc(doc); return err; }
static int xlsx_read_worksheet (xlsx_info *xinfo, PRN *prn) { xmlDocPtr doc = NULL; xmlNodePtr data_node = NULL; xmlNodePtr cur = NULL; xmlNodePtr c1; int gotdata = 0; int err = 0; sprintf(xinfo->sheetfile, "xl%c%s", SLASH, xinfo->filenames[xinfo->selsheet]); #if XDEBUG fprintf(stderr, "xlsx_read_worksheet: sheetnum=%d, name='%s'\n", xinfo->selsheet, xinfo->filenames[xinfo->selsheet]); #endif sprintf(xinfo->stringsfile, "xl%csharedStrings.xml", SLASH); err = gretl_xml_open_doc_root(xinfo->sheetfile, "worksheet", &doc, &cur); if (err) { pprintf(prn, "didn't get worksheet\n"); pprintf(prn, "%s", gretl_errmsg_get()); return err; } /* walk the tree, first pass */ cur = cur->xmlChildrenNode; while (cur != NULL && !err && !gotdata) { if (!xmlStrcmp(cur->name, (XUC) "sheetData")) { data_node = c1 = cur->xmlChildrenNode; while (c1 != NULL && !err) { if (!xmlStrcmp(c1->name, (XUC) "row")) { err = xlsx_read_row(c1, xinfo, prn); } c1 = c1->next; } gotdata = 1; } cur = cur->next; } #if XDEBUG if (!err) { pprintf(prn, "Max row = %d, max col = %d\n", xinfo->maxrow, xinfo->maxcol); pprintf(prn, "Accessed %d shared strings\n", xinfo->n_strings); } #endif if (!err && xinfo->dset == NULL) { err = xlsx_check_dimensions(xinfo, prn); if (!err) { gretl_push_c_numeric_locale(); c1 = data_node; while (c1 != NULL && !err) { if (!xmlStrcmp(c1->name, (XUC) "row")) { err = xlsx_read_row(c1, xinfo, prn); } c1 = c1->next; } gretl_pop_c_numeric_locale(); } } xmlFreeDoc(doc); return err; }
static int wsheet_get_data (const char *fname, wsheet *sheet, int *obscol, PRN *prn) { xmlDocPtr doc; xmlNodePtr cur, sub; char *tmp = NULL; int got_sheet = 0; int err; err = gretl_xml_open_doc_root(fname, "Workbook", &doc, &cur); if (err) { return err; } cur = cur->xmlChildrenNode; /* Now walk the tree */ while (!err && cur != NULL && !got_sheet) { if (!xmlStrcmp(cur->name, (XUC) "Sheets")) { int sheetcount = 0; sub = cur->xmlChildrenNode; while (sub != NULL && !got_sheet && !err) { if (!xmlStrcmp(sub->name, (XUC) "Sheet")) { xmlNodePtr snode = sub->xmlChildrenNode; while (snode != NULL && !err) { if (!xmlStrcmp(snode->name, (XUC) "Name")) { sheetcount++; tmp = (char *) xmlNodeGetContent(snode); if (tmp) { tailstrip(tmp); if (!strcmp(tmp, sheet->name) && sheetcount == sheet->ID + 1) { got_sheet = 1; } free(tmp); } } else if (got_sheet && !xmlStrcmp(snode->name, (XUC) "Cells")) { err = wsheet_get_real_size_etc(snode, sheet, obscol); if (!err) { err = wsheet_parse_cells(snode, sheet, *obscol, prn); } } snode = snode->next; } } sub = sub->next; } } cur = cur->next; } xmlFreeDoc(doc); if (!got_sheet) { fprintf(stderr, "'%s': couldn't find the requested sheet\n", sheet->name); err = 1; } return err; }