/* returns TRUE on error */ static gboolean zip_child_init (GsfInfileZip *child, GError **errmsg) { static guint8 const header_signature[] = { 'P', 'K', 0x03, 0x04 }; guint8 const *data = NULL; guint16 name_len, extras_len; char *err = NULL; GsfZipDirent *dirent = child->vdir->dirent; /* skip local header * should test tons of other info, but trust that those are correct **/ if (gsf_input_seek (child->source, (gsf_off_t) dirent->offset, G_SEEK_SET)) err = g_strdup_printf ("Error seeking to zip header @ %" GSF_OFF_T_FORMAT, dirent->offset); else if (NULL == (data = gsf_input_read (child->source, ZIP_FILE_HEADER_SIZE, NULL))) err = g_strdup_printf ("Error reading %d bytes in zip header", ZIP_FILE_HEADER_SIZE); else if (0 != memcmp (data, header_signature, sizeof (header_signature))) { err = g_strdup_printf ("Error incorrect zip header @ %" GSF_OFF_T_FORMAT, dirent->offset); g_print ("Header is :\n"); gsf_mem_dump (data, sizeof (header_signature)); g_print ("Header should be :\n"); gsf_mem_dump (header_signature, sizeof (header_signature)); } if (NULL != err) { if (errmsg != NULL) *errmsg = g_error_new_literal (gsf_input_error_id (), 0, err); g_free (err); return TRUE; } name_len = GSF_LE_GET_GUINT16 (data + ZIP_FILE_HEADER_NAME_SIZE); extras_len = GSF_LE_GET_GUINT16 (data + ZIP_FILE_HEADER_EXTRAS_SIZE); dirent->data_offset = dirent->offset + ZIP_FILE_HEADER_SIZE + name_len + extras_len; child->restlen = dirent->usize; child->crestlen = dirent->csize; if (dirent->compr_method != GSF_ZIP_STORED) { int err; if (!child->stream) child->stream = g_new0 (z_stream, 1); err = inflateInit2 (child->stream, -MAX_WBITS); if (err != Z_OK) { if (errmsg != NULL) *errmsg = g_error_new (gsf_input_error_id (), 0, "problem uncompressing stream"); return TRUE; } } return FALSE; }
static void dump_biff_stream (GsfInput *stream) { guint8 const *data; guint16 len, opcode; unsigned pos = gsf_input_tell (stream); while (NULL != (data = gsf_input_read (stream, 4, NULL))) { gboolean enable_dump = TRUE; opcode = GSF_LE_GET_GUINT16 (data); len = GSF_LE_GET_GUINT16 (data+2); if (len > 15000) { enable_dump = TRUE; g_warning ("Suspicious import of biff record > 15,000 (0x%x) for opcode 0x%hx", len, opcode); } else if ((opcode & 0xff00) > 0x1000) { enable_dump = TRUE; g_warning ("Suspicious import of biff record with opcode 0x%hx", opcode); } if (enable_dump) printf ("Opcode 0x%3hx : %15s, length 0x%hx (=%hd)\n", opcode, get_biff_opcode_name (opcode), len, len); if (len > 0) { data = gsf_input_read (stream, len, NULL); if (data == NULL) break; if (enable_dump) gsf_mem_dump (data, len); } pos = gsf_input_tell (stream); } }
static void qpro_parse_formula (QProReadState *state, int col, int row, guint8 const *data, guint8 const *end) { guint16 magic, ref_offset; #if 0 int flags = GSF_LE_GET_GUINT16 (data + 8); int length = GSF_LE_GET_GUINT16 (data + 10); #endif GnmValue *val; GSList *stack = NULL; GnmExprTop const *texpr = NULL; guint8 const *refs, *fmla; #ifdef DEBUG_MISSING dump_missing_functions (); #endif Q_CHECK_CONDITION (end - data >= 14); magic = GSF_LE_GET_GUINT16 (data + 6) & 0x7ff8; ref_offset = GSF_LE_GET_GUINT16 (data + 12); fmla = data + 14; refs = fmla + ref_offset; Q_CHECK_CONDITION (refs <= end); #if 0 puts (cell_coord_name (col, row)); gsf_mem_dump (data, 14); gsf_mem_dump (fmla, refs-fmla); gsf_mem_dump (refs, end-refs); #endif while (fmla < refs && *fmla != QPRO_OP_EOF) { QProOperators op = *fmla++; GnmExpr const *expr = NULL; #if 0 g_print ("Operator %d.\n", op); #endif switch (op) { case QPRO_OP_CONST_FLOAT: Q_CHECK_CONDITION (refs - fmla >= 8); expr = gnm_expr_new_constant (value_new_float ( gsf_le_get_double (fmla))); fmla += 8; break; case QPRO_OP_CELLREF: { GnmCellRef ref; guint16 tmp; Q_CHECK_CONDITION (end - refs >= 6); tmp = GSF_LE_GET_GUINT16 (refs + 4); ref.sheet = NULL; ref.col = *((gint8 *)(refs + 2)); ref.col_relative = (tmp & 0x4000) ? TRUE : FALSE; ref.row_relative = (tmp & 0x2000) ? TRUE : FALSE; if (ref.row_relative) ref.row = (int)(((gint16)((tmp & 0x1fff) << 3)) >> 3); else ref.row = tmp & 0x1fff; expr = gnm_expr_new_cellref (&ref); refs += 6; break; } case QPRO_OP_RANGEREF: { GnmCellRef a, b; guint16 tmp; Q_CHECK_CONDITION (end - refs >= 10); tmp = GSF_LE_GET_GUINT16 (refs + 4); a.sheet = NULL; a.col = *((gint8 *)(refs + 2)); a.col_relative = (tmp & 0x4000) ? TRUE : FALSE; a.row_relative = (tmp & 0x2000) ? TRUE : FALSE; if (a.row_relative) a.row = (int)(((gint16)((tmp & 0x1fff) << 3)) >> 3); else a.row = tmp & 0x1fff; tmp = GSF_LE_GET_GUINT16 (refs + 8); b.sheet = NULL; b.col = *((gint8 *)(refs + 6)); b.col_relative = (tmp & 0x4000) ? TRUE : FALSE; b.row_relative = (tmp & 0x2000) ? TRUE : FALSE; if (b.row_relative) b.row = (int)(((gint16)((tmp & 0x1fff) << 3)) >> 3); else b.row = tmp & 0x1fff; expr = gnm_expr_new_constant ( value_new_cellrange_unsafe (&a, &b)); refs += 10; break; }
static int test (unsigned argc, char *argv[]) { static char const * const stream_names[] = { "Workbook", "WORKBOOK", "workbook", "Book", "BOOK", "book" }; GsfInput *input, *stream, *pcache_dir; GsfInfile *infile; GError *err = NULL; unsigned i, j; for (i = 1 ; i < argc ; i++) { fprintf( stderr, "%s\n",argv[i]); input = gsf_input_mmap_new (argv[i], NULL); if (input == NULL) /* Only report error if stdio fails too */ input = gsf_input_stdio_new (argv[i], &err); if (input == NULL) { g_return_val_if_fail (err != NULL, 1); g_warning ("'%s' error: %s", argv[i], err->message); g_error_free (err); err = NULL; continue; } input = gsf_input_uncompress (input); infile = gsf_infile_msole_new (input, &err); if (infile == NULL) { g_return_val_if_fail (err != NULL, 1); g_warning ("'%s' Not an OLE file: %s", argv[i], err->message); g_error_free (err); err = NULL; #ifdef DUMP_CONTENT dump_biff_stream (input); #endif g_object_unref (G_OBJECT (input)); continue; } #if 0 stream = gsf_infile_child_by_name (infile, "\01CompObj"); if (stream != NULL) { gsf_off_t len = gsf_input_size (stream); guint8 const *data = gsf_input_read (stream, len, NULL); if (data != NULL) gsf_mem_dump (data, len); g_object_unref (G_OBJECT (stream)); } return 0; #endif stream = gsf_infile_child_by_name (infile, "\05SummaryInformation"); if (stream != NULL) { GsfDocMetaData *meta_data = gsf_doc_meta_data_new (); puts ( "SummaryInfo"); err = gsf_doc_meta_data_read_from_msole (meta_data, stream); if (err != NULL) { g_warning ("'%s' error: %s", argv[i], err->message); g_error_free (err); err = NULL; } else gsf_doc_meta_dump (meta_data); g_object_unref (meta_data); g_object_unref (G_OBJECT (stream)); } stream = gsf_infile_child_by_name (infile, "\05DocumentSummaryInformation"); if (stream != NULL) { GsfDocMetaData *meta_data = gsf_doc_meta_data_new (); puts ( "DocSummaryInfo"); err = gsf_doc_meta_data_read_from_msole (meta_data, stream); if (err != NULL) { g_warning ("'%s' error: %s", argv[i], err->message); g_error_free (err); err = NULL; } else gsf_doc_meta_dump (meta_data); g_object_unref (meta_data); g_object_unref (G_OBJECT (stream)); } for (j = 0 ; j < G_N_ELEMENTS (stream_names) ; j++) { stream = gsf_infile_child_by_name (infile, stream_names[j]); if (stream != NULL) { puts (j < 3 ? "Excel97" : "Excel95"); #ifdef DUMP_CONTENT dump_biff_stream (stream); #endif g_object_unref (G_OBJECT (stream)); break; } } #ifdef DUMP_CONTENT pcache_dir = gsf_infile_child_by_name (infile, "_SX_DB_CUR"); /* Excel 97 */ if (NULL == pcache_dir) pcache_dir = gsf_infile_child_by_name (infile, "_SX_DB"); /* Excel 95 */ if (NULL != pcache_dir) { int i, n = gsf_infile_num_children (infile); for (i = 0 ; i < n ; i++) { stream = gsf_infile_child_by_index (GSF_INFILE (pcache_dir), i); if (stream != NULL) { printf ("=================================================\nPivot cache '%04hX'\n\n", i); dump_biff_stream (stream); g_object_unref (G_OBJECT (stream)); } } g_object_unref (G_OBJECT (pcache_dir)); } #endif g_object_unref (G_OBJECT (infile)); g_object_unref (G_OBJECT (input)); } return 0; }
/** * vba_dir_read: * @vba: #GsfInfileMSVBA * @err: (allow-none): place to store a #GError if anything goes wrong * * Read an VBA dirctory and its project file. * along the way. * * Returns: %FALSE on error setting @err if it is supplied. **/ static gboolean vba_dir_read (GsfInfileMSVBA *vba, GError **err) { int inflated_size, element_count = -1; char const *msg = NULL; char *name, *elem_stream = NULL; guint32 len; guint16 tag; guint8 *inflated_data, *end, *ptr; GsfInput *dir; gboolean failed = TRUE; /* 0. get the stream */ dir = gsf_infile_child_by_name (vba->source, "dir"); if (dir == NULL) { msg = _("Can't find the VBA directory stream"); goto fail_stream; } /* 1. decompress it */ ptr = inflated_data = gsf_vba_inflate (dir, 0, &inflated_size, TRUE); if (inflated_data == NULL) goto fail_compression; end = inflated_data + inflated_size; /* 2. GUESS : based on several xls with macros and XL8GARY this looks like a * series of sized records. Be _extra_ careful */ do { /* I have seen * type len data * 1 4 1 0 0 0 * 2 4 9 4 0 0 * 3 2 4 e4 * 4 <var> project name * 5 0 * 6 0 * 7 4 * 8 4 * 0x3d 0 * 0x40 0 * 0x14 4 9 4 0 0 * * 0x0f == number of elements * 0x1c == (Size 0) * 0x1e == (Size 4) * 0x48 == (Size 0) * 0x31 == stream offset of the compressed source ! * * 0x16 == an ascii dependency name * 0x3e == a unicode dependency name * 0x33 == a classid for a dependency with no trialing data * * 0x2f == a dummy classid * 0x30 == a classid * 0x0d == the classid * 0x2f, and 0x0d appear contain * uint32 classid_size; * <classid> * 00 00 00 00 00 00 * and sometimes some trailing junk **/ if ((ptr + 6) > end) { msg = _("vba project header problem"); goto fail_content; } tag = GSF_LE_GET_GUINT16 (ptr); len = GSF_LE_GET_GUINT32 (ptr + 2); ptr += 6; if ((ptr + len) > end) { msg = _("vba project header problem"); goto fail_content; } switch (tag) { case 4: name = g_strndup (ptr, len); #ifdef OLD_VBA_DUMP puts ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); printf ("<project name=\"%s\">", name); #endif g_free (name); break; case 9: /* this seems to have an extra two bytes that are not * part of the length ..?? */ len += 2; break; case 0xf : if (len != 2) { g_warning ("element count is not what we expected"); break; } if (element_count >= 0) { g_warning ("More than one element count ??"); break; } element_count = GSF_LE_GET_GUINT16 (ptr); break; /* dependencies */ case 0x0d : break; case 0x2f : break; case 0x30 : break; case 0x33 : break; case 0x3e : break; case 0x16: #if 0 name = g_strndup (ptr, len); g_print ("Depend Name : '%s'\n", name); g_free (name); #endif break; /* elements */ case 0x47 : break; case 0x32 : break; case 0x1a: #if 0 name = g_strndup (ptr, len); g_print ("Element Name : '%s'\n", name); g_free (name); #endif break; case 0x19: g_free (elem_stream); elem_stream = g_strndup (ptr, len); break; case 0x31: if (len != 4) { g_warning ("source offset property is not what we expected"); break; } vba_extract_module_source (vba, elem_stream, GSF_LE_GET_GUINT32 (ptr)); g_free (elem_stream); elem_stream = NULL; element_count--; break; default : #if 0 g_print ("tag %hx : len %u\n", tag, len); gsf_mem_dump (ptr, len); #endif break; } ptr += len; } while (tag != 0x10); if (element_count != 0) g_warning ("Number of elements differs from expectations"); failed = FALSE; fail_content : g_free (inflated_data); #ifdef OLD_VBA_DUMP puts ("</project>"); #endif fail_compression : g_object_unref (dir); fail_stream : g_free (elem_stream); if (failed) { if (err != NULL) *err = g_error_new_literal (gsf_input_error_id (), 0, msg); return FALSE; } return TRUE; }