static GsfZipDirent * zip_dirent_new_in (GsfInfileZip *zip, gsf_off_t *offset) { static guint8 const dirent_signature[] = { 'P', 'K', 0x01, 0x02 }; GsfZipDirent *dirent; guint8 const *data; guint16 name_len, extras_len, comment_len, compr_method, flags; guint32 crc32, csize, usize, off; gchar *name; /* Read data and check the header */ if (gsf_input_seek (zip->source, *offset, G_SEEK_SET) || NULL == (data = gsf_input_read (zip->source, ZIP_DIRENT_SIZE, NULL)) || 0 != memcmp (data, dirent_signature, sizeof (dirent_signature))) { return NULL; } name_len = GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_NAME_SIZE); extras_len = GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_EXTRAS_SIZE); comment_len = GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_COMMENT_SIZE); flags = GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_FLAGS); compr_method = GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_COMPR_METHOD); crc32 = GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_CRC32); csize = GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_CSIZE); usize = GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_USIZE); off = GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_OFFSET); if ((data = gsf_input_read (zip->source, name_len, NULL)) == NULL) return NULL; name = g_new (gchar, (gulong) (name_len + 1)); memcpy (name, data, name_len); name[name_len] = '\0'; dirent = gsf_zip_dirent_new (); dirent->name = name; dirent->flags = flags; dirent->compr_method = compr_method; dirent->crc32 = crc32; dirent->csize = csize; dirent->usize = usize; dirent->offset = off; #if 0 g_print ("%s = 0x%x @ %" GSF_OFF_T_FORMAT "\n", name, off, *offset); #endif *offset += ZIP_DIRENT_SIZE + name_len + extras_len + comment_len; return dirent; }
/** * zip_read_dirents: * @zip : #GsfInfileZip * * Read zip headers and do some sanity checking * along the way. * * Returns: %TRUE on error setting zip->err. **/ static gboolean zip_read_dirents (GsfInfileZip *zip) { guint8 const *trailer; guint16 entries, i; guint32 dir_pos; ZipInfo *info; gsf_off_t offset; /* Find and check the trailing header */ offset = zip_find_trailer (zip); if (offset < 0) { zip->err = g_error_new (gsf_input_error_id (), 0, "No Zip trailer"); return TRUE; } if (gsf_input_seek (zip->source, offset, G_SEEK_SET) || NULL == (trailer = gsf_input_read (zip->source, ZIP_TRAILER_SIZE, NULL))) { zip->err = g_error_new (gsf_input_error_id (), 0, "Error reading Zip signature"); return TRUE; } entries = GSF_LE_GET_GUINT32 (trailer + ZIP_TRAILER_ENTRIES); dir_pos = GSF_LE_GET_GUINT32 (trailer + ZIP_TRAILER_DIR_POS); info = g_new0 (ZipInfo, 1); zip->info = info; info->ref_count = 1; info->entries = entries; info->dir_pos = dir_pos; /* Read the directory */ for (i = 0, offset = dir_pos; i < entries; i++) { GsfZipDirent *d; d = zip_dirent_new_in (zip, &offset); if (d == NULL) { zip->err = g_error_new (gsf_input_error_id (), 0, "Error reading zip dirent"); return TRUE; } info->dirent_list = g_list_append (info->dirent_list, d); } return FALSE; }
static void parse_file_header (HwpHWP5File *file) { size_t size = gsf_input_size (file->file_header_stream); const guint8 *buf = gsf_input_read (file->file_header_stream, size, NULL); guint32 prop = 0; if (buf == NULL) return; file->signature = g_strndup ((const gchar *) buf, 32); /* null로 끝남 */ file->major_version = buf[35]; file->minor_version = buf[34]; file->micro_version = buf[33]; file->extra_version = buf[32]; prop = GSF_LE_GET_GUINT32(buf + 36); file->is_compress = prop & (1 << 0); file->is_encrypt = prop & (1 << 1); file->is_distribute = prop & (1 << 2); file->is_script = prop & (1 << 3); file->is_drm = prop & (1 << 4); file->is_xml_template = prop & (1 << 5); file->is_history = prop & (1 << 6); file->is_sign = prop & (1 << 7); file->is_certificate_encrypt = prop & (1 << 8); file->is_sign_spare = prop & (1 << 9); file->is_certificate_drm = prop & (1 << 10); file->is_ccl = prop & (1 << 11); }
void streamRead(GsfInput* aStream, TextAttr& aAttr, gsf_off_t aEoa) throw(UT_Error) { UT_uint8 flags; gsf_off_t newPos; readFlagRec(aStream, flags, &newPos); streamRead(aStream, aAttr.which); streamRead(aStream, aAttr.ver); if (flags & 0x10) { aAttr.startSet = true; streamRead(aStream, aAttr.start); } else aAttr.startSet = false; if (flags & 0x20) { aAttr.endSet = true; streamRead(aStream, aAttr.end); } else aAttr.endSet = false; if (gsf_input_seek(aStream, newPos, G_SEEK_SET)) throw UT_IE_BOGUSDOCUMENT; gsf_off_t curPos = gsf_input_tell(aStream); if (curPos != aEoa) { // there is data aAttr.dataLen = aEoa - curPos; aAttr.data = new UT_uint8[aAttr.dataLen]; streamRead(aStream, aAttr.data, aAttr.dataLen); } // LIST OF THE VALUES: http://ooo.ximian.com/lxr/source/sw/sw/inc/hintids.hxx#086 // together with http://ooo.ximian.com/lxr/source/sw/sw/source/core/sw3io/sw3fmts.cxx#172 switch (aAttr.which) { case 0x1004: // strikethrough aAttr.attrName = "text-decoration"; if (!aAttr.data || aAttr.data[0]) aAttr.attrVal = "line-through"; else aAttr.isOff = true; break; case 0x1005: { // sub-/superscript if (aAttr.dataLen < 3) break; // first byte is size of text % of normal size UT_sint16 height = GSF_LE_GET_GINT16(aAttr.data + 1); aAttr.attrName = "text-position"; if (height > 0) aAttr.attrVal = "superscript"; else if (height < 0) aAttr.attrVal = "subscript"; else aAttr.isOff = true; break; } case 0x1006: { // font family if (!aAttr.data || aAttr.dataLen < 7) // 7 = 3 byte family etc., 2 byte name length, 2 byte style length break; aAttr.attrName = "font-family"; // XXX TODO This code here assumes that the font names are in latin1 UT_uint16 fontLen = GSF_LE_GET_GUINT16(aAttr.data + 3); UT_String_sprintf(aAttr.attrVal, "%.*s", fontLen, (aAttr.data + 5)); break; } case 0x1007: // font height // structure: | height (2 byte, twips) | prop (?) (2 byte) (if version >= 2, if ver=1 1 byte) | unit (if version>=2) | // XXX we ignore "prop" and unit for now, they seem not used much aAttr.attrName = "font-size"; if (aAttr.data) aAttr.attrVal = twipsToSizeString(GSF_LE_GET_GUINT16(aAttr.data)); break; case 0x100a: // Italic aAttr.attrName = "font-style"; if (!aAttr.data || aAttr.data[0]) // if there is data, first byte must be != 0 // abiword doesn't support oblique, so always set italic aAttr.attrVal = "italic"; else aAttr.isOff = true; break; case 0x100d: // Underline aAttr.attrName = "text-decoration"; if (!aAttr.data || aAttr.data[0]) aAttr.attrVal = "underline"; else aAttr.isOff = true; break; case 0x100e: // Bold aAttr.attrName = "font-weight"; if (!aAttr.data || aAttr.data[0] >= 8) // 8=Bold. aAttr.attrVal = "bold"; else aAttr.isOff = true; break; case 0x4000: // line spacing aAttr.attrName = "line-height"; aAttr.isPara = true; // prop space (s8) | inter space (s16) | height (u16) | rule (s8) | interrule (s8) if (aAttr.data && aAttr.dataLen >= 7) { // Abiword wants it as float value, StarOffice saves as percentage (e.g. // 150 for 1.5) float proportionalLineSpace = float(aAttr.data[0])/100; // But maybe we need to use the height - stored as twips, need points // (used for "exact" and "minimum" line spacing) // XXX inter-line spacing not supported by abiword (would be rule=0x00 // interrule=0x02, value to use=inter space, unit twips) UT_String lineHeight = twipsToSizeString(GSF_LE_GET_GINT16(aAttr.data + 3)); // We'll turn the bytes at 5 and 6 into a single integer, for easier // evaluation switch (GSF_LE_GET_GUINT16(aAttr.data + 5)) { case 0x0100: // proportional aAttr.attrVal = std_size_string(proportionalLineSpace); break; case 0x0001: case 0x0002: aAttr.attrVal = lineHeight; if (aAttr.data[5] == 2) // "minimum" case aAttr.attrVal += '+'; break; default: UT_DEBUGMSG(("Unsupported linespacing: %02x %02x\n", aAttr.data[5], aAttr.data[6])); } } break; case 0x4001: // Alignment aAttr.attrName = "text-align"; aAttr.isPara = true; if (aAttr.data) { switch (aAttr.data[0]) { case 0: aAttr.attrVal = "left"; break; case 1: aAttr.attrVal = "right"; break; case 2: case 4: // BLOCKLINE!? what's BLOCKLINE? I'm guessing justify. aAttr.attrVal = "justify"; break; case 3: aAttr.attrVal = "center"; break; } } break; case 0x4005: {// Tabstops aAttr.attrName = "tabstops"; aAttr.isPara = true; // Data structure: // Count(8) | Position (in twips) (32) | Adjustment (8) | Decimal Separator (?) (8) | Fill character (8) // (total size per tab = 7) // UT_sint8 count = aAttr.data[0]; for (UT_uint32 i = 1; (i + 6) < aAttr.dataLen; i += 7) { // Abiword wants: 12.3cm/L0, where 0 indicates what to fill with UT_uint16 posInTwips = GSF_LE_GET_GUINT32(aAttr.data + i); UT_String pos = twipsToSizeString(posInTwips); aAttr.attrVal += pos; aAttr.attrVal += '/'; if (aAttr.data[i + 4] < sizeof(sTabAlignment)/sizeof(*sTabAlignment)) aAttr.attrVal += sTabAlignment[aAttr.data[i + 4]]; else aAttr.attrVal += 'L'; // fallback char fillIndex = '0'; // Fill character switch (aAttr.data[i + 6]) { case '.': fillIndex = '1'; break; case '-': fillIndex = '2'; break; case '_': fillIndex = '3'; break; case ' ': fillIndex = '0'; break; default: UT_DEBUGMSG(("Filling with '%c' is not supported\n", aAttr.data[i + 6])); } aAttr.attrVal += fillIndex; aAttr.attrVal += ','; } } break; default: UT_DEBUGMSG(("SDW: unknown attribute 0x%x, compressed %d\n", aAttr.which, lcl_sw3io__CompressWhich(aAttr.which))); } }
static void make_stream (HwpHWP5File *file, GError **error) { GsfInput *input = NULL; GsfInfile *ole = GSF_INFILE (file->priv->olefile); gint n_root_entry = gsf_infile_num_children (ole); if (n_root_entry < 1) { g_set_error_literal (error, HWP_FILE_ERROR, HWP_FILE_ERROR_INVALID, "invalid hwp file"); return; } /* 우선 순위에 따라 스트림을 만든다 */ input = gsf_infile_child_by_name (ole, "FileHeader"); if (input && gsf_infile_num_children (GSF_INFILE (input)) == -1) { file->file_header_stream = input; input = NULL; parse_file_header (file); } else { goto FAIL; } input = gsf_infile_child_by_name (ole, "DocInfo"); if (input && gsf_infile_num_children (GSF_INFILE (input)) == -1) { if (file->is_compress) { GInputStream *gis; GZlibDecompressor *zd; GInputStream *cis; gis = (GInputStream *) gsf_input_stream_new (input); zd = g_zlib_decompressor_new (G_ZLIB_COMPRESSOR_FORMAT_RAW); cis = g_converter_input_stream_new (gis, (GConverter *) zd); g_filter_input_stream_set_close_base_stream (G_FILTER_INPUT_STREAM (cis), TRUE); file->doc_info_stream = cis; g_object_unref (zd); g_object_unref (gis); input = NULL; } else { file->doc_info_stream = (GInputStream *) gsf_input_stream_new (input); } } else { goto FAIL; } if (!file->is_distribute) input = gsf_infile_child_by_name (ole, "BodyText"); else input = gsf_infile_child_by_name (ole, "ViewText"); if (input) { for (gint i = 0; i < gsf_infile_num_children (GSF_INFILE (input)); i++) { GsfInput *section = gsf_infile_child_by_name (GSF_INFILE (input), g_strdup_printf("Section%d", i)); if (gsf_infile_num_children (GSF_INFILE (section)) != -1) { if (GSF_IS_INPUT (section)) g_object_unref (section); g_set_error_literal (error, HWP_FILE_ERROR, HWP_FILE_ERROR_INVALID, "invalid hwp file"); return; } if (file->is_distribute) { guint8 *data = g_malloc0 (256); gsf_input_read (section, 4, NULL); gsf_input_read (section, 256, data); guint32 seed = GSF_LE_GET_GUINT32 (data); msvc_srand (seed); gint n = 0, val = 0, offset; for (guint i = 0; i < 256; i++) { if (n == 0) { val = msvc_rand() & 0xff; n = (msvc_rand() & 0xf) + 1; } data[i] ^= val; n--; } offset = 4 + (seed & 0xf); gchar *key = g_malloc0 (16); memcpy (key, (const gchar *) data + offset, 16); #ifdef HWP_ENABLE_DEBUG gchar *sha1 = g_convert ((const gchar *) data + offset, 80, "UTF-8", "UTF-16LE", NULL, NULL, error); printf ("sha1: %s\n", sha1); printf ("key: %s\n", key); g_free (sha1); #endif g_free (data); EVP_CIPHER_CTX *ctx = EVP_CIPHER_CTX_new (); EVP_CIPHER_CTX_init (ctx); EVP_DecryptInit_ex (ctx, EVP_aes_128_ecb(), NULL, (unsigned char *) key, NULL); g_free (key); EVP_CIPHER_CTX_set_padding(ctx, 0); /* no padding */ gsf_off_t encrypted_data_len = gsf_input_remaining (section); guint8 const *encrypted_data = gsf_input_read (section, encrypted_data_len, NULL); guint8 *decrypted_data = g_malloc (encrypted_data_len); int decrypted_data_len, len; EVP_DecryptUpdate (ctx, decrypted_data, &len, encrypted_data, encrypted_data_len); decrypted_data_len = len; EVP_DecryptFinal_ex (ctx, decrypted_data + len, &len); decrypted_data_len += len; EVP_CIPHER_CTX_free (ctx); g_object_unref (section); section = gsf_input_memory_new (decrypted_data, decrypted_data_len, TRUE); } if (file->is_compress) { GInputStream *gis; GZlibDecompressor *zd; GInputStream *cis; gis = (GInputStream *) gsf_input_stream_new (section); zd = g_zlib_decompressor_new (G_ZLIB_COMPRESSOR_FORMAT_RAW); cis = g_converter_input_stream_new (gis, (GConverter *) zd); g_filter_input_stream_set_close_base_stream (G_FILTER_INPUT_STREAM (cis), TRUE); g_ptr_array_add (file->section_streams, cis); g_object_unref (zd); g_object_unref (gis); } else { GInputStream *stream = (GInputStream *) gsf_input_stream_new (section); g_ptr_array_add (file->section_streams, stream); } } /* for */ g_object_unref (input); input = NULL; } else { goto FAIL; } input = gsf_infile_child_by_name (ole, "\005HwpSummaryInformation"); if (input && gsf_infile_num_children (GSF_INFILE (input)) == -1) { file->summary_info_stream = input; input = NULL; } else { goto FAIL; } input = gsf_infile_child_by_name (ole, "BinData"); if (input) { gint n_data = gsf_infile_num_children (GSF_INFILE (input)); for (gint i = 0; i < n_data; i++) { GsfInput *bin_data_input = gsf_infile_child_by_index (GSF_INFILE (input), i); if (gsf_infile_num_children (GSF_INFILE (bin_data_input)) != -1) { if (GSF_IS_INPUT (bin_data_input)) g_object_unref (bin_data_input); g_set_error_literal (error, HWP_FILE_ERROR, HWP_FILE_ERROR_INVALID, "invalid hwp file"); return; } if (file->is_compress) { GInputStream *gis; GZlibDecompressor *zd; GInputStream *cis; gis = (GInputStream *) gsf_input_stream_new (bin_data_input); zd = g_zlib_decompressor_new (G_ZLIB_COMPRESSOR_FORMAT_RAW); cis = g_converter_input_stream_new (gis, (GConverter *) zd); g_filter_input_stream_set_close_base_stream (G_FILTER_INPUT_STREAM (cis), TRUE); g_ptr_array_add (file->bin_data_streams, cis); g_object_unref (zd); g_object_unref (gis); } else { GInputStream *stream = (GInputStream *) gsf_input_stream_new (bin_data_input); g_ptr_array_add (file->bin_data_streams, stream); } } g_object_unref (input); input = NULL; } input = gsf_infile_child_by_name (ole, "PrvText"); if (input && gsf_infile_num_children (GSF_INFILE (input)) == -1) { file->prv_text_stream = input; input = NULL; } else { goto FAIL; } input = gsf_infile_child_by_name (ole, "PrvImage"); if (input && gsf_infile_num_children (GSF_INFILE (input)) == -1) { file->prv_image_stream = input; input = NULL; } else { goto FAIL; } return; FAIL: if (GSF_IS_INPUT (input)) g_object_unref (input); g_set_error_literal (error, HWP_FILE_ERROR, HWP_FILE_ERROR_INVALID, "invalid hwp file"); return; }
static gboolean check_header (GsfInputGZip *input) { if (input->raw) { input->header_size = 0; input->trailer_size = 0; } else { static guint8 const signature[2] = {0x1f, 0x8b}; guint8 const *data; unsigned flags, len; /* Check signature */ if (NULL == (data = gsf_input_read (input->source, 2 + 1 + 1 + 6, NULL)) || 0 != memcmp (data, signature, sizeof (signature))) return TRUE; /* verify flags and compression type */ flags = data[3]; if (data[2] != Z_DEFLATED || (flags & ~GZIP_HEADER_FLAGS) != 0) return TRUE; /* If we have the size, don't bother seeking to the end. */ if (input->uncompressed_size < 0) { /* Get the uncompressed size */ if (gsf_input_seek (input->source, (gsf_off_t) -4, G_SEEK_END) || NULL == (data = gsf_input_read (input->source, 4, NULL))) return TRUE; /* FIXME, but how? The size read here is modulo 2^32. */ input->uncompressed_size = GSF_LE_GET_GUINT32 (data); if (input->uncompressed_size / 1000 > gsf_input_size (input->source)) { g_warning ("Suspiciously well compressed file with better than 1000:1 ratio.\n" "It is probably truncated or corrupt"); } } if (gsf_input_seek (input->source, 2 + 1 + 1 + 6, G_SEEK_SET)) return TRUE; if (flags & GZIP_EXTRA_FIELD) { if (NULL == (data = gsf_input_read (input->source, 2, NULL))) return TRUE; len = GSF_LE_GET_GUINT16 (data); if (NULL == gsf_input_read (input->source, len, NULL)) return TRUE; } if (flags & GZIP_ORIGINAL_NAME) { /* Skip over the filename (which is in ISO 8859-1 encoding). */ do { if (NULL == (data = gsf_input_read (input->source, 1, NULL))) return TRUE; } while (*data != 0); } if (flags & GZIP_HAS_COMMENT) { /* Skip over the comment (which is in ISO 8859-1 encoding). */ do { if (NULL == (data = gsf_input_read (input->source, 1, NULL))) return TRUE; } while (*data != 0); } if (flags & GZIP_HEADER_CRC && NULL == (data = gsf_input_read (input->source, 2, NULL))) return TRUE; input->header_size = input->source->cur_offset; /* the last 8 bytes are the crc and size. */ input->trailer_size = 8; } gsf_input_set_size (GSF_INPUT (input), input->uncompressed_size); if (gsf_input_remaining (input->source) < input->trailer_size) return TRUE; /* No room for payload */ return FALSE; }
/** * vba_dir_read: * @vba: #GsfInfileMSVBA * @err: (allow-none): place to store a #GError if anything goes wrong * * Read an VBA dirctory and its project file. * along the way. * * Returns: %FALSE on error setting @err if it is supplied. **/ static gboolean vba_dir_read (GsfInfileMSVBA *vba, GError **err) { int inflated_size, element_count = -1; char const *msg = NULL; char *name, *elem_stream = NULL; guint32 len; guint16 tag; guint8 *inflated_data, *end, *ptr; GsfInput *dir; gboolean failed = TRUE; /* 0. get the stream */ dir = gsf_infile_child_by_name (vba->source, "dir"); if (dir == NULL) { msg = _("Can't find the VBA directory stream"); goto fail_stream; } /* 1. decompress it */ ptr = inflated_data = gsf_vba_inflate (dir, 0, &inflated_size, TRUE); if (inflated_data == NULL) goto fail_compression; end = inflated_data + inflated_size; /* 2. GUESS : based on several xls with macros and XL8GARY this looks like a * series of sized records. Be _extra_ careful */ do { /* I have seen * type len data * 1 4 1 0 0 0 * 2 4 9 4 0 0 * 3 2 4 e4 * 4 <var> project name * 5 0 * 6 0 * 7 4 * 8 4 * 0x3d 0 * 0x40 0 * 0x14 4 9 4 0 0 * * 0x0f == number of elements * 0x1c == (Size 0) * 0x1e == (Size 4) * 0x48 == (Size 0) * 0x31 == stream offset of the compressed source ! * * 0x16 == an ascii dependency name * 0x3e == a unicode dependency name * 0x33 == a classid for a dependency with no trialing data * * 0x2f == a dummy classid * 0x30 == a classid * 0x0d == the classid * 0x2f, and 0x0d appear contain * uint32 classid_size; * <classid> * 00 00 00 00 00 00 * and sometimes some trailing junk **/ if ((ptr + 6) > end) { msg = _("vba project header problem"); goto fail_content; } tag = GSF_LE_GET_GUINT16 (ptr); len = GSF_LE_GET_GUINT32 (ptr + 2); ptr += 6; if ((ptr + len) > end) { msg = _("vba project header problem"); goto fail_content; } switch (tag) { case 4: name = g_strndup (ptr, len); #ifdef OLD_VBA_DUMP puts ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); printf ("<project name=\"%s\">", name); #endif g_free (name); break; case 9: /* this seems to have an extra two bytes that are not * part of the length ..?? */ len += 2; break; case 0xf : if (len != 2) { g_warning ("element count is not what we expected"); break; } if (element_count >= 0) { g_warning ("More than one element count ??"); break; } element_count = GSF_LE_GET_GUINT16 (ptr); break; /* dependencies */ case 0x0d : break; case 0x2f : break; case 0x30 : break; case 0x33 : break; case 0x3e : break; case 0x16: #if 0 name = g_strndup (ptr, len); g_print ("Depend Name : '%s'\n", name); g_free (name); #endif break; /* elements */ case 0x47 : break; case 0x32 : break; case 0x1a: #if 0 name = g_strndup (ptr, len); g_print ("Element Name : '%s'\n", name); g_free (name); #endif break; case 0x19: g_free (elem_stream); elem_stream = g_strndup (ptr, len); break; case 0x31: if (len != 4) { g_warning ("source offset property is not what we expected"); break; } vba_extract_module_source (vba, elem_stream, GSF_LE_GET_GUINT32 (ptr)); g_free (elem_stream); elem_stream = NULL; element_count--; break; default : #if 0 g_print ("tag %hx : len %u\n", tag, len); gsf_mem_dump (ptr, len); #endif break; } ptr += len; } while (tag != 0x10); if (element_count != 0) g_warning ("Number of elements differs from expectations"); failed = FALSE; fail_content : g_free (inflated_data); #ifdef OLD_VBA_DUMP puts ("</project>"); #endif fail_compression : g_object_unref (dir); fail_stream : g_free (elem_stream); if (failed) { if (err != NULL) *err = g_error_new_literal (gsf_input_error_id (), 0, msg); return FALSE; } return TRUE; }