static gboolean init_zip (GsfInputGZip *gzip, GError **err) { gsf_off_t cur_pos; if (Z_OK != inflateInit2 (&(gzip->stream), -MAX_WBITS)) { if (err != NULL) *err = g_error_new (gsf_input_error_id (), 0, "Unable to initialize zlib"); return TRUE; } cur_pos = gsf_input_tell (gzip->source); if (gsf_input_seek (gzip->source, 0, G_SEEK_SET)) { if (err) *err = g_error_new (gsf_input_error_id (), 0, "Failed to rewind source"); return TRUE; } if (check_header (gzip) != FALSE) { if (err != NULL) *err = g_error_new (gsf_input_error_id (), 0, "Invalid gzip header"); if (gsf_input_seek (gzip->source, cur_pos, G_SEEK_SET)) { g_warning ("attempt to restore position failed ??"); } return TRUE; } return FALSE; }
static GlpkFileVersion gnm_glpk_detect_version (GnmGlpk *lp, GsfInputTextline *tl) { GnmSubSolver *subsol = lp->parent; gsf_off_t cur = gsf_input_tell (GSF_INPUT (tl)); GlpkFileVersion ver = GLPK_UNKNOWN; const char *line; unsigned cols, rows; if ((line = gsf_input_textline_utf8_gets (tl)) == NULL) goto out; if (sscanf (line, "%u %u", &rows, &cols) == 2 && cols == g_hash_table_size (subsol->cell_from_name)) { ver = GLPK_457; if (gnm_solver_debug ()) g_printerr ("Detected version 4.57 file format\n"); goto out; } if ((line[0] == 'c' || line[0] == 's') && line[1] == ' ') { ver = GLPK_458; if (gnm_solver_debug ()) g_printerr ("Detected version 4.58 file format\n"); goto out; } out: // Extra seek due to gsf bug gsf_input_seek (GSF_INPUT (tl), cur + 1, G_SEEK_SET); gsf_input_seek (GSF_INPUT (tl), cur, G_SEEK_SET); return ver; }
/* returns TRUE on error */ static gboolean zip_child_init (GsfInfileZip *child, GError **errmsg) { static guint8 const header_signature[] = { 'P', 'K', 0x03, 0x04 }; guint8 const *data = NULL; guint16 name_len, extras_len; char *err = NULL; GsfZipDirent *dirent = child->vdir->dirent; /* skip local header * should test tons of other info, but trust that those are correct **/ if (gsf_input_seek (child->source, (gsf_off_t) dirent->offset, G_SEEK_SET)) err = g_strdup_printf ("Error seeking to zip header @ %" GSF_OFF_T_FORMAT, dirent->offset); else if (NULL == (data = gsf_input_read (child->source, ZIP_FILE_HEADER_SIZE, NULL))) err = g_strdup_printf ("Error reading %d bytes in zip header", ZIP_FILE_HEADER_SIZE); else if (0 != memcmp (data, header_signature, sizeof (header_signature))) { err = g_strdup_printf ("Error incorrect zip header @ %" GSF_OFF_T_FORMAT, dirent->offset); g_print ("Header is :\n"); gsf_mem_dump (data, sizeof (header_signature)); g_print ("Header should be :\n"); gsf_mem_dump (header_signature, sizeof (header_signature)); } if (NULL != err) { if (errmsg != NULL) *errmsg = g_error_new_literal (gsf_input_error_id (), 0, err); g_free (err); return TRUE; } name_len = GSF_LE_GET_GUINT16 (data + ZIP_FILE_HEADER_NAME_SIZE); extras_len = GSF_LE_GET_GUINT16 (data + ZIP_FILE_HEADER_EXTRAS_SIZE); dirent->data_offset = dirent->offset + ZIP_FILE_HEADER_SIZE + name_len + extras_len; child->restlen = dirent->usize; child->crestlen = dirent->csize; if (dirent->compr_method != GSF_ZIP_STORED) { int err; if (!child->stream) child->stream = g_new0 (z_stream, 1); err = inflateInit2 (child->stream, -MAX_WBITS); if (err != Z_OK) { if (errmsg != NULL) *errmsg = g_error_new (gsf_input_error_id (), 0, "problem uncompressing stream"); return TRUE; } } return FALSE; }
static gboolean gsf_input_textline_seek (GsfInput *input, gsf_off_t offset, GSeekType whence) { GsfInputTextline *textline = GSF_INPUT_TEXTLINE (input); textline->remainder = NULL; return gsf_input_seek (textline->source, offset, whence); }
/* * stf_open_and_read: * @filename: name of the file to open&read * * Will open filename, read the file into a g_alloced memory buffer * * NOTE : The returned buffer has to be g_freed by the calling routine. * * returns : a buffer containing the file contents */ static char * stf_open_and_read (G_GNUC_UNUSED GOIOContext *context, GsfInput *input, size_t *readsize) { gpointer result; gulong allocsize; gsf_off_t size = gsf_input_size (input); if (gsf_input_seek (input, 0, G_SEEK_SET)) return NULL; *readsize = (size_t) size; if ((gsf_off_t) *readsize != size) /* Check for overflow */ return NULL; size++; allocsize = (gulong) size; if ((gsf_off_t) allocsize != size) /* Check for overflow */ return NULL; result = g_try_malloc (allocsize); if (result == NULL) return NULL; *((char *)result + *readsize) = '\0'; if (*readsize > 0 && gsf_input_read (input, *readsize, result) == NULL) { g_warning ("gsf_input_read failed."); g_free (result); result = NULL; } return result; }
static gsf_off_t zip_find_trailer (GsfInfileZip *zip) { static guint8 const trailer_signature[] = { 'P', 'K', 0x05, 0x06 }; gsf_off_t offset, trailer_offset, filesize; gsf_off_t maplen; guint8 const *data; filesize = gsf_input_size (zip->source); if (filesize < ZIP_TRAILER_SIZE) return -1; trailer_offset = filesize; maplen = filesize & (ZIP_BUF_SIZE - 1); if (maplen == 0) maplen = ZIP_BUF_SIZE; offset = filesize - maplen; /* offset is now BUFSIZ aligned */ while (TRUE) { guchar *p, *s; if (gsf_input_seek (zip->source, offset, G_SEEK_SET)) return -1; if ((data = gsf_input_read (zip->source, maplen, NULL)) == NULL) return -1; p = (guchar *) data; for (s = p + maplen - 1; (s >= p); s--, trailer_offset--) { if ((*s == 'P') && (p + maplen - 1 - s > ZIP_TRAILER_SIZE - 2) && !memcmp (s, trailer_signature, sizeof (trailer_signature))) { return --trailer_offset; } } /* not found in currently mapped block, so update it if * there is some room in before. The requirements are.. * (a) mappings should overlap so that trailer can cross BUFSIZ-boundary * (b) trailer cannot be farther away than 64K from fileend */ /* outer loop cond */ if (offset <= 0) return -1; /* outer loop step */ offset -= ZIP_BUF_SIZE / 2; maplen = MIN (filesize - offset, ZIP_BUF_SIZE); trailer_offset = offset + maplen; if (filesize - offset > 64 * 1024) return -1; } /*outer loop*/ return -1; }
static guint8 const * gsf_infile_zip_read (GsfInput *input, size_t num_bytes, guint8 *buffer) { GsfInfileZip *zip = GSF_INFILE_ZIP (input); GsfZipVDir *vdir = zip->vdir; gsf_off_t pos; if (zip->restlen < num_bytes) return NULL; switch (vdir->dirent->compr_method) { case GSF_ZIP_STORED: zip->restlen -= num_bytes; pos = zip->vdir->dirent->data_offset + input->cur_offset; if (gsf_input_seek (zip->source, pos, G_SEEK_SET)) return NULL; return gsf_input_read (zip->source, num_bytes, buffer); case GSF_ZIP_DEFLATED: if (buffer == NULL) { if (zip->buf_size < num_bytes) { zip->buf_size = MAX (num_bytes, 256); g_free (zip->buf); zip->buf = g_new (guint8, zip->buf_size); } buffer = zip->buf; } zip->stream->avail_out = num_bytes; zip->stream->next_out = (unsigned char *)buffer; do { int err; int startlen; if (zip->crestlen > 0 && zip->stream->avail_in == 0) if (!zip_update_stream_in (zip)) break; startlen = zip->stream->total_out; err = inflate(zip->stream, Z_NO_FLUSH); if (err == Z_STREAM_END) zip->restlen = 0; else if (err == Z_OK) zip->restlen -= (zip->stream->total_out - startlen); else break; } while (zip->restlen && zip->stream->avail_out); return buffer; default: break; } return NULL; }
gboolean sylk_file_probe (GOFileOpener const *fo, GsfInput *input, GOFileProbeLevel pl) { char const *header = NULL; if (!gsf_input_seek (input, 0, G_SEEK_SET)) header = gsf_input_read (input, 3, NULL); return (header != NULL && strncmp (header, "ID;", 3) == 0); }
/** * gsf_vba_inflate: * @input: stream to read from * @offset: offset into it for start byte of compressed stream * @size: size of the returned array * @add_null_terminator: whenever add or not null at the end of array * * Decompresses VBA stream. * * Return value: A pointer to guint8 array **/ guint8 * gsf_vba_inflate (GsfInput *input, gsf_off_t offset, int *size, gboolean add_null_terminator) { guint8 sig; GByteArray *res; gsf_off_t length; res = g_byte_array_new(); gsf_input_read (input, 1, &sig); if (1 != sig) /* should start with 0x01 */ return NULL; offset++; length = gsf_input_size (input); while (offset < length) { GsfInput *chunk; guint16 chunk_hdr; guint8 const *tmp; tmp = gsf_input_read (input, 2, NULL); if (!tmp) break; chunk_hdr = GSF_LE_GET_GUINT16 (tmp); offset += 2; if (0xB000 == (chunk_hdr&0xF000) && (chunk_hdr&0xFFF) > 0 && (length - offset < 4094)){ if (length < offset + (chunk_hdr&0xFFF)) break; chunk = gsf_input_proxy_new_section (input, offset, (gsf_off_t) (chunk_hdr&0xFFF) + 1); offset += (chunk_hdr&0xFFF) + 1; } else { if (length < offset + 4094){ chunk = gsf_input_proxy_new_section (input, offset, length-offset); offset = length; } else { chunk = gsf_input_proxy_new_section (input, offset, 4094); offset += 4094; } } if (chunk) { GByteArray *tmpres = gsf_msole_inflate (chunk, 0); gsf_input_seek (input, offset, G_SEEK_CUR); g_byte_array_append (res, tmpres->data, tmpres->len); g_byte_array_free (tmpres, TRUE); g_object_unref (chunk); } } if (res == NULL) return NULL; if (add_null_terminator) g_byte_array_append (res, "", 1); *size = res->len; return g_byte_array_free (res, FALSE); }
/*! Detect encoding of text file \param fp File Supports UTF-8 and UCS-2 big and little endian CJK encodings could be added */ UT_Error IE_Imp_Text::_recognizeEncoding(GsfInput * fp) { char szBuf[4096]; // 4096 ought to be enough UT_sint32 iNumbytes; iNumbytes = UT_MIN(4096, gsf_input_remaining(fp)); gsf_input_read(fp, iNumbytes, (guint8 *)szBuf); gsf_input_seek(fp, 0, G_SEEK_SET); return _recognizeEncoding(szBuf, iNumbytes); }
static GsfZipDirent * zip_dirent_new_in (GsfInfileZip *zip, gsf_off_t *offset) { static guint8 const dirent_signature[] = { 'P', 'K', 0x01, 0x02 }; GsfZipDirent *dirent; guint8 const *data; guint16 name_len, extras_len, comment_len, compr_method, flags; guint32 crc32, csize, usize, off; gchar *name; /* Read data and check the header */ if (gsf_input_seek (zip->source, *offset, G_SEEK_SET) || NULL == (data = gsf_input_read (zip->source, ZIP_DIRENT_SIZE, NULL)) || 0 != memcmp (data, dirent_signature, sizeof (dirent_signature))) { return NULL; } name_len = GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_NAME_SIZE); extras_len = GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_EXTRAS_SIZE); comment_len = GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_COMMENT_SIZE); flags = GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_FLAGS); compr_method = GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_COMPR_METHOD); crc32 = GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_CRC32); csize = GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_CSIZE); usize = GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_USIZE); off = GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_OFFSET); if ((data = gsf_input_read (zip->source, name_len, NULL)) == NULL) return NULL; name = g_new (gchar, (gulong) (name_len + 1)); memcpy (name, data, name_len); name[name_len] = '\0'; dirent = gsf_zip_dirent_new (); dirent->name = name; dirent->flags = flags; dirent->compr_method = compr_method; dirent->crc32 = crc32; dirent->csize = csize; dirent->usize = usize; dirent->offset = off; #if 0 g_print ("%s = 0x%x @ %" GSF_OFF_T_FORMAT "\n", name, off, *offset); #endif *offset += ZIP_DIRENT_SIZE + name_len + extras_len + comment_len; return dirent; }
bool check_password(char *password) { int offset = wvStream_tell(ps.tablefd); wvSetPassword(password, &ps); bool r = false; switch (version) { case WORD6: case WORD7: r = wvDecrypt95(&ps) == 0; break; case WORD8: r = wvDecrypt97(&ps) == 0; break; } gsf_input_seek(ps.tablefd->stream.gsf_stream, offset, G_SEEK_SET); return r; }
/** * zip_read_dirents: * @zip : #GsfInfileZip * * Read zip headers and do some sanity checking * along the way. * * Returns: %TRUE on error setting zip->err. **/ static gboolean zip_read_dirents (GsfInfileZip *zip) { guint8 const *trailer; guint16 entries, i; guint32 dir_pos; ZipInfo *info; gsf_off_t offset; /* Find and check the trailing header */ offset = zip_find_trailer (zip); if (offset < 0) { zip->err = g_error_new (gsf_input_error_id (), 0, "No Zip trailer"); return TRUE; } if (gsf_input_seek (zip->source, offset, G_SEEK_SET) || NULL == (trailer = gsf_input_read (zip->source, ZIP_TRAILER_SIZE, NULL))) { zip->err = g_error_new (gsf_input_error_id (), 0, "Error reading Zip signature"); return TRUE; } entries = GSF_LE_GET_GUINT32 (trailer + ZIP_TRAILER_ENTRIES); dir_pos = GSF_LE_GET_GUINT32 (trailer + ZIP_TRAILER_DIR_POS); info = g_new0 (ZipInfo, 1); zip->info = info; info->ref_count = 1; info->entries = entries; info->dir_pos = dir_pos; /* Read the directory */ for (i = 0, offset = dir_pos; i < entries; i++) { GsfZipDirent *d; d = zip_dirent_new_in (zip, &offset); if (d == NULL) { zip->err = g_error_new (gsf_input_error_id (), 0, "Error reading zip dirent"); return TRUE; } info->dirent_list = g_list_append (info->dirent_list, d); } return FALSE; }
int AbiWordperfectInputStream::seek(long offset, WPX_SEEK_TYPE seekType) { GSeekType gsfSeekType = G_SEEK_SET; switch(seekType) { case WPX_SEEK_CUR: gsfSeekType = G_SEEK_CUR; break; case WPX_SEEK_SET: gsfSeekType = G_SEEK_SET; break; } return gsf_input_seek(m_input, offset, gsfSeekType); }
void wvStream_rewind (wvStream * in) { if (in->kind == GSF_STREAM) { gsf_input_seek (GSF_INPUT (in->stream.gsf_stream), 0, G_SEEK_SET); } else if (in->kind == FILE_STREAM) { rewind (in->stream.file_stream); } else { in->stream.memory_stream->current = 0; } }
static gboolean go_plugin_file_opener_probe (GOFileOpener const *fo, GsfInput *input, GOFileProbeLevel pl) { GOPluginFileOpener *pfo = GO_PLUGIN_FILE_OPENER (fo); GOPluginServiceFileOpener *service_file_opener = GO_PLUGIN_SERVICE_FILE_OPENER (pfo->service); g_return_val_if_fail (GSF_IS_INPUT (input), FALSE); if (pl == GO_FILE_PROBE_FILE_NAME && service_file_opener->suffixes != NULL) { GSList *ptr; gchar const *extension; gchar *lowercase_extension; if (gsf_input_name (input) == NULL) return FALSE; extension = gsf_extension_pointer (gsf_input_name (input)); if (extension == NULL) return FALSE; lowercase_extension = g_utf8_strdown (extension, -1); for (ptr = service_file_opener->suffixes; ptr != NULL ; ptr = ptr->next) if (0 == strcmp (lowercase_extension, ptr->data)) break; g_free (lowercase_extension); return ptr != NULL; } if (service_file_opener->has_probe) { GOErrorInfo *ignored_error = NULL; go_plugin_service_load (pfo->service, &ignored_error); if (ignored_error != NULL) { go_error_info_print (ignored_error); go_error_info_free (ignored_error); return FALSE; } else if (service_file_opener->cbs.plugin_func_file_probe == NULL) { return FALSE; } else { gboolean res = service_file_opener->cbs.plugin_func_file_probe (fo, pfo->service, input, pl); gsf_input_seek (input, 0, G_SEEK_SET); return res; } } else { return FALSE; } }
static gboolean qpro_check_signature (GsfInput *input) { guint8 const *header; guint16 version; if (gsf_input_seek (input, 0, G_SEEK_SET) || NULL == (header = gsf_input_read (input, 2+2+2, NULL)) || GSF_LE_GET_GUINT16 (header + 0) != 0 || GSF_LE_GET_GUINT16 (header + 2) != 2) return FALSE; version = GSF_LE_GET_GUINT16 (header + 4); return (version == 0x1001 || /* 'WB1' format, documented */ version == 0x1002 || /* 'WB2' format, documented */ version == 0x1006 || /* qpro 6.0 ?? */ version == 0x1007); /* qpro 7.0 ?? */ }
U32 wvStream_offset (wvStream * in, long offset) { if (in->kind == GSF_STREAM) { gsf_input_seek (GSF_INPUT (in->stream.gsf_stream), offset, G_SEEK_CUR); return (U32)gsf_input_tell(GSF_INPUT (in->stream.gsf_stream)); } else if (in->kind == FILE_STREAM) { return ((U32) fseek (in->stream.file_stream, offset, SEEK_CUR)); } else { in->stream.memory_stream->current += offset; return in->stream.memory_stream->current; } }
U32 wvStream_goto (wvStream * in, long position) { if (in->kind == GSF_STREAM) { gsf_input_seek (GSF_INPUT (in->stream.gsf_stream), position, G_SEEK_SET); return (U32)gsf_input_tell(GSF_INPUT (in->stream.gsf_stream)); } else if (in->kind == FILE_STREAM) { return ((U32) fseek (in->stream.file_stream, position, SEEK_SET)); } else { in->stream.memory_stream->current = position; return in->stream.memory_stream->current; } }
UT_Error IE_Imp_EPUB::uncompress() { m_tmpDir = UT_go_filename_to_uri(g_get_tmp_dir()); m_tmpDir += G_DIR_SEPARATOR_S; m_tmpDir += getDoc()->getDocUUIDString(); if (!UT_go_directory_create(m_tmpDir.c_str(), 0644, NULL)) { UT_DEBUGMSG(("Can`t create temporary directory\n")); return UT_ERROR; } GsfInput *opsDirInput = gsf_infile_child_by_name(m_epub, m_opsDir.c_str()); UT_DEBUGMSG(("Child count : %d", gsf_infile_num_children(m_epub))); if (opsDirInput == NULL) { UT_DEBUGMSG(("Failed to open OPS dir\n")); return UT_ERROR; } for (std::map<std::string, std::string>::iterator i = m_manifestItems.begin(); i != m_manifestItems.end(); i++) { gchar *itemFileName = UT_go_filename_from_uri( (m_tmpDir + G_DIR_SEPARATOR_S + (*i).second).c_str()); gchar** aname = g_strsplit((*i).second.c_str(), G_DIR_SEPARATOR_S, 0); GsfInput* itemInput = gsf_infile_child_by_aname( GSF_INFILE(opsDirInput), (const char**) aname); GsfOutput* itemOutput = createFileByPath(itemFileName); gsf_input_seek(itemInput, 0, G_SEEK_SET); gsf_input_copy(itemInput, itemOutput); g_strfreev(aname); g_free(itemFileName); g_object_unref(G_OBJECT(itemInput)); gsf_output_close(itemOutput); } g_object_unref(G_OBJECT(opsDirInput)); return UT_OK; }
UT_Error IE_Exp_EPUB::compress() { GsfInfile* oebpsDir = gsf_infile_stdio_new( UT_go_filename_from_uri(m_oebpsDir.c_str()), NULL); if (oebpsDir == NULL) { UT_DEBUGMSG(("RUDYJ: Can`t open temporary OEBPS directory\n")); return UT_ERROR; } std::vector<std::string> listing = getFileList( UT_go_filename_from_uri(m_oebpsDir.c_str())); for (std::vector<std::string>::iterator i = listing.begin(); i != listing.end(); i++) { GsfOutput* item = gsf_outfile_new_child(GSF_OUTFILE(m_oebps), (*i).c_str(), FALSE); std::string fullPath = m_oebpsDir + G_DIR_SEPARATOR_S + *i; GsfInput* file = UT_go_file_open(fullPath.c_str(), NULL); if (file == NULL) { UT_DEBUGMSG(("RUDYJ: Can`t open file\n")); return UT_ERROR; } gsf_output_seek(item, 0, G_SEEK_SET); gsf_input_seek(file, 0, G_SEEK_SET); gsf_input_copy(file, item); gsf_output_close(item); // Time to delete temporary file UT_go_file_remove(fullPath.c_str(), NULL); } UT_go_file_remove((m_oebpsDir + G_DIR_SEPARATOR_S + "index.xhtml_files").c_str(), NULL); UT_go_file_remove(m_oebpsDir.c_str(), NULL); return UT_OK; }
static gboolean gsf_input_gzip_seek (GsfInput *input, gsf_off_t offset, GSeekType whence) { GsfInputGZip *gzip = GSF_INPUT_GZIP (input); /* Global flag -- we don't want one per stream. */ static gboolean warned = FALSE; gsf_off_t pos = offset; /* Note, that pos has already been sanity checked. */ switch (whence) { case G_SEEK_SET : break; case G_SEEK_CUR : pos += input->cur_offset; break; case G_SEEK_END : pos += input->size; break; default : return TRUE; } if (pos < input->cur_offset) { if (gsf_input_seek (gzip->source, gzip->header_size, G_SEEK_SET)) return TRUE; gzip->crc = crc32 (0L, Z_NULL, 0); gzip->stream.avail_in = 0; if (inflateReset (&(gzip->stream)) != Z_OK) return TRUE; input->cur_offset = 0; } if (gsf_input_seek_emulate (input, pos)) return TRUE; gzip->seek_skipped += pos; if (!warned && gzip->seek_skipped != pos && /* Don't warn for single seek. */ gzip->seek_skipped >= 1000000) { warned = TRUE; g_warning ("Seeking in gzipped streams is awfully slow."); } return FALSE; }
static psiconv_buffer psiconv_stream_to_buffer (GsfInput *input, int maxlen) { psiconv_buffer buf; gsf_off_t size; int len; if (!input) return NULL; if ((buf = psiconv_buffer_new()) == NULL) return NULL; if (gsf_input_seek (input, 0, G_SEEK_SET) == TRUE) { psiconv_buffer_free(buf); return NULL; } size = gsf_input_size (input); if (maxlen > 0 && size > maxlen) size = maxlen; for (; size > 0 ; size -= len) { guint8 const *chunk; int i; len = MIN (4096, size); chunk = gsf_input_read (input, len, NULL); if (chunk == NULL) break; for (i = 0; i<len; i++) { if (psiconv_buffer_add(buf, chunk[i]) != 0) { psiconv_buffer_free(buf); return NULL; } } } return buf; }
static gboolean zip_update_stream_in (GsfInfileZip *zip) { guint32 read_now; guint8 const *data; gsf_off_t pos; if (zip->crestlen == 0) return FALSE; read_now = MIN (zip->crestlen, ZIP_BLOCK_SIZE); pos = zip->vdir->dirent->data_offset + zip->stream->total_in; if (gsf_input_seek (zip->source, pos, G_SEEK_SET)) return FALSE; if ((data = gsf_input_read (zip->source, read_now, NULL)) == NULL) return FALSE; zip->crestlen -= read_now; zip->stream->next_in = (unsigned char *) data; /* next input byte */ zip->stream->avail_in = read_now; /* number of bytes available at next_in */ return TRUE; }
/** * tar_init_info : * @tar : #GsfInfileTar * * Read tar headers and do some sanity checking * along the way. **/ static void tar_init_info (GsfInfileTar *tar) { TarHeader end; const TarHeader *header; gsf_off_t pos0 = gsf_input_tell (tar->source); char *pending_longname = NULL; memset (&end, 0, sizeof (end)); while (tar->err == NULL && (header = (const TarHeader *)gsf_input_read (tar->source, HEADER_SIZE, NULL))) { char *name; gsf_off_t length; gsf_off_t offset; if (memcmp (header->filler, end.filler, sizeof (end.filler))) { tar->err = g_error_new (gsf_input_error_id (), 0, "Invalid tar header"); break; } if (memcmp (header, &end, HEADER_SIZE) == 0) break; if (pending_longname) { name = pending_longname; pending_longname = NULL; } else name = g_strndup (header->name, sizeof (header->name)); length = unpack_octal (tar, header->size, sizeof (header->size)); offset = gsf_input_tell (tar->source); #if 0 g_printerr ("[%s]: %d\n", name, (int)length); #endif switch (header->typeflag) { case '0': case 0: { /* Regular file. */ GsfInfileTar *dir; const char *n = name, *s; TarChild c; /* This is deliberately slash-only. */ while ((s = strchr (n, '/'))) n = s + 1; c.name = g_strdup (n); c.offset = offset; c.length = length; c.dir = NULL; dir = tar_directory_for_file (tar, name, FALSE); g_array_append_val (dir->children, c); break; } case '5': { /* Directory */ (void)tar_directory_for_file (tar, name, TRUE); break; } case 'L': { const char *n; if (pending_longname || strcmp (name, MAGIC_LONGNAME) != 0) { tar->err = g_error_new (gsf_input_error_id (), 0, "Invalid longname header"); break; } n = gsf_input_read (tar->source, length, NULL); if (!n) { tar->err = g_error_new (gsf_input_error_id (), 0, "Failed to read longname"); break; } pending_longname = g_strndup (n, length); break; } default: /* Other -- ignore */ break; } g_free (name); /* Round up to block size */ length = (length + (BLOCK_SIZE - 1)) / BLOCK_SIZE * BLOCK_SIZE; if (!tar->err && gsf_input_seek (tar->source, offset + length, G_SEEK_SET)) { tar->err = g_error_new (gsf_input_error_id (), 0, "Seek failed"); break; } } if (pending_longname) { if (!tar->err) tar->err = g_error_new (gsf_input_error_id (), 0, "Truncated archive"); g_free (pending_longname); } if (tar->err) gsf_input_seek (tar->source, pos0, G_SEEK_SET); }
UT_Error IE_Imp_PalmDoc::_parseFile(GsfInput * pdfp) { UT_GrowBuf gbBlock(1024); bool bEatLF = false; bool bEmptyFile = true; UT_UCSChar c; UT_UCS4Char wc; pdb_header header; doc_record0 rec0; bool bCompressed = false; int num_records, rec_num; DWord file_size, offset; gsf_input_read( pdfp, PDB_HEADER_SIZE, (guint8*)&header); if (strncmp( header.type, DOC_TYPE, sizeof(header.type) ) || strncmp( header.creator, DOC_CREATOR, sizeof(header.creator) )) { UT_DEBUGMSG(("This is not a DOC file!\n")); // Create an empty paragraph. X_ReturnNoMemIfError(appendStrux(PTX_Block, NULL)); return UT_OK; } num_records = _swap_Word( header.numRecords ) - 1; gsf_input_seek( pdfp, PDB_HEADER_SIZE, G_SEEK_SET ); GET_DWord( pdfp, offset ); gsf_input_seek( pdfp, offset, G_SEEK_SET ); gsf_input_read( pdfp, sizeof(rec0), (guint8*)&rec0); if ( _swap_Word( rec0.version ) == 2 ) bCompressed = true; gsf_input_seek( pdfp, 0, G_SEEK_END ); file_size = gsf_input_tell( pdfp ); for (rec_num = 1; rec_num <= num_records; ++rec_num ) { DWord next_offset; gsf_input_seek( pdfp, PDB_HEADER_SIZE + PDB_RECORD_HEADER_SIZE * rec_num, G_SEEK_SET); GET_DWord( pdfp, offset ); if( rec_num < num_records ) { gsf_input_seek( pdfp, PDB_HEADER_SIZE + PDB_RECORD_HEADER_SIZE * (rec_num + 1), G_SEEK_SET); GET_DWord( pdfp, next_offset ); } else next_offset = file_size; gsf_input_seek( pdfp, offset, G_SEEK_SET ); // be overly cautious here _zero_fill (m_buf->buf, BUFFER_SIZE); gsf_input_read(pdfp, next_offset - offset, m_buf->buf); m_buf->position = next_offset - offset; if ( bCompressed ) _uncompress( m_buf ); m_buf->position = 0; while ( (m_buf->position) < (m_buf->len) ) { // don't copy over null chars if (m_buf->buf[m_buf->position] == '\0') { ++m_buf->position; continue; } if( !m_Mbtowc.mbtowc( wc, m_buf->buf[m_buf->position] ) ) continue; c = static_cast<UT_UCSChar>(wc); switch (c) { case static_cast<UT_UCSChar>('\r'): case static_cast<UT_UCSChar>('\n'): if ((c == static_cast<UT_UCSChar>('\n')) && bEatLF) { bEatLF = false; break; } if (c == static_cast<UT_UCSChar>('\r')) { bEatLF = true; } // we interprete either CRLF, CR, or LF as a paragraph break. // start a paragraph and emit any text that we // have accumulated. X_ReturnNoMemIfError(appendStrux(PTX_Block, NULL)); bEmptyFile = false; if (gbBlock.getLength() > 0) { X_ReturnNoMemIfError(appendSpan(reinterpret_cast<const UT_UCSChar*>(gbBlock.getPointer(0)), gbBlock.getLength())); gbBlock.truncate(0); } break; default: bEatLF = false; X_ReturnNoMemIfError(gbBlock.ins(gbBlock.getLength(),reinterpret_cast<const UT_GrowBufElement *>(&c),1)); break; } ++m_buf->position; } } if (gbBlock.getLength() > 0 || bEmptyFile) { // if we have text left over (without final CR/LF), // or if we read an empty file, // create a paragraph and emit the text now. X_ReturnNoMemIfError(appendStrux(PTX_Block, NULL)); if (gbBlock.getLength() > 0) X_ReturnNoMemIfError(appendSpan(reinterpret_cast<const UT_UCSChar *>(gbBlock.getPointer(0)), gbBlock.getLength())); } return UT_OK; }
UT_Error IE_Imp_StarOffice::_loadFile(GsfInput * input) { try { UT_DEBUGMSG(("SDW: Starting import\n")); mOle = GSF_INFILE (gsf_infile_msole_new(input, NULL)); if (!mOle) return UT_IE_BOGUSDOCUMENT; // firstly, load metadata SDWDocInfo::load(mOle, getDoc()); mDocStream = gsf_infile_child_by_name(mOle, "StarWriterDocument"); if (!mDocStream) return UT_IE_BOGUSDOCUMENT; gsf_off_t size = gsf_input_size(mDocStream); if (!appendStrux(PTX_Section, PP_NOPROPS)) return UT_IE_NOMEMORY; UT_DEBUGMSG(("SDW: Attempting to load DocHdr...\n")); mDocHdr.load(mDocStream); UT_DEBUGMSG(("SDW: ...success\n")); // Ask for and verify the password if (mDocHdr.cryptor) { if (!mDocHdr.cryptor->SetPassword(GetPassword().c_str())) { UT_DEBUGMSG(("SDW: Wrong password\n")); return UT_IE_PROTECTED; } } // do the actual reading char type; bool done = false; UT_uint32 recSize; while (!done) { if (gsf_input_tell(mDocStream) == size) break; readChar(mDocStream, type); gsf_off_t eor; readRecSize(mDocStream, recSize, &eor); switch (type) { case SWG_CONTENTS: { gsf_off_t flagsEnd = 0; UT_uint32 nNodes; // sw/source/core/sw3io/sw3sectn.cxx#L129 if (mDocHdr.nVersion >= SWG_LAYFRAMES) { UT_uint8 flags; readFlagRec(mDocStream, flags, &flagsEnd); } if (mDocHdr.nVersion >= SWG_LONGIDX) streamRead(mDocStream, nNodes); else { if (mDocHdr.nVersion >= SWG_LAYFRAMES) { UT_uint16 sectidDummy; streamRead(mDocStream, sectidDummy); } UT_uint16 nodes16; streamRead(mDocStream, nodes16); nNodes = (UT_uint32)nodes16; } if (flagsEnd) { UT_ASSERT(flagsEnd >= gsf_input_tell(mDocStream)); if (gsf_input_tell(mDocStream) != flagsEnd) { UT_DEBUGMSG(("SDW: have not read all flags\n")); if (gsf_input_seek(mDocStream, flagsEnd, G_SEEK_SET)) return UT_IE_BOGUSDOCUMENT; } } bool done2 = false; UT_uint32 size2; while (!done2) { readChar(mDocStream, type); gsf_off_t eor2; readRecSize(mDocStream, size2, &eor2); switch (type) { case SWG_TEXTNODE: { // sw/source/core/sw3io/sw3nodes.cxx#L788 UT_DEBUGMSG(("SDW: Found Textnode! (start at 0x%08llX end at 0x%08llX)\n", (long long)gsf_input_tell(mDocStream), (long long)eor2)); UT_uint8 flags; gsf_off_t newPos; readFlagRec(mDocStream, flags, &newPos); // XXX check flags if (gsf_input_seek(mDocStream, newPos, G_SEEK_SET)) return UT_IE_BOGUSDOCUMENT; // Read the actual text UT_UCS4Char* str; readByteString(mDocStream, str); UT_UCS4String textNode(str); free(str); UT_DEBUGMSG(("SDW: ...length=%zu contents are: |%s|\n", textNode.length(), textNode.utf8_str())); // now get the attributes UT_String attrs; UT_String pAttrs; UT_Vector charAttributes; while (gsf_input_tell(mDocStream) < eor2) { char attVal; streamRead(mDocStream, attVal); UT_uint32 attSize; gsf_off_t eoa; // end of attribute readRecSize(mDocStream, attSize, &eoa); if (attVal == SWG_ATTRIBUTE) { TextAttr* a = new TextAttr; streamRead(mDocStream, *a, eoa); UT_DEBUGMSG(("SDW: ...found text-sub-node, which=0x%x, ver=0x%x, start=%u, end=%u - data:%s len:%llu data is:", a->which, a->ver, a->start, a->end, a->data?"Yes":"No", (long long unsigned)a->dataLen)); #ifdef DEBUG hexdump(a->data, a->dataLen); putc('\n', stderr); #endif charAttributes.addItem(a); } else if (attVal == SWG_ATTRSET) { // bah, yet another loop UT_DEBUGMSG(("SDW: ...paragraph attributes found\n")); while (gsf_input_tell(mDocStream) < eoa) { // reusing attVal and attSize streamRead(mDocStream, attVal); gsf_off_t eoa2; // end of attribute readRecSize(mDocStream, attSize, &eoa2); if (attVal == SWG_ATTRIBUTE) { TextAttr a; streamRead(mDocStream, a, eoa2); if (!a.attrVal.empty()) { if (a.isPara) UT_String_setProperty(pAttrs, a.attrName, a.attrVal); else UT_String_setProperty(attrs, a.attrName, a.attrVal); } UT_DEBUGMSG(("SDW: ......found paragraph attr, which=0x%x, ver=0x%x, start=%u, end=%u (string now %s) Data:%s Len=%lld Data:", a.which, a.ver, (a.startSet?a.start:0), (a.endSet?a.end:0), attrs.c_str(), (a.data ? "Yes" : "No"), (long long)a.dataLen)); #ifdef DEBUG hexdump(a.data, a.dataLen); putc('\n', stderr); #endif } if (gsf_input_seek(mDocStream, eoa2, G_SEEK_SET)) return UT_IE_BOGUSDOCUMENT; } } else { UT_DEBUGMSG(("SDW: ...unknown attribute '%c' found (start=%" GSF_OFF_T_FORMAT " end=%" GSF_OFF_T_FORMAT ")\n", attVal, gsf_input_tell(mDocStream), eoa)); } if (gsf_input_seek(mDocStream, eoa, G_SEEK_SET)) return UT_IE_BOGUSDOCUMENT; } PP_PropertyVector attributes = { "props", pAttrs.c_str() }; // first, insert the paragraph if (!appendStrux(PTX_Block, attributes)) return UT_IE_NOMEMORY; UT_String pca(attrs); // character attributes for the whole paragraph // now insert the spans of text UT_uint32 len = textNode.length(); UT_uint32 lastInsPos = 0; for (UT_uint32 i = 1; i < len; i++) { bool doInsert = false; // whether there was an attribute change for (UT_sint32 j = 0; j < charAttributes.getItemCount(); j++) { const TextAttr* a = reinterpret_cast<const TextAttr*>(charAttributes[j]); // clear the last attribute, if set if (a->endSet && a->end == (i - 1)) { if (a->isOff) { UT_String propval = UT_String_getPropVal(pca, a->attrName); UT_String_setProperty(attrs, a->attrName, propval); } else UT_String_removeProperty(attrs, a->attrName); } // now set new attribute, if needed if (a->startSet && a->start == (i - 1)) { if (a->isPara) UT_String_setProperty(pAttrs, a->attrName, a->attrVal); else if (a->isOff) UT_String_removeProperty(attrs, a->attrName); else UT_String_setProperty(attrs, a->attrName, a->attrVal); } // insert if this is the last character, or if there was a format change if ((a->endSet && a->end == i) || (a->startSet && a->start == i)) doInsert = true; } if (doInsert || i == (len - 1)) { attributes[1] = attrs.c_str(); UT_DEBUGMSG(("SDW: Going to appendFmt with %s\n", attributes[1].c_str())); if (!appendFmt(attributes)) return UT_IE_NOMEMORY; /* leave cast alone! */ UT_DEBUGMSG(("SDW: About to insert %u-%u\n", lastInsPos, i)); size_t spanLen = i - lastInsPos; if (i == (len - 1)) spanLen++; UT_UCS4String span = textNode.substr(lastInsPos, spanLen); appendSpan(span.ucs4_str(), spanLen); lastInsPos = i; } } UT_VECTOR_PURGEALL(TextAttr*, charAttributes); break; } case SWG_JOBSETUP: { // flags are apparently unused here. no idea why they are there. gsf_off_t newpos; UT_uint8 flags; readFlagRec(mDocStream, flags, &newpos); if (gsf_input_seek(mDocStream, newpos, G_SEEK_SET)) return UT_IE_BOGUSDOCUMENT; UT_uint16 len, system; streamRead(mDocStream, len); streamRead(mDocStream, system); char printerName[64]; streamRead(mDocStream, printerName, 64); char deviceName[32], portName[32], driverName[32]; streamRead(mDocStream, deviceName, 32); streamRead(mDocStream, portName, 32); streamRead(mDocStream, driverName, 32); UT_DEBUGMSG(("SDW: Jobsetup: len %u sys 0x%x printer |%.64s| device |%.32s| port |%.32s| driver |%.32s|\n", len, system, printerName, deviceName, portName, driverName)); if (system == JOBSET_FILE364_SYSTEM || system == JOBSET_FILE605_SYSTEM) { UT_uint16 len2, system2; streamRead(mDocStream, len2); streamRead(mDocStream, system2); UT_uint32 ddl; // driver data length streamRead(mDocStream, ddl); // now the interesting data UT_uint16 orient; // 0=portrait 1=landscape streamRead(mDocStream, orient); UT_uint16 paperBin; streamRead(mDocStream, paperBin); UT_uint16 paperFormat; streamRead(mDocStream, paperFormat); UT_uint32 width, height; streamRead(mDocStream, width); streamRead(mDocStream, height); UT_DEBUGMSG(("SDW: orient %u bin %u format %u width %u height %u\n", orient, paperBin, paperFormat, width, height)); // rest of the data is ignored, seems to be printer specific anyway. // Use A4, Portrait by default PP_PropertyVector attributes = { "pagetype", "a4", // A4/Letter/... "orientation", "portrait", "width", "210", "height", "297", "units", "mm" }; const char* sdwPaperToAbi[] = { "A3", "A4", "A5", "B4", "B5", "Letter", "Legal", "Tabloid/Ledger", "Custom" }; if (paperFormat < sizeof(sdwPaperToAbi)/sizeof(*sdwPaperToAbi)) { attributes[1] = sdwPaperToAbi[paperFormat]; } const char* sdwOrientToAbi[] = { "portrait", "landscape" }; if (orient < sizeof(sdwOrientToAbi)/sizeof(*sdwOrientToAbi)) { attributes[3] = sdwOrientToAbi[orient]; } attributes[5] = UT_std_string_sprintf("%f", static_cast<double>(width)/100); attributes[7] = UT_std_string_sprintf("%f", static_cast<double>(height)/100); getDoc()->setPageSizeFromFile(attributes); } break; } case SWG_EOF: done2 = true; break; default: UT_DEBUGMSG(("SDW: SWG_CONTENT: Skipping %u bytes for record type '%c' (starting at 0x%08llX)\n", size2, type, (long long)gsf_input_tell(mDocStream))); } if (gsf_input_seek(mDocStream, eor2, G_SEEK_SET)) return UT_IE_BOGUSDOCUMENT; } break; } case SWG_STRINGPOOL: { if (mDocHdr.nVersion <= SWG_POOLIDS) { UT_ASSERT_HARMLESS(UT_NOT_IMPLEMENTED); break; } UT_uint8 encoding; streamRead(mDocStream, encoding); UT_iconv_t cd = findConverter(encoding); if (!UT_iconv_isValid(cd)) throw UT_IE_IMPORTERROR; UT_uint16 count; streamRead(mDocStream, count); while (count--) { UT_uint16 id; streamRead(mDocStream, id); char* str; UT_uint16 len; ::readByteString(mDocStream, str, &len); if (id == IDX_NOCONV_FF) { UT_ASSERT_HARMLESS(UT_NOT_IMPLEMENTED); } // FIXME: find a way to not have to copy and free // the result of UT_convert_cd.... --hub UT_DEBUGMSG(("SDW: StringPool: found 0x%04x <-> %.*s\n", id, len, str)); UT_UCS4Char* convertedString = reinterpret_cast<UT_UCS4Char*>(UT_convert_cd(str, len + 1, cd, NULL, NULL)); mStringPool.insert(stringpool_map::value_type(id, convertedString)); FREEP(convertedString); delete [] str; } UT_iconv_close(cd); break; } case SWG_COMMENT: // skip over comments break; case SWG_EOF: done = true; break; default: UT_DEBUGMSG(("SDW: Skipping %u bytes for record type '%c' (starting at 0x%08llX)\n", recSize, type, (long long)gsf_input_tell(mDocStream))); } // Seek to the end of the record, in case it wasn't read completely if (gsf_input_seek(mDocStream, eor, G_SEEK_SET)) return UT_IE_BOGUSDOCUMENT; } UT_DEBUGMSG(("SDW: Done\n")); return UT_OK; } catch(UT_Error e) { UT_DEBUGMSG(("SDW: error %d\n", e)); return e; } catch(...) { UT_DEBUGMSG(("SDW: Unknown error\n")); return UT_IE_BOGUSDOCUMENT; } }
void streamRead(GsfInput* aStream, TextAttr& aAttr, gsf_off_t aEoa) throw(UT_Error) { UT_uint8 flags; gsf_off_t newPos; readFlagRec(aStream, flags, &newPos); streamRead(aStream, aAttr.which); streamRead(aStream, aAttr.ver); if (flags & 0x10) { aAttr.startSet = true; streamRead(aStream, aAttr.start); } else aAttr.startSet = false; if (flags & 0x20) { aAttr.endSet = true; streamRead(aStream, aAttr.end); } else aAttr.endSet = false; if (gsf_input_seek(aStream, newPos, G_SEEK_SET)) throw UT_IE_BOGUSDOCUMENT; gsf_off_t curPos = gsf_input_tell(aStream); if (curPos != aEoa) { // there is data aAttr.dataLen = aEoa - curPos; aAttr.data = new UT_uint8[aAttr.dataLen]; streamRead(aStream, aAttr.data, aAttr.dataLen); } // LIST OF THE VALUES: http://ooo.ximian.com/lxr/source/sw/sw/inc/hintids.hxx#086 // together with http://ooo.ximian.com/lxr/source/sw/sw/source/core/sw3io/sw3fmts.cxx#172 switch (aAttr.which) { case 0x1004: // strikethrough aAttr.attrName = "text-decoration"; if (!aAttr.data || aAttr.data[0]) aAttr.attrVal = "line-through"; else aAttr.isOff = true; break; case 0x1005: { // sub-/superscript if (aAttr.dataLen < 3) break; // first byte is size of text % of normal size UT_sint16 height = GSF_LE_GET_GINT16(aAttr.data + 1); aAttr.attrName = "text-position"; if (height > 0) aAttr.attrVal = "superscript"; else if (height < 0) aAttr.attrVal = "subscript"; else aAttr.isOff = true; break; } case 0x1006: { // font family if (!aAttr.data || aAttr.dataLen < 7) // 7 = 3 byte family etc., 2 byte name length, 2 byte style length break; aAttr.attrName = "font-family"; // XXX TODO This code here assumes that the font names are in latin1 UT_uint16 fontLen = GSF_LE_GET_GUINT16(aAttr.data + 3); UT_String_sprintf(aAttr.attrVal, "%.*s", fontLen, (aAttr.data + 5)); break; } case 0x1007: // font height // structure: | height (2 byte, twips) | prop (?) (2 byte) (if version >= 2, if ver=1 1 byte) | unit (if version>=2) | // XXX we ignore "prop" and unit for now, they seem not used much aAttr.attrName = "font-size"; if (aAttr.data) aAttr.attrVal = twipsToSizeString(GSF_LE_GET_GUINT16(aAttr.data)); break; case 0x100a: // Italic aAttr.attrName = "font-style"; if (!aAttr.data || aAttr.data[0]) // if there is data, first byte must be != 0 // abiword doesn't support oblique, so always set italic aAttr.attrVal = "italic"; else aAttr.isOff = true; break; case 0x100d: // Underline aAttr.attrName = "text-decoration"; if (!aAttr.data || aAttr.data[0]) aAttr.attrVal = "underline"; else aAttr.isOff = true; break; case 0x100e: // Bold aAttr.attrName = "font-weight"; if (!aAttr.data || aAttr.data[0] >= 8) // 8=Bold. aAttr.attrVal = "bold"; else aAttr.isOff = true; break; case 0x4000: // line spacing aAttr.attrName = "line-height"; aAttr.isPara = true; // prop space (s8) | inter space (s16) | height (u16) | rule (s8) | interrule (s8) if (aAttr.data && aAttr.dataLen >= 7) { // Abiword wants it as float value, StarOffice saves as percentage (e.g. // 150 for 1.5) float proportionalLineSpace = float(aAttr.data[0])/100; // But maybe we need to use the height - stored as twips, need points // (used for "exact" and "minimum" line spacing) // XXX inter-line spacing not supported by abiword (would be rule=0x00 // interrule=0x02, value to use=inter space, unit twips) UT_String lineHeight = twipsToSizeString(GSF_LE_GET_GINT16(aAttr.data + 3)); // We'll turn the bytes at 5 and 6 into a single integer, for easier // evaluation switch (GSF_LE_GET_GUINT16(aAttr.data + 5)) { case 0x0100: // proportional aAttr.attrVal = std_size_string(proportionalLineSpace); break; case 0x0001: case 0x0002: aAttr.attrVal = lineHeight; if (aAttr.data[5] == 2) // "minimum" case aAttr.attrVal += '+'; break; default: UT_DEBUGMSG(("Unsupported linespacing: %02x %02x\n", aAttr.data[5], aAttr.data[6])); } } break; case 0x4001: // Alignment aAttr.attrName = "text-align"; aAttr.isPara = true; if (aAttr.data) { switch (aAttr.data[0]) { case 0: aAttr.attrVal = "left"; break; case 1: aAttr.attrVal = "right"; break; case 2: case 4: // BLOCKLINE!? what's BLOCKLINE? I'm guessing justify. aAttr.attrVal = "justify"; break; case 3: aAttr.attrVal = "center"; break; } } break; case 0x4005: {// Tabstops aAttr.attrName = "tabstops"; aAttr.isPara = true; // Data structure: // Count(8) | Position (in twips) (32) | Adjustment (8) | Decimal Separator (?) (8) | Fill character (8) // (total size per tab = 7) // UT_sint8 count = aAttr.data[0]; for (UT_uint32 i = 1; (i + 6) < aAttr.dataLen; i += 7) { // Abiword wants: 12.3cm/L0, where 0 indicates what to fill with UT_uint16 posInTwips = GSF_LE_GET_GUINT32(aAttr.data + i); UT_String pos = twipsToSizeString(posInTwips); aAttr.attrVal += pos; aAttr.attrVal += '/'; if (aAttr.data[i + 4] < sizeof(sTabAlignment)/sizeof(*sTabAlignment)) aAttr.attrVal += sTabAlignment[aAttr.data[i + 4]]; else aAttr.attrVal += 'L'; // fallback char fillIndex = '0'; // Fill character switch (aAttr.data[i + 6]) { case '.': fillIndex = '1'; break; case '-': fillIndex = '2'; break; case '_': fillIndex = '3'; break; case ' ': fillIndex = '0'; break; default: UT_DEBUGMSG(("Filling with '%c' is not supported\n", aAttr.data[i + 6])); } aAttr.attrVal += fillIndex; aAttr.attrVal += ','; } } break; default: UT_DEBUGMSG(("SDW: unknown attribute 0x%x, compressed %d\n", aAttr.which, lcl_sw3io__CompressWhich(aAttr.which))); } }
void html_file_open (G_GNUC_UNUSED GOFileOpener const *fo, GOIOContext *io_context, WorkbookView *wb_view, GsfInput *input) { guint8 const *buf; gsf_off_t size; int len, bomlen; htmlParserCtxtPtr ctxt; htmlDocPtr doc = NULL; xmlCharEncoding enc; GnmHtmlTableCtxt tc; g_return_if_fail (input != NULL); if (gsf_input_seek (input, 0, G_SEEK_SET)) return; size = gsf_input_size (input); if (size >= 4) { size -= 4; buf = gsf_input_read (input, 4, NULL); if (buf != NULL) { enc = xmlDetectCharEncoding(buf, 4); switch (enc) { /* Skip byte order mark */ case XML_CHAR_ENCODING_UCS4BE: case XML_CHAR_ENCODING_UCS4LE: case XML_CHAR_ENCODING_UCS4_2143: case XML_CHAR_ENCODING_UCS4_3412: case XML_CHAR_ENCODING_EBCDIC: bomlen = 4; break; case XML_CHAR_ENCODING_UTF16BE: case XML_CHAR_ENCODING_UTF16LE: bomlen = 2; break; case XML_CHAR_ENCODING_UTF8: if (buf[0] == 0xef) bomlen = 3; else if (buf[0] == 0x3c) bomlen = 4; else bomlen = 0; break; case XML_CHAR_ENCODING_NONE: bomlen = 0; /* Try to detect unmarked UTF16LE (Firefox Windows clipboard, drag data all platforms) */ if ((buf[0] >= 0x20 || g_ascii_isspace(buf[0])) && buf[1] == 0 && (buf[2] >= 0x20 || g_ascii_isspace(buf[2])) && buf[3] == 0) enc = XML_CHAR_ENCODING_UTF16LE; break; default: bomlen = 0; } ctxt = htmlCreatePushParserCtxt ( NULL, NULL, (char const *)(buf + bomlen), 4 - bomlen, gsf_input_name (input), enc); for (; size > 0 ; size -= len) { len = MIN (4096, size); buf = gsf_input_read (input, len, NULL); if (buf == NULL) break; htmlParseChunk ( ctxt, (char const *)buf, len, 0); } htmlParseChunk (ctxt, (char const *)buf, 0, 1); doc = ctxt->myDoc; htmlFreeParserCtxt (ctxt); } } if (doc != NULL) { xmlNodePtr ptr; tc.sheet = NULL; tc.row = -1; tc.wb_view = wb_view; for (ptr = doc->children; ptr != NULL ; ptr = ptr->next) html_search_for_tables (ptr, doc, wb_view, &tc); xmlFreeDoc (doc); } else go_io_error_info_set (io_context, go_error_info_new_str (_("Unable to parse the html."))); }
bool IE_Imp_MSWrite::read_txt (int from, int to) { static const char *currcp; int fcMac, pnChar, fcFirst, cfod, fc, fcLim; unsigned char page[0x80]; UT_String properties, tmp; int dataLen = static_cast<UT_sint32>(mData.getLength()); UT_DEBUGMSG((" TXT:\n")); UT_DEBUGMSG((" from = %d\n", from)); UT_DEBUGMSG((" to = %d\n", to)); fcMac = wri_struct_value(wri_file_header, "fcMac"); pnChar = (fcMac + 127) / 128; fcFirst = 0x80; while (true) { gsf_input_seek(mFile, pnChar++ * 0x80, G_SEEK_SET); gsf_input_read(mFile, 0x80, page); fc = READ_DWORD(page); cfod = page[0x7f]; UT_DEBUGMSG((" fcFirst = %d\n", fc)); UT_DEBUGMSG((" cfod = %d\n", cfod)); if (fc != fcFirst) UT_WARNINGMSG(("read_txt: fcFirst wrong.\n")); // read all FODs (format descriptors) for (int fod = 0; fod < cfod; fod++) { int bfprop, cch, ftc, hps, fBold, fItalic, fUline, hpsPos; UT_DEBUGMSG((" CHP-FOD #%02d:\n", fod + 1)); // read a FOD (format descriptor) fcLim = READ_DWORD(page + 4 + fod * 6); bfprop = READ_WORD(page + 8 + fod * 6); UT_DEBUGMSG((" fcLim = %d\n", fcLim)); UT_DEBUGMSG((bfprop == 0xffff ? " bfprop = 0x%04X\n" : " bfprop = %d\n", bfprop)); // default CHP values ftc = 0; hps = 24; fBold = fItalic = fUline = hpsPos = 0; // if the CHP FPROPs (formatting properties) differ from the defaults, get them if (bfprop != 0xffff && bfprop + (cch = page[bfprop + 4]) < 0x80) { UT_DEBUGMSG((" cch = %d\n", cch)); if (cch >= 2) ftc = page[bfprop + 6] >> 2; if (cch >= 5) ftc |= (page[bfprop + 9] & 3) << 6; if (cch >= 3) hps = page[bfprop + 7]; if (cch >= 2) fBold = page[bfprop + 6] & 1; if (cch >= 2) fItalic = page[bfprop + 6] & 2; if (cch >= 4) fUline = page[bfprop + 8] & 1; if (cch >= 6) hpsPos = page[bfprop + 10]; } UT_DEBUGMSG((" ftc = %d\n", ftc)); UT_DEBUGMSG((" hps = %d\n", hps)); UT_DEBUGMSG((" fBold = %d\n", fBold)); UT_DEBUGMSG((" fItalic = %d\n", fItalic)); UT_DEBUGMSG((" fUline = %d\n", fUline)); UT_DEBUGMSG((" hpsPos = %d\n", hpsPos)); if (ftc >= wri_fonts_count) { UT_WARNINGMSG(("read_txt: Wrong font code.\n")); ftc = wri_fonts_count - 1; } if (from < fcLim && to >= fcFirst) { UT_LocaleTransactor lt(LC_NUMERIC, "C"); UT_String_sprintf(properties, "font-weight:%s", fBold ? "bold" : "normal"); if (hps != 24) { UT_String_sprintf(tmp, "; font-size:%dpt", hps / 2); properties += tmp; } if (fItalic) properties += "; font-style:italic"; if (fUline) properties += "; text-decoration:underline"; if (hpsPos) { UT_String_sprintf(tmp, "; text-position:%s", hpsPos < 128 ? "superscript" : "subscript"); properties += tmp; } if (wri_fonts_count) { UT_String_sprintf(tmp, "; font-family:%s", wri_fonts[ftc].name); properties += tmp; } if (wri_fonts[ftc].codepage != currcp /*sic!*/) { set_codepage(wri_fonts[ftc].codepage); currcp = wri_fonts[ftc].codepage; } mText.clear(); UT_DEBUGMSG((" Text: ")); while (fcFirst <= from && from < fcLim && from <= to && from - 0x80 < dataLen) translate_char(*mData.getPointer(from++ - 0x80), mText); UT_DEBUGMSG(("\n")); // new attributes, only if there was text if (mText.size() > 0) { const gchar *attributes[5]; const UT_UCS4Char *text = mText.ucs4_str(), *p = text; size_t txtLen; UT_DEBUGMSG((" Conv: %s\n", mText.utf8_str())); attributes[0] = PT_PROPS_ATTRIBUTE_NAME; attributes[1] = properties.c_str(); attributes[2] = NULL; appendFmt(attributes); // check for page number (should only be in header or footer) while (*p && *p != (UT_UCS4Char) 0x01) p++; if (*p) { if (p - text) appendSpan(text, p - text); attributes[2] = PT_TYPE_ATTRIBUTE_NAME; attributes[3] = "page_number"; attributes[4] = NULL; appendObject(PTO_Field, attributes); txtLen = mText.size() - (p - text) - 1; p++; } else { txtLen = mText.size(); p = text; } if (txtLen) appendSpan(p, txtLen); } } fcFirst = fcLim; if (fcLim >= fcMac || fcFirst > to) { UT_DEBUGMSG((" CHP-FODs end, fcLim (%d) >= fcMac (%d) or fcFirst (%d) > to (%d)\n", fcLim, fcMac, fcFirst, to)); return true; } } }