Beispiel #1
0
static gboolean
init_zip (GsfInputGZip *gzip, GError **err)
{
	gsf_off_t cur_pos;

	if (Z_OK != inflateInit2 (&(gzip->stream), -MAX_WBITS)) {
		if (err != NULL)
			*err = g_error_new (gsf_input_error_id (), 0,
				"Unable to initialize zlib");
		return TRUE;
	}

	cur_pos = gsf_input_tell (gzip->source);
	if (gsf_input_seek (gzip->source, 0, G_SEEK_SET)) {
		if (err)
			*err = g_error_new (gsf_input_error_id (), 0,
					    "Failed to rewind source");
		return TRUE;
	}

	if (check_header (gzip) != FALSE) {
		if (err != NULL)
			*err = g_error_new (gsf_input_error_id (), 0,
				"Invalid gzip header");
		if (gsf_input_seek (gzip->source, cur_pos, G_SEEK_SET)) {
			g_warning ("attempt to restore position failed ??");
		}
		return TRUE;
	}

	return FALSE;
}
Beispiel #2
0
static GlpkFileVersion
gnm_glpk_detect_version (GnmGlpk *lp,
			 GsfInputTextline *tl)
{
	GnmSubSolver *subsol = lp->parent;
	gsf_off_t cur = gsf_input_tell (GSF_INPUT (tl));
	GlpkFileVersion ver = GLPK_UNKNOWN;
	const char *line;
	unsigned cols, rows;

	if ((line = gsf_input_textline_utf8_gets (tl)) == NULL)
		goto out;
	if (sscanf (line, "%u %u", &rows, &cols) == 2 &&
	    cols == g_hash_table_size (subsol->cell_from_name)) {
		ver = GLPK_457;
		if (gnm_solver_debug ())
			g_printerr ("Detected version 4.57 file format\n");
		goto out;
	}

	if ((line[0] == 'c' || line[0] == 's') && line[1] == ' ') {
		ver = GLPK_458;
		if (gnm_solver_debug ())
			g_printerr ("Detected version 4.58 file format\n");
		goto out;
	}	

out:
	// Extra seek due to gsf bug
	gsf_input_seek (GSF_INPUT (tl), cur + 1, G_SEEK_SET);
	gsf_input_seek (GSF_INPUT (tl), cur, G_SEEK_SET);
	return ver;
}
Beispiel #3
0
/* returns TRUE on error */
static gboolean
zip_child_init (GsfInfileZip *child, GError **errmsg)
{
	static guint8 const header_signature[] =
		{ 'P', 'K', 0x03, 0x04 };
	guint8 const *data = NULL;
	guint16 name_len, extras_len;
	char *err = NULL;

	GsfZipDirent *dirent = child->vdir->dirent;

	/* skip local header
	 * should test tons of other info, but trust that those are correct
	 **/

	if (gsf_input_seek (child->source, (gsf_off_t) dirent->offset, G_SEEK_SET))
		err = g_strdup_printf ("Error seeking to zip header @ %" GSF_OFF_T_FORMAT,
				       dirent->offset);
	else if (NULL == (data = gsf_input_read (child->source, ZIP_FILE_HEADER_SIZE, NULL)))
		err = g_strdup_printf ("Error reading %d bytes in zip header", ZIP_FILE_HEADER_SIZE);
	else if (0 != memcmp (data, header_signature, sizeof (header_signature))) {
		err = g_strdup_printf ("Error incorrect zip header @ %" GSF_OFF_T_FORMAT,
				       dirent->offset);
		g_print ("Header is :\n");
		gsf_mem_dump (data, sizeof (header_signature));
		g_print ("Header should be :\n");
		gsf_mem_dump (header_signature, sizeof (header_signature));
	}

	if (NULL != err) {
		if (errmsg != NULL)
			*errmsg = g_error_new_literal (gsf_input_error_id (), 0, err);
		g_free (err);
		return TRUE;
	}

	name_len =   GSF_LE_GET_GUINT16 (data + ZIP_FILE_HEADER_NAME_SIZE);
	extras_len = GSF_LE_GET_GUINT16 (data + ZIP_FILE_HEADER_EXTRAS_SIZE);

	dirent->data_offset = dirent->offset + ZIP_FILE_HEADER_SIZE + name_len + extras_len;
	child->restlen  = dirent->usize;
	child->crestlen = dirent->csize;

	if (dirent->compr_method != GSF_ZIP_STORED) {
		int err;

		if (!child->stream)
			child->stream = g_new0 (z_stream, 1);

		err = inflateInit2 (child->stream, -MAX_WBITS);
		if (err != Z_OK) {
			if (errmsg != NULL)
				*errmsg = g_error_new (gsf_input_error_id (), 0,
						       "problem uncompressing stream");
			return TRUE;
		}
	}

	return FALSE;
}
Beispiel #4
0
static gboolean
gsf_input_textline_seek (GsfInput *input, gsf_off_t offset, GSeekType whence)
{
	GsfInputTextline *textline = GSF_INPUT_TEXTLINE (input);
	textline->remainder = NULL;
	return gsf_input_seek (textline->source, offset, whence);
}
Beispiel #5
0
/*
 * stf_open_and_read:
 * @filename: name of the file to open&read
 *
 * Will open filename, read the file into a g_alloced memory buffer
 *
 * NOTE : The returned buffer has to be g_freed by the calling routine.
 *
 * returns : a buffer containing the file contents
 */
static char *
stf_open_and_read (G_GNUC_UNUSED GOIOContext *context, GsfInput *input, size_t *readsize)
{
	gpointer result;
	gulong    allocsize;
	gsf_off_t size = gsf_input_size (input);

	if (gsf_input_seek (input, 0, G_SEEK_SET))
		return NULL;

	*readsize = (size_t) size;
	if ((gsf_off_t) *readsize != size) /* Check for overflow */
		return NULL;
	size++;
	allocsize = (gulong) size;
	if ((gsf_off_t) allocsize != size) /* Check for overflow */
		return NULL;
	result = g_try_malloc (allocsize);
	if (result == NULL)
		return NULL;

	*((char *)result + *readsize) = '\0';

	if (*readsize > 0 && gsf_input_read (input, *readsize, result) == NULL) {
		g_warning ("gsf_input_read failed.");
		g_free (result);
		result = NULL;
	}
	return result;
}
Beispiel #6
0
static gsf_off_t
zip_find_trailer (GsfInfileZip *zip)
{
	static guint8 const trailer_signature[] =
		{ 'P', 'K', 0x05, 0x06 };
	gsf_off_t offset, trailer_offset, filesize;
	gsf_off_t maplen;
	guint8 const *data;

	filesize = gsf_input_size (zip->source);
	if (filesize < ZIP_TRAILER_SIZE)
		return -1;

	trailer_offset = filesize;
	maplen = filesize & (ZIP_BUF_SIZE - 1);
	if (maplen == 0)
		maplen = ZIP_BUF_SIZE;
	offset = filesize - maplen; /* offset is now BUFSIZ aligned */

	while (TRUE) {
		guchar *p, *s;

		if (gsf_input_seek (zip->source, offset, G_SEEK_SET))
			return -1;

		if ((data = gsf_input_read (zip->source, maplen, NULL)) == NULL)
			return -1;

		p = (guchar *) data;
        
		for (s = p + maplen - 1; (s >= p); s--, trailer_offset--) {
			if ((*s == 'P') &&
			    (p + maplen - 1 - s > ZIP_TRAILER_SIZE - 2) &&
			    !memcmp (s, trailer_signature, sizeof (trailer_signature))) {
				return --trailer_offset;
			}
		}
        
		/* not found in currently mapped block, so update it if
		 * there is some room in before. The requirements are..
		 * (a) mappings should overlap so that trailer can cross BUFSIZ-boundary
		 * (b) trailer cannot be farther away than 64K from fileend
		 */

		/* outer loop cond */
		if (offset <= 0)
			return -1;

		/* outer loop step */
		offset -= ZIP_BUF_SIZE / 2;
		maplen = MIN (filesize - offset, ZIP_BUF_SIZE);
		trailer_offset = offset + maplen;

		if (filesize - offset > 64 * 1024)
			return -1;
	} /*outer loop*/

	return -1;
}
Beispiel #7
0
static guint8 const *
gsf_infile_zip_read (GsfInput *input, size_t num_bytes, guint8 *buffer)
{
	GsfInfileZip *zip = GSF_INFILE_ZIP (input);
	GsfZipVDir      *vdir = zip->vdir;
	gsf_off_t pos;

	if (zip->restlen < num_bytes)
		return NULL;

	switch (vdir->dirent->compr_method) {
	case GSF_ZIP_STORED:
		zip->restlen -= num_bytes;
		pos = zip->vdir->dirent->data_offset + input->cur_offset;
		if (gsf_input_seek (zip->source, pos, G_SEEK_SET))
			return NULL;
		return gsf_input_read (zip->source, num_bytes, buffer);

	case GSF_ZIP_DEFLATED:
		if (buffer == NULL) {
			if (zip->buf_size < num_bytes) {
				zip->buf_size = MAX (num_bytes, 256);
				g_free (zip->buf);
				zip->buf = g_new (guint8, zip->buf_size);
			}
			buffer = zip->buf;
		}

		zip->stream->avail_out = num_bytes;
		zip->stream->next_out = (unsigned char *)buffer;

		do {
			int err;
			int startlen;

			if (zip->crestlen > 0 && zip->stream->avail_in == 0)
				if (!zip_update_stream_in (zip))
					break;

			startlen = zip->stream->total_out;
			err = inflate(zip->stream, Z_NO_FLUSH);

			if (err == Z_STREAM_END) 
				zip->restlen = 0;
			else if (err == Z_OK)
				zip->restlen -= (zip->stream->total_out - startlen);
			else
				break;

		} while (zip->restlen && zip->stream->avail_out);

		return buffer;

	default:
		break;
	}

	return NULL;
}
Beispiel #8
0
gboolean
sylk_file_probe (GOFileOpener const *fo, GsfInput *input, GOFileProbeLevel pl)
{
	char const *header = NULL;
	if (!gsf_input_seek (input, 0, G_SEEK_SET))
		header = gsf_input_read (input, 3, NULL);
	return (header != NULL && strncmp (header, "ID;", 3) == 0);
}
Beispiel #9
0
/**
 * gsf_vba_inflate:
 * @input: stream to read from
 * @offset: offset into it for start byte of compressed stream
 * @size: size of the returned array
 * @add_null_terminator: whenever add or not null at the end of array
 * 
 * Decompresses VBA stream.
 * 
 * Return value: A pointer to guint8 array
 **/
guint8 *
gsf_vba_inflate (GsfInput *input, gsf_off_t offset, int *size, gboolean add_null_terminator)
{
	guint8 sig;
	GByteArray *res;
	gsf_off_t length;

	res = g_byte_array_new();

	gsf_input_read (input, 1, &sig);
	if (1 != sig) /* should start with 0x01 */
		return NULL;
	offset++;

	length = gsf_input_size (input);
	while (offset < length) {
		GsfInput *chunk;
		guint16 chunk_hdr;
		guint8 const *tmp;

		tmp = gsf_input_read (input, 2, NULL);
		if (!tmp)
			break;

		chunk_hdr = GSF_LE_GET_GUINT16 (tmp);
		offset += 2;

		if (0xB000 == (chunk_hdr&0xF000) && (chunk_hdr&0xFFF) > 0 && (length - offset < 4094)){
			if (length < offset + (chunk_hdr&0xFFF))
				break;
			chunk = gsf_input_proxy_new_section (input, offset, (gsf_off_t) (chunk_hdr&0xFFF) + 1);
			offset += (chunk_hdr&0xFFF) + 1;
		} else {
			if (length < offset + 4094){
				chunk = gsf_input_proxy_new_section (input, offset, length-offset);
				offset = length;
			} else {
				chunk = gsf_input_proxy_new_section (input, offset, 4094);
				offset += 4094;
			}
		}
		if (chunk) {
			GByteArray *tmpres = gsf_msole_inflate (chunk, 0);
			gsf_input_seek (input, offset, G_SEEK_CUR);
			g_byte_array_append (res, tmpres->data, tmpres->len);
			g_byte_array_free (tmpres, TRUE);
			g_object_unref (chunk);
		}
	}
	
	if (res == NULL)
		return NULL;
	if (add_null_terminator)
		g_byte_array_append (res, "", 1);
	*size = res->len;

	return g_byte_array_free (res, FALSE);
}
/*!
  Detect encoding of text file
 \param fp File

 Supports UTF-8 and UCS-2 big and little endian
 CJK encodings could be added
 */
UT_Error IE_Imp_Text::_recognizeEncoding(GsfInput * fp)
{
	char szBuf[4096];  // 4096 ought to be enough
	UT_sint32 iNumbytes;

	iNumbytes = UT_MIN(4096, gsf_input_remaining(fp));
	gsf_input_read(fp, iNumbytes, (guint8 *)szBuf);
	gsf_input_seek(fp, 0, G_SEEK_SET);

	return _recognizeEncoding(szBuf, iNumbytes);
}
Beispiel #11
0
static GsfZipDirent *
zip_dirent_new_in (GsfInfileZip *zip, gsf_off_t *offset)
{
	static guint8 const dirent_signature[] =
		{ 'P', 'K', 0x01, 0x02 };
	GsfZipDirent *dirent;
	guint8 const *data;
	guint16 name_len, extras_len, comment_len, compr_method, flags;
	guint32 crc32, csize, usize, off;
	gchar *name;

	/* Read data and check the header */
	if (gsf_input_seek (zip->source, *offset, G_SEEK_SET) ||
	    NULL == (data = gsf_input_read (zip->source, ZIP_DIRENT_SIZE, NULL)) ||
	    0 != memcmp (data, dirent_signature, sizeof (dirent_signature))) {
		return NULL;
	}

	name_len =      GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_NAME_SIZE);
	extras_len =    GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_EXTRAS_SIZE);
	comment_len =   GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_COMMENT_SIZE);

	flags =         GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_FLAGS);
	compr_method =  GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_COMPR_METHOD);
	crc32 =         GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_CRC32);
	csize =         GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_CSIZE);
	usize =         GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_USIZE);
	off =           GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_OFFSET);

	if ((data = gsf_input_read (zip->source, name_len, NULL)) == NULL)
		return NULL;

	name = g_new (gchar, (gulong) (name_len + 1));
	memcpy (name, data, name_len);
	name[name_len] = '\0';

	dirent = gsf_zip_dirent_new ();
	dirent->name = name;

	dirent->flags = flags;
	dirent->compr_method =  compr_method;
	dirent->crc32 =         crc32;
	dirent->csize =         csize;
	dirent->usize =         usize;
	dirent->offset =        off;
#if 0
	g_print ("%s = 0x%x @ %" GSF_OFF_T_FORMAT "\n", name, off, *offset);
#endif

	*offset += ZIP_DIRENT_SIZE + name_len + extras_len + comment_len;

	return dirent;
}
 bool check_password(char *password) {
     int offset = wvStream_tell(ps.tablefd);
     wvSetPassword(password, &ps);
     bool r = false;
     switch (version) {
         case WORD6: case WORD7:
             r = wvDecrypt95(&ps) == 0; break;
         case WORD8:
             r = wvDecrypt97(&ps) == 0; break;
     }
     gsf_input_seek(ps.tablefd->stream.gsf_stream, offset, G_SEEK_SET);
     return r;
 }
Beispiel #13
0
/**
 * zip_read_dirents:
 * @zip : #GsfInfileZip
 *
 * Read zip headers and do some sanity checking
 * along the way.
 *
 * Returns: %TRUE on error setting zip->err.
 **/
static gboolean
zip_read_dirents (GsfInfileZip *zip)
{
	guint8 const *trailer;
	guint16 entries, i;
	guint32 dir_pos;
	ZipInfo *info;
	gsf_off_t offset;

	/* Find and check the trailing header */
	offset = zip_find_trailer (zip);
	if (offset < 0) {
		zip->err = g_error_new (gsf_input_error_id (), 0,
					"No Zip trailer");
		return TRUE;
	}

	if (gsf_input_seek (zip->source, offset, G_SEEK_SET) ||
	    NULL == (trailer = gsf_input_read (zip->source, ZIP_TRAILER_SIZE, NULL))) {
		zip->err = g_error_new (gsf_input_error_id (), 0,
					"Error reading Zip signature");
		return TRUE;
	}

	entries      = GSF_LE_GET_GUINT32 (trailer + ZIP_TRAILER_ENTRIES);
	dir_pos      = GSF_LE_GET_GUINT32 (trailer + ZIP_TRAILER_DIR_POS);

	info = g_new0 (ZipInfo, 1);
	zip->info = info;

	info->ref_count    = 1;
	info->entries      = entries;
	info->dir_pos      = dir_pos;

	/* Read the directory */
	for (i = 0, offset = dir_pos; i < entries; i++) {
		GsfZipDirent *d;

		d = zip_dirent_new_in (zip, &offset);
		if (d == NULL) {
			zip->err = g_error_new (gsf_input_error_id (), 0,
						"Error reading zip dirent");
			return TRUE;
		}

		info->dirent_list = g_list_append (info->dirent_list, d);
	}

	return FALSE;
}
int AbiWordperfectInputStream::seek(long offset, WPX_SEEK_TYPE seekType) 
{
	GSeekType gsfSeekType = G_SEEK_SET;
	switch(seekType)
	{
	case WPX_SEEK_CUR:
		gsfSeekType = G_SEEK_CUR;
		break;
	case WPX_SEEK_SET:
		gsfSeekType = G_SEEK_SET;
		break;
	}

	return gsf_input_seek(m_input, offset, gsfSeekType);
}
Beispiel #15
0
void
wvStream_rewind (wvStream * in)
{
    if (in->kind == GSF_STREAM)
      {
	gsf_input_seek (GSF_INPUT (in->stream.gsf_stream), 0, G_SEEK_SET);
      }
    else if (in->kind == FILE_STREAM)
      {
	  rewind (in->stream.file_stream);
      }
    else
      {
	in->stream.memory_stream->current = 0;
      }
}
Beispiel #16
0
static gboolean
go_plugin_file_opener_probe (GOFileOpener const *fo, GsfInput *input,
                               GOFileProbeLevel pl)
{
	GOPluginFileOpener *pfo = GO_PLUGIN_FILE_OPENER (fo);
	GOPluginServiceFileOpener *service_file_opener = GO_PLUGIN_SERVICE_FILE_OPENER (pfo->service);

	g_return_val_if_fail (GSF_IS_INPUT (input), FALSE);

	if (pl == GO_FILE_PROBE_FILE_NAME && service_file_opener->suffixes != NULL) {
		GSList *ptr;
		gchar const *extension;
		gchar *lowercase_extension;

		if (gsf_input_name (input) == NULL)
			return FALSE;
		extension = gsf_extension_pointer (gsf_input_name (input));
		if (extension == NULL)
			return FALSE;

		lowercase_extension = g_utf8_strdown (extension, -1);
		for (ptr = service_file_opener->suffixes; ptr != NULL ; ptr = ptr->next)
			if (0 == strcmp (lowercase_extension, ptr->data))
				break;
		g_free (lowercase_extension);
		return ptr != NULL;
	}

	if (service_file_opener->has_probe) {
		GOErrorInfo *ignored_error = NULL;

		go_plugin_service_load (pfo->service, &ignored_error);
		if (ignored_error != NULL) {
			go_error_info_print (ignored_error);
			go_error_info_free (ignored_error);
			return FALSE;
		} else if (service_file_opener->cbs.plugin_func_file_probe == NULL) {
			return FALSE;
		} else {
			gboolean res = service_file_opener->cbs.plugin_func_file_probe (fo, pfo->service, input, pl);
			gsf_input_seek (input, 0, G_SEEK_SET);
			return res;
		}
	} else {
		return FALSE;
	}
}
Beispiel #17
0
static gboolean
qpro_check_signature (GsfInput *input)
{
	guint8 const *header;
	guint16 version;

	if (gsf_input_seek (input, 0, G_SEEK_SET) ||
	    NULL == (header = gsf_input_read (input, 2+2+2, NULL)) ||
	    GSF_LE_GET_GUINT16 (header + 0) != 0 ||
	    GSF_LE_GET_GUINT16 (header + 2) != 2)
		return FALSE;
	version = GSF_LE_GET_GUINT16 (header + 4);
	return (version == 0x1001 || /* 'WB1' format, documented */
		version == 0x1002 || /* 'WB2' format, documented */
		version == 0x1006 ||  /* qpro 6.0 ?? */
		version == 0x1007);  /* qpro 7.0 ?? */
}
Beispiel #18
0
U32
wvStream_offset (wvStream * in, long offset)
{
    if (in->kind == GSF_STREAM)
      {
	gsf_input_seek (GSF_INPUT (in->stream.gsf_stream), offset, G_SEEK_CUR);
	return (U32)gsf_input_tell(GSF_INPUT (in->stream.gsf_stream));
      }
    else if (in->kind == FILE_STREAM)
      {
	  return ((U32) fseek (in->stream.file_stream, offset, SEEK_CUR));
      }
    else
      {
	in->stream.memory_stream->current += offset;
	return  in->stream.memory_stream->current;
      }
}
Beispiel #19
0
U32
wvStream_goto (wvStream * in, long position)
{
    if (in->kind == GSF_STREAM)
      {
	gsf_input_seek (GSF_INPUT (in->stream.gsf_stream), position, G_SEEK_SET);
	return (U32)gsf_input_tell(GSF_INPUT (in->stream.gsf_stream));
      }
    else if (in->kind == FILE_STREAM)
      {
	  return ((U32) fseek (in->stream.file_stream, position, SEEK_SET));
      }
    else
      {
	in->stream.memory_stream->current = position;
        return in->stream.memory_stream->current;
      }
}
Beispiel #20
0
UT_Error IE_Imp_EPUB::uncompress()
{
    m_tmpDir = UT_go_filename_to_uri(g_get_tmp_dir());
    m_tmpDir += G_DIR_SEPARATOR_S;
    m_tmpDir += getDoc()->getDocUUIDString();

    if (!UT_go_directory_create(m_tmpDir.c_str(), 0644, NULL))
    {
        UT_DEBUGMSG(("Can`t create temporary directory\n"));
        return UT_ERROR;
    }
    GsfInput *opsDirInput = gsf_infile_child_by_name(m_epub,
            m_opsDir.c_str());
    UT_DEBUGMSG(("Child count : %d", gsf_infile_num_children(m_epub)));
    if (opsDirInput == NULL)
    {
        UT_DEBUGMSG(("Failed to open OPS dir\n"));
        return UT_ERROR;
    }

    for (std::map<std::string, std::string>::iterator i =
            m_manifestItems.begin(); i != m_manifestItems.end(); i++)
    {
        gchar *itemFileName = UT_go_filename_from_uri(
                (m_tmpDir + G_DIR_SEPARATOR_S + (*i).second).c_str());
        gchar** aname =
                g_strsplit((*i).second.c_str(), G_DIR_SEPARATOR_S, 0);

        GsfInput* itemInput = gsf_infile_child_by_aname(
                GSF_INFILE(opsDirInput), (const char**) aname);
        GsfOutput* itemOutput = createFileByPath(itemFileName);
        gsf_input_seek(itemInput, 0, G_SEEK_SET);
        gsf_input_copy(itemInput, itemOutput);
        g_strfreev(aname);
        g_free(itemFileName);
        g_object_unref(G_OBJECT(itemInput));
        gsf_output_close(itemOutput);
    }

    g_object_unref(G_OBJECT(opsDirInput));

    return UT_OK;
}
Beispiel #21
0
UT_Error IE_Exp_EPUB::compress()
{

    GsfInfile* oebpsDir = gsf_infile_stdio_new(
            UT_go_filename_from_uri(m_oebpsDir.c_str()), NULL);

    if (oebpsDir == NULL)
    {
        UT_DEBUGMSG(("RUDYJ: Can`t open temporary OEBPS directory\n"));
        return UT_ERROR;
    }

    std::vector<std::string> listing = getFileList(
            UT_go_filename_from_uri(m_oebpsDir.c_str()));
    for (std::vector<std::string>::iterator i = listing.begin(); i
            != listing.end(); i++)
    {
        GsfOutput* item = gsf_outfile_new_child(GSF_OUTFILE(m_oebps),
                (*i).c_str(), FALSE);
	std::string fullPath = m_oebpsDir + G_DIR_SEPARATOR_S + *i;
        GsfInput* file = UT_go_file_open(fullPath.c_str(), NULL);

        if (file == NULL)
        {
            UT_DEBUGMSG(("RUDYJ: Can`t open file\n"));
            return UT_ERROR;
        }

        gsf_output_seek(item, 0, G_SEEK_SET);
        gsf_input_seek(file, 0, G_SEEK_SET);
        gsf_input_copy(file, item);
        gsf_output_close(item);
        // Time to delete temporary file
        UT_go_file_remove(fullPath.c_str(), NULL);
    }

    UT_go_file_remove((m_oebpsDir + G_DIR_SEPARATOR_S + "index.xhtml_files").c_str(), NULL);
    UT_go_file_remove(m_oebpsDir.c_str(), NULL);
	return UT_OK;
}
Beispiel #22
0
static gboolean
gsf_input_gzip_seek (GsfInput *input, gsf_off_t offset, GSeekType whence)
{
	GsfInputGZip *gzip = GSF_INPUT_GZIP (input);
	/* Global flag -- we don't want one per stream.  */
	static gboolean warned = FALSE;
	gsf_off_t pos = offset;

	/* Note, that pos has already been sanity checked.  */
	switch (whence) {
	case G_SEEK_SET : break;
	case G_SEEK_CUR : pos += input->cur_offset;	break;
	case G_SEEK_END : pos += input->size;		break;
	default : return TRUE;
	}

	if (pos < input->cur_offset) {
		if (gsf_input_seek (gzip->source, gzip->header_size, G_SEEK_SET))
			return TRUE;
		gzip->crc = crc32 (0L, Z_NULL, 0);
		gzip->stream.avail_in = 0;
		if (inflateReset (&(gzip->stream)) != Z_OK)
			return TRUE;
		input->cur_offset = 0;
	}

	if (gsf_input_seek_emulate (input, pos))
		return TRUE;

	gzip->seek_skipped += pos;
	if (!warned &&
	    gzip->seek_skipped != pos && /* Don't warn for single seek.  */
	    gzip->seek_skipped >= 1000000) {
		warned = TRUE;
		g_warning ("Seeking in gzipped streams is awfully slow.");
	}

	return FALSE;
}
Beispiel #23
0
static psiconv_buffer
psiconv_stream_to_buffer (GsfInput *input, int maxlen)
{
	psiconv_buffer buf;
	gsf_off_t size;
	int len;

	if (!input)
		return NULL;
	if ((buf = psiconv_buffer_new()) == NULL)
		return NULL;
	if (gsf_input_seek (input, 0, G_SEEK_SET) == TRUE) {
		psiconv_buffer_free(buf);
		return NULL;
	}

	size = gsf_input_size (input);
	if (maxlen > 0 && size > maxlen)
		size = maxlen;
	for (; size > 0 ; size -= len) {
		guint8 const *chunk;
		int   i;

		len = MIN (4096, size);
		chunk = gsf_input_read (input, len, NULL);
		if (chunk == NULL)
			break;
		for (i = 0; i<len; i++) {
			if (psiconv_buffer_add(buf, chunk[i]) != 0) {
				psiconv_buffer_free(buf);
				return NULL;
			}
		}
	}

	return buf;
}
Beispiel #24
0
static gboolean
zip_update_stream_in (GsfInfileZip *zip)
{
	guint32 read_now;
	guint8 const *data;
	gsf_off_t pos;
	
	if (zip->crestlen == 0)
		return FALSE;

	read_now = MIN (zip->crestlen, ZIP_BLOCK_SIZE);

	pos = zip->vdir->dirent->data_offset + zip->stream->total_in;
	if (gsf_input_seek (zip->source, pos, G_SEEK_SET))
		return FALSE;
	if ((data = gsf_input_read (zip->source, read_now, NULL)) == NULL)
		return FALSE;

	zip->crestlen -= read_now;
	zip->stream->next_in  = (unsigned char *) data;      /* next input byte */
	zip->stream->avail_in = read_now;  /* number of bytes available at next_in */

	return TRUE;
}
Beispiel #25
0
/**
 * tar_init_info :
 * @tar : #GsfInfileTar
 *
 * Read tar headers and do some sanity checking
 * along the way.
 **/
static void
tar_init_info (GsfInfileTar *tar)
{
	TarHeader end;
	const TarHeader *header;
	gsf_off_t pos0 = gsf_input_tell (tar->source);
	char *pending_longname = NULL;

	memset (&end, 0, sizeof (end));

	while (tar->err == NULL &&
	       (header = (const TarHeader *)gsf_input_read (tar->source,
							    HEADER_SIZE,
							    NULL))) {
		char *name;
		gsf_off_t length;
		gsf_off_t offset;

		if (memcmp (header->filler, end.filler, sizeof (end.filler))) {
			tar->err = g_error_new (gsf_input_error_id (), 0,
						"Invalid tar header");
			break;
		}

		if (memcmp (header, &end, HEADER_SIZE) == 0)
			break;

		if (pending_longname) {
			name = pending_longname;
			pending_longname = NULL;
		} else
			name = g_strndup (header->name, sizeof (header->name));
		length = unpack_octal (tar, header->size, sizeof (header->size));
		offset = gsf_input_tell (tar->source);

#if 0
		g_printerr ("[%s]: %d\n", name, (int)length);
#endif

		switch (header->typeflag) {
		case '0': case 0: {
			/* Regular file. */
			GsfInfileTar *dir;
			const char *n = name, *s;
			TarChild c;

			/* This is deliberately slash-only.  */
			while ((s = strchr (n, '/')))
				n = s + 1;
			c.name = g_strdup (n);
			c.offset = offset;
			c.length = length;
			c.dir = NULL;
			dir = tar_directory_for_file (tar, name, FALSE);
			g_array_append_val (dir->children, c);
			break;
		}
		case '5': {
			/* Directory */
			(void)tar_directory_for_file (tar, name, TRUE);
			break;
		}
		case 'L': {
			const char *n;

			if (pending_longname ||
			    strcmp (name, MAGIC_LONGNAME) != 0) {
				tar->err = g_error_new (gsf_input_error_id (), 0,
							"Invalid longname header");
				break;
			}

			n = gsf_input_read (tar->source, length, NULL);
			if (!n) {
				tar->err = g_error_new (gsf_input_error_id (), 0,
							"Failed to read longname");
				break;
			}

			pending_longname = g_strndup (n, length);
			break;
		}
		default:
			/* Other -- ignore */
			break;
		}

		g_free (name);

		/* Round up to block size */
		length = (length + (BLOCK_SIZE - 1)) / BLOCK_SIZE * BLOCK_SIZE;
		
		if (!tar->err &&
		    gsf_input_seek (tar->source, offset + length, G_SEEK_SET)) {
			tar->err = g_error_new (gsf_input_error_id (), 0,
						"Seek failed");
			break;
		}
	}

	if (pending_longname) {
		if (!tar->err)
			tar->err = g_error_new (gsf_input_error_id (), 0,
						"Truncated archive");
		g_free (pending_longname);
	}

	if (tar->err)
		gsf_input_seek (tar->source, pos0, G_SEEK_SET);
}
UT_Error IE_Imp_PalmDoc::_parseFile(GsfInput * pdfp)
{
	UT_GrowBuf gbBlock(1024);
	bool bEatLF = false;
	bool bEmptyFile = true;
	UT_UCSChar c;
	UT_UCS4Char wc;

	pdb_header	header;
	doc_record0	rec0;
	bool		bCompressed = false;
	int		num_records, rec_num;
	DWord		file_size, offset;

	gsf_input_read( pdfp, PDB_HEADER_SIZE, (guint8*)&header);
	if (strncmp( header.type,    DOC_TYPE,    sizeof(header.type) ) ||
	    strncmp( header.creator, DOC_CREATOR, sizeof(header.creator) ))
        {
		UT_DEBUGMSG(("This is not a DOC file!\n"));

		// Create an empty paragraph.
		X_ReturnNoMemIfError(appendStrux(PTX_Block, NULL));
		return UT_OK;
	}

	num_records = _swap_Word( header.numRecords ) - 1;

	gsf_input_seek( pdfp, PDB_HEADER_SIZE, G_SEEK_SET );
	GET_DWord( pdfp, offset );
	gsf_input_seek( pdfp, offset, G_SEEK_SET );
	gsf_input_read( pdfp, sizeof(rec0), (guint8*)&rec0);

	if ( _swap_Word( rec0.version ) == 2 )
		bCompressed = true;

	gsf_input_seek( pdfp, 0, G_SEEK_END );
	file_size = gsf_input_tell( pdfp );

	for (rec_num = 1; rec_num <= num_records; ++rec_num )
	{
		DWord next_offset;

		gsf_input_seek( pdfp, PDB_HEADER_SIZE + PDB_RECORD_HEADER_SIZE * rec_num, G_SEEK_SET);
		GET_DWord( pdfp, offset );
		if( rec_num < num_records )
		{
			gsf_input_seek( pdfp, PDB_HEADER_SIZE + PDB_RECORD_HEADER_SIZE * (rec_num + 1), G_SEEK_SET);
			GET_DWord( pdfp, next_offset );
		}
		else
			next_offset = file_size;

		gsf_input_seek( pdfp, offset, G_SEEK_SET );

		// be overly cautious here
		_zero_fill (m_buf->buf, BUFFER_SIZE);
		gsf_input_read(pdfp, next_offset - offset, m_buf->buf);
		m_buf->position = next_offset - offset;

		if ( bCompressed )
			_uncompress( m_buf );

		m_buf->position = 0;

		while ( (m_buf->position) < (m_buf->len) )
		{
		  // don't copy over null chars
		        if (m_buf->buf[m_buf->position] == '\0')
			  {
			    ++m_buf->position;
			    continue;
			  }
			if( !m_Mbtowc.mbtowc( wc, m_buf->buf[m_buf->position] ) )
		 	   continue;
			c = static_cast<UT_UCSChar>(wc);
			switch (c)
			{
			case static_cast<UT_UCSChar>('\r'):
			case static_cast<UT_UCSChar>('\n'):
			
				if ((c == static_cast<UT_UCSChar>('\n')) && bEatLF)
				{
					bEatLF = false;
					break;
				}

				if (c == static_cast<UT_UCSChar>('\r'))
				{
					bEatLF = true;
				}
		
				// we interprete either CRLF, CR, or LF as a paragraph break.
		
				// start a paragraph and emit any text that we
				// have accumulated.
				X_ReturnNoMemIfError(appendStrux(PTX_Block, NULL));
				bEmptyFile = false;
				if (gbBlock.getLength() > 0)
				{
					X_ReturnNoMemIfError(appendSpan(reinterpret_cast<const UT_UCSChar*>(gbBlock.getPointer(0)), gbBlock.getLength()));
					gbBlock.truncate(0);
				}
				break;

			default:
				bEatLF = false;
				X_ReturnNoMemIfError(gbBlock.ins(gbBlock.getLength(),reinterpret_cast<const UT_GrowBufElement *>(&c),1));
				break;
			}

			++m_buf->position;
		} 

	}
	if (gbBlock.getLength() > 0 || bEmptyFile)
	{
		// if we have text left over (without final CR/LF),
		// or if we read an empty file,
		// create a paragraph and emit the text now.
		X_ReturnNoMemIfError(appendStrux(PTX_Block, NULL));
		if (gbBlock.getLength() > 0)
			X_ReturnNoMemIfError(appendSpan(reinterpret_cast<const UT_UCSChar *>(gbBlock.getPointer(0)), gbBlock.getLength()));
	}

	return UT_OK;
}
Beispiel #27
0
UT_Error IE_Imp_StarOffice::_loadFile(GsfInput * input)
{
	try {
		UT_DEBUGMSG(("SDW: Starting import\n"));
		mOle = GSF_INFILE (gsf_infile_msole_new(input, NULL));
		if (!mOle)
			return UT_IE_BOGUSDOCUMENT;

		// firstly, load metadata
		SDWDocInfo::load(mOle, getDoc());

		mDocStream = gsf_infile_child_by_name(mOle, "StarWriterDocument");
		if (!mDocStream)
			return UT_IE_BOGUSDOCUMENT;

		gsf_off_t size = gsf_input_size(mDocStream);

		if (!appendStrux(PTX_Section, PP_NOPROPS))
			return UT_IE_NOMEMORY;

		UT_DEBUGMSG(("SDW: Attempting to load DocHdr...\n"));
		mDocHdr.load(mDocStream);
		UT_DEBUGMSG(("SDW: ...success\n"));

		// Ask for and verify the password
		if (mDocHdr.cryptor) {
			if (!mDocHdr.cryptor->SetPassword(GetPassword().c_str())) {
				UT_DEBUGMSG(("SDW: Wrong password\n"));
				return UT_IE_PROTECTED;
			}
		}

		// do the actual reading
		char type;
		bool done = false;
		UT_uint32 recSize;
		while (!done) {
			if (gsf_input_tell(mDocStream) == size)
				break;
			readChar(mDocStream, type);
			gsf_off_t eor;
			readRecSize(mDocStream, recSize, &eor);

			switch (type) {
				case SWG_CONTENTS: {
					gsf_off_t flagsEnd = 0;
					UT_uint32 nNodes;
					// sw/source/core/sw3io/sw3sectn.cxx#L129
					if (mDocHdr.nVersion >= SWG_LAYFRAMES) {
						UT_uint8 flags;
						readFlagRec(mDocStream, flags, &flagsEnd);
					}
					if (mDocHdr.nVersion >= SWG_LONGIDX)
						streamRead(mDocStream, nNodes);
					else {
						if (mDocHdr.nVersion >= SWG_LAYFRAMES) {
							UT_uint16 sectidDummy;
							streamRead(mDocStream, sectidDummy);
						}
						UT_uint16 nodes16;
						streamRead(mDocStream, nodes16);
						nNodes = (UT_uint32)nodes16;
					}
					if (flagsEnd) {
						UT_ASSERT(flagsEnd >= gsf_input_tell(mDocStream));
						if (gsf_input_tell(mDocStream) != flagsEnd) {
							UT_DEBUGMSG(("SDW: have not read all flags\n"));
							if (gsf_input_seek(mDocStream, flagsEnd, G_SEEK_SET))
								return UT_IE_BOGUSDOCUMENT;
						}
					}
					bool done2 = false;
					UT_uint32 size2;
					while (!done2) {
						readChar(mDocStream, type);
						gsf_off_t eor2;
						readRecSize(mDocStream, size2, &eor2);

						switch (type) {
							case SWG_TEXTNODE: { // sw/source/core/sw3io/sw3nodes.cxx#L788
								UT_DEBUGMSG(("SDW: Found Textnode! (start at 0x%08llX end at 0x%08llX)\n", 
											 (long long)gsf_input_tell(mDocStream), 
											 (long long)eor2));
								UT_uint8 flags;
								gsf_off_t newPos;
								readFlagRec(mDocStream, flags, &newPos);
								// XXX check flags
								if (gsf_input_seek(mDocStream, newPos, G_SEEK_SET))
									return UT_IE_BOGUSDOCUMENT;

								// Read the actual text
								UT_UCS4Char* str;
								readByteString(mDocStream, str);
								UT_UCS4String textNode(str);
								free(str);
								UT_DEBUGMSG(("SDW: ...length=%zu contents are: |%s|\n", textNode.length(), textNode.utf8_str()));

								// now get the attributes
								UT_String attrs;
								UT_String pAttrs;
								UT_Vector charAttributes;
								while (gsf_input_tell(mDocStream) < eor2) {
									char attVal;
									streamRead(mDocStream, attVal);
									UT_uint32 attSize;
									gsf_off_t eoa; // end of attribute
									readRecSize(mDocStream, attSize, &eoa);
									if (attVal == SWG_ATTRIBUTE) {
										TextAttr* a = new TextAttr;
										streamRead(mDocStream, *a, eoa);
										UT_DEBUGMSG(("SDW: ...found text-sub-node, which=0x%x, ver=0x%x, start=%u, end=%u - data:%s len:%llu data is:",
													 a->which, a->ver, a->start,
													 a->end, a->data?"Yes":"No",
													 (long long unsigned)a->dataLen));
#ifdef DEBUG
										hexdump(a->data, a->dataLen);
                    putc('\n', stderr);
#endif
										charAttributes.addItem(a);
									}
									else if (attVal == SWG_ATTRSET) {
									  // bah, yet another loop
										UT_DEBUGMSG(("SDW: ...paragraph attributes found\n"));
										while (gsf_input_tell(mDocStream) < eoa) {
											// reusing attVal and attSize
											streamRead(mDocStream, attVal);
											gsf_off_t eoa2; // end of attribute
											readRecSize(mDocStream, attSize, &eoa2);
											if (attVal == SWG_ATTRIBUTE) {
												TextAttr a;
												streamRead(mDocStream, a, eoa2);
                        if (!a.attrVal.empty()) {
  												if (a.isPara)
	  												UT_String_setProperty(pAttrs, a.attrName, a.attrVal);
		  										else
			  										UT_String_setProperty(attrs, a.attrName, a.attrVal);
                        }
						UT_DEBUGMSG(("SDW: ......found paragraph attr, which=0x%x, ver=0x%x, start=%u, end=%u (string now %s) Data:%s Len=%lld Data:", a.which, a.ver, (a.startSet?a.start:0), (a.endSet?a.end:0), attrs.c_str(), (a.data ? "Yes" : "No"), (long long)a.dataLen));
#ifdef DEBUG
												hexdump(a.data, a.dataLen);
                        putc('\n', stderr);
#endif
											}
											if (gsf_input_seek(mDocStream, eoa2, G_SEEK_SET))
												return UT_IE_BOGUSDOCUMENT;
										}
									}
									else {
										UT_DEBUGMSG(("SDW: ...unknown attribute '%c' found (start=%" GSF_OFF_T_FORMAT " end=%" GSF_OFF_T_FORMAT ")\n", attVal, gsf_input_tell(mDocStream), eoa));
									}
									if (gsf_input_seek(mDocStream, eoa, G_SEEK_SET))
										return UT_IE_BOGUSDOCUMENT;
								}

								PP_PropertyVector attributes = {
									"props",
									pAttrs.c_str()
								};
								// first, insert the paragraph
								if (!appendStrux(PTX_Block, attributes))
									return UT_IE_NOMEMORY;

								UT_String pca(attrs); // character attributes for the whole paragraph
								// now insert the spans of text
								UT_uint32 len = textNode.length();
								UT_uint32 lastInsPos = 0;
								for (UT_uint32 i = 1; i < len; i++) {
									bool doInsert = false; // whether there was an attribute change
									for (UT_sint32 j = 0; j < charAttributes.getItemCount(); j++) {
										const TextAttr* a = reinterpret_cast<const TextAttr*>(charAttributes[j]);
										// clear the last attribute, if set
										if (a->endSet && a->end == (i - 1)) {
											if (a->isOff) {
												UT_String propval = UT_String_getPropVal(pca, a->attrName);
												UT_String_setProperty(attrs, a->attrName, propval);
											}
											else
												UT_String_removeProperty(attrs, a->attrName);
										}

										// now set new attribute, if needed
										if (a->startSet && a->start == (i - 1)) {
											if (a->isPara)
												UT_String_setProperty(pAttrs, a->attrName, a->attrVal);
											else if (a->isOff)
												UT_String_removeProperty(attrs, a->attrName);
											else
												UT_String_setProperty(attrs, a->attrName, a->attrVal);
										}

										// insert if this is the last character, or if there was a format change
										if ((a->endSet && a->end == i) || (a->startSet && a->start == i))
											doInsert = true;
									}
									if (doInsert || i == (len - 1)) {
										attributes[1] = attrs.c_str();
										UT_DEBUGMSG(("SDW: Going to appendFmt with %s\n", attributes[1].c_str()));
										if (!appendFmt(attributes))
											return UT_IE_NOMEMORY; /* leave cast alone! */
										UT_DEBUGMSG(("SDW: About to insert %u-%u\n", lastInsPos, i));
										size_t spanLen = i - lastInsPos;
										if (i == (len - 1)) spanLen++;
										UT_UCS4String span = textNode.substr(lastInsPos, spanLen);
										appendSpan(span.ucs4_str(), spanLen);
										lastInsPos = i;
									}
								}

								UT_VECTOR_PURGEALL(TextAttr*, charAttributes);
								break;

							}
							case SWG_JOBSETUP: {
								// flags are apparently unused here. no idea why they are there.
								gsf_off_t newpos;
								UT_uint8 flags;
								readFlagRec(mDocStream, flags, &newpos);
								if (gsf_input_seek(mDocStream, newpos, G_SEEK_SET))
									return UT_IE_BOGUSDOCUMENT;
								UT_uint16 len, system;
								streamRead(mDocStream, len);
								streamRead(mDocStream, system);
								char printerName[64];
								streamRead(mDocStream, printerName, 64);
								char deviceName[32], portName[32], driverName[32];
								streamRead(mDocStream, deviceName, 32);
								streamRead(mDocStream, portName, 32);
								streamRead(mDocStream, driverName, 32);
								UT_DEBUGMSG(("SDW: Jobsetup: len %u sys 0x%x printer |%.64s| device |%.32s| port |%.32s| driver |%.32s|\n", len, system, printerName, deviceName, portName, driverName));

								if (system == JOBSET_FILE364_SYSTEM || system == JOBSET_FILE605_SYSTEM) {
									UT_uint16 len2, system2;
									streamRead(mDocStream, len2);
									streamRead(mDocStream, system2);
									UT_uint32 ddl; // driver data length
									streamRead(mDocStream, ddl);
									// now the interesting data
									UT_uint16 orient; // 0=portrait 1=landscape
									streamRead(mDocStream, orient);
									UT_uint16 paperBin;
									streamRead(mDocStream, paperBin);
									UT_uint16 paperFormat;
									streamRead(mDocStream, paperFormat);
									UT_uint32 width, height;
									streamRead(mDocStream, width);
									streamRead(mDocStream, height);
									UT_DEBUGMSG(("SDW: orient %u bin %u format %u width %u height %u\n", orient, paperBin, paperFormat, width, height));
									// rest of the data is ignored, seems to be printer specific anyway.
									// Use A4, Portrait by default
									PP_PropertyVector attributes = {
										"pagetype", "a4", // A4/Letter/...
										"orientation", "portrait",
										"width", "210",
										"height", "297",
										"units", "mm"
									};
									const char* sdwPaperToAbi[] = {
										"A3",
										"A4",
										"A5",
										"B4",
										"B5",
										"Letter",
										"Legal",
										"Tabloid/Ledger",
										"Custom"
									};
									if (paperFormat < sizeof(sdwPaperToAbi)/sizeof(*sdwPaperToAbi)) {
										attributes[1] = sdwPaperToAbi[paperFormat];
                                    }
									const char* sdwOrientToAbi[] = {
										"portrait",
										"landscape"
									};
									if (orient < sizeof(sdwOrientToAbi)/sizeof(*sdwOrientToAbi)) {
										attributes[3] = sdwOrientToAbi[orient];
                                    }
									attributes[5] = UT_std_string_sprintf("%f", static_cast<double>(width)/100);
									attributes[7] = UT_std_string_sprintf("%f", static_cast<double>(height)/100);

									getDoc()->setPageSizeFromFile(attributes);
								}
								break;

							}
							case SWG_EOF:
								done2 = true;
								break;
							default:
								UT_DEBUGMSG(("SDW: SWG_CONTENT: Skipping %u bytes for record type '%c' (starting at 0x%08llX)\n",
											 size2, type,
											 (long long)gsf_input_tell(mDocStream)));
						}
						if (gsf_input_seek(mDocStream, eor2, G_SEEK_SET))
							return UT_IE_BOGUSDOCUMENT;
					}
					break;
				}
				case SWG_STRINGPOOL:
				{
					if (mDocHdr.nVersion <= SWG_POOLIDS) {
						UT_ASSERT_HARMLESS(UT_NOT_IMPLEMENTED);
						break;
					}
					UT_uint8 encoding;
					streamRead(mDocStream, encoding);
					UT_iconv_t cd = findConverter(encoding);
					if (!UT_iconv_isValid(cd))
						throw UT_IE_IMPORTERROR;
					UT_uint16 count;
					streamRead(mDocStream, count);
					while (count--) {
						UT_uint16 id;
						streamRead(mDocStream, id);
						char* str;
						UT_uint16 len;
						::readByteString(mDocStream, str, &len);
						if (id == IDX_NOCONV_FF) {
							UT_ASSERT_HARMLESS(UT_NOT_IMPLEMENTED);
						}
						// FIXME: find a way to not have to copy and free 
						// the result of UT_convert_cd.... --hub
						UT_DEBUGMSG(("SDW: StringPool: found 0x%04x <-> %.*s\n", id, len, str));
						UT_UCS4Char* convertedString = reinterpret_cast<UT_UCS4Char*>(UT_convert_cd(str, len + 1, cd, NULL, NULL));
						mStringPool.insert(stringpool_map::value_type(id, convertedString));
						FREEP(convertedString);
                        delete [] str;
					}
                    UT_iconv_close(cd);
					break;
				}
				case SWG_COMMENT: // skip over comments
					break;
				case SWG_EOF:
					done = true;
					break;
				default:
					UT_DEBUGMSG(("SDW: Skipping %u bytes for record type '%c' (starting at 0x%08llX)\n", recSize, type, (long long)gsf_input_tell(mDocStream)));
			}
			// Seek to the end of the record, in case it wasn't read completely
			if (gsf_input_seek(mDocStream, eor, G_SEEK_SET))
				return UT_IE_BOGUSDOCUMENT;
		}

		UT_DEBUGMSG(("SDW: Done\n"));

		return UT_OK;
	}
	catch(UT_Error e) {
		UT_DEBUGMSG(("SDW: error %d\n", e));
		return e;
	}
	catch(...) {
		UT_DEBUGMSG(("SDW: Unknown error\n"));
		return UT_IE_BOGUSDOCUMENT;
	}
}
Beispiel #28
0
void streamRead(GsfInput* aStream, TextAttr& aAttr, gsf_off_t aEoa)
    throw(UT_Error)
{
	UT_uint8 flags;
	gsf_off_t newPos;
	readFlagRec(aStream, flags, &newPos);

	streamRead(aStream, aAttr.which);
	streamRead(aStream, aAttr.ver);
	if (flags & 0x10) {
		aAttr.startSet = true;
		streamRead(aStream, aAttr.start);
	}
	else
		aAttr.startSet = false;

	if (flags & 0x20) {
		aAttr.endSet = true;
		streamRead(aStream, aAttr.end);
	}
	else
		aAttr.endSet = false;

	if (gsf_input_seek(aStream, newPos, G_SEEK_SET))
		throw UT_IE_BOGUSDOCUMENT;

	gsf_off_t curPos = gsf_input_tell(aStream);
	if (curPos != aEoa) {
		// there is data
		aAttr.dataLen = aEoa - curPos;
		aAttr.data = new UT_uint8[aAttr.dataLen];
		streamRead(aStream, aAttr.data, aAttr.dataLen);
	}
  // LIST OF THE VALUES: http://ooo.ximian.com/lxr/source/sw/sw/inc/hintids.hxx#086
  // together with http://ooo.ximian.com/lxr/source/sw/sw/source/core/sw3io/sw3fmts.cxx#172
	switch (aAttr.which) {
		case 0x1004: // strikethrough
			aAttr.attrName = "text-decoration";
			if (!aAttr.data || aAttr.data[0])
				aAttr.attrVal = "line-through";
			else
				aAttr.isOff = true;
			break;
		case 0x1005: { // sub-/superscript
			if (aAttr.dataLen < 3)
				break;
			// first byte is size of text % of normal size
			UT_sint16 height = GSF_LE_GET_GINT16(aAttr.data + 1);	
			aAttr.attrName = "text-position";
			if (height > 0)
				aAttr.attrVal = "superscript";
			else if (height < 0)
				aAttr.attrVal = "subscript";
			else
				aAttr.isOff = true;
			break; }
		case 0x1006: { // font family
			if (!aAttr.data || aAttr.dataLen < 7)
				// 7 = 3 byte family etc., 2 byte name length, 2 byte style length
				break;
			aAttr.attrName = "font-family";
			// XXX TODO This code here assumes that the font names are in latin1
			UT_uint16 fontLen = GSF_LE_GET_GUINT16(aAttr.data + 3);
			UT_String_sprintf(aAttr.attrVal, "%.*s", fontLen, (aAttr.data + 5));
			
		break; }
    case 0x1007: // font height
      // structure: | height (2 byte, twips) | prop (?) (2 byte) (if version >= 2, if ver=1 1 byte) | unit (if version>=2) |
      // XXX we ignore "prop" and unit for now, they seem not used much
      aAttr.attrName = "font-size";
      if (aAttr.data)
        aAttr.attrVal = twipsToSizeString(GSF_LE_GET_GUINT16(aAttr.data));
      break;
		case 0x100a: // Italic
			aAttr.attrName = "font-style";
			if (!aAttr.data || aAttr.data[0]) // if there is data, first byte must be != 0
				// abiword doesn't support oblique, so always set italic
				aAttr.attrVal = "italic";
			else
				aAttr.isOff = true;
			break;
		case 0x100d: // Underline
			aAttr.attrName = "text-decoration";
			if (!aAttr.data || aAttr.data[0])
				aAttr.attrVal = "underline";
			else
				aAttr.isOff = true;
			break;
		case 0x100e: // Bold
			aAttr.attrName = "font-weight";
			if (!aAttr.data || aAttr.data[0] >= 8) // 8=Bold.
				aAttr.attrVal = "bold";
			else
				aAttr.isOff = true;
			break;
    case 0x4000: // line spacing
      aAttr.attrName = "line-height";
      aAttr.isPara = true;
      // prop space (s8) | inter space (s16) | height (u16) | rule (s8) | interrule (s8)
      if (aAttr.data && aAttr.dataLen >= 7) {
        // Abiword wants it as float value, StarOffice saves as percentage (e.g.
        // 150 for 1.5)
        float proportionalLineSpace = float(aAttr.data[0])/100;
        // But maybe we need to use the height - stored as twips, need points
        // (used for "exact" and "minimum" line spacing)
        // XXX inter-line spacing not supported by abiword (would be rule=0x00
        // interrule=0x02, value to use=inter space, unit twips)
        UT_String lineHeight = twipsToSizeString(GSF_LE_GET_GINT16(aAttr.data + 3));

        // We'll turn the bytes at 5 and 6 into a single integer, for easier
        // evaluation
        switch (GSF_LE_GET_GUINT16(aAttr.data + 5)) {
          case 0x0100: // proportional
            aAttr.attrVal = std_size_string(proportionalLineSpace);
            break;
          case 0x0001:
          case 0x0002:
            aAttr.attrVal = lineHeight;
            if (aAttr.data[5] == 2) // "minimum" case
              aAttr.attrVal += '+';
            break;
          default:
            UT_DEBUGMSG(("Unsupported linespacing: %02x %02x\n", aAttr.data[5], aAttr.data[6]));
        }
      }
      break;
		case 0x4001: // Alignment
			aAttr.attrName = "text-align";
			aAttr.isPara = true;
			if (aAttr.data) {
				switch (aAttr.data[0]) {
					case 0:
						aAttr.attrVal = "left";
						break;
					case 1:
						aAttr.attrVal = "right";
						break;
					case 2:
					case 4: // BLOCKLINE!? what's BLOCKLINE? I'm guessing justify.
						aAttr.attrVal = "justify";
						break;
					case 3:
						aAttr.attrVal = "center";
						break;
				}
			}
      break;
    case 0x4005: {// Tabstops
      aAttr.attrName = "tabstops";
      aAttr.isPara = true;
      // Data structure:
      // Count(8) | Position (in twips) (32) | Adjustment (8) | Decimal Separator (?) (8) | Fill character (8)
      // (total size per tab = 7)
      // UT_sint8 count = aAttr.data[0];
      for (UT_uint32 i = 1; (i + 6) < aAttr.dataLen; i += 7) {
        // Abiword wants: 12.3cm/L0, where 0 indicates what to fill with
        UT_uint16 posInTwips = GSF_LE_GET_GUINT32(aAttr.data + i);
        UT_String pos = twipsToSizeString(posInTwips);

        aAttr.attrVal += pos;
        aAttr.attrVal += '/';
        if (aAttr.data[i + 4] < sizeof(sTabAlignment)/sizeof(*sTabAlignment))
          aAttr.attrVal += sTabAlignment[aAttr.data[i + 4]];
        else
          aAttr.attrVal += 'L'; // fallback

        char fillIndex = '0';
        // Fill character
        switch (aAttr.data[i + 6]) {
          case '.':
            fillIndex = '1';
            break;
          case '-':
            fillIndex = '2';
            break;
          case '_':
            fillIndex = '3';
            break;
          case ' ':
            fillIndex = '0';
            break;
          default:
            UT_DEBUGMSG(("Filling with '%c' is not supported\n", aAttr.data[i + 6]));
        }
        aAttr.attrVal += fillIndex;
        aAttr.attrVal += ',';
      }
      }
      break;
    default:
      UT_DEBUGMSG(("SDW: unknown attribute 0x%x, compressed %d\n", aAttr.which, lcl_sw3io__CompressWhich(aAttr.which)));
	}

}
Beispiel #29
0
void
html_file_open (G_GNUC_UNUSED GOFileOpener const *fo, GOIOContext *io_context,
		WorkbookView *wb_view, GsfInput *input)
{
	guint8 const *buf;
	gsf_off_t size;
	int len, bomlen;
	htmlParserCtxtPtr ctxt;
	htmlDocPtr doc = NULL;
	xmlCharEncoding enc;
	GnmHtmlTableCtxt tc;

	g_return_if_fail (input != NULL);

	if (gsf_input_seek (input, 0, G_SEEK_SET))
		return;

	size = gsf_input_size (input);
	if (size >= 4) {
		size -= 4;
		buf = gsf_input_read (input, 4, NULL);
		if (buf != NULL) {
			enc = xmlDetectCharEncoding(buf, 4);
			switch (enc) {	/* Skip byte order mark */
			case XML_CHAR_ENCODING_UCS4BE:
			case XML_CHAR_ENCODING_UCS4LE:
			case XML_CHAR_ENCODING_UCS4_2143:
			case XML_CHAR_ENCODING_UCS4_3412:
			case XML_CHAR_ENCODING_EBCDIC:
				bomlen = 4;
				break;
			case XML_CHAR_ENCODING_UTF16BE:
			case XML_CHAR_ENCODING_UTF16LE:
				bomlen = 2;
				break;
			case XML_CHAR_ENCODING_UTF8:
				if (buf[0] == 0xef)
					bomlen = 3;
				else if (buf[0] == 0x3c)
					bomlen = 4;
				else
					bomlen = 0;
				break;
			case XML_CHAR_ENCODING_NONE:
				bomlen = 0;
				/* Try to detect unmarked UTF16LE
				   (Firefox Windows clipboard, drag data all platforms) */
				if ((buf[0] >= 0x20 || g_ascii_isspace(buf[0])) &&
				    buf[1] == 0 &&
				    (buf[2] >= 0x20 || g_ascii_isspace(buf[2])) &&
				    buf[3] == 0)
					enc =  XML_CHAR_ENCODING_UTF16LE;
				break;
			default:
				bomlen = 0;
			}
			ctxt = htmlCreatePushParserCtxt (
				NULL, NULL, (char const *)(buf + bomlen),
				4 - bomlen, gsf_input_name (input), enc);

			for (; size > 0 ; size -= len) {
				len = MIN (4096, size);
				buf = gsf_input_read (input, len, NULL);
				if (buf == NULL)
					break;
				htmlParseChunk (
					ctxt, (char const *)buf, len, 0);
			}

			htmlParseChunk (ctxt, (char const *)buf, 0, 1);
			doc = ctxt->myDoc;
			htmlFreeParserCtxt (ctxt);
		}
	}

	if (doc != NULL) {
		xmlNodePtr ptr;
		tc.sheet = NULL;
		tc.row   = -1;
		tc.wb_view = wb_view;
		for (ptr = doc->children; ptr != NULL ; ptr = ptr->next)
			html_search_for_tables (ptr, doc, wb_view, &tc);
		xmlFreeDoc (doc);
	} else
		go_io_error_info_set (io_context,
			go_error_info_new_str (_("Unable to parse the html.")));
}
bool IE_Imp_MSWrite::read_txt (int from, int to)
{
	static const char *currcp;
	int fcMac, pnChar, fcFirst, cfod, fc, fcLim;
	unsigned char page[0x80];
	UT_String properties, tmp;
	int dataLen = static_cast<UT_sint32>(mData.getLength());

	UT_DEBUGMSG(("    TXT:\n"));
	UT_DEBUGMSG(("     from = %d\n", from));
	UT_DEBUGMSG(("     to   = %d\n", to));

	fcMac = wri_struct_value(wri_file_header, "fcMac");
	pnChar = (fcMac + 127) / 128;

	fcFirst = 0x80;

	while (true)
	{
		gsf_input_seek(mFile, pnChar++ * 0x80, G_SEEK_SET);
		gsf_input_read(mFile, 0x80, page);

		fc = READ_DWORD(page);
		cfod = page[0x7f];

		UT_DEBUGMSG(("      fcFirst = %d\n", fc));
		UT_DEBUGMSG(("      cfod    = %d\n", cfod));

		if (fc != fcFirst) UT_WARNINGMSG(("read_txt: fcFirst wrong.\n"));

		// read all FODs (format descriptors)
		for (int fod = 0; fod < cfod; fod++)
		{
			int bfprop, cch, ftc, hps, fBold, fItalic, fUline, hpsPos;

			UT_DEBUGMSG(("       CHP-FOD #%02d:\n", fod + 1));

			// read a FOD (format descriptor)
			fcLim = READ_DWORD(page + 4 + fod * 6);
			bfprop = READ_WORD(page + 8 + fod * 6);

			UT_DEBUGMSG(("        fcLim  = %d\n", fcLim));
			UT_DEBUGMSG((bfprop == 0xffff ? "        bfprop = 0x%04X\n" : "        bfprop = %d\n", bfprop));

			// default CHP values
			ftc = 0;
			hps = 24;
			fBold = fItalic = fUline = hpsPos = 0;

			// if the CHP FPROPs (formatting properties) differ from the defaults, get them
			if (bfprop != 0xffff && bfprop + (cch = page[bfprop + 4]) < 0x80)
			{
				UT_DEBUGMSG(("         cch = %d\n", cch));

				if (cch >= 2) ftc = page[bfprop + 6] >> 2;
				if (cch >= 5) ftc |= (page[bfprop + 9] & 3) << 6;
				if (cch >= 3) hps = page[bfprop + 7];
				if (cch >= 2) fBold = page[bfprop + 6] & 1;
				if (cch >= 2) fItalic = page[bfprop + 6] & 2;
				if (cch >= 4) fUline = page[bfprop + 8] & 1;
				if (cch >= 6) hpsPos = page[bfprop + 10];
			}

			UT_DEBUGMSG(("         ftc           = %d\n", ftc));
			UT_DEBUGMSG(("         hps           = %d\n", hps));
			UT_DEBUGMSG(("         fBold         = %d\n", fBold));
			UT_DEBUGMSG(("         fItalic       = %d\n", fItalic));
			UT_DEBUGMSG(("         fUline        = %d\n", fUline));
			UT_DEBUGMSG(("         hpsPos        = %d\n", hpsPos));

			if (ftc >= wri_fonts_count)
			{
				UT_WARNINGMSG(("read_txt: Wrong font code.\n"));
				ftc = wri_fonts_count - 1;
			}

			if (from < fcLim && to >= fcFirst)
			{
				UT_LocaleTransactor lt(LC_NUMERIC, "C");
				UT_String_sprintf(properties, "font-weight:%s",
				                              fBold ? "bold" : "normal");

				if (hps != 24)
				{
					UT_String_sprintf(tmp, "; font-size:%dpt", hps / 2);
					properties += tmp;
				}

				if (fItalic) properties += "; font-style:italic";
				if (fUline) properties += "; text-decoration:underline";

				if (hpsPos)
				{
					UT_String_sprintf(tmp, "; text-position:%s",
					                       hpsPos < 128 ? "superscript" : "subscript");
					properties += tmp;
				}

				if (wri_fonts_count)
				{
					UT_String_sprintf(tmp, "; font-family:%s", wri_fonts[ftc].name);
					properties += tmp;
				}

				if (wri_fonts[ftc].codepage != currcp /*sic!*/)
				{
					set_codepage(wri_fonts[ftc].codepage);
					currcp = wri_fonts[ftc].codepage;
				}

				mText.clear();
				UT_DEBUGMSG(("         Text: "));

				while (fcFirst <= from && from < fcLim && from <= to && from - 0x80 < dataLen)
					translate_char(*mData.getPointer(from++ - 0x80), mText);

				UT_DEBUGMSG(("\n"));

				// new attributes, only if there was text
				if (mText.size() > 0)
				{
					const gchar *attributes[5];
					const UT_UCS4Char *text = mText.ucs4_str(), *p = text;
					size_t txtLen;

					UT_DEBUGMSG(("         Conv: %s\n", mText.utf8_str()));

					attributes[0] = PT_PROPS_ATTRIBUTE_NAME;
					attributes[1] = properties.c_str();
					attributes[2] = NULL;

					appendFmt(attributes);

					// check for page number (should only be in header or footer)
					while (*p && *p != (UT_UCS4Char) 0x01) p++;

					if (*p)
					{
						if (p - text) appendSpan(text, p - text);

						attributes[2] = PT_TYPE_ATTRIBUTE_NAME;
						attributes[3] = "page_number";
						attributes[4] = NULL;

						appendObject(PTO_Field, attributes);

						txtLen = mText.size() - (p - text) - 1;
						p++;
					}
					else
					{
						txtLen = mText.size();
						p = text;
					}

					if (txtLen) appendSpan(p, txtLen);
				}
			}

			fcFirst = fcLim;

			if (fcLim >= fcMac || fcFirst > to)
			{
				UT_DEBUGMSG(("       CHP-FODs end, fcLim (%d) >= fcMac (%d) or fcFirst (%d) > to (%d)\n", fcLim, fcMac, fcFirst, to));
				return true;
			}
		}
	}