Esempio n. 1
0
/**
 * gsf_open_pkg_parse_rel_by_id:
 * @xin: #GsfXMLIn
 * @id: target id
 * @dtd: #GsfXMLInNode
 * @ns: #GsfXMLInNS
 *
 * Convenience function to parse a related part.
 *
 * Returns: (transfer full): %NULL on success or a #GError on failure.
 **/
GError *
gsf_open_pkg_parse_rel_by_id (GsfXMLIn *xin, char const *id,
			      GsfXMLInNode const *dtd,
			      GsfXMLInNS const *ns)
{
	GError *res = NULL;
	GsfInput *cur_stream, *part_stream;

	g_return_val_if_fail (xin != NULL, NULL);

	cur_stream = gsf_xml_in_get_input (xin);

	if (NULL == id)
		return g_error_new (gsf_input_error_id(), gsf_open_pkg_error_id (),
			_("Missing id for part in '%s'"),
			gsf_input_name (cur_stream) );

	part_stream = gsf_open_pkg_open_rel_by_id (cur_stream, id, &res);
	if (NULL != part_stream) {
		GsfXMLInDoc *doc = gsf_xml_in_doc_new (dtd, ns);

		if (!gsf_xml_in_doc_parse (doc, part_stream, xin->user_state))
			res = g_error_new (gsf_input_error_id(), gsf_open_pkg_error_id (),
				_("Part '%s' in '%s' from '%s' is corrupt!"),
				id,
				gsf_input_name (part_stream),
				gsf_input_name (cur_stream) );
		gsf_xml_in_doc_free (doc);

		g_object_unref (part_stream);
	}
	return res;
}
Esempio n. 2
0
static void
ls_R (GsfInput *input)
{
	char const *name = gsf_input_name (input);
	gboolean is_dir = GSF_IS_INFILE (input) &&
		(gsf_infile_num_children (GSF_INFILE (input)) >= 0);
	/* Please see the comment on is_dir in test-cp-msole.c. */
	
	printf ("%c '%s'\t\t%" GSF_OFF_T_FORMAT "\n",
		(is_dir ? 'd' : ' '),
		(name != NULL) ? name : "",
		gsf_input_size (input));

	if (is_dir) {
		GsfInfile *infile = GSF_INFILE (input);
		int i;

		puts ("{");
		for (i = 0 ; i < gsf_infile_num_children (infile) ; i++)
			ls_R (gsf_infile_child_by_index (infile, i));
		puts ("}");
	}

	g_object_unref (G_OBJECT (input));
}
Esempio n. 3
0
/**
 * gsf_structured_blob_write:
 * @blob: #GsfStructuredBlob
 * @container: #GsfOutfile
 *
 * Dumps structured blob @blob onto the @container.  Will fail if the output is
 * not an Outfile and blob has multiple streams.
 *
 * Returns: %TRUE on success.
 **/
gboolean
gsf_structured_blob_write (GsfStructuredBlob *blob, GsfOutfile *container)
{
	GsfOutput *output;
	gboolean has_kids;

	g_return_val_if_fail (GSF_IS_STRUCTURED_BLOB (blob), FALSE);
	g_return_val_if_fail (GSF_IS_OUTFILE (container), FALSE);

	has_kids = (blob->children != NULL && blob->children->len > 0);

	output = gsf_outfile_new_child  (GSF_OUTFILE (container),
		gsf_input_name (GSF_INPUT (blob)),
		has_kids);
	if (has_kids) {
		GsfStructuredBlob *child_blob;
		unsigned i;

		for (i = 0 ; i < blob->children->len ; i++) {
			child_blob = g_ptr_array_index (blob->children, i);
			if (!gsf_structured_blob_write (child_blob, GSF_OUTFILE (output)))
				return FALSE;
		}
	}

	if (blob->data != NULL)
		gsf_output_write (output, blob->data->size, blob->data->buf);
	gsf_output_close (output);
	g_object_unref (output);

	return TRUE;
}
Esempio n. 4
0
UT_Error IE_Imp_XML::_loadFile(GsfInput * input)
{
	m_szFileName = gsf_input_name (input);

	UT_XML default_xml;
	UT_XML * parser = &default_xml;
	if (m_pParser) parser = m_pParser;

	parser->setListener (this);
	if (m_pReader) parser->setReader (m_pReader);

	// hack!!!
	size_t num_bytes = gsf_input_size(input);
	char * bytes = (char *)gsf_input_read(input, num_bytes, NULL);

	UT_Error err = parser->parse (bytes, num_bytes);
	
	if ((err != UT_OK) && (err != UT_IE_SKIPINVALID))
		m_error = UT_IE_BOGUSDOCUMENT;
	
	if (m_error != UT_OK)
		{
			UT_DEBUGMSG(("Problem reading document\n"));
			if(m_error != UT_IE_SKIPINVALID)
				m_szFileName = 0;
		}
	
	return m_error;
}
Esempio n. 5
0
/* Decompress infile in dest_dir. */
static void
decompress_infile (GsfInfile *infile,
                   gchar     *dest_dir)
{
    GError   *err = (GError *) NULL;
    int j = 0;

    for (j = 0; j < gsf_infile_num_children (infile); j++)
    {
        GsfInput *child = gsf_infile_child_by_index (infile, j);
        char const* filename = gsf_input_name (child);
        gboolean is_dir = gsf_infile_num_children (GSF_INFILE (child)) >= 0;

        if (is_dir)
        {
            gchar *dir_path = g_build_filename (dest_dir, filename, (gchar *) 0);
            decompress_infile (GSF_INFILE (child), dir_path);
            g_free (dir_path);
        }
        else
        {
            gchar *file_path = g_build_filename (dest_dir, filename, (gchar *) 0);
            GsfOutput *output = GSF_OUTPUT (gsf_output_stdio_new (file_path, &err));
            gsf_input_copy (child, output);
            gsf_output_close (output);
            g_object_unref (output);
            g_free (file_path);
        }

        g_object_unref (G_OBJECT (child));
    }
}
Esempio n. 6
0
static void
dump_child (GsfInfile *infile, char const *childname)
{
	GsfInput *child = gsf_infile_child_by_name (infile, childname);
	GsfInputTextline *textinput;
	unsigned char *res;
	int len = 0;

	if (child == NULL) {
		printf ("not an OpenOffice document\n");
		return;
	}

	textinput = (GsfInputTextline *)gsf_input_textline_new (child);
	if (textinput == NULL) {
		printf ("Could not read lines from %s",
			gsf_input_name (child));
		return;
	}

	do {
		res = gsf_input_textline_ascii_gets (textinput);
		if (res) {
			printf ("'%s'\n", res);
			len += strlen (res) + 1;
		}
	} while (res);

	printf ("Finished reading - %d bytes\n", len);
	g_object_unref (G_OBJECT (textinput));
	g_object_unref (G_OBJECT (child));
}
Esempio n. 7
0
static gboolean
go_plugin_file_opener_probe (GOFileOpener const *fo, GsfInput *input,
                               GOFileProbeLevel pl)
{
	GOPluginFileOpener *pfo = GO_PLUGIN_FILE_OPENER (fo);
	GOPluginServiceFileOpener *service_file_opener = GO_PLUGIN_SERVICE_FILE_OPENER (pfo->service);

	g_return_val_if_fail (GSF_IS_INPUT (input), FALSE);

	if (pl == GO_FILE_PROBE_FILE_NAME && service_file_opener->suffixes != NULL) {
		GSList *ptr;
		gchar const *extension;
		gchar *lowercase_extension;

		if (gsf_input_name (input) == NULL)
			return FALSE;
		extension = gsf_extension_pointer (gsf_input_name (input));
		if (extension == NULL)
			return FALSE;

		lowercase_extension = g_utf8_strdown (extension, -1);
		for (ptr = service_file_opener->suffixes; ptr != NULL ; ptr = ptr->next)
			if (0 == strcmp (lowercase_extension, ptr->data))
				break;
		g_free (lowercase_extension);
		return ptr != NULL;
	}

	if (service_file_opener->has_probe) {
		GOErrorInfo *ignored_error = NULL;

		go_plugin_service_load (pfo->service, &ignored_error);
		if (ignored_error != NULL) {
			go_error_info_print (ignored_error);
			go_error_info_free (ignored_error);
			return FALSE;
		} else if (service_file_opener->cbs.plugin_func_file_probe == NULL) {
			return FALSE;
		} else {
			gboolean res = service_file_opener->cbs.plugin_func_file_probe (fo, pfo->service, input, pl);
			gsf_input_seek (input, 0, G_SEEK_SET);
			return res;
		}
	} else {
		return FALSE;
	}
}
Esempio n. 8
0
/*
 * stf_read_workbook:
 * @fo: file opener
 * @enc: encoding of file
 * @context: command context
 * @book: workbook
 * @input: file to read from+convert
 *
 * Main routine, handles importing a file including all dialog mumbo-jumbo
 */
static void
stf_read_workbook (G_GNUC_UNUSED GOFileOpener const *fo,  gchar const *enc,
		   GOIOContext *context, GoView *view, GsfInput *input)
{
	DialogStfResult_t *dialogresult = NULL;
	char *name, *nameutf8 = NULL;
	char *data = NULL;
	size_t data_len;
	WorkbookView *wbv = GNM_WORKBOOK_VIEW (view);

	if (!GNM_IS_WBC_GTK (context->impl)) {
		go_io_error_string (context, _("This importer can only be used with a GUI."));
		return;
	}

	name = g_path_get_basename (gsf_input_name (input));
	nameutf8 = g_filename_to_utf8 (name, -1, NULL, NULL, NULL);
	g_free (name);
	if (!nameutf8) {
		g_warning ("Failed to convert filename to UTF-8.  This shouldn't happen here.");
		goto out;
	}

	data = stf_preparse (context, input, &data_len);
	if (!data)
		goto out;

	dialogresult = stf_dialog (WBC_GTK (context->impl),
				   enc, FALSE, NULL, FALSE,
				   nameutf8, data, data_len);
	if (dialogresult != NULL) {
		Workbook *book = wb_view_get_workbook (wbv);
		int cols = dialogresult->colcount, rows = dialogresult->rowcount;
		Sheet *sheet;

		gnm_sheet_suggest_size (&cols, &rows);
		sheet = sheet_new (book, nameutf8, cols, rows);
		workbook_sheet_attach (book, sheet);
		if (stf_store_results (dialogresult, sheet, 0, 0)) {
			workbook_recalc_all (book);
			resize_columns (sheet);
			workbook_set_saveinfo
				(book,
				 GO_FILE_FL_WRITE_ONLY,
				 go_file_saver_for_id
				 ("Gnumeric_stf:stf_assistant"));
		} else {
			/* the user has cancelled */
			/* the caller should notice that we have no sheets */
			workbook_sheet_delete (sheet);
		}
	}

 out:
	g_free (nameutf8);
	g_free (data);
	if (dialogresult != NULL)
		stf_dialog_result_free (dialogresult);
}
Esempio n. 9
0
/**
 * gsf_structured_blob_read:
 * @input: An input (potentially a GsfInfile) holding the blob
 *
 * Create a tree of binary blobs with unknown content from a #GsfInput or
 * #GsfInfile and store it in a newly created #GsfStructuredBlob.
 *
 * Returns: (transfer full): a new #GsfStructuredBlob object which the caller is responsible for.
 **/
GsfStructuredBlob *
gsf_structured_blob_read (GsfInput *input)
{
	GsfStructuredBlob *blob;
	gsf_off_t content_size;
	int i = 0;

	g_return_val_if_fail (GSF_IS_INPUT (input), NULL);

	blob = g_object_new (GSF_STRUCTURED_BLOB_TYPE, NULL);

	content_size = gsf_input_remaining (input);
	if (content_size > 0) {
		guint8 *buf = (guint8*)g_try_malloc (content_size);

		if (buf == NULL) {
			g_warning ("Failed attempting to allocate %" GSF_OFF_T_FORMAT " bytes",
				   content_size);

			g_object_unref (blob);
			return NULL;
		}

		gsf_input_read (input, content_size, buf);
		blob->data = gsf_shared_memory_new (buf, content_size, TRUE);
	}

	gsf_input_set_name (GSF_INPUT (blob), gsf_input_name (input));

	if (GSF_IS_INFILE (input))
		i = gsf_infile_num_children (GSF_INFILE (input));
	if (i > 0) {
		GsfInput	  *child;
		GsfStructuredBlob *child_blob;

		blob->children = g_ptr_array_sized_new (i);
		g_ptr_array_set_size  (blob->children, i);
		while (i-- > 0) {
			child = gsf_infile_child_by_index (GSF_INFILE (input), i);
			child_blob = gsf_structured_blob_read (child);
			g_object_unref (child);

			g_ptr_array_index (blob->children, i) = child_blob;
#if 0
			/*
			 * We don't need this, and setting it causes circular
			 * links.
			 */
			gsf_input_set_container (GSF_INPUT (child_blob),
						 GSF_INFILE (blob));
#endif
		}
	}

	return blob;
}
Esempio n. 10
0
static char const *
blob_name_by_index (GsfInfile *infile, int i)
{
	GsfStructuredBlob const *blob = (GsfStructuredBlob *) infile;
	if (blob->children != NULL) {
		g_return_val_if_fail (i < 0 || (unsigned)i >= blob->children->len, NULL);
		return gsf_input_name (g_ptr_array_index (blob->children, i));
	}
	return NULL;
}
Esempio n. 11
0
void
sylk_file_open (GOFileOpener const *fo,
		GOIOContext	*io_context,
                WorkbookView	*wb_view,
		GsfInput	*input)
{
	SylkReader state;
	char const *input_name;
	char *name = NULL;
	int i;
	GnmLocale *locale;

	memset (&state, 0, sizeof (state));
	state.io_context = io_context;
	state.input	 = (GsfInputTextline *) gsf_input_textline_new (input);
	state.converter  = g_iconv_open ("UTF-8", "ISO-8859-1");
	state.finished	 = FALSE;
	state.line_no	 = 0;

	state.pp.wb = wb_view_get_workbook (wb_view);

	if (NULL == (input_name = gsf_input_name (input)) ||
	    NULL == (name = g_path_get_basename (input_name)) ||
	    '\0' == *name) {
		g_free (name);
		name = g_strdup ("Sheet");
	}

	state.pp.sheet = sheet_new (state.pp.wb, name, 256, 65536);
	workbook_sheet_attach (state.pp.wb, state.pp.sheet);
	g_free (name);

	state.pp.eval.col = state.pp.eval.row = 1;
	state.convs = gnm_conventions_xls_r1c1;

	state.formats	= g_ptr_array_new ();
	state.fonts	= g_ptr_array_new ();

	locale = gnm_push_C_locale ();
	sylk_parse_sheet (&state);
	gnm_pop_C_locale (locale);
	workbook_set_saveinfo (state.pp.wb, GO_FILE_FL_AUTO,
		go_file_saver_for_id ("Gnumeric_sylk:sylk"));

	for (i = state.fonts->len ; i-- > 0 ; )
		gnm_style_unref (g_ptr_array_index (state.fonts, i));
	g_ptr_array_free (state.fonts, TRUE);

	for (i = state.formats->len ; i-- > 0 ; )
		go_format_unref (g_ptr_array_index (state.formats, i));
	g_ptr_array_free (state.formats, TRUE);

	gsf_iconv_close (state.converter);
	g_object_unref (G_OBJECT (state.input));
}
Esempio n. 12
0
/**
 * gsf_open_pkg_open_rel_by_id :
 * @opkg : #GsfInput
 * @id : target id
 * @err : optionally %NULL
 *
 * New in 1.14.7
 *
 * Open @opkg's relation @id
 *
 * Returns: A new GsfInput or %NULL, and sets @err if possible.
 **/
GsfInput *
gsf_open_pkg_open_rel_by_id (GsfInput *opkg, char const *id, GError **err)
{
	GsfOpenPkgRel *rel = NULL;
	GsfOpenPkgRels *rels = gsf_open_pkg_get_rels (opkg);

	if (NULL != rels && NULL != (rel = g_hash_table_lookup (rels->by_id, id)))
		return gsf_open_pkg_open_rel (opkg, rel, err);
	if (err)
		*err = g_error_new (gsf_input_error_id(), gsf_open_pkg_error_id (),
			_("Unable to find part id='%s' for '%s'"),
			id, gsf_input_name (opkg) );
	return NULL;
}
Esempio n. 13
0
/**
 * gsf_open_pkg_open_rel_by_type:
 * @opkg: #GsfInput
 * @type: target type
 * @err: optionally %NULL
 *
 * New in 1.14.9
 *
 * Open one of @opkg's relationships with type=@type.
 *
 * Returns: (transfer full): A new GsfInput or %NULL, and sets @err if possible.
 **/
GsfInput *
gsf_open_pkg_open_rel_by_type (GsfInput *opkg, char const *type, GError **err)
{
	GsfOpenPkgRel *rel = gsf_open_pkg_lookup_rel_by_type (opkg, type);

	if (rel)
		return gsf_open_pkg_open_rel (opkg, rel, err);

	if (err)
		*err = g_error_new (gsf_input_error_id(), gsf_open_pkg_error_id (),
			_("Unable to find part with type='%s' for '%s'"),
			type, gsf_input_name (opkg) );
	return NULL;
}
Esempio n. 14
0
static GsfInput   *
blob_child_by_name (GsfInfile *infile, char const *name, GError **err)
{
	GsfStructuredBlob const *blob = (GsfStructuredBlob *) infile;
	if (blob->children != NULL) {
		unsigned i;

		for (i = 0 ; i < blob->children->len ;) {
			GsfInput *child = g_ptr_array_index (blob->children, i);
			if (child && !strcmp (gsf_input_name (child), name))
				return gsf_input_dup (child, err);
		}
	}
	return NULL;
}
Esempio n. 15
0
/**
 * gsf_input_textline_new:
 * @source: in some combination of ascii and utf8
 *
 * <note>This adds a reference to @source.</note>
 *
 * Returns: a new file or %NULL.
 **/
GsfInput *
gsf_input_textline_new (GsfInput *source)
{
	GsfInputTextline *input;

	g_return_val_if_fail (source != NULL, NULL);

	input = g_object_new (GSF_INPUT_TEXTLINE_TYPE, NULL);
	input->source = g_object_ref (source);
	input->buf = NULL;
	input->buf_size = 0;
	gsf_input_set_size (GSF_INPUT (input), gsf_input_size (source));
	gsf_input_set_name (GSF_INPUT (input), gsf_input_name (source));

	return GSF_INPUT (input);
}
Esempio n. 16
0
/**
 * gsf_open_pkg_open_rel:
 * @opkg: #GsfInput
 * @rel: #GsfOpenPkgRel
 * @err: #GError.
 *
 * Returns: (transfer full): a new #GsfInput which the called needs to unref, or %NULL and sets @err
 **/
GsfInput *
gsf_open_pkg_open_rel (GsfInput *opkg, GsfOpenPkgRel const *rel,
		       G_GNUC_UNUSED GError **err /* just in case we need it one day */ )
{
	GsfInput *res = NULL;
	GsfInfile *parent, *prev_parent;
	gchar **elems;
	unsigned i;

	g_return_val_if_fail (rel != NULL, NULL);
	g_return_val_if_fail (opkg != NULL, NULL);

	/* References from the root use children of opkg
	 * References from a child are relative to siblings of opkg */
	parent = gsf_input_name (opkg)
		? gsf_input_container (opkg) : GSF_INFILE (opkg);
	g_object_ref (parent);

	elems = g_strsplit (rel->target, "/", 0);
	for (i = 0 ; elems[i] && NULL != parent ; i++) {
		if (0 == strcmp (elems[i], ".") || '\0' == *elems[i])
			continue; /* ignore '.' and empty */

		prev_parent = parent;
		if (0 == strcmp (elems[i], "..")) {
			parent = gsf_input_container (GSF_INPUT (parent));
			res = NULL;	/* only return newly created children */
			if (NULL != parent) {
				/* check for attempt to gain access outside the zip file */
				if (G_OBJECT_TYPE (parent) == G_OBJECT_TYPE (prev_parent))
					g_object_ref (parent);
				else
					parent = NULL;
			}
		} else {
			res = gsf_infile_child_by_name (parent, elems[i]);
			if (NULL != elems[i+1]) {
				g_return_val_if_fail (GSF_IS_INFILE (res), NULL);
				parent = GSF_INFILE (res);
			}
		}
		g_object_unref (prev_parent);
	}
	g_strfreev (elems);

	return res;
}
Esempio n. 17
0
UT_Error IE_Imp_EPUB::readPackage()
{
    gchar **aname = g_strsplit(m_rootfilePath.c_str(), G_DIR_SEPARATOR_S, 0);
    GsfInput* opf = gsf_infile_child_by_aname(m_epub, (const char**) aname);

    UT_DEBUGMSG(("Getting parent\n"));
    GsfInfile* opfParent = gsf_input_container(opf);
    m_opsDir = std::string(gsf_input_name(GSF_INPUT(opfParent)));

    UT_DEBUGMSG(("OPS dir: %s\n", m_opsDir.c_str()));

    if (opf == NULL)
    {
        UT_DEBUGMSG(("Can`t open .opf file\n"));
        return UT_ERROR;
    }

    size_t opfSize = gsf_input_size(opf);
    gchar* opfXml = (gchar*) gsf_input_read(opf, opfSize, NULL);

    UT_XML opfParser;
    OpfListener opfListener;
    opfParser.setListener(&opfListener);
    if (opfParser.sniff(opfXml, opfSize, "package"))
    {
        UT_DEBUGMSG(("Parsing opf file\n"));
        opfParser.parse(opfXml, opfSize);
    }
    else
    {
        UT_DEBUGMSG(("Incorrect opf file found \n"));
        return UT_ERROR;
    }

    g_strfreev(aname);
    g_object_unref(G_OBJECT(opf));
    //g_object_unref(G_OBJECT(opfParent));

    m_spine = opfListener.getSpine();
    m_manifestItems = opfListener.getManifestItems();

    return UT_OK;
}
Esempio n. 18
0
static GsfOpenPkgRels *
gsf_open_pkg_get_rels (GsfInput *opkg)
{
	GsfOpenPkgRels *rels = NULL;

	g_return_val_if_fail (opkg != NULL, NULL);

	if (NULL == (rels = g_object_get_data (G_OBJECT (opkg), "OpenPkgRels"))) {
		char const *part_name = gsf_input_name (opkg);
		GsfXMLInDoc *rel_doc;
		GsfInput *rel_stream;

		if (NULL != part_name) {
			GsfInfile *container = gsf_input_container (opkg);
			char *rel_name;

			g_return_val_if_fail (container != NULL, NULL);

			rel_name = g_strconcat (part_name, ".rels", NULL);
			rel_stream = gsf_infile_child_by_vname (container, "_rels", rel_name, NULL);
			g_free (rel_name);
		} else /* the root */
			rel_stream = gsf_infile_child_by_vname (GSF_INFILE (opkg), "_rels", ".rels", NULL);

		if (NULL != rel_stream) {
			rels = g_new (GsfOpenPkgRels, 1);
			rels->by_id = g_hash_table_new_full (g_str_hash, g_str_equal,
				NULL, (GDestroyNotify)gsf_open_pkg_rel_free);
			rels->by_type = g_hash_table_new (g_str_hash, g_str_equal);

			rel_doc = gsf_xml_in_doc_new (open_pkg_rel_dtd, open_pkg_ns);
			(void) gsf_xml_in_doc_parse (rel_doc, rel_stream, rels);

			gsf_xml_in_doc_free (rel_doc);
			g_object_unref (rel_stream);
		}

		g_object_set_data_full (G_OBJECT (opkg), "OpenPkgRels", rels,
			(GDestroyNotify) gsf_open_pkg_rels_free);
	}

	return rels;
}
Esempio n. 19
0
File: gsf.c Progetto: arcean/libgsf
static void
ls_R (GsfInput *input, char const *prefix)
{
	char const *name = gsf_input_name (input);
	GsfInfile *infile = GSF_IS_INFILE (input) ? GSF_INFILE (input) : NULL;
	gboolean is_dir = infile && gsf_infile_num_children (infile) > 0;
	char *full_name;
	char *new_prefix;

	if (prefix) {
		char *display_name = name ?
			g_filename_display_name (name)
			: g_strdup ("?");
		full_name = g_strconcat (prefix,
					 display_name,
					 NULL);
		new_prefix = g_strconcat (full_name, "/", NULL);
		g_free (display_name);
	} else {
		full_name = g_strdup ("*root*");
		new_prefix = g_strdup ("");
	}

	g_print ("%c %10" GSF_OFF_T_FORMAT " %s\n",
		 (is_dir ? 'd' : 'f'),
		 gsf_input_size (input),
		 full_name);

	if (is_dir) {
		int i;
		for (i = 0 ; i < gsf_infile_num_children (infile) ; i++) {
			GsfInput *child = gsf_infile_child_by_index (infile, i);
			/* We can get NULL here in case of file corruption.  */
			if (child) {
				ls_R (child, new_prefix);
				g_object_unref (child);
			}
		}
	}

	g_free (full_name);
	g_free (new_prefix);
}
UT_Error OXMLi_PackageManager::_parseStream( GsfInput * stream, OXMLi_StreamListener * pListener)
{
	UT_return_val_if_fail(stream != NULL && pListener != NULL , UT_ERROR);

	//First, we check if this stream has already been parsed before
	std::string part_name = gsf_input_name(stream); //TODO: determine if part names are truly unique
	std::map<std::string, bool>::iterator it;
	it = m_parsedParts.find(part_name);
	if (it != m_parsedParts.end() && it->second) {
		//this stream has already been parsed successfully
		return UT_OK;
	}

	UT_Error ret = UT_OK;
	guint8 const *data = NULL;
	const char * cdata = NULL;
	size_t len = 0;

	UT_XML reader;
	reader.setListener(pListener);

	if (gsf_input_size (stream) > 0) {
		len = gsf_input_remaining (stream);
		if (len > 0) {
			data = gsf_input_read (stream, len, NULL);
			if (NULL == data) {
				g_object_unref (G_OBJECT (stream));
				return UT_ERROR;
			}
			cdata = (const char *)data;
			ret = reader.parse (cdata, len);
		}
	}

	//There are two error codes to check here.  
	if (ret == UT_OK && pListener->getStatus() == UT_OK)
		m_parsedParts[part_name] = true;

	//We prioritize the one from UT_XML when returning.
	return ret == UT_OK ? pListener->getStatus() : ret;
}
Esempio n. 21
0
/**
 * gsf_input_gzip_new :
 * @source : The underlying data source.
 * @err	   : optionally %NULL.
 *
 * Adds a reference to @source.
 *
 * Returns: a new file or %NULL.
 **/
GsfInput *
gsf_input_gzip_new (GsfInput *source, GError **err)
{
	GsfInputGZip *gzip;

	g_return_val_if_fail (GSF_IS_INPUT (source), NULL);

	gzip = g_object_new (GSF_INPUT_GZIP_TYPE,
			     "source", source,
			     NULL);
	if (G_UNLIKELY (NULL == gzip)) return NULL;

	if (gzip->err) {
		if (err)
			*err = g_error_copy (gzip->err);
		g_object_unref (gzip);
		return NULL;
	}
	gsf_input_set_name (GSF_INPUT (gzip), gsf_input_name (source));

	return GSF_INPUT (gzip);
}
/**
 * Parses the image stream and returns the image data
 */
UT_ByteBuf* OXMLi_PackageManager::parseImageStream(const char * id)
{
	GsfInput * parent = _getDocumentStream();
	GsfInput * stream = getChildById(parent, id);

	//First, we check if this stream has already been parsed before
	std::string part_name = gsf_input_name(stream); //TODO: determine if part names are truly unique
	std::map<std::string, bool>::iterator it;
	it = m_parsedParts.find(part_name);
	if (it != m_parsedParts.end() && it->second) {
		//this stream has already been parsed successfully
		return NULL;
	}

	UT_ByteBuf* buffer = new UT_ByteBuf();
	buffer->insertFromInput(0, stream);
	g_object_unref (G_OBJECT (stream));

	m_parsedParts[part_name] = true;

	return buffer;
}
/*! 
  Construct an importer of the right type.
 \param pDocument Document
 \param input:
 \param ieft Desired filetype - pass IEFT_Unknown for best guess
 \param ppie Pointer to return importer in
 \param pieft Pointer to fill in actual filetype

 Caller is responsible for deleting the importer object
 when finished with it.
 This function should closely match IE_Exp::contructExporter()
*/
UT_Error IE_Imp::constructImporter(PD_Document * pDocument,
								   GsfInput * input,
								   IEFileType ieft,
								   IE_Imp ** ppie,
								   IEFileType * pieft)
{
	bool bUseGuesswork = (ieft != IEFT_Unknown);
	
	UT_return_val_if_fail(pDocument, UT_ERROR);
	UT_return_val_if_fail(ieft != IEFT_Unknown || (input), UT_ERROR);
	UT_return_val_if_fail(ppie, UT_ERROR);

	UT_uint32 nrElements = getImporterCount();

	// no filter will support IEFT_Unknown, so we try to detect
	// from the contents of the file or the filename suffix
	// the importer to use and assign that back to ieft.
	// Give precedence to the file contents
	if (ieft == IEFT_Unknown && input)
	{
		UT_Confidence_t   best_confidence = UT_CONFIDENCE_ZILCH;
		IE_ImpSniffer * best_sniffer = 0;

		for (UT_uint32 k=0; k < nrElements; k++)
		  {
		    IE_ImpSniffer * s = IE_IMP_Sniffers.getNthItem (k);

		    UT_Confidence_t content_confidence = UT_CONFIDENCE_ZILCH;
		    UT_Confidence_t suffix_confidence = UT_CONFIDENCE_ZILCH;

			{
				GsfInputMarker marker(input);
				content_confidence = s->recognizeContents(input);
			}
			const IE_SuffixConfidence * sc = s->getSuffixConfidence();
			while (sc && !sc->suffix.empty() && suffix_confidence != UT_CONFIDENCE_PERFECT) {
				/* suffixes do not have a leading '.' */
				// we use g_str_has_suffix like this to make sure we properly autodetect the extensions
				// of files that have dots in their names, like foo.bar.abw
				std::string suffix = std::string(".") + sc->suffix;
				if (g_str_has_suffix(gsf_input_name (input), suffix.c_str()) && 
					sc->confidence > suffix_confidence) {
					suffix_confidence = sc->confidence;
				}
				sc++;
			}

		    UT_Confidence_t confidence = s_confidence_heuristic ( content_confidence, 
																  suffix_confidence ) ;
		    
		    if ( confidence > CONFIDENCE_THRESHOLD && confidence >= best_confidence )
				{
					best_sniffer = s;
					best_confidence = confidence;
					ieft = (IEFileType) (k+1);

					// short-circuit when we have perfect confidence for both
					if (suffix_confidence == UT_CONFIDENCE_PERFECT &&
					    content_confidence == UT_CONFIDENCE_PERFECT)
						break;
				}
		  }
		if (best_sniffer)
			{
				if (pieft != NULL) *pieft = ieft;
				return best_sniffer->constructImporter (pDocument, ppie);
			}
	}

	if (ieft == IEFT_Unknown)
	{
	   	// maybe they're trying to open an image directly?
	   	IE_ImpGraphic *pIEG;
 		UT_Error errorCode = IE_ImpGraphic::constructImporter(input, IEGFT_Unknown, &pIEG);
		if (!errorCode && pIEG) 
 		{
			// tell the caller the type of importer they got
		   	if (pieft != NULL) *pieft = IEFT_Unknown; // to force a save-as

		   	// create the importer 
			*ppie = new IE_Imp_GraphicAsDocument(pDocument);
		   	if (*ppie) {
			   	// tell the importer where to get the graphic
			   	((IE_Imp_GraphicAsDocument*)(*ppie))->setGraphicImporter(pIEG);
			   	return UT_OK;
			} else {
			   	delete pIEG;
				return UT_IE_NOMEMORY;
			}
		}
		else
 		{
	   		// as a last resort, just try importing it as text  :(
			ieft = IE_Imp::fileTypeForSuffix(".txt");
		}
	}

	UT_ASSERT_HARMLESS(ieft != IEFT_Unknown);

	// tell the caller the type of importer they got
	if (pieft != NULL) 
		*pieft = ieft;

	for (UT_uint32 k=0; k < nrElements; k++)
	{
		IE_ImpSniffer * s = IE_IMP_Sniffers.getNthItem (k);
		if (s->supportsFileType(ieft))
			return s->constructImporter(pDocument,ppie);
	}

	// if we got here, no registered importer handles the
	// type of file we're supposed to be reading.
	// assume it is our format and try to read it.
	// if that fails, just give up.
	if (bUseGuesswork)
	{
		*ppie = new IE_Imp_AbiWord_1(pDocument);
		return ((*ppie) ? UT_OK : UT_IE_NOMEMORY);
	}
	else
		return UT_ERROR;
}
Esempio n. 24
0
static gboolean
csv_tsv_probe (GOFileOpener const *fo, GsfInput *input, GOFileProbeLevel pl)
{
	/* Rough and ready heuristic.  If the first N bytes have no
	 * unprintable characters this may be text */
	const gsf_off_t N = 512;

	if (pl == GO_FILE_PROBE_CONTENT) {
		guint8 const *header;
		gsf_off_t i;
		char const *enc = NULL;
		GString *header_utf8;
		char const *p;
		gboolean ok = TRUE;

		if (gsf_input_seek (input, 0, G_SEEK_SET))
			return FALSE;
		i = gsf_input_remaining (input);

		/* If someone ships us an empty file, accept it only if
		   it has a proper name.  */
		if (i == 0)
			return csv_tsv_probe (fo, input, GO_FILE_PROBE_FILE_NAME);

		if (i > N) i = N;
		if (NULL == (header = gsf_input_read (input, i, NULL)))
			return FALSE;

		enc = go_guess_encoding (header, i, NULL, &header_utf8, NULL);
		if (!enc)
			return FALSE;

		for (p = header_utf8->str; *p; p = g_utf8_next_char (p)) {
			gunichar uc = g_utf8_get_char (p);
			/* isprint might not be true for these: */
			if (uc == '\n' || uc == '\t' || uc == '\r')
				continue;
			/* Also, ignore a byte-order mark which may be used to
			 * indicate UTF-8; see
			 * http://en.wikipedia.org/wiki/Byte_Order_Mark for
			 * background.
			 */
			if (p == header_utf8->str && uc == 0x0000FEFF) {
				continue;
			}
			if (!g_unichar_isprint (uc)) {
				ok = FALSE;
				break;
			}
		}

		g_string_free (header_utf8, TRUE);
		return ok;
	} else {
		char const *name = gsf_input_name (input);
		if (name == NULL)
			return FALSE;
		name = gsf_extension_pointer (name);
		return (name != NULL &&
			(g_ascii_strcasecmp (name, "csv") == 0 ||
			 g_ascii_strcasecmp (name, "tsv") == 0 ||
			 g_ascii_strcasecmp (name, "txt") == 0));
	}
}
Esempio n. 25
0
/*
 * stf_read_workbook_auto_csvtab:
 * @fo: file opener
 * @enc: optional encoding
 * @context: command context
 * @book: workbook
 * @input: file to read from+convert
 *
 * Attempt to auto-detect CSV or tab-delimited file
 */
static void
stf_read_workbook_auto_csvtab (G_GNUC_UNUSED GOFileOpener const *fo, gchar const *enc,
			       GOIOContext *context,
			       GoView *view, GsfInput *input)
{
	Sheet *sheet;
	Workbook *book;
	char *name;
	char *data;
	GString *utf8data;
	size_t data_len;
	StfParseOptions_t *po;
	const char *gsfname;
	int cols, rows, i;
	GStringChunk *lines_chunk;
	GPtrArray *lines;
	WorkbookView *wbv = GNM_WORKBOOK_VIEW (view);

	g_return_if_fail (context != NULL);
	g_return_if_fail (wbv != NULL);

	book = wb_view_get_workbook (wbv);

	data = stf_preparse (context, input, &data_len);
	if (!data)
		return;

	enc = go_guess_encoding (data, data_len, enc, &utf8data, NULL);
	g_free (data);

	if (!enc) {
		go_cmd_context_error_import (GO_CMD_CONTEXT (context),
				     _("That file is not in the given encoding."));
		return;
	}

	clear_stray_NULs (context, utf8data);

	/*
	 * Try to get the filename we're reading from.  This is not a
	 * great way.
	 */
	gsfname = gsf_input_name (input);

	{
		const char *ext = gsf_extension_pointer (gsfname);
		gboolean iscsv = ext && strcasecmp (ext, "csv") == 0;
		if (iscsv)
			po = stf_parse_options_guess_csv (utf8data->str);
		else
			po = stf_parse_options_guess (utf8data->str);
	}

	lines_chunk = g_string_chunk_new (100 * 1024);
	lines = stf_parse_general (po, lines_chunk,
				   utf8data->str, utf8data->str + utf8data->len);
	rows = lines->len;
	cols = 0;
	for (i = 0; i < rows; i++) {
		GPtrArray *line = g_ptr_array_index (lines, i);
		cols = MAX (cols, (int)line->len);
	}
	gnm_sheet_suggest_size (&cols, &rows);
	stf_parse_general_free (lines);
	g_string_chunk_free (lines_chunk);

	name = g_path_get_basename (gsfname);
	sheet = sheet_new (book, name, cols, rows);
	g_free (name);
	workbook_sheet_attach (book, sheet);

	if (stf_parse_sheet (po, utf8data->str, NULL, sheet, 0, 0)) {
		gboolean is_csv;
		workbook_recalc_all (book);
		resize_columns (sheet);
		if (po->cols_exceeded || po->rows_exceeded) {
			stf_warning (context,
				     _("Some data did not fit on the "
				       "sheet and was dropped."));
		}
		is_csv = po->sep.chr && po->sep.chr[0] == ',';
		workbook_set_saveinfo
			(book,
			 GO_FILE_FL_WRITE_ONLY,
			 go_file_saver_for_id
			 (is_csv ? "Gnumeric_stf:stf_csv" : "Gnumeric_stf:stf_assistant"));
	} else {
		workbook_sheet_delete (sheet);
		go_cmd_context_error_import (GO_CMD_CONTEXT (context),
			_("Parse error while trying to parse data into sheet"));
	}


	stf_parse_options_free (po);
	g_string_free (utf8data, TRUE);
}
Esempio n. 26
0
void
html_file_open (G_GNUC_UNUSED GOFileOpener const *fo, GOIOContext *io_context,
		WorkbookView *wb_view, GsfInput *input)
{
	guint8 const *buf;
	gsf_off_t size;
	int len, bomlen;
	htmlParserCtxtPtr ctxt;
	htmlDocPtr doc = NULL;
	xmlCharEncoding enc;
	GnmHtmlTableCtxt tc;

	g_return_if_fail (input != NULL);

	if (gsf_input_seek (input, 0, G_SEEK_SET))
		return;

	size = gsf_input_size (input);
	if (size >= 4) {
		size -= 4;
		buf = gsf_input_read (input, 4, NULL);
		if (buf != NULL) {
			enc = xmlDetectCharEncoding(buf, 4);
			switch (enc) {	/* Skip byte order mark */
			case XML_CHAR_ENCODING_UCS4BE:
			case XML_CHAR_ENCODING_UCS4LE:
			case XML_CHAR_ENCODING_UCS4_2143:
			case XML_CHAR_ENCODING_UCS4_3412:
			case XML_CHAR_ENCODING_EBCDIC:
				bomlen = 4;
				break;
			case XML_CHAR_ENCODING_UTF16BE:
			case XML_CHAR_ENCODING_UTF16LE:
				bomlen = 2;
				break;
			case XML_CHAR_ENCODING_UTF8:
				if (buf[0] == 0xef)
					bomlen = 3;
				else if (buf[0] == 0x3c)
					bomlen = 4;
				else
					bomlen = 0;
				break;
			case XML_CHAR_ENCODING_NONE:
				bomlen = 0;
				/* Try to detect unmarked UTF16LE
				   (Firefox Windows clipboard, drag data all platforms) */
				if ((buf[0] >= 0x20 || g_ascii_isspace(buf[0])) &&
				    buf[1] == 0 &&
				    (buf[2] >= 0x20 || g_ascii_isspace(buf[2])) &&
				    buf[3] == 0)
					enc =  XML_CHAR_ENCODING_UTF16LE;
				break;
			default:
				bomlen = 0;
			}
			ctxt = htmlCreatePushParserCtxt (
				NULL, NULL, (char const *)(buf + bomlen),
				4 - bomlen, gsf_input_name (input), enc);

			for (; size > 0 ; size -= len) {
				len = MIN (4096, size);
				buf = gsf_input_read (input, len, NULL);
				if (buf == NULL)
					break;
				htmlParseChunk (
					ctxt, (char const *)buf, len, 0);
			}

			htmlParseChunk (ctxt, (char const *)buf, 0, 1);
			doc = ctxt->myDoc;
			htmlFreeParserCtxt (ctxt);
		}
	}

	if (doc != NULL) {
		xmlNodePtr ptr;
		tc.sheet = NULL;
		tc.row   = -1;
		tc.wb_view = wb_view;
		for (ptr = doc->children; ptr != NULL ; ptr = ptr->next)
			html_search_for_tables (ptr, doc, wb_view, &tc);
		xmlFreeDoc (doc);
	} else
		go_io_error_info_set (io_context,
			go_error_info_new_str (_("Unable to parse the html.")));
}