Пример #1
0
/** Loads a file into a GncCsvParseData. This is the first function
 * that must be called after createing a new GncCsvParseData. If this
 * fails because the file couldn't be opened, no more functions can be
 * called on the parse data until this succeeds (or until it fails
 * because of an encoding guess error). If it fails because the
 * encoding could not be guessed, gnc_csv_convert_encoding must be
 * called until it succeeds.
 * @param parse_data Data that is being parsed
 * @param filename Name of the file that should be opened
 * @param error Will contain an error if there is a failure
 * @return 0 on success, 1 on failure
 */
int gnc_csv_load_file (GncCsvParseData* parse_data, const char* filename,
                      GError** error)
{
    const char* guess_enc = NULL;

    /* Get the raw data first and handle an error if one occurs. */
    parse_data->raw_mapping = g_mapped_file_new (filename, FALSE, error);
    if (parse_data->raw_mapping == NULL)
    {
        /* TODO Handle file opening errors more specifically,
         * e.g. inexistent file versus no read permission. */
        parse_data->raw_str.begin = NULL;
        g_clear_error (error);
        g_set_error (error, 0, GNC_CSV_FILE_OPEN_ERR, "%s", _("File opening failed."));
        return 1;
    }

    /* Copy the mapping's contents into parse-data->raw_str. */
    parse_data->raw_str.begin = g_mapped_file_get_contents (parse_data->raw_mapping);
    parse_data->raw_str.end = parse_data->raw_str.begin + g_mapped_file_get_length (parse_data->raw_mapping);

    /* Make a guess at the encoding of the data. */
    if (!g_mapped_file_get_length (parse_data->raw_mapping) == 0)
        guess_enc = go_guess_encoding ((const char*)(parse_data->raw_str.begin),
                                      (size_t)(parse_data->raw_str.end - parse_data->raw_str.begin),
                                      "UTF-8", NULL);
    if (guess_enc == NULL)
    {
        g_set_error (error, 0, GNC_CSV_ENCODING_ERR, "%s", _("Unknown encoding."));
        return 1;
    }
    /* Convert using the guessed encoding into parse_data->file_str and
     * handle any errors that occur. */
    gnc_csv_convert_encoding (parse_data, guess_enc, error);
    if (parse_data->file_str.begin == NULL)
    {
        g_set_error (error, 0, GNC_CSV_ENCODING_ERR, "%s", _("Unknown encoding."));
        return 1;
    }
    else
        return 0;
}
Пример #2
0
/* Quick and dirty html probe. */
gboolean
html_file_probe (G_GNUC_UNUSED GOFileOpener const *fo, GsfInput *input,
		 G_GNUC_UNUSED GOFileProbeLevel pl)
{
	gsf_off_t size = 200;
	guint8 const* buf = gsf_input_read (input, size, NULL);
	gchar *ulstr = NULL;
	GString *ustr;
	gboolean res = FALSE;

	/* Avoid seeking in large streams - try to read, fall back if
	 * stream is too short.  (Actually, currently _size does not
	 * involve any syscalls -- MW).  */
	if (!buf) {
		size = gsf_input_size (input);
		buf = gsf_input_read (input, size, NULL);
		if (!buf)
			return res;
	}

	if (go_guess_encoding (buf, size, NULL, &ustr, NULL)) {
		ulstr = g_utf8_strdown (ustr->str, -1);
		g_string_free (ustr, TRUE);
	}

	if (!ulstr)
		return res;

	res = (strstr (ulstr, "<table") != NULL ||
	       strstr (ulstr, "<html") != NULL ||
	       strstr (ulstr, "<!doctype html") != NULL);

	g_free (ulstr);

	return res;
}
Пример #3
0
static gboolean
csv_tsv_probe (GOFileOpener const *fo, GsfInput *input, GOFileProbeLevel pl)
{
	/* Rough and ready heuristic.  If the first N bytes have no
	 * unprintable characters this may be text */
	const gsf_off_t N = 512;

	if (pl == GO_FILE_PROBE_CONTENT) {
		guint8 const *header;
		gsf_off_t i;
		char const *enc = NULL;
		GString *header_utf8;
		char const *p;
		gboolean ok = TRUE;

		if (gsf_input_seek (input, 0, G_SEEK_SET))
			return FALSE;
		i = gsf_input_remaining (input);

		/* If someone ships us an empty file, accept it only if
		   it has a proper name.  */
		if (i == 0)
			return csv_tsv_probe (fo, input, GO_FILE_PROBE_FILE_NAME);

		if (i > N) i = N;
		if (NULL == (header = gsf_input_read (input, i, NULL)))
			return FALSE;

		enc = go_guess_encoding (header, i, NULL, &header_utf8, NULL);
		if (!enc)
			return FALSE;

		for (p = header_utf8->str; *p; p = g_utf8_next_char (p)) {
			gunichar uc = g_utf8_get_char (p);
			/* isprint might not be true for these: */
			if (uc == '\n' || uc == '\t' || uc == '\r')
				continue;
			/* Also, ignore a byte-order mark which may be used to
			 * indicate UTF-8; see
			 * http://en.wikipedia.org/wiki/Byte_Order_Mark for
			 * background.
			 */
			if (p == header_utf8->str && uc == 0x0000FEFF) {
				continue;
			}
			if (!g_unichar_isprint (uc)) {
				ok = FALSE;
				break;
			}
		}

		g_string_free (header_utf8, TRUE);
		return ok;
	} else {
		char const *name = gsf_input_name (input);
		if (name == NULL)
			return FALSE;
		name = gsf_extension_pointer (name);
		return (name != NULL &&
			(g_ascii_strcasecmp (name, "csv") == 0 ||
			 g_ascii_strcasecmp (name, "tsv") == 0 ||
			 g_ascii_strcasecmp (name, "txt") == 0));
	}
}
Пример #4
0
/*
 * stf_read_workbook_auto_csvtab:
 * @fo: file opener
 * @enc: optional encoding
 * @context: command context
 * @book: workbook
 * @input: file to read from+convert
 *
 * Attempt to auto-detect CSV or tab-delimited file
 */
static void
stf_read_workbook_auto_csvtab (G_GNUC_UNUSED GOFileOpener const *fo, gchar const *enc,
			       GOIOContext *context,
			       GoView *view, GsfInput *input)
{
	Sheet *sheet;
	Workbook *book;
	char *name;
	char *data;
	GString *utf8data;
	size_t data_len;
	StfParseOptions_t *po;
	const char *gsfname;
	int cols, rows, i;
	GStringChunk *lines_chunk;
	GPtrArray *lines;
	WorkbookView *wbv = GNM_WORKBOOK_VIEW (view);

	g_return_if_fail (context != NULL);
	g_return_if_fail (wbv != NULL);

	book = wb_view_get_workbook (wbv);

	data = stf_preparse (context, input, &data_len);
	if (!data)
		return;

	enc = go_guess_encoding (data, data_len, enc, &utf8data, NULL);
	g_free (data);

	if (!enc) {
		go_cmd_context_error_import (GO_CMD_CONTEXT (context),
				     _("That file is not in the given encoding."));
		return;
	}

	clear_stray_NULs (context, utf8data);

	/*
	 * Try to get the filename we're reading from.  This is not a
	 * great way.
	 */
	gsfname = gsf_input_name (input);

	{
		const char *ext = gsf_extension_pointer (gsfname);
		gboolean iscsv = ext && strcasecmp (ext, "csv") == 0;
		if (iscsv)
			po = stf_parse_options_guess_csv (utf8data->str);
		else
			po = stf_parse_options_guess (utf8data->str);
	}

	lines_chunk = g_string_chunk_new (100 * 1024);
	lines = stf_parse_general (po, lines_chunk,
				   utf8data->str, utf8data->str + utf8data->len);
	rows = lines->len;
	cols = 0;
	for (i = 0; i < rows; i++) {
		GPtrArray *line = g_ptr_array_index (lines, i);
		cols = MAX (cols, (int)line->len);
	}
	gnm_sheet_suggest_size (&cols, &rows);
	stf_parse_general_free (lines);
	g_string_chunk_free (lines_chunk);

	name = g_path_get_basename (gsfname);
	sheet = sheet_new (book, name, cols, rows);
	g_free (name);
	workbook_sheet_attach (book, sheet);

	if (stf_parse_sheet (po, utf8data->str, NULL, sheet, 0, 0)) {
		gboolean is_csv;
		workbook_recalc_all (book);
		resize_columns (sheet);
		if (po->cols_exceeded || po->rows_exceeded) {
			stf_warning (context,
				     _("Some data did not fit on the "
				       "sheet and was dropped."));
		}
		is_csv = po->sep.chr && po->sep.chr[0] == ',';
		workbook_set_saveinfo
			(book,
			 GO_FILE_FL_WRITE_ONLY,
			 go_file_saver_for_id
			 (is_csv ? "Gnumeric_stf:stf_csv" : "Gnumeric_stf:stf_assistant"));
	} else {
		workbook_sheet_delete (sheet);
		go_cmd_context_error_import (GO_CMD_CONTEXT (context),
			_("Parse error while trying to parse data into sheet"));
	}


	stf_parse_options_free (po);
	g_string_free (utf8data, TRUE);
}