static VALUE
rg_initialize(int argc, VALUE *argv, VALUE self)
{
    PopplerDocument *document = NULL;
    GError *error = NULL;
    VALUE uri_or_data, rb_password;
    const char *password;

    rb_scan_args(argc, argv, "11", &uri_or_data, &rb_password);

    password = NIL_P(rb_password) ? NULL : RVAL2CSTR(rb_password);

    if (RVAL2CBOOL(rb_funcall(self, id_pdf_data_p, 1, uri_or_data))) {
        document = poppler_document_new_from_data(RSTRING_PTR(uri_or_data),
                                                  RSTRING_LEN(uri_or_data),
                                                  password, &error);
    }

    if (!document && !error) {
        uri_or_data = rb_funcall(self, id_ensure_uri, 1, uri_or_data);
        document = poppler_document_new_from_file(RVAL2CSTR(uri_or_data),
                                                  password, &error);
    }

    if (error)
        RAISE_GERROR(error);

    G_INITIALIZE(self, document);
    return Qnil;
}
Beispiel #2
0
int main(int argc, char const *argv[])
{
	char *path;
	PopplerDocument *doc;
	GError *err;
	gchar *gbuf;
	char *buf;
	page_t page_meta;
	int file_length, n;

	g_type_init();

	if (argc != 2) {
		return 1;
	}

	err = NULL;
	buf = open_pdf_file(argv[1], &file_length);

	sandboxify();

	doc = poppler_document_new_from_data(buf, file_length, NULL, &err);
	if (err != NULL) {
		fprintf(stderr, "Unable to open file: %s\n", err->message);
		return 2;
	}

	n = poppler_document_get_n_pages(doc);

	for (int i = 0; i < n; i++) {
		PopplerPage *page = poppler_document_get_page(doc, i);

		page_meta.pagenum = i;
		page_meta.text = poppler_page_get_text(page);
		page_meta.svg_len = 0;
		page_meta.svg = malloc(SVG_BUFLEN);
		if (!page_meta.svg)
			ERROR("Cannot allocate svg buffer, not enought memory?");
		page_meta.free_space = SVG_BUFLEN;

		render_page(&page_meta, page);

		if (page_meta.text)
			free(page_meta.text);
		g_object_unref(page);
	}

	if (munmap(buf, file_length) == -1)
		PERROR("munmap()");

	return 0;
}
Beispiel #3
0
void
test_new_from_data (void)
{
  GError *error = NULL;
  gchar *data;
  gsize length;
  const gchar *path;

  path = cut_take_string (cut_build_fixture_data_path ("multi-pages.pdf", NULL));
  g_file_get_contents (path, &data, &length, &error);
  gcut_assert_error (error);

  document = poppler_document_new_from_data (data, length, NULL, &error);
  g_free (data);
  gcut_assert_error (error);

  cut_assert_equal_int (3, poppler_document_get_n_pages (document));
}
static PopplerDocument*
open_document (const gchar  *filename,
               GError      **load_error)
{
    PopplerDocument *doc;
    GMappedFile     *mapped_file;
    GError          *error = NULL;

    mapped_file = g_mapped_file_new (filename, FALSE, &error);

    if (! mapped_file)
    {
        g_set_error (load_error, 0, 0,
                     "Could not load '%s' %s",
                     gimp_filename_to_utf8 (filename), error->message);
        g_error_free (error);
        return NULL;
    }

    doc = poppler_document_new_from_data (g_mapped_file_get_contents (mapped_file),
                                          g_mapped_file_get_length (mapped_file),
                                          NULL,
                                          &error);

    /* We can't g_mapped_file_unref(mapped_file) as apparently doc has
     * references to data in there. No big deal, this is just a
     * short-lived plug-in.
     */

    if (! doc)
    {
        g_set_error (load_error, G_FILE_ERROR, G_FILE_ERROR_FAILED,
                     _("Could not load '%s': %s"),
                     gimp_filename_to_utf8 (filename),
                     error->message);
        g_error_free (error);
        return NULL;
    }

    return doc;
}
Beispiel #5
0
int main(int argc, char **argv)
{
	int i = 0, numframes;
	char *filename = NULL;
	gchar *notefile = NULL;
	FILE *fp = NULL;
	struct stat statbuf;
	char *databuf = NULL;
	GError *err = NULL;

	gtk_init(&argc, &argv);

	/* Load preferences first. Command line options will override those
	 * preferences. */
	loadPreferences();

	/* Read defaults from preferences. */
	filename = NULL;
	numframes = 2 * prefs.slide_context + 1;
	runpref.do_wrapping = prefs.do_wrapping;
	runpref.do_notectrl = prefs.do_notectrl;
	runpref.cache_max = prefs.cache_max;
	runpref.fit_mode = prefs.initial_fit_mode;

	/* get options via getopt */
	while ((i = getopt(argc, argv, "s:wnc:N:CTv")) != -1)
	{
		switch (i)
		{
			case 's':
				numframes = 2 * atoi(optarg) + 1;
				if (numframes <= 1)
				{
					fprintf(stderr, "Invalid slide count specified.\n");
					usage(argv[0]);
					exit(EXIT_FAILURE);
				}
				break;

			case 'w':
				runpref.do_wrapping = TRUE;
				break;

			case 'n':
				runpref.do_notectrl = TRUE;
				break;

			case 'c':
				/* don't care if that number is invalid. it'll get
				 * re-adjusted anyway if it's too small. */
				runpref.cache_max = atoi(optarg);
				break;

			case 'N':
				notefile = g_strdup(optarg);
				break;

			case 'C':
				/* Force the timer to be a clock. */
				prefs.timer_is_clock = TRUE;
				break;

			case 'T':
				/* Force the timer to be a timer (not a clock). */
				prefs.timer_is_clock = FALSE;
				break;

			case 'v':
				printf("pdfpres version: %s\n", PDFPRES_VERSION);
				exit(EXIT_SUCCESS);
				break;

			case '?':
				exit(EXIT_FAILURE);
				break;
		}
	}

	/* retrieve file name via first non-option argument */
	if (optind < argc)
	{
		filename = argv[optind];
	}

	if (filename == NULL)
	{
		fprintf(stderr, "Invalid file path specified.\n");
		usage(argv[0]);
		exit(EXIT_FAILURE);
	}

	/* for the cache to be useful, we'll need at least "some" items.
	 * that is 2 items (prev and next) per preview viewport and 2
	 * items for the beamer port.
	 *
	 * this means that switching to the previous and next slide will
	 * always be fast.
	 *
	 * note: numframes is not negative (see above), so that cast is okay.
	 */
	if (runpref.cache_max < (guint)((numframes + 1) * 2))
		runpref.cache_max = (guint)((numframes + 1) * 2);

	/* try to load the file */
	if (stat(filename, &statbuf) == -1)
	{
		perror("Could not stat file");
		exit(EXIT_FAILURE);
	}

	/* note: this buffer must not be freed, it'll be used by poppler
	 * later on. */
	databuf = (char *)malloc(statbuf.st_size);
	g_assert(databuf);

	fp = fopen(filename, "rb");
	if (!fp)
	{
		perror("Could not open file");
		exit(EXIT_FAILURE);
	}

	/* Read 1 element of size "statbuf.st_size". fread() returns the
	 * number of items successfully read. Thus, a return value of "1"
	 * means "success" and anything else is an error. */
	if (fread(databuf, statbuf.st_size, 1, fp) != 1)
	{
		fprintf(stderr, "Unexpected end of file.\n");
		exit(EXIT_FAILURE);
	}

	fclose(fp);

	/* get document from data */
	doc = poppler_document_new_from_data(databuf, statbuf.st_size,
			NULL, &err);
	if (!doc)
	{
		fprintf(stderr, "%s\n", err->message);
		g_error_free(err);
		exit(EXIT_FAILURE);
	}

	doc_n_pages = poppler_document_get_n_pages(doc);
	if (doc_n_pages <= 0)
	{
		fprintf(stderr, "Huh, no pages in that document.\n");
		exit(EXIT_FAILURE);
	}

	initGUI(numframes, notefile);

	/* queue initial prerendering. */
	preQueued = TRUE;
	g_idle_add(idleFillCaches, NULL);

	gtk_main();
	exit(EXIT_SUCCESS);
}
Beispiel #6
0
int main(int argc, char **argv)
{
	int i = 0, numframes;
	char *filename = NULL;
	gchar *notefile = NULL;
	FILE *fp = NULL;
	struct stat statbuf;
	char *databuf = NULL;
	GError *err = NULL;

	gtk_init(&argc, &argv);

	/* Load preferences first. Command line options will override those
	 * preferences. */
	loadPreferences();

	/* Read defaults from preferences. */
	filename = NULL;
	numframes = 2 * prefs.slide_context + 1;
	runpref.do_wrapping = prefs.do_wrapping;
	runpref.do_notectrl = prefs.do_notectrl;
	runpref.fit_mode = prefs.initial_fit_mode;

	/* get options via getopt */
	while ((i = getopt(argc, argv, "s:wnN:CTv")) != -1)
	{
		switch (i)
		{
			case 's':
				numframes = 2 * atoi(optarg) + 1;
				if (numframes <= 1)
				{
					fprintf(stderr, "Invalid slide count specified.\n");
					usage(argv[0]);
					exit(EXIT_FAILURE);
				}
				break;

			case 'w':
				runpref.do_wrapping = TRUE;
				break;

			case 'n':
				runpref.do_notectrl = TRUE;
				break;

			case 'N':
				notefile = g_strdup(optarg);
				break;

			case 'C':
				/* Force the timer to be a clock. */
				prefs.timer_is_clock = TRUE;
				break;

			case 'T':
				/* Force the timer to be a timer (not a clock). */
				prefs.timer_is_clock = FALSE;
				break;

			case 'v':
				printf("pdfpres version: %s\n", PDFPRES_VERSION);
				exit(EXIT_SUCCESS);
				break;

			case '?':
				exit(EXIT_FAILURE);
				break;
		}
	}

	/* retrieve file name via first non-option argument */
	if (optind < argc)
	{
		filename = argv[optind];
	}

	if (filename == NULL)
	{
		fprintf(stderr, "Invalid file path specified.\n");
		usage(argv[0]);
		exit(EXIT_FAILURE);
	}

	/* try to load the file */
	if (stat(filename, &statbuf) == -1)
	{
		perror("Could not stat file");
		exit(EXIT_FAILURE);
	}

	/* note: this buffer must not be freed, it'll be used by poppler
	 * later on. */
	databuf = (char *)malloc(statbuf.st_size);
	g_assert(databuf);

	fp = fopen(filename, "rb");
	if (!fp)
	{
		perror("Could not open file");
		exit(EXIT_FAILURE);
	}

	/* Read 1 element of size "statbuf.st_size". fread() returns the
	 * number of items successfully read. Thus, a return value of "1"
	 * means "success" and anything else is an error. */
	if (fread(databuf, statbuf.st_size, 1, fp) != 1)
	{
		fprintf(stderr, "Unexpected end of file.\n");
		exit(EXIT_FAILURE);
	}

	fclose(fp);

	/* get document from data */
	doc = poppler_document_new_from_data(databuf, statbuf.st_size,
			NULL, &err);
	if (!doc)
	{
		fprintf(stderr, "%s\n", err->message);
		g_error_free(err);
		exit(EXIT_FAILURE);
	}

	doc_n_pages = poppler_document_get_n_pages(doc);
	
	if (doc_n_pages <= 0)
	{
		fprintf(stderr, "Huh, no pages in that document.\n");
		exit(EXIT_FAILURE);
	}
	
	PopplerPage *page = poppler_document_get_page(doc, doc_n_pages-1);
	doc_last_page = atoi(poppler_page_get_label(page));
	g_object_unref(G_OBJECT(page));

	initGUI(numframes, notefile);

	gtk_main();
	exit(EXIT_SUCCESS);
}
Beispiel #7
0
G_MODULE_EXPORT gboolean
tracker_extract_get_metadata (TrackerExtractInfo *info)
{
	TrackerConfig *config;
	GTime creation_date;
	GError *error = NULL;
	TrackerSparqlBuilder *metadata, *preupdate;
	const gchar *graph;
	const gchar *urn;
	TrackerXmpData *xd = NULL;
	PDFData pd = { 0 }; /* actual data */
	PDFData md = { 0 }; /* for merging */
	PopplerDocument *document;
	gchar *xml = NULL;
	gchar *content, *uri;
	guint n_bytes;
	GPtrArray *keywords;
	guint i;
	GFile *file;
	gchar *filename;
	int fd;
	gchar *contents = NULL;
	gsize len;
	struct stat st;

	metadata = tracker_extract_info_get_metadata_builder (info);
	preupdate = tracker_extract_info_get_preupdate_builder (info);
	graph = tracker_extract_info_get_graph (info);
	urn = tracker_extract_info_get_urn (info);

	file = tracker_extract_info_get_file (info);
	filename = g_file_get_path (file);

	fd = tracker_file_open_fd (filename);

	if (fd == -1) {
		g_warning ("Could not open pdf file '%s': %s\n",
		           filename,
		           g_strerror (errno));
		g_free (filename);
		return FALSE;
	}

	if (fstat (fd, &st) == -1) {
		g_warning ("Could not fstat pdf file '%s': %s\n",
		           filename,
		           g_strerror (errno));
		close (fd);
		g_free (filename);
		return FALSE;
	}

	if (st.st_size == 0) {
		contents = NULL;
		len = 0;
	} else {
		contents = (gchar *) mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
		if (contents == NULL || contents == MAP_FAILED) {
			g_warning ("Could not mmap pdf file '%s': %s\n",
			           filename,
			           g_strerror (errno));
			close (fd);
			g_free (filename);
			return FALSE;
		}
		len = st.st_size;
	}

	g_free (filename);
	uri = g_file_get_uri (file);

	document = poppler_document_new_from_data (contents, len, NULL, &error);
	
	if (error) {
		if (error->code == POPPLER_ERROR_ENCRYPTED) {
			tracker_sparql_builder_predicate (metadata, "a");
			tracker_sparql_builder_object (metadata, "nfo:PaginatedTextDocument");

			tracker_sparql_builder_predicate (metadata, "nfo:isContentEncrypted");
			tracker_sparql_builder_object_boolean (metadata, TRUE);

			g_error_free (error);
			g_free (uri);
			close (fd);

			return TRUE;
		} else {
			g_warning ("Couldn't create PopplerDocument from uri:'%s', %s",
			           uri,
			           error->message ? error->message : "no error given");

			g_error_free (error);
			g_free (uri);
			close (fd);

			return FALSE;
		}
	}

	if (!document) {
		g_warning ("Could not create PopplerDocument from uri:'%s', "
		           "NULL returned without an error",
		           uri);
		g_free (uri);
		close (fd);
		return FALSE;
	}

	tracker_sparql_builder_predicate (metadata, "a");
	tracker_sparql_builder_object (metadata, "nfo:PaginatedTextDocument");

	g_object_get (document,
	              "title", &pd.title,
	              "author", &pd.author,
	              "subject", &pd.subject,
	              "keywords", &pd.keywords,
	              "creation-date", &creation_date,
	              "metadata", &xml,
	              NULL);

	if (creation_date > 0) {
		pd.creation_date = tracker_date_to_string ((time_t) creation_date);
	}

	keywords = g_ptr_array_new_with_free_func ((GDestroyNotify) g_free);

	if (xml && *xml &&
	    (xd = tracker_xmp_new (xml, strlen (xml), uri)) != NULL) {
		/* The casts here are well understood and known */
		md.title = (gchar *) tracker_coalesce_strip (4, pd.title, xd->title, xd->title2, xd->pdf_title);
		md.subject = (gchar *) tracker_coalesce_strip (2, pd.subject, xd->subject);
		md.date = (gchar *) tracker_coalesce_strip (3, pd.creation_date, xd->date, xd->time_original);
		md.author = (gchar *) tracker_coalesce_strip (2, pd.author, xd->creator);

		write_pdf_data (md, metadata, keywords);

		if (xd->keywords) {
			tracker_keywords_parse (keywords, xd->keywords);
		}

		if (xd->pdf_keywords) {
			tracker_keywords_parse (keywords, xd->pdf_keywords);
		}

		if (xd->publisher) {
			tracker_sparql_builder_predicate (metadata, "nco:publisher");
			tracker_sparql_builder_object_blank_open (metadata);
			tracker_sparql_builder_predicate (metadata, "a");
			tracker_sparql_builder_object (metadata, "nco:Contact");
			tracker_sparql_builder_predicate (metadata, "nco:fullname");
			tracker_sparql_builder_object_unvalidated (metadata, xd->publisher);
			tracker_sparql_builder_object_blank_close (metadata);
		}

		if (xd->type) {
			tracker_sparql_builder_predicate (metadata, "dc:type");
			tracker_sparql_builder_object_unvalidated (metadata, xd->type);
		}

		if (xd->format) {
			tracker_sparql_builder_predicate (metadata, "dc:format");
			tracker_sparql_builder_object_unvalidated (metadata, xd->format);
		}

		if (xd->identifier) {
			tracker_sparql_builder_predicate (metadata, "dc:identifier");
			tracker_sparql_builder_object_unvalidated (metadata, xd->identifier);
		}

		if (xd->source) {
			tracker_sparql_builder_predicate (metadata, "dc:source");
			tracker_sparql_builder_object_unvalidated (metadata, xd->source);
		}

		if (xd->language) {
			tracker_sparql_builder_predicate (metadata, "dc:language");
			tracker_sparql_builder_object_unvalidated (metadata, xd->language);
		}

		if (xd->relation) {
			tracker_sparql_builder_predicate (metadata, "dc:relation");
			tracker_sparql_builder_object_unvalidated (metadata, xd->relation);
		}

		if (xd->coverage) {
			tracker_sparql_builder_predicate (metadata, "dc:coverage");
			tracker_sparql_builder_object_unvalidated (metadata, xd->coverage);
		}

		if (xd->license) {
			tracker_sparql_builder_predicate (metadata, "nie:license");
			tracker_sparql_builder_object_unvalidated (metadata, xd->license);
		}

		if (xd->make || xd->model) {
			gchar *equip_uri;

			equip_uri = tracker_sparql_escape_uri_printf ("urn:equipment:%s:%s:",
			                                              xd->make ? xd->make : "",
			                                              xd->model ? xd->model : "");

			tracker_sparql_builder_insert_open (preupdate, NULL);
			if (graph) {
				tracker_sparql_builder_graph_open (preupdate, graph);
			}

			tracker_sparql_builder_subject_iri (preupdate, equip_uri);
			tracker_sparql_builder_predicate (preupdate, "a");
			tracker_sparql_builder_object (preupdate, "nfo:Equipment");

			if (xd->make) {
				tracker_sparql_builder_predicate (preupdate, "nfo:manufacturer");
				tracker_sparql_builder_object_unvalidated (preupdate, xd->make);
			}

			if (xd->model) {
				tracker_sparql_builder_predicate (preupdate, "nfo:model");
				tracker_sparql_builder_object_unvalidated (preupdate, xd->model);
			}

			if (graph) {
				tracker_sparql_builder_graph_close (preupdate);
			}
			tracker_sparql_builder_insert_close (preupdate);

			tracker_sparql_builder_predicate (metadata, "nfo:equipment");
			tracker_sparql_builder_object_iri (metadata, equip_uri);
			g_free (equip_uri);
		}

		if (xd->orientation) {
			tracker_sparql_builder_predicate (metadata, "nfo:orientation");
			tracker_sparql_builder_object (metadata, xd->orientation);
		}

		if (xd->rights) {
			tracker_sparql_builder_predicate (metadata, "nie:copyright");
			tracker_sparql_builder_object_unvalidated (metadata, xd->rights);
		}

		if (xd->white_balance) {
			tracker_sparql_builder_predicate (metadata, "nmm:whiteBalance");
			tracker_sparql_builder_object (metadata, xd->white_balance);
		}

		if (xd->fnumber) {
			gdouble value;

			value = g_strtod (xd->fnumber, NULL);
			tracker_sparql_builder_predicate (metadata, "nmm:fnumber");
			tracker_sparql_builder_object_double (metadata, value);
		}

		if (xd->flash) {
			tracker_sparql_builder_predicate (metadata, "nmm:flash");
			tracker_sparql_builder_object (metadata, xd->flash);
		}

		if (xd->focal_length) {
			gdouble value;

			value = g_strtod (xd->focal_length, NULL);
			tracker_sparql_builder_predicate (metadata, "nmm:focalLength");
			tracker_sparql_builder_object_double (metadata, value);
		}

		/* Question: Shouldn't xd->Artist be merged with md.author instead? */

		if (xd->artist || xd->contributor) {
			const gchar *artist;

			artist = tracker_coalesce_strip (2, xd->artist, xd->contributor);
			tracker_sparql_builder_predicate (metadata, "nco:contributor");
			tracker_sparql_builder_object_blank_open (metadata);
			tracker_sparql_builder_predicate (metadata, "a");
			tracker_sparql_builder_object (metadata, "nco:Contact");
			tracker_sparql_builder_predicate (metadata, "nco:fullname");
			tracker_sparql_builder_object_unvalidated (metadata, artist);
			tracker_sparql_builder_object_blank_close (metadata);
		}

		if (xd->exposure_time) {
			gdouble value;

			value = g_strtod (xd->exposure_time, NULL);
			tracker_sparql_builder_predicate (metadata, "nmm:exposureTime");
			tracker_sparql_builder_object_double (metadata, value);
		}

		if (xd->iso_speed_ratings) {
			gdouble value;

			value = g_strtod (xd->iso_speed_ratings, NULL);
			tracker_sparql_builder_predicate (metadata, "nmm:isoSpeed");
			tracker_sparql_builder_object_double (metadata, value);
		}

		if (xd->description) {
			tracker_sparql_builder_predicate (metadata, "nie:description");
			tracker_sparql_builder_object_unvalidated (metadata, xd->description);
		}

		if (xd->metering_mode) {
			tracker_sparql_builder_predicate (metadata, "nmm:meteringMode");
			tracker_sparql_builder_object (metadata, xd->metering_mode);
		}

		if (xd->address || xd->state || xd->country || xd->city ||
		    xd->gps_altitude || xd->gps_latitude || xd-> gps_longitude) {

			tracker_sparql_builder_predicate (metadata, "slo:location");

			tracker_sparql_builder_object_blank_open (metadata); /* GeoLocation */
			tracker_sparql_builder_predicate (metadata, "a");
			tracker_sparql_builder_object (metadata, "slo:GeoLocation");
			
			if (xd->address || xd->state || xd->country || xd->city)  {
				gchar *addruri;
				addruri = tracker_sparql_get_uuid_urn ();

				tracker_sparql_builder_predicate (metadata, "slo:postalAddress");
				tracker_sparql_builder_object_iri (metadata, addruri);			

				tracker_sparql_builder_insert_open (preupdate, NULL);
				if (graph) {
					tracker_sparql_builder_graph_open (preupdate, graph);
				}

				tracker_sparql_builder_subject_iri (preupdate, addruri);

				g_free (addruri);

				tracker_sparql_builder_predicate (preupdate, "a");
				tracker_sparql_builder_object (preupdate, "nco:PostalAddress");

				if (xd->address) {
				        tracker_sparql_builder_predicate (preupdate, "nco:streetAddress");
				        tracker_sparql_builder_object_unvalidated (preupdate, xd->address);
				}

				if (xd->state) {
				        tracker_sparql_builder_predicate (preupdate, "nco:region");
				        tracker_sparql_builder_object_unvalidated (preupdate, xd->state);
				}

				if (xd->city) {
				        tracker_sparql_builder_predicate (preupdate, "nco:locality");
				        tracker_sparql_builder_object_unvalidated (preupdate, xd->city);
				}

				if (xd->country) {
				        tracker_sparql_builder_predicate (preupdate, "nco:country");
				        tracker_sparql_builder_object_unvalidated (preupdate, xd->country);
				}

				if (graph) {
					tracker_sparql_builder_graph_close (preupdate);
				}
				tracker_sparql_builder_insert_close (preupdate);
			}

			if (xd->gps_altitude) {
				tracker_sparql_builder_predicate (metadata, "slo:altitude");
				tracker_sparql_builder_object_unvalidated (metadata, xd->gps_altitude);
			}

			if (xd->gps_latitude) {
				tracker_sparql_builder_predicate (metadata, "slo:latitude");
				tracker_sparql_builder_object_unvalidated (metadata, xd->gps_latitude);
			}

			if (xd->gps_longitude) {
				tracker_sparql_builder_predicate (metadata, "slo:longitude");
				tracker_sparql_builder_object_unvalidated (metadata, xd->gps_longitude);
			}

			tracker_sparql_builder_object_blank_close (metadata); /* GeoLocation */
		}

                if (xd->regions) {
	                tracker_xmp_apply_regions (preupdate, metadata, graph, xd);
                }

		tracker_xmp_free (xd);
	} else {
		/* So if we are here we have NO XMP data and we just
		 * write what we know from Poppler.
		 */
		write_pdf_data (pd, metadata, keywords);
	}

	for (i = 0; i < keywords->len; i++) {
		gchar *escaped, *subject;
		const gchar *p;

		p = g_ptr_array_index (keywords, i);
		escaped = tracker_sparql_escape_string (p);
		subject = g_strdup_printf ("_:tag%d", i + 1);

		/* ensure tag with specified label exists */
		tracker_sparql_builder_insert_open (preupdate, graph);
		tracker_sparql_builder_subject (preupdate, subject);
		tracker_sparql_builder_predicate (preupdate, "a");
		tracker_sparql_builder_object (preupdate, "nao:Tag");
		tracker_sparql_builder_predicate (preupdate, "nao:prefLabel");
		tracker_sparql_builder_object_unvalidated (preupdate, escaped);
		tracker_sparql_builder_insert_close (preupdate);
		tracker_sparql_builder_append (preupdate,
		                               "WHERE { FILTER (NOT EXISTS { "
		                               "?tag a nao:Tag ; nao:prefLabel \"");
		tracker_sparql_builder_append (preupdate, escaped);
		tracker_sparql_builder_append (preupdate,
		                               "\" }) }\n");

		/* associate file with tag */
		tracker_sparql_builder_insert_open (preupdate, graph);
		tracker_sparql_builder_subject_iri (preupdate, urn);
		tracker_sparql_builder_predicate (preupdate, "nao:hasTag");
		tracker_sparql_builder_object (preupdate, "?tag");
		tracker_sparql_builder_insert_close (preupdate);
		tracker_sparql_builder_where_open (preupdate);
		tracker_sparql_builder_subject (preupdate, "?tag");
		tracker_sparql_builder_predicate (preupdate, "a");
		tracker_sparql_builder_object (preupdate, "nao:Tag");
		tracker_sparql_builder_predicate (preupdate, "nao:prefLabel");
		tracker_sparql_builder_object_unvalidated (preupdate, escaped);
		tracker_sparql_builder_where_close (preupdate);

		g_free (subject);
		g_free (escaped);
	}
	g_ptr_array_free (keywords, TRUE);

	tracker_sparql_builder_predicate (metadata, "nfo:pageCount");
	tracker_sparql_builder_object_int64 (metadata, poppler_document_get_n_pages (document));

	config = tracker_main_get_config ();
	n_bytes = tracker_config_get_max_bytes (config);
	content = extract_content_text (document, n_bytes);

	if (content) {
		tracker_sparql_builder_predicate (metadata, "nie:plainTextContent");
		tracker_sparql_builder_object_unvalidated (metadata, content);
		g_free (content);
	}

	read_outline (document, metadata);

	g_free (xml);
	g_free (pd.keywords);
	g_free (pd.title);
	g_free (pd.subject);
	g_free (pd.creation_date);
	g_free (pd.author);
	g_free (pd.date);
	g_free (uri);

	g_object_unref (document);

	if (contents) {
		munmap (contents, len);
	}

	close (fd);

	return TRUE;
}
Beispiel #8
0
G_MODULE_EXPORT gboolean
tracker_extract_get_metadata (TrackerExtractInfo *info)
{
    TrackerConfig *config;
    GTime creation_date;
    GError *error = NULL;
    TrackerResource *metadata;
    TrackerXmpData *xd = NULL;
    PDFData pd = { 0 }; /* actual data */
    PDFData md = { 0 }; /* for merging */
    PopplerDocument *document;
    gchar *xml = NULL;
    gchar *content, *uri;
    guint n_bytes;
    GPtrArray *keywords;
    guint i;
    GFile *file;
    gchar *filename;
    int fd;
    gchar *contents = NULL;
    gsize len;
    struct stat st;

    file = tracker_extract_info_get_file (info);
    filename = g_file_get_path (file);

    fd = tracker_file_open_fd (filename);

    if (fd == -1) {
        g_warning ("Could not open pdf file '%s': %s\n",
                   filename,
                   g_strerror (errno));
        g_free (filename);
        return FALSE;
    }

    if (fstat (fd, &st) == -1) {
        g_warning ("Could not fstat pdf file '%s': %s\n",
                   filename,
                   g_strerror (errno));
        close (fd);
        g_free (filename);
        return FALSE;
    }

    if (st.st_size == 0) {
        contents = NULL;
        len = 0;
    } else {
        contents = (gchar *) mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
        if (contents == NULL || contents == MAP_FAILED) {
            g_warning ("Could not mmap pdf file '%s': %s\n",
                       filename,
                       g_strerror (errno));
            close (fd);
            g_free (filename);
            return FALSE;
        }
        len = st.st_size;
    }

    g_free (filename);
    uri = g_file_get_uri (file);

    document = poppler_document_new_from_data (contents, len, NULL, &error);

    if (error) {
        if (error->code == POPPLER_ERROR_ENCRYPTED) {
            metadata = tracker_resource_new (NULL);

            tracker_resource_add_uri (metadata, "rdf:type", "nfo:PaginatedTextDocument");
            tracker_resource_set_boolean (metadata, "nfo:isContentEncrypted", TRUE);

            tracker_extract_info_set_resource (info, metadata);
            g_object_unref (metadata);

            g_error_free (error);
            g_free (uri);
            close (fd);

            return TRUE;
        } else {
            g_warning ("Couldn't create PopplerDocument from uri:'%s', %s",
                       uri,
                       error->message ? error->message : "no error given");

            g_error_free (error);
            g_free (uri);
            close (fd);

            return FALSE;
        }
    }

    if (!document) {
        g_warning ("Could not create PopplerDocument from uri:'%s', "
                   "NULL returned without an error",
                   uri);
        g_free (uri);
        close (fd);
        return FALSE;
    }

    metadata = tracker_resource_new (NULL);
    tracker_resource_add_uri (metadata, "rdf:type", "nfo:PaginatedTextDocument");

    g_object_get (document,
                  "title", &pd.title,
                  "author", &pd.author,
                  "subject", &pd.subject,
                  "keywords", &pd.keywords,
                  "creation-date", &creation_date,
                  "metadata", &xml,
                  NULL);

    if (creation_date > 0) {
        pd.creation_date = tracker_date_to_string ((time_t) creation_date);
    }

    keywords = g_ptr_array_new_with_free_func ((GDestroyNotify) g_free);

    if (xml && *xml &&
            (xd = tracker_xmp_new (xml, strlen (xml), uri)) != NULL) {
        /* The casts here are well understood and known */
        md.title = (gchar *) tracker_coalesce_strip (4, pd.title, xd->title, xd->title2, xd->pdf_title);
        md.subject = (gchar *) tracker_coalesce_strip (2, pd.subject, xd->subject);
        md.date = (gchar *) tracker_coalesce_strip (3, pd.creation_date, xd->date, xd->time_original);
        md.author = (gchar *) tracker_coalesce_strip (2, pd.author, xd->creator);

        write_pdf_data (md, metadata, keywords);

        if (xd->keywords) {
            tracker_keywords_parse (keywords, xd->keywords);
        }

        if (xd->pdf_keywords) {
            tracker_keywords_parse (keywords, xd->pdf_keywords);
        }

        if (xd->publisher) {
            TrackerResource *publisher = tracker_extract_new_contact (xd->publisher);
            tracker_resource_set_relation (metadata, "nco:publisher", publisher);
            g_object_unref (publisher);
        }

        if (xd->type) {
            tracker_resource_set_string (metadata, "dc:type", xd->type);
        }

        if (xd->format) {
            tracker_resource_set_string (metadata, "dc:format", xd->format);
        }

        if (xd->identifier) {
            tracker_resource_set_string (metadata, "dc:identifier", xd->identifier);
        }

        if (xd->source) {
            tracker_resource_set_string (metadata, "dc:source", xd->source);
        }

        if (xd->language) {
            tracker_resource_set_string (metadata, "dc:language", xd->language);
        }

        if (xd->relation) {
            tracker_resource_set_string (metadata, "dc:relation", xd->relation);
        }

        if (xd->coverage) {
            tracker_resource_set_string (metadata, "dc:coverage", xd->coverage);
        }

        if (xd->license) {
            tracker_resource_set_string (metadata, "nie:license", xd->license);
        }

        if (xd->make || xd->model) {
            TrackerResource *equipment = tracker_extract_new_equipment (xd->make, xd->model);
            tracker_resource_set_relation (metadata, "nfo:equipment", equipment);
            g_object_unref (equipment);
        }

        if (xd->orientation) {
            tracker_resource_set_string (metadata, "nfo:orientation", xd->orientation);
        }

        if (xd->rights) {
            tracker_resource_set_string (metadata, "nie:copyright", xd->rights);
        }

        if (xd->white_balance) {
            tracker_resource_set_string (metadata, "nmm:whiteBalance", xd->white_balance);
        }

        if (xd->fnumber) {
            gdouble value;

            value = g_strtod (xd->fnumber, NULL);
            tracker_resource_set_double (metadata, "nmm:fnumber", value);
        }

        if (xd->flash) {
            tracker_resource_set_string (metadata, "nmm:flash", xd->flash);
        }

        if (xd->focal_length) {
            gdouble value;

            value = g_strtod (xd->focal_length, NULL);
            tracker_resource_set_double (metadata, "nmm:focalLength", value);
        }

        /* Question: Shouldn't xd->Artist be merged with md.author instead? */

        if (xd->artist || xd->contributor) {
            TrackerResource *artist;
            const gchar *artist_name;

            artist_name = tracker_coalesce_strip (2, xd->artist, xd->contributor);

            artist = tracker_extract_new_contact (artist_name);

            tracker_resource_set_relation (metadata, "nco:contributor", artist);

            g_object_unref (artist);
        }

        if (xd->exposure_time) {
            gdouble value;

            value = g_strtod (xd->exposure_time, NULL);
            tracker_resource_set_double (metadata, "nmm:exposureTime", value);
        }

        if (xd->iso_speed_ratings) {
            gdouble value;

            value = g_strtod (xd->iso_speed_ratings, NULL);
            tracker_resource_set_double (metadata, "nmm:isoSpeed", value);
        }

        if (xd->description) {
            tracker_resource_set_string (metadata, "nie:description", xd->description);
        }

        if (xd->metering_mode) {
            tracker_resource_set_string (metadata, "nmm:meteringMode", xd->metering_mode);
        }

        if (xd->address || xd->state || xd->country || xd->city ||
                xd->gps_altitude || xd->gps_latitude || xd-> gps_longitude) {

            TrackerResource *location = tracker_extract_new_location (xd->address,
                                        xd->state, xd->city, xd->country, xd->gps_altitude,
                                        xd->gps_latitude, xd->gps_longitude);

            tracker_resource_set_relation (metadata, "slo:location", location);

            g_object_unref (location);
        }

        if (xd->regions) {
            tracker_xmp_apply_regions_to_resource (metadata, xd);
        }

        tracker_xmp_free (xd);
    } else {
        /* So if we are here we have NO XMP data and we just
         * write what we know from Poppler.
         */
        write_pdf_data (pd, metadata, keywords);
    }

    for (i = 0; i < keywords->len; i++) {
        TrackerResource *tag;
        const gchar *p;

        p = g_ptr_array_index (keywords, i);
        tag = tracker_extract_new_tag (p);

        tracker_resource_add_relation (metadata, "nao:hasTag", tag);

        g_object_unref (tag);
    }
    g_ptr_array_free (keywords, TRUE);

    tracker_resource_set_int64 (metadata, "nfo:pageCount", poppler_document_get_n_pages(document));

    config = tracker_main_get_config ();
    n_bytes = tracker_config_get_max_bytes (config);
    content = extract_content_text (document, n_bytes);

    if (content) {
        tracker_resource_set_string (metadata, "nie:plainTextContent", content);
        g_free (content);
    }

    read_outline (document, metadata);

    g_free (xml);
    g_free (pd.keywords);
    g_free (pd.title);
    g_free (pd.subject);
    g_free (pd.creation_date);
    g_free (pd.author);
    g_free (pd.date);
    g_free (uri);

    g_object_unref (document);

    if (contents) {
        munmap (contents, len);
    }

    close (fd);

    tracker_extract_info_set_resource (info, metadata);
    g_object_unref (metadata);

    return TRUE;
}