static VALUE rg_initialize(int argc, VALUE *argv, VALUE self) { PopplerDocument *document = NULL; GError *error = NULL; VALUE uri_or_data, rb_password; const char *password; rb_scan_args(argc, argv, "11", &uri_or_data, &rb_password); password = NIL_P(rb_password) ? NULL : RVAL2CSTR(rb_password); if (RVAL2CBOOL(rb_funcall(self, id_pdf_data_p, 1, uri_or_data))) { document = poppler_document_new_from_data(RSTRING_PTR(uri_or_data), RSTRING_LEN(uri_or_data), password, &error); } if (!document && !error) { uri_or_data = rb_funcall(self, id_ensure_uri, 1, uri_or_data); document = poppler_document_new_from_file(RVAL2CSTR(uri_or_data), password, &error); } if (error) RAISE_GERROR(error); G_INITIALIZE(self, document); return Qnil; }
int main(int argc, char const *argv[]) { char *path; PopplerDocument *doc; GError *err; gchar *gbuf; char *buf; page_t page_meta; int file_length, n; g_type_init(); if (argc != 2) { return 1; } err = NULL; buf = open_pdf_file(argv[1], &file_length); sandboxify(); doc = poppler_document_new_from_data(buf, file_length, NULL, &err); if (err != NULL) { fprintf(stderr, "Unable to open file: %s\n", err->message); return 2; } n = poppler_document_get_n_pages(doc); for (int i = 0; i < n; i++) { PopplerPage *page = poppler_document_get_page(doc, i); page_meta.pagenum = i; page_meta.text = poppler_page_get_text(page); page_meta.svg_len = 0; page_meta.svg = malloc(SVG_BUFLEN); if (!page_meta.svg) ERROR("Cannot allocate svg buffer, not enought memory?"); page_meta.free_space = SVG_BUFLEN; render_page(&page_meta, page); if (page_meta.text) free(page_meta.text); g_object_unref(page); } if (munmap(buf, file_length) == -1) PERROR("munmap()"); return 0; }
void test_new_from_data (void) { GError *error = NULL; gchar *data; gsize length; const gchar *path; path = cut_take_string (cut_build_fixture_data_path ("multi-pages.pdf", NULL)); g_file_get_contents (path, &data, &length, &error); gcut_assert_error (error); document = poppler_document_new_from_data (data, length, NULL, &error); g_free (data); gcut_assert_error (error); cut_assert_equal_int (3, poppler_document_get_n_pages (document)); }
static PopplerDocument* open_document (const gchar *filename, GError **load_error) { PopplerDocument *doc; GMappedFile *mapped_file; GError *error = NULL; mapped_file = g_mapped_file_new (filename, FALSE, &error); if (! mapped_file) { g_set_error (load_error, 0, 0, "Could not load '%s' %s", gimp_filename_to_utf8 (filename), error->message); g_error_free (error); return NULL; } doc = poppler_document_new_from_data (g_mapped_file_get_contents (mapped_file), g_mapped_file_get_length (mapped_file), NULL, &error); /* We can't g_mapped_file_unref(mapped_file) as apparently doc has * references to data in there. No big deal, this is just a * short-lived plug-in. */ if (! doc) { g_set_error (load_error, G_FILE_ERROR, G_FILE_ERROR_FAILED, _("Could not load '%s': %s"), gimp_filename_to_utf8 (filename), error->message); g_error_free (error); return NULL; } return doc; }
int main(int argc, char **argv) { int i = 0, numframes; char *filename = NULL; gchar *notefile = NULL; FILE *fp = NULL; struct stat statbuf; char *databuf = NULL; GError *err = NULL; gtk_init(&argc, &argv); /* Load preferences first. Command line options will override those * preferences. */ loadPreferences(); /* Read defaults from preferences. */ filename = NULL; numframes = 2 * prefs.slide_context + 1; runpref.do_wrapping = prefs.do_wrapping; runpref.do_notectrl = prefs.do_notectrl; runpref.cache_max = prefs.cache_max; runpref.fit_mode = prefs.initial_fit_mode; /* get options via getopt */ while ((i = getopt(argc, argv, "s:wnc:N:CTv")) != -1) { switch (i) { case 's': numframes = 2 * atoi(optarg) + 1; if (numframes <= 1) { fprintf(stderr, "Invalid slide count specified.\n"); usage(argv[0]); exit(EXIT_FAILURE); } break; case 'w': runpref.do_wrapping = TRUE; break; case 'n': runpref.do_notectrl = TRUE; break; case 'c': /* don't care if that number is invalid. it'll get * re-adjusted anyway if it's too small. */ runpref.cache_max = atoi(optarg); break; case 'N': notefile = g_strdup(optarg); break; case 'C': /* Force the timer to be a clock. */ prefs.timer_is_clock = TRUE; break; case 'T': /* Force the timer to be a timer (not a clock). */ prefs.timer_is_clock = FALSE; break; case 'v': printf("pdfpres version: %s\n", PDFPRES_VERSION); exit(EXIT_SUCCESS); break; case '?': exit(EXIT_FAILURE); break; } } /* retrieve file name via first non-option argument */ if (optind < argc) { filename = argv[optind]; } if (filename == NULL) { fprintf(stderr, "Invalid file path specified.\n"); usage(argv[0]); exit(EXIT_FAILURE); } /* for the cache to be useful, we'll need at least "some" items. * that is 2 items (prev and next) per preview viewport and 2 * items for the beamer port. * * this means that switching to the previous and next slide will * always be fast. * * note: numframes is not negative (see above), so that cast is okay. */ if (runpref.cache_max < (guint)((numframes + 1) * 2)) runpref.cache_max = (guint)((numframes + 1) * 2); /* try to load the file */ if (stat(filename, &statbuf) == -1) { perror("Could not stat file"); exit(EXIT_FAILURE); } /* note: this buffer must not be freed, it'll be used by poppler * later on. */ databuf = (char *)malloc(statbuf.st_size); g_assert(databuf); fp = fopen(filename, "rb"); if (!fp) { perror("Could not open file"); exit(EXIT_FAILURE); } /* Read 1 element of size "statbuf.st_size". fread() returns the * number of items successfully read. Thus, a return value of "1" * means "success" and anything else is an error. */ if (fread(databuf, statbuf.st_size, 1, fp) != 1) { fprintf(stderr, "Unexpected end of file.\n"); exit(EXIT_FAILURE); } fclose(fp); /* get document from data */ doc = poppler_document_new_from_data(databuf, statbuf.st_size, NULL, &err); if (!doc) { fprintf(stderr, "%s\n", err->message); g_error_free(err); exit(EXIT_FAILURE); } doc_n_pages = poppler_document_get_n_pages(doc); if (doc_n_pages <= 0) { fprintf(stderr, "Huh, no pages in that document.\n"); exit(EXIT_FAILURE); } initGUI(numframes, notefile); /* queue initial prerendering. */ preQueued = TRUE; g_idle_add(idleFillCaches, NULL); gtk_main(); exit(EXIT_SUCCESS); }
int main(int argc, char **argv) { int i = 0, numframes; char *filename = NULL; gchar *notefile = NULL; FILE *fp = NULL; struct stat statbuf; char *databuf = NULL; GError *err = NULL; gtk_init(&argc, &argv); /* Load preferences first. Command line options will override those * preferences. */ loadPreferences(); /* Read defaults from preferences. */ filename = NULL; numframes = 2 * prefs.slide_context + 1; runpref.do_wrapping = prefs.do_wrapping; runpref.do_notectrl = prefs.do_notectrl; runpref.fit_mode = prefs.initial_fit_mode; /* get options via getopt */ while ((i = getopt(argc, argv, "s:wnN:CTv")) != -1) { switch (i) { case 's': numframes = 2 * atoi(optarg) + 1; if (numframes <= 1) { fprintf(stderr, "Invalid slide count specified.\n"); usage(argv[0]); exit(EXIT_FAILURE); } break; case 'w': runpref.do_wrapping = TRUE; break; case 'n': runpref.do_notectrl = TRUE; break; case 'N': notefile = g_strdup(optarg); break; case 'C': /* Force the timer to be a clock. */ prefs.timer_is_clock = TRUE; break; case 'T': /* Force the timer to be a timer (not a clock). */ prefs.timer_is_clock = FALSE; break; case 'v': printf("pdfpres version: %s\n", PDFPRES_VERSION); exit(EXIT_SUCCESS); break; case '?': exit(EXIT_FAILURE); break; } } /* retrieve file name via first non-option argument */ if (optind < argc) { filename = argv[optind]; } if (filename == NULL) { fprintf(stderr, "Invalid file path specified.\n"); usage(argv[0]); exit(EXIT_FAILURE); } /* try to load the file */ if (stat(filename, &statbuf) == -1) { perror("Could not stat file"); exit(EXIT_FAILURE); } /* note: this buffer must not be freed, it'll be used by poppler * later on. */ databuf = (char *)malloc(statbuf.st_size); g_assert(databuf); fp = fopen(filename, "rb"); if (!fp) { perror("Could not open file"); exit(EXIT_FAILURE); } /* Read 1 element of size "statbuf.st_size". fread() returns the * number of items successfully read. Thus, a return value of "1" * means "success" and anything else is an error. */ if (fread(databuf, statbuf.st_size, 1, fp) != 1) { fprintf(stderr, "Unexpected end of file.\n"); exit(EXIT_FAILURE); } fclose(fp); /* get document from data */ doc = poppler_document_new_from_data(databuf, statbuf.st_size, NULL, &err); if (!doc) { fprintf(stderr, "%s\n", err->message); g_error_free(err); exit(EXIT_FAILURE); } doc_n_pages = poppler_document_get_n_pages(doc); if (doc_n_pages <= 0) { fprintf(stderr, "Huh, no pages in that document.\n"); exit(EXIT_FAILURE); } PopplerPage *page = poppler_document_get_page(doc, doc_n_pages-1); doc_last_page = atoi(poppler_page_get_label(page)); g_object_unref(G_OBJECT(page)); initGUI(numframes, notefile); gtk_main(); exit(EXIT_SUCCESS); }
G_MODULE_EXPORT gboolean tracker_extract_get_metadata (TrackerExtractInfo *info) { TrackerConfig *config; GTime creation_date; GError *error = NULL; TrackerSparqlBuilder *metadata, *preupdate; const gchar *graph; const gchar *urn; TrackerXmpData *xd = NULL; PDFData pd = { 0 }; /* actual data */ PDFData md = { 0 }; /* for merging */ PopplerDocument *document; gchar *xml = NULL; gchar *content, *uri; guint n_bytes; GPtrArray *keywords; guint i; GFile *file; gchar *filename; int fd; gchar *contents = NULL; gsize len; struct stat st; metadata = tracker_extract_info_get_metadata_builder (info); preupdate = tracker_extract_info_get_preupdate_builder (info); graph = tracker_extract_info_get_graph (info); urn = tracker_extract_info_get_urn (info); file = tracker_extract_info_get_file (info); filename = g_file_get_path (file); fd = tracker_file_open_fd (filename); if (fd == -1) { g_warning ("Could not open pdf file '%s': %s\n", filename, g_strerror (errno)); g_free (filename); return FALSE; } if (fstat (fd, &st) == -1) { g_warning ("Could not fstat pdf file '%s': %s\n", filename, g_strerror (errno)); close (fd); g_free (filename); return FALSE; } if (st.st_size == 0) { contents = NULL; len = 0; } else { contents = (gchar *) mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (contents == NULL || contents == MAP_FAILED) { g_warning ("Could not mmap pdf file '%s': %s\n", filename, g_strerror (errno)); close (fd); g_free (filename); return FALSE; } len = st.st_size; } g_free (filename); uri = g_file_get_uri (file); document = poppler_document_new_from_data (contents, len, NULL, &error); if (error) { if (error->code == POPPLER_ERROR_ENCRYPTED) { tracker_sparql_builder_predicate (metadata, "a"); tracker_sparql_builder_object (metadata, "nfo:PaginatedTextDocument"); tracker_sparql_builder_predicate (metadata, "nfo:isContentEncrypted"); tracker_sparql_builder_object_boolean (metadata, TRUE); g_error_free (error); g_free (uri); close (fd); return TRUE; } else { g_warning ("Couldn't create PopplerDocument from uri:'%s', %s", uri, error->message ? error->message : "no error given"); g_error_free (error); g_free (uri); close (fd); return FALSE; } } if (!document) { g_warning ("Could not create PopplerDocument from uri:'%s', " "NULL returned without an error", uri); g_free (uri); close (fd); return FALSE; } tracker_sparql_builder_predicate (metadata, "a"); tracker_sparql_builder_object (metadata, "nfo:PaginatedTextDocument"); g_object_get (document, "title", &pd.title, "author", &pd.author, "subject", &pd.subject, "keywords", &pd.keywords, "creation-date", &creation_date, "metadata", &xml, NULL); if (creation_date > 0) { pd.creation_date = tracker_date_to_string ((time_t) creation_date); } keywords = g_ptr_array_new_with_free_func ((GDestroyNotify) g_free); if (xml && *xml && (xd = tracker_xmp_new (xml, strlen (xml), uri)) != NULL) { /* The casts here are well understood and known */ md.title = (gchar *) tracker_coalesce_strip (4, pd.title, xd->title, xd->title2, xd->pdf_title); md.subject = (gchar *) tracker_coalesce_strip (2, pd.subject, xd->subject); md.date = (gchar *) tracker_coalesce_strip (3, pd.creation_date, xd->date, xd->time_original); md.author = (gchar *) tracker_coalesce_strip (2, pd.author, xd->creator); write_pdf_data (md, metadata, keywords); if (xd->keywords) { tracker_keywords_parse (keywords, xd->keywords); } if (xd->pdf_keywords) { tracker_keywords_parse (keywords, xd->pdf_keywords); } if (xd->publisher) { tracker_sparql_builder_predicate (metadata, "nco:publisher"); tracker_sparql_builder_object_blank_open (metadata); tracker_sparql_builder_predicate (metadata, "a"); tracker_sparql_builder_object (metadata, "nco:Contact"); tracker_sparql_builder_predicate (metadata, "nco:fullname"); tracker_sparql_builder_object_unvalidated (metadata, xd->publisher); tracker_sparql_builder_object_blank_close (metadata); } if (xd->type) { tracker_sparql_builder_predicate (metadata, "dc:type"); tracker_sparql_builder_object_unvalidated (metadata, xd->type); } if (xd->format) { tracker_sparql_builder_predicate (metadata, "dc:format"); tracker_sparql_builder_object_unvalidated (metadata, xd->format); } if (xd->identifier) { tracker_sparql_builder_predicate (metadata, "dc:identifier"); tracker_sparql_builder_object_unvalidated (metadata, xd->identifier); } if (xd->source) { tracker_sparql_builder_predicate (metadata, "dc:source"); tracker_sparql_builder_object_unvalidated (metadata, xd->source); } if (xd->language) { tracker_sparql_builder_predicate (metadata, "dc:language"); tracker_sparql_builder_object_unvalidated (metadata, xd->language); } if (xd->relation) { tracker_sparql_builder_predicate (metadata, "dc:relation"); tracker_sparql_builder_object_unvalidated (metadata, xd->relation); } if (xd->coverage) { tracker_sparql_builder_predicate (metadata, "dc:coverage"); tracker_sparql_builder_object_unvalidated (metadata, xd->coverage); } if (xd->license) { tracker_sparql_builder_predicate (metadata, "nie:license"); tracker_sparql_builder_object_unvalidated (metadata, xd->license); } if (xd->make || xd->model) { gchar *equip_uri; equip_uri = tracker_sparql_escape_uri_printf ("urn:equipment:%s:%s:", xd->make ? xd->make : "", xd->model ? xd->model : ""); tracker_sparql_builder_insert_open (preupdate, NULL); if (graph) { tracker_sparql_builder_graph_open (preupdate, graph); } tracker_sparql_builder_subject_iri (preupdate, equip_uri); tracker_sparql_builder_predicate (preupdate, "a"); tracker_sparql_builder_object (preupdate, "nfo:Equipment"); if (xd->make) { tracker_sparql_builder_predicate (preupdate, "nfo:manufacturer"); tracker_sparql_builder_object_unvalidated (preupdate, xd->make); } if (xd->model) { tracker_sparql_builder_predicate (preupdate, "nfo:model"); tracker_sparql_builder_object_unvalidated (preupdate, xd->model); } if (graph) { tracker_sparql_builder_graph_close (preupdate); } tracker_sparql_builder_insert_close (preupdate); tracker_sparql_builder_predicate (metadata, "nfo:equipment"); tracker_sparql_builder_object_iri (metadata, equip_uri); g_free (equip_uri); } if (xd->orientation) { tracker_sparql_builder_predicate (metadata, "nfo:orientation"); tracker_sparql_builder_object (metadata, xd->orientation); } if (xd->rights) { tracker_sparql_builder_predicate (metadata, "nie:copyright"); tracker_sparql_builder_object_unvalidated (metadata, xd->rights); } if (xd->white_balance) { tracker_sparql_builder_predicate (metadata, "nmm:whiteBalance"); tracker_sparql_builder_object (metadata, xd->white_balance); } if (xd->fnumber) { gdouble value; value = g_strtod (xd->fnumber, NULL); tracker_sparql_builder_predicate (metadata, "nmm:fnumber"); tracker_sparql_builder_object_double (metadata, value); } if (xd->flash) { tracker_sparql_builder_predicate (metadata, "nmm:flash"); tracker_sparql_builder_object (metadata, xd->flash); } if (xd->focal_length) { gdouble value; value = g_strtod (xd->focal_length, NULL); tracker_sparql_builder_predicate (metadata, "nmm:focalLength"); tracker_sparql_builder_object_double (metadata, value); } /* Question: Shouldn't xd->Artist be merged with md.author instead? */ if (xd->artist || xd->contributor) { const gchar *artist; artist = tracker_coalesce_strip (2, xd->artist, xd->contributor); tracker_sparql_builder_predicate (metadata, "nco:contributor"); tracker_sparql_builder_object_blank_open (metadata); tracker_sparql_builder_predicate (metadata, "a"); tracker_sparql_builder_object (metadata, "nco:Contact"); tracker_sparql_builder_predicate (metadata, "nco:fullname"); tracker_sparql_builder_object_unvalidated (metadata, artist); tracker_sparql_builder_object_blank_close (metadata); } if (xd->exposure_time) { gdouble value; value = g_strtod (xd->exposure_time, NULL); tracker_sparql_builder_predicate (metadata, "nmm:exposureTime"); tracker_sparql_builder_object_double (metadata, value); } if (xd->iso_speed_ratings) { gdouble value; value = g_strtod (xd->iso_speed_ratings, NULL); tracker_sparql_builder_predicate (metadata, "nmm:isoSpeed"); tracker_sparql_builder_object_double (metadata, value); } if (xd->description) { tracker_sparql_builder_predicate (metadata, "nie:description"); tracker_sparql_builder_object_unvalidated (metadata, xd->description); } if (xd->metering_mode) { tracker_sparql_builder_predicate (metadata, "nmm:meteringMode"); tracker_sparql_builder_object (metadata, xd->metering_mode); } if (xd->address || xd->state || xd->country || xd->city || xd->gps_altitude || xd->gps_latitude || xd-> gps_longitude) { tracker_sparql_builder_predicate (metadata, "slo:location"); tracker_sparql_builder_object_blank_open (metadata); /* GeoLocation */ tracker_sparql_builder_predicate (metadata, "a"); tracker_sparql_builder_object (metadata, "slo:GeoLocation"); if (xd->address || xd->state || xd->country || xd->city) { gchar *addruri; addruri = tracker_sparql_get_uuid_urn (); tracker_sparql_builder_predicate (metadata, "slo:postalAddress"); tracker_sparql_builder_object_iri (metadata, addruri); tracker_sparql_builder_insert_open (preupdate, NULL); if (graph) { tracker_sparql_builder_graph_open (preupdate, graph); } tracker_sparql_builder_subject_iri (preupdate, addruri); g_free (addruri); tracker_sparql_builder_predicate (preupdate, "a"); tracker_sparql_builder_object (preupdate, "nco:PostalAddress"); if (xd->address) { tracker_sparql_builder_predicate (preupdate, "nco:streetAddress"); tracker_sparql_builder_object_unvalidated (preupdate, xd->address); } if (xd->state) { tracker_sparql_builder_predicate (preupdate, "nco:region"); tracker_sparql_builder_object_unvalidated (preupdate, xd->state); } if (xd->city) { tracker_sparql_builder_predicate (preupdate, "nco:locality"); tracker_sparql_builder_object_unvalidated (preupdate, xd->city); } if (xd->country) { tracker_sparql_builder_predicate (preupdate, "nco:country"); tracker_sparql_builder_object_unvalidated (preupdate, xd->country); } if (graph) { tracker_sparql_builder_graph_close (preupdate); } tracker_sparql_builder_insert_close (preupdate); } if (xd->gps_altitude) { tracker_sparql_builder_predicate (metadata, "slo:altitude"); tracker_sparql_builder_object_unvalidated (metadata, xd->gps_altitude); } if (xd->gps_latitude) { tracker_sparql_builder_predicate (metadata, "slo:latitude"); tracker_sparql_builder_object_unvalidated (metadata, xd->gps_latitude); } if (xd->gps_longitude) { tracker_sparql_builder_predicate (metadata, "slo:longitude"); tracker_sparql_builder_object_unvalidated (metadata, xd->gps_longitude); } tracker_sparql_builder_object_blank_close (metadata); /* GeoLocation */ } if (xd->regions) { tracker_xmp_apply_regions (preupdate, metadata, graph, xd); } tracker_xmp_free (xd); } else { /* So if we are here we have NO XMP data and we just * write what we know from Poppler. */ write_pdf_data (pd, metadata, keywords); } for (i = 0; i < keywords->len; i++) { gchar *escaped, *subject; const gchar *p; p = g_ptr_array_index (keywords, i); escaped = tracker_sparql_escape_string (p); subject = g_strdup_printf ("_:tag%d", i + 1); /* ensure tag with specified label exists */ tracker_sparql_builder_insert_open (preupdate, graph); tracker_sparql_builder_subject (preupdate, subject); tracker_sparql_builder_predicate (preupdate, "a"); tracker_sparql_builder_object (preupdate, "nao:Tag"); tracker_sparql_builder_predicate (preupdate, "nao:prefLabel"); tracker_sparql_builder_object_unvalidated (preupdate, escaped); tracker_sparql_builder_insert_close (preupdate); tracker_sparql_builder_append (preupdate, "WHERE { FILTER (NOT EXISTS { " "?tag a nao:Tag ; nao:prefLabel \""); tracker_sparql_builder_append (preupdate, escaped); tracker_sparql_builder_append (preupdate, "\" }) }\n"); /* associate file with tag */ tracker_sparql_builder_insert_open (preupdate, graph); tracker_sparql_builder_subject_iri (preupdate, urn); tracker_sparql_builder_predicate (preupdate, "nao:hasTag"); tracker_sparql_builder_object (preupdate, "?tag"); tracker_sparql_builder_insert_close (preupdate); tracker_sparql_builder_where_open (preupdate); tracker_sparql_builder_subject (preupdate, "?tag"); tracker_sparql_builder_predicate (preupdate, "a"); tracker_sparql_builder_object (preupdate, "nao:Tag"); tracker_sparql_builder_predicate (preupdate, "nao:prefLabel"); tracker_sparql_builder_object_unvalidated (preupdate, escaped); tracker_sparql_builder_where_close (preupdate); g_free (subject); g_free (escaped); } g_ptr_array_free (keywords, TRUE); tracker_sparql_builder_predicate (metadata, "nfo:pageCount"); tracker_sparql_builder_object_int64 (metadata, poppler_document_get_n_pages (document)); config = tracker_main_get_config (); n_bytes = tracker_config_get_max_bytes (config); content = extract_content_text (document, n_bytes); if (content) { tracker_sparql_builder_predicate (metadata, "nie:plainTextContent"); tracker_sparql_builder_object_unvalidated (metadata, content); g_free (content); } read_outline (document, metadata); g_free (xml); g_free (pd.keywords); g_free (pd.title); g_free (pd.subject); g_free (pd.creation_date); g_free (pd.author); g_free (pd.date); g_free (uri); g_object_unref (document); if (contents) { munmap (contents, len); } close (fd); return TRUE; }
G_MODULE_EXPORT gboolean tracker_extract_get_metadata (TrackerExtractInfo *info) { TrackerConfig *config; GTime creation_date; GError *error = NULL; TrackerResource *metadata; TrackerXmpData *xd = NULL; PDFData pd = { 0 }; /* actual data */ PDFData md = { 0 }; /* for merging */ PopplerDocument *document; gchar *xml = NULL; gchar *content, *uri; guint n_bytes; GPtrArray *keywords; guint i; GFile *file; gchar *filename; int fd; gchar *contents = NULL; gsize len; struct stat st; file = tracker_extract_info_get_file (info); filename = g_file_get_path (file); fd = tracker_file_open_fd (filename); if (fd == -1) { g_warning ("Could not open pdf file '%s': %s\n", filename, g_strerror (errno)); g_free (filename); return FALSE; } if (fstat (fd, &st) == -1) { g_warning ("Could not fstat pdf file '%s': %s\n", filename, g_strerror (errno)); close (fd); g_free (filename); return FALSE; } if (st.st_size == 0) { contents = NULL; len = 0; } else { contents = (gchar *) mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (contents == NULL || contents == MAP_FAILED) { g_warning ("Could not mmap pdf file '%s': %s\n", filename, g_strerror (errno)); close (fd); g_free (filename); return FALSE; } len = st.st_size; } g_free (filename); uri = g_file_get_uri (file); document = poppler_document_new_from_data (contents, len, NULL, &error); if (error) { if (error->code == POPPLER_ERROR_ENCRYPTED) { metadata = tracker_resource_new (NULL); tracker_resource_add_uri (metadata, "rdf:type", "nfo:PaginatedTextDocument"); tracker_resource_set_boolean (metadata, "nfo:isContentEncrypted", TRUE); tracker_extract_info_set_resource (info, metadata); g_object_unref (metadata); g_error_free (error); g_free (uri); close (fd); return TRUE; } else { g_warning ("Couldn't create PopplerDocument from uri:'%s', %s", uri, error->message ? error->message : "no error given"); g_error_free (error); g_free (uri); close (fd); return FALSE; } } if (!document) { g_warning ("Could not create PopplerDocument from uri:'%s', " "NULL returned without an error", uri); g_free (uri); close (fd); return FALSE; } metadata = tracker_resource_new (NULL); tracker_resource_add_uri (metadata, "rdf:type", "nfo:PaginatedTextDocument"); g_object_get (document, "title", &pd.title, "author", &pd.author, "subject", &pd.subject, "keywords", &pd.keywords, "creation-date", &creation_date, "metadata", &xml, NULL); if (creation_date > 0) { pd.creation_date = tracker_date_to_string ((time_t) creation_date); } keywords = g_ptr_array_new_with_free_func ((GDestroyNotify) g_free); if (xml && *xml && (xd = tracker_xmp_new (xml, strlen (xml), uri)) != NULL) { /* The casts here are well understood and known */ md.title = (gchar *) tracker_coalesce_strip (4, pd.title, xd->title, xd->title2, xd->pdf_title); md.subject = (gchar *) tracker_coalesce_strip (2, pd.subject, xd->subject); md.date = (gchar *) tracker_coalesce_strip (3, pd.creation_date, xd->date, xd->time_original); md.author = (gchar *) tracker_coalesce_strip (2, pd.author, xd->creator); write_pdf_data (md, metadata, keywords); if (xd->keywords) { tracker_keywords_parse (keywords, xd->keywords); } if (xd->pdf_keywords) { tracker_keywords_parse (keywords, xd->pdf_keywords); } if (xd->publisher) { TrackerResource *publisher = tracker_extract_new_contact (xd->publisher); tracker_resource_set_relation (metadata, "nco:publisher", publisher); g_object_unref (publisher); } if (xd->type) { tracker_resource_set_string (metadata, "dc:type", xd->type); } if (xd->format) { tracker_resource_set_string (metadata, "dc:format", xd->format); } if (xd->identifier) { tracker_resource_set_string (metadata, "dc:identifier", xd->identifier); } if (xd->source) { tracker_resource_set_string (metadata, "dc:source", xd->source); } if (xd->language) { tracker_resource_set_string (metadata, "dc:language", xd->language); } if (xd->relation) { tracker_resource_set_string (metadata, "dc:relation", xd->relation); } if (xd->coverage) { tracker_resource_set_string (metadata, "dc:coverage", xd->coverage); } if (xd->license) { tracker_resource_set_string (metadata, "nie:license", xd->license); } if (xd->make || xd->model) { TrackerResource *equipment = tracker_extract_new_equipment (xd->make, xd->model); tracker_resource_set_relation (metadata, "nfo:equipment", equipment); g_object_unref (equipment); } if (xd->orientation) { tracker_resource_set_string (metadata, "nfo:orientation", xd->orientation); } if (xd->rights) { tracker_resource_set_string (metadata, "nie:copyright", xd->rights); } if (xd->white_balance) { tracker_resource_set_string (metadata, "nmm:whiteBalance", xd->white_balance); } if (xd->fnumber) { gdouble value; value = g_strtod (xd->fnumber, NULL); tracker_resource_set_double (metadata, "nmm:fnumber", value); } if (xd->flash) { tracker_resource_set_string (metadata, "nmm:flash", xd->flash); } if (xd->focal_length) { gdouble value; value = g_strtod (xd->focal_length, NULL); tracker_resource_set_double (metadata, "nmm:focalLength", value); } /* Question: Shouldn't xd->Artist be merged with md.author instead? */ if (xd->artist || xd->contributor) { TrackerResource *artist; const gchar *artist_name; artist_name = tracker_coalesce_strip (2, xd->artist, xd->contributor); artist = tracker_extract_new_contact (artist_name); tracker_resource_set_relation (metadata, "nco:contributor", artist); g_object_unref (artist); } if (xd->exposure_time) { gdouble value; value = g_strtod (xd->exposure_time, NULL); tracker_resource_set_double (metadata, "nmm:exposureTime", value); } if (xd->iso_speed_ratings) { gdouble value; value = g_strtod (xd->iso_speed_ratings, NULL); tracker_resource_set_double (metadata, "nmm:isoSpeed", value); } if (xd->description) { tracker_resource_set_string (metadata, "nie:description", xd->description); } if (xd->metering_mode) { tracker_resource_set_string (metadata, "nmm:meteringMode", xd->metering_mode); } if (xd->address || xd->state || xd->country || xd->city || xd->gps_altitude || xd->gps_latitude || xd-> gps_longitude) { TrackerResource *location = tracker_extract_new_location (xd->address, xd->state, xd->city, xd->country, xd->gps_altitude, xd->gps_latitude, xd->gps_longitude); tracker_resource_set_relation (metadata, "slo:location", location); g_object_unref (location); } if (xd->regions) { tracker_xmp_apply_regions_to_resource (metadata, xd); } tracker_xmp_free (xd); } else { /* So if we are here we have NO XMP data and we just * write what we know from Poppler. */ write_pdf_data (pd, metadata, keywords); } for (i = 0; i < keywords->len; i++) { TrackerResource *tag; const gchar *p; p = g_ptr_array_index (keywords, i); tag = tracker_extract_new_tag (p); tracker_resource_add_relation (metadata, "nao:hasTag", tag); g_object_unref (tag); } g_ptr_array_free (keywords, TRUE); tracker_resource_set_int64 (metadata, "nfo:pageCount", poppler_document_get_n_pages(document)); config = tracker_main_get_config (); n_bytes = tracker_config_get_max_bytes (config); content = extract_content_text (document, n_bytes); if (content) { tracker_resource_set_string (metadata, "nie:plainTextContent", content); g_free (content); } read_outline (document, metadata); g_free (xml); g_free (pd.keywords); g_free (pd.title); g_free (pd.subject); g_free (pd.creation_date); g_free (pd.author); g_free (pd.date); g_free (uri); g_object_unref (document); if (contents) { munmap (contents, len); } close (fd); tracker_extract_info_set_resource (info, metadata); g_object_unref (metadata); return TRUE; }