static void sanity_check_option_values (TrackerConfig *config) { g_message ("General options:"); g_message (" Verbosity ............................ %d", tracker_config_get_verbosity (config)); g_message (" Sched Idle ........................... %d", tracker_config_get_sched_idle (config)); g_message (" Max bytes (per file) ................. %d", tracker_config_get_max_bytes (config)); }
static gchar * extract_opf_contents (const gchar *uri, const gchar *content_prefix, GList *content_files) { OPFContentData content_data = { 0 }; TrackerConfig *config; GError *error = NULL; GList *l; GMarkupParser xml_parser = { NULL, NULL, content_xml_text_handler, NULL, NULL }; config = tracker_main_get_config (); content_data.contents = g_string_new (""); content_data.limit = (gsize) tracker_config_get_max_bytes (config); g_debug ("Extracting up to %" G_GSIZE_FORMAT " bytes of content", content_data.limit); for (l = content_files; l; l = l->next) { GMarkupParseContext *context; gchar *path; context = g_markup_parse_context_new (&xml_parser, 0, &content_data, NULL); /* Page file is relative to OPF file location */ path = g_build_filename (content_prefix, l->data, NULL); tracker_gsf_parse_xml_in_zip (uri, path, context, &error); if (error) { g_warning ("Error extracting EPUB contents (%s): %s", path, error->message); g_clear_error (&error); } g_free (path); g_markup_parse_context_free (context); if (content_data.limit <= 0) { /* Reached plain text extraction limit */ break; } } return g_string_free (content_data.contents, FALSE); }
G_MODULE_EXPORT gboolean tracker_extract_get_metadata (TrackerExtractInfo *info) { TrackerSparqlBuilder *metadata; TrackerConfig *config; gchar *content; config = tracker_main_get_config (); metadata = tracker_extract_info_get_metadata_builder (info); content = get_file_content (tracker_extract_info_get_file (info), tracker_config_get_max_bytes (config)); tracker_sparql_builder_predicate (metadata, "a"); tracker_sparql_builder_object (metadata, "nfo:PlainTextDocument"); if (content) { tracker_sparql_builder_predicate (metadata, "nie:plainTextContent"); tracker_sparql_builder_object_unvalidated (metadata, content); g_free (content); } return TRUE; }
G_MODULE_EXPORT gboolean tracker_extract_get_metadata (TrackerExtractInfo *info) { TrackerSparqlBuilder *metadata; GFile *file; TrackerConfig *config; htmlDocPtr doc; parser_data pd; gchar *filename; xmlSAXHandler handler = { NULL, /* internalSubset */ NULL, /* isStandalone */ NULL, /* hasInternalSubset */ NULL, /* hasExternalSubset */ NULL, /* resolveEntity */ NULL, /* getEntity */ NULL, /* entityDecl */ NULL, /* notationDecl */ NULL, /* attributeDecl */ NULL, /* elementDecl */ NULL, /* unparsedEntityDecl */ NULL, /* setDocumentLocator */ NULL, /* startDocument */ NULL, /* endDocument */ parser_start_element, /* startElement */ parser_end_element, /* endElement */ NULL, /* reference */ parser_characters, /* characters */ NULL, /* ignorableWhitespace */ NULL, /* processingInstruction */ NULL, /* comment */ NULL, /* xmlParserWarning */ NULL, /* xmlParserError */ NULL, /* xmlParserError */ NULL, /* getParameterEntity */ NULL, /* cdataBlock */ NULL, /* externalSubset */ 1, /* initialized */ NULL, /* private */ NULL, /* startElementNsSAX2Func */ NULL, /* endElementNsSAX2Func */ NULL /* xmlStructuredErrorFunc */ }; metadata = tracker_extract_info_get_metadata_builder (info); file = tracker_extract_info_get_file (info); tracker_sparql_builder_predicate (metadata, "a"); tracker_sparql_builder_object (metadata, "nfo:HtmlDocument"); pd.metadata = metadata; pd.current = -1; pd.in_body = FALSE; pd.plain_text = g_string_new (NULL); pd.title = g_string_new (NULL); config = tracker_main_get_config (); pd.n_bytes_remaining = tracker_config_get_max_bytes (config); filename = g_file_get_path (file); doc = htmlSAXParseFile (filename, NULL, &handler, &pd); g_free (filename); if (doc) { xmlFreeDoc (doc); } g_strstrip (pd.plain_text->str); g_strstrip (pd.title->str); if (pd.title->str && *pd.title->str != '\0') { tracker_sparql_builder_predicate (metadata, "nie:title"); tracker_sparql_builder_object_unvalidated (metadata, pd.title->str); } if (pd.plain_text->str && *pd.plain_text->str != '\0') { tracker_sparql_builder_predicate (metadata, "nie:plainTextContent"); tracker_sparql_builder_object_unvalidated (metadata, pd.plain_text->str); } g_string_free (pd.plain_text, TRUE); g_string_free (pd.title, TRUE); return TRUE; }
G_MODULE_EXPORT gboolean tracker_extract_get_metadata (TrackerExtractInfo *info) { TrackerConfig *config; GTime creation_date; GError *error = NULL; TrackerSparqlBuilder *metadata, *preupdate; const gchar *graph; const gchar *urn; TrackerXmpData *xd = NULL; PDFData pd = { 0 }; /* actual data */ PDFData md = { 0 }; /* for merging */ PopplerDocument *document; gchar *xml = NULL; gchar *content, *uri; guint n_bytes; GPtrArray *keywords; guint i; GFile *file; gchar *filename; int fd; gchar *contents = NULL; gsize len; struct stat st; metadata = tracker_extract_info_get_metadata_builder (info); preupdate = tracker_extract_info_get_preupdate_builder (info); graph = tracker_extract_info_get_graph (info); urn = tracker_extract_info_get_urn (info); file = tracker_extract_info_get_file (info); filename = g_file_get_path (file); fd = tracker_file_open_fd (filename); if (fd == -1) { g_warning ("Could not open pdf file '%s': %s\n", filename, g_strerror (errno)); g_free (filename); return FALSE; } if (fstat (fd, &st) == -1) { g_warning ("Could not fstat pdf file '%s': %s\n", filename, g_strerror (errno)); close (fd); g_free (filename); return FALSE; } if (st.st_size == 0) { contents = NULL; len = 0; } else { contents = (gchar *) mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (contents == NULL || contents == MAP_FAILED) { g_warning ("Could not mmap pdf file '%s': %s\n", filename, g_strerror (errno)); close (fd); g_free (filename); return FALSE; } len = st.st_size; } g_free (filename); uri = g_file_get_uri (file); document = poppler_document_new_from_data (contents, len, NULL, &error); if (error) { if (error->code == POPPLER_ERROR_ENCRYPTED) { tracker_sparql_builder_predicate (metadata, "a"); tracker_sparql_builder_object (metadata, "nfo:PaginatedTextDocument"); tracker_sparql_builder_predicate (metadata, "nfo:isContentEncrypted"); tracker_sparql_builder_object_boolean (metadata, TRUE); g_error_free (error); g_free (uri); close (fd); return TRUE; } else { g_warning ("Couldn't create PopplerDocument from uri:'%s', %s", uri, error->message ? error->message : "no error given"); g_error_free (error); g_free (uri); close (fd); return FALSE; } } if (!document) { g_warning ("Could not create PopplerDocument from uri:'%s', " "NULL returned without an error", uri); g_free (uri); close (fd); return FALSE; } tracker_sparql_builder_predicate (metadata, "a"); tracker_sparql_builder_object (metadata, "nfo:PaginatedTextDocument"); g_object_get (document, "title", &pd.title, "author", &pd.author, "subject", &pd.subject, "keywords", &pd.keywords, "creation-date", &creation_date, "metadata", &xml, NULL); if (creation_date > 0) { pd.creation_date = tracker_date_to_string ((time_t) creation_date); } keywords = g_ptr_array_new_with_free_func ((GDestroyNotify) g_free); if (xml && *xml && (xd = tracker_xmp_new (xml, strlen (xml), uri)) != NULL) { /* The casts here are well understood and known */ md.title = (gchar *) tracker_coalesce_strip (4, pd.title, xd->title, xd->title2, xd->pdf_title); md.subject = (gchar *) tracker_coalesce_strip (2, pd.subject, xd->subject); md.date = (gchar *) tracker_coalesce_strip (3, pd.creation_date, xd->date, xd->time_original); md.author = (gchar *) tracker_coalesce_strip (2, pd.author, xd->creator); write_pdf_data (md, metadata, keywords); if (xd->keywords) { tracker_keywords_parse (keywords, xd->keywords); } if (xd->pdf_keywords) { tracker_keywords_parse (keywords, xd->pdf_keywords); } if (xd->publisher) { tracker_sparql_builder_predicate (metadata, "nco:publisher"); tracker_sparql_builder_object_blank_open (metadata); tracker_sparql_builder_predicate (metadata, "a"); tracker_sparql_builder_object (metadata, "nco:Contact"); tracker_sparql_builder_predicate (metadata, "nco:fullname"); tracker_sparql_builder_object_unvalidated (metadata, xd->publisher); tracker_sparql_builder_object_blank_close (metadata); } if (xd->type) { tracker_sparql_builder_predicate (metadata, "dc:type"); tracker_sparql_builder_object_unvalidated (metadata, xd->type); } if (xd->format) { tracker_sparql_builder_predicate (metadata, "dc:format"); tracker_sparql_builder_object_unvalidated (metadata, xd->format); } if (xd->identifier) { tracker_sparql_builder_predicate (metadata, "dc:identifier"); tracker_sparql_builder_object_unvalidated (metadata, xd->identifier); } if (xd->source) { tracker_sparql_builder_predicate (metadata, "dc:source"); tracker_sparql_builder_object_unvalidated (metadata, xd->source); } if (xd->language) { tracker_sparql_builder_predicate (metadata, "dc:language"); tracker_sparql_builder_object_unvalidated (metadata, xd->language); } if (xd->relation) { tracker_sparql_builder_predicate (metadata, "dc:relation"); tracker_sparql_builder_object_unvalidated (metadata, xd->relation); } if (xd->coverage) { tracker_sparql_builder_predicate (metadata, "dc:coverage"); tracker_sparql_builder_object_unvalidated (metadata, xd->coverage); } if (xd->license) { tracker_sparql_builder_predicate (metadata, "nie:license"); tracker_sparql_builder_object_unvalidated (metadata, xd->license); } if (xd->make || xd->model) { gchar *equip_uri; equip_uri = tracker_sparql_escape_uri_printf ("urn:equipment:%s:%s:", xd->make ? xd->make : "", xd->model ? xd->model : ""); tracker_sparql_builder_insert_open (preupdate, NULL); if (graph) { tracker_sparql_builder_graph_open (preupdate, graph); } tracker_sparql_builder_subject_iri (preupdate, equip_uri); tracker_sparql_builder_predicate (preupdate, "a"); tracker_sparql_builder_object (preupdate, "nfo:Equipment"); if (xd->make) { tracker_sparql_builder_predicate (preupdate, "nfo:manufacturer"); tracker_sparql_builder_object_unvalidated (preupdate, xd->make); } if (xd->model) { tracker_sparql_builder_predicate (preupdate, "nfo:model"); tracker_sparql_builder_object_unvalidated (preupdate, xd->model); } if (graph) { tracker_sparql_builder_graph_close (preupdate); } tracker_sparql_builder_insert_close (preupdate); tracker_sparql_builder_predicate (metadata, "nfo:equipment"); tracker_sparql_builder_object_iri (metadata, equip_uri); g_free (equip_uri); } if (xd->orientation) { tracker_sparql_builder_predicate (metadata, "nfo:orientation"); tracker_sparql_builder_object (metadata, xd->orientation); } if (xd->rights) { tracker_sparql_builder_predicate (metadata, "nie:copyright"); tracker_sparql_builder_object_unvalidated (metadata, xd->rights); } if (xd->white_balance) { tracker_sparql_builder_predicate (metadata, "nmm:whiteBalance"); tracker_sparql_builder_object (metadata, xd->white_balance); } if (xd->fnumber) { gdouble value; value = g_strtod (xd->fnumber, NULL); tracker_sparql_builder_predicate (metadata, "nmm:fnumber"); tracker_sparql_builder_object_double (metadata, value); } if (xd->flash) { tracker_sparql_builder_predicate (metadata, "nmm:flash"); tracker_sparql_builder_object (metadata, xd->flash); } if (xd->focal_length) { gdouble value; value = g_strtod (xd->focal_length, NULL); tracker_sparql_builder_predicate (metadata, "nmm:focalLength"); tracker_sparql_builder_object_double (metadata, value); } /* Question: Shouldn't xd->Artist be merged with md.author instead? */ if (xd->artist || xd->contributor) { const gchar *artist; artist = tracker_coalesce_strip (2, xd->artist, xd->contributor); tracker_sparql_builder_predicate (metadata, "nco:contributor"); tracker_sparql_builder_object_blank_open (metadata); tracker_sparql_builder_predicate (metadata, "a"); tracker_sparql_builder_object (metadata, "nco:Contact"); tracker_sparql_builder_predicate (metadata, "nco:fullname"); tracker_sparql_builder_object_unvalidated (metadata, artist); tracker_sparql_builder_object_blank_close (metadata); } if (xd->exposure_time) { gdouble value; value = g_strtod (xd->exposure_time, NULL); tracker_sparql_builder_predicate (metadata, "nmm:exposureTime"); tracker_sparql_builder_object_double (metadata, value); } if (xd->iso_speed_ratings) { gdouble value; value = g_strtod (xd->iso_speed_ratings, NULL); tracker_sparql_builder_predicate (metadata, "nmm:isoSpeed"); tracker_sparql_builder_object_double (metadata, value); } if (xd->description) { tracker_sparql_builder_predicate (metadata, "nie:description"); tracker_sparql_builder_object_unvalidated (metadata, xd->description); } if (xd->metering_mode) { tracker_sparql_builder_predicate (metadata, "nmm:meteringMode"); tracker_sparql_builder_object (metadata, xd->metering_mode); } if (xd->address || xd->state || xd->country || xd->city || xd->gps_altitude || xd->gps_latitude || xd-> gps_longitude) { tracker_sparql_builder_predicate (metadata, "slo:location"); tracker_sparql_builder_object_blank_open (metadata); /* GeoLocation */ tracker_sparql_builder_predicate (metadata, "a"); tracker_sparql_builder_object (metadata, "slo:GeoLocation"); if (xd->address || xd->state || xd->country || xd->city) { gchar *addruri; addruri = tracker_sparql_get_uuid_urn (); tracker_sparql_builder_predicate (metadata, "slo:postalAddress"); tracker_sparql_builder_object_iri (metadata, addruri); tracker_sparql_builder_insert_open (preupdate, NULL); if (graph) { tracker_sparql_builder_graph_open (preupdate, graph); } tracker_sparql_builder_subject_iri (preupdate, addruri); g_free (addruri); tracker_sparql_builder_predicate (preupdate, "a"); tracker_sparql_builder_object (preupdate, "nco:PostalAddress"); if (xd->address) { tracker_sparql_builder_predicate (preupdate, "nco:streetAddress"); tracker_sparql_builder_object_unvalidated (preupdate, xd->address); } if (xd->state) { tracker_sparql_builder_predicate (preupdate, "nco:region"); tracker_sparql_builder_object_unvalidated (preupdate, xd->state); } if (xd->city) { tracker_sparql_builder_predicate (preupdate, "nco:locality"); tracker_sparql_builder_object_unvalidated (preupdate, xd->city); } if (xd->country) { tracker_sparql_builder_predicate (preupdate, "nco:country"); tracker_sparql_builder_object_unvalidated (preupdate, xd->country); } if (graph) { tracker_sparql_builder_graph_close (preupdate); } tracker_sparql_builder_insert_close (preupdate); } if (xd->gps_altitude) { tracker_sparql_builder_predicate (metadata, "slo:altitude"); tracker_sparql_builder_object_unvalidated (metadata, xd->gps_altitude); } if (xd->gps_latitude) { tracker_sparql_builder_predicate (metadata, "slo:latitude"); tracker_sparql_builder_object_unvalidated (metadata, xd->gps_latitude); } if (xd->gps_longitude) { tracker_sparql_builder_predicate (metadata, "slo:longitude"); tracker_sparql_builder_object_unvalidated (metadata, xd->gps_longitude); } tracker_sparql_builder_object_blank_close (metadata); /* GeoLocation */ } if (xd->regions) { tracker_xmp_apply_regions (preupdate, metadata, graph, xd); } tracker_xmp_free (xd); } else { /* So if we are here we have NO XMP data and we just * write what we know from Poppler. */ write_pdf_data (pd, metadata, keywords); } for (i = 0; i < keywords->len; i++) { gchar *escaped, *subject; const gchar *p; p = g_ptr_array_index (keywords, i); escaped = tracker_sparql_escape_string (p); subject = g_strdup_printf ("_:tag%d", i + 1); /* ensure tag with specified label exists */ tracker_sparql_builder_insert_open (preupdate, graph); tracker_sparql_builder_subject (preupdate, subject); tracker_sparql_builder_predicate (preupdate, "a"); tracker_sparql_builder_object (preupdate, "nao:Tag"); tracker_sparql_builder_predicate (preupdate, "nao:prefLabel"); tracker_sparql_builder_object_unvalidated (preupdate, escaped); tracker_sparql_builder_insert_close (preupdate); tracker_sparql_builder_append (preupdate, "WHERE { FILTER (NOT EXISTS { " "?tag a nao:Tag ; nao:prefLabel \""); tracker_sparql_builder_append (preupdate, escaped); tracker_sparql_builder_append (preupdate, "\" }) }\n"); /* associate file with tag */ tracker_sparql_builder_insert_open (preupdate, graph); tracker_sparql_builder_subject_iri (preupdate, urn); tracker_sparql_builder_predicate (preupdate, "nao:hasTag"); tracker_sparql_builder_object (preupdate, "?tag"); tracker_sparql_builder_insert_close (preupdate); tracker_sparql_builder_where_open (preupdate); tracker_sparql_builder_subject (preupdate, "?tag"); tracker_sparql_builder_predicate (preupdate, "a"); tracker_sparql_builder_object (preupdate, "nao:Tag"); tracker_sparql_builder_predicate (preupdate, "nao:prefLabel"); tracker_sparql_builder_object_unvalidated (preupdate, escaped); tracker_sparql_builder_where_close (preupdate); g_free (subject); g_free (escaped); } g_ptr_array_free (keywords, TRUE); tracker_sparql_builder_predicate (metadata, "nfo:pageCount"); tracker_sparql_builder_object_int64 (metadata, poppler_document_get_n_pages (document)); config = tracker_main_get_config (); n_bytes = tracker_config_get_max_bytes (config); content = extract_content_text (document, n_bytes); if (content) { tracker_sparql_builder_predicate (metadata, "nie:plainTextContent"); tracker_sparql_builder_object_unvalidated (metadata, content); g_free (content); } read_outline (document, metadata); g_free (xml); g_free (pd.keywords); g_free (pd.title); g_free (pd.subject); g_free (pd.creation_date); g_free (pd.author); g_free (pd.date); g_free (uri); g_object_unref (document); if (contents) { munmap (contents, len); } close (fd); return TRUE; }
G_MODULE_EXPORT gboolean tracker_extract_get_metadata (TrackerExtractInfo *extract_info) { TrackerResource *metadata; TrackerConfig *config; ODTMetadataParseInfo info = { 0 }; ODTFileType file_type; GFile *file; gchar *uri; const gchar *mime_used; GMarkupParseContext *context; GMarkupParser parser = { xml_start_element_handler_metadata, xml_end_element_handler_metadata, xml_text_handler_metadata, NULL, NULL }; if (G_UNLIKELY (maximum_size_error_quark == 0)) { maximum_size_error_quark = g_quark_from_static_string ("maximum_size_error"); } metadata = tracker_resource_new (NULL); mime_used = tracker_extract_info_get_mimetype (extract_info); file = tracker_extract_info_get_file (extract_info); uri = g_file_get_uri (file); /* Setup conf */ config = tracker_main_get_config (); g_debug ("Extracting OASIS metadata and contents from '%s'", uri); /* First, parse metadata */ tracker_resource_add_uri (metadata, "rdf:type", "nfo:PaginatedTextDocument"); /* Create parse info */ info.metadata = metadata; info.current = ODT_TAG_TYPE_UNKNOWN; info.uri = uri; /* Create parsing context */ context = g_markup_parse_context_new (&parser, 0, &info, NULL); /* Load the internal XML file from the Zip archive, and parse it * using the given context */ tracker_gsf_parse_xml_in_zip (uri, "meta.xml", context, NULL); g_markup_parse_context_free (context); if (g_ascii_strcasecmp (mime_used, "application/vnd.oasis.opendocument.text") == 0) { file_type = FILE_TYPE_ODT; } else if (g_ascii_strcasecmp (mime_used, "application/vnd.oasis.opendocument.presentation") == 0) { file_type = FILE_TYPE_ODP; } else if (g_ascii_strcasecmp (mime_used, "application/vnd.oasis.opendocument.spreadsheet") == 0) { file_type = FILE_TYPE_ODS; } else if (g_ascii_strcasecmp (mime_used, "application/vnd.oasis.opendocument.graphics") == 0) { file_type = FILE_TYPE_ODG; } else { g_message ("Mime type was not recognised:'%s'", mime_used); file_type = FILE_TYPE_INVALID; } /* Extract content with the given limitations */ extract_oasis_content (uri, tracker_config_get_max_bytes (config), file_type, metadata); g_free (uri); tracker_extract_info_set_resource (extract_info, metadata); g_object_unref (metadata); return TRUE; }
G_MODULE_EXPORT gboolean tracker_extract_get_metadata (TrackerExtractInfo *info) { TrackerConfig *config; GTime creation_date; GError *error = NULL; TrackerResource *metadata; TrackerXmpData *xd = NULL; PDFData pd = { 0 }; /* actual data */ PDFData md = { 0 }; /* for merging */ PopplerDocument *document; gchar *xml = NULL; gchar *content, *uri; guint n_bytes; GPtrArray *keywords; guint i; GFile *file; gchar *filename; int fd; gchar *contents = NULL; gsize len; struct stat st; file = tracker_extract_info_get_file (info); filename = g_file_get_path (file); fd = tracker_file_open_fd (filename); if (fd == -1) { g_warning ("Could not open pdf file '%s': %s\n", filename, g_strerror (errno)); g_free (filename); return FALSE; } if (fstat (fd, &st) == -1) { g_warning ("Could not fstat pdf file '%s': %s\n", filename, g_strerror (errno)); close (fd); g_free (filename); return FALSE; } if (st.st_size == 0) { contents = NULL; len = 0; } else { contents = (gchar *) mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (contents == NULL || contents == MAP_FAILED) { g_warning ("Could not mmap pdf file '%s': %s\n", filename, g_strerror (errno)); close (fd); g_free (filename); return FALSE; } len = st.st_size; } g_free (filename); uri = g_file_get_uri (file); document = poppler_document_new_from_data (contents, len, NULL, &error); if (error) { if (error->code == POPPLER_ERROR_ENCRYPTED) { metadata = tracker_resource_new (NULL); tracker_resource_add_uri (metadata, "rdf:type", "nfo:PaginatedTextDocument"); tracker_resource_set_boolean (metadata, "nfo:isContentEncrypted", TRUE); tracker_extract_info_set_resource (info, metadata); g_object_unref (metadata); g_error_free (error); g_free (uri); close (fd); return TRUE; } else { g_warning ("Couldn't create PopplerDocument from uri:'%s', %s", uri, error->message ? error->message : "no error given"); g_error_free (error); g_free (uri); close (fd); return FALSE; } } if (!document) { g_warning ("Could not create PopplerDocument from uri:'%s', " "NULL returned without an error", uri); g_free (uri); close (fd); return FALSE; } metadata = tracker_resource_new (NULL); tracker_resource_add_uri (metadata, "rdf:type", "nfo:PaginatedTextDocument"); g_object_get (document, "title", &pd.title, "author", &pd.author, "subject", &pd.subject, "keywords", &pd.keywords, "creation-date", &creation_date, "metadata", &xml, NULL); if (creation_date > 0) { pd.creation_date = tracker_date_to_string ((time_t) creation_date); } keywords = g_ptr_array_new_with_free_func ((GDestroyNotify) g_free); if (xml && *xml && (xd = tracker_xmp_new (xml, strlen (xml), uri)) != NULL) { /* The casts here are well understood and known */ md.title = (gchar *) tracker_coalesce_strip (4, pd.title, xd->title, xd->title2, xd->pdf_title); md.subject = (gchar *) tracker_coalesce_strip (2, pd.subject, xd->subject); md.date = (gchar *) tracker_coalesce_strip (3, pd.creation_date, xd->date, xd->time_original); md.author = (gchar *) tracker_coalesce_strip (2, pd.author, xd->creator); write_pdf_data (md, metadata, keywords); if (xd->keywords) { tracker_keywords_parse (keywords, xd->keywords); } if (xd->pdf_keywords) { tracker_keywords_parse (keywords, xd->pdf_keywords); } if (xd->publisher) { TrackerResource *publisher = tracker_extract_new_contact (xd->publisher); tracker_resource_set_relation (metadata, "nco:publisher", publisher); g_object_unref (publisher); } if (xd->type) { tracker_resource_set_string (metadata, "dc:type", xd->type); } if (xd->format) { tracker_resource_set_string (metadata, "dc:format", xd->format); } if (xd->identifier) { tracker_resource_set_string (metadata, "dc:identifier", xd->identifier); } if (xd->source) { tracker_resource_set_string (metadata, "dc:source", xd->source); } if (xd->language) { tracker_resource_set_string (metadata, "dc:language", xd->language); } if (xd->relation) { tracker_resource_set_string (metadata, "dc:relation", xd->relation); } if (xd->coverage) { tracker_resource_set_string (metadata, "dc:coverage", xd->coverage); } if (xd->license) { tracker_resource_set_string (metadata, "nie:license", xd->license); } if (xd->make || xd->model) { TrackerResource *equipment = tracker_extract_new_equipment (xd->make, xd->model); tracker_resource_set_relation (metadata, "nfo:equipment", equipment); g_object_unref (equipment); } if (xd->orientation) { tracker_resource_set_string (metadata, "nfo:orientation", xd->orientation); } if (xd->rights) { tracker_resource_set_string (metadata, "nie:copyright", xd->rights); } if (xd->white_balance) { tracker_resource_set_string (metadata, "nmm:whiteBalance", xd->white_balance); } if (xd->fnumber) { gdouble value; value = g_strtod (xd->fnumber, NULL); tracker_resource_set_double (metadata, "nmm:fnumber", value); } if (xd->flash) { tracker_resource_set_string (metadata, "nmm:flash", xd->flash); } if (xd->focal_length) { gdouble value; value = g_strtod (xd->focal_length, NULL); tracker_resource_set_double (metadata, "nmm:focalLength", value); } /* Question: Shouldn't xd->Artist be merged with md.author instead? */ if (xd->artist || xd->contributor) { TrackerResource *artist; const gchar *artist_name; artist_name = tracker_coalesce_strip (2, xd->artist, xd->contributor); artist = tracker_extract_new_contact (artist_name); tracker_resource_set_relation (metadata, "nco:contributor", artist); g_object_unref (artist); } if (xd->exposure_time) { gdouble value; value = g_strtod (xd->exposure_time, NULL); tracker_resource_set_double (metadata, "nmm:exposureTime", value); } if (xd->iso_speed_ratings) { gdouble value; value = g_strtod (xd->iso_speed_ratings, NULL); tracker_resource_set_double (metadata, "nmm:isoSpeed", value); } if (xd->description) { tracker_resource_set_string (metadata, "nie:description", xd->description); } if (xd->metering_mode) { tracker_resource_set_string (metadata, "nmm:meteringMode", xd->metering_mode); } if (xd->address || xd->state || xd->country || xd->city || xd->gps_altitude || xd->gps_latitude || xd-> gps_longitude) { TrackerResource *location = tracker_extract_new_location (xd->address, xd->state, xd->city, xd->country, xd->gps_altitude, xd->gps_latitude, xd->gps_longitude); tracker_resource_set_relation (metadata, "slo:location", location); g_object_unref (location); } if (xd->regions) { tracker_xmp_apply_regions_to_resource (metadata, xd); } tracker_xmp_free (xd); } else { /* So if we are here we have NO XMP data and we just * write what we know from Poppler. */ write_pdf_data (pd, metadata, keywords); } for (i = 0; i < keywords->len; i++) { TrackerResource *tag; const gchar *p; p = g_ptr_array_index (keywords, i); tag = tracker_extract_new_tag (p); tracker_resource_add_relation (metadata, "nao:hasTag", tag); g_object_unref (tag); } g_ptr_array_free (keywords, TRUE); tracker_resource_set_int64 (metadata, "nfo:pageCount", poppler_document_get_n_pages(document)); config = tracker_main_get_config (); n_bytes = tracker_config_get_max_bytes (config); content = extract_content_text (document, n_bytes); if (content) { tracker_resource_set_string (metadata, "nie:plainTextContent", content); g_free (content); } read_outline (document, metadata); g_free (xml); g_free (pd.keywords); g_free (pd.title); g_free (pd.subject); g_free (pd.creation_date); g_free (pd.author); g_free (pd.date); g_free (uri); g_object_unref (document); if (contents) { munmap (contents, len); } close (fd); tracker_extract_info_set_resource (info, metadata); g_object_unref (metadata); return TRUE; }