static void write_pdf_data (PDFData data, TrackerResource *metadata, GPtrArray *keywords) { if (!tracker_is_empty_string (data.title)) { tracker_resource_set_string (metadata, "nie:title", data.title); } if (!tracker_is_empty_string (data.subject)) { tracker_resource_set_string (metadata, "nie:subject", data.subject); } if (!tracker_is_empty_string (data.author)) { TrackerResource *author = tracker_extract_new_contact (data.author); tracker_resource_set_relation (metadata, "nco:creator", author); g_object_unref (author); } if (!tracker_is_empty_string (data.date)) { tracker_resource_set_string (metadata, "nie:contentCreated", data.date); } if (!tracker_is_empty_string (data.keywords)) { tracker_keywords_parse (keywords, data.keywords); } }
/** * tracker_extract_new_location: * @street_address: (allow none): main part of postal address, or %NULL * @state: (allow none): regional part of postal address, or %NULL * @city: (allow none): locality part of postal address, or %NULL * @country: (allow none): country of postal address, or %NULL * @gps_altitude: (allow none): altitude (following WGS 84 reference) as a string, or %NULL * @gps_latitude: (allow none): latitude as a string, or %NULL * @gps_longitude: (allow none): longitude as a string, or %NULL * * Create a new slo:GeoLocation resource, with the given postal address and/or * GPS coordinates. * * No validation is done here -- it's up to you to ensure the postal address * and GPS coordinates describe the same thing. * * Returns: a newly allocated #TrackerResource instance, of type slo:GeoLocation * * Since: 1.10 */ TrackerResource * tracker_extract_new_location (const char *street_address, const char *state, const char *city, const char *country, const char *gps_altitude, const char *gps_latitude, const char *gps_longitude) { TrackerResource *location; g_return_val_if_fail (street_address != NULL || state != NULL || city != NULL || country != NULL || gps_altitude != NULL || gps_latitude != NULL || gps_longitude != NULL, NULL); location = tracker_resource_new (NULL); tracker_resource_set_uri (location, "rdf:type", "slo:GeoLocation"); if (street_address || state || country || city) { TrackerResource *address; gchar *addruri; addruri = tracker_sparql_get_uuid_urn (); address = tracker_resource_new (addruri); tracker_resource_set_string (address, "rdf:type", "nco:PostalAddress"); g_free (addruri); if (address) { tracker_resource_set_string (address, "nco:streetAddress", street_address); } if (state) { tracker_resource_set_string (address, "nco:region", state); } if (city) { tracker_resource_set_string (address, "nco:locality", city); } if (country) { tracker_resource_set_string (address, "nco:country", country); } tracker_resource_set_relation (location, "slo:postalAddress", address); g_object_unref (address); } if (gps_altitude) { tracker_resource_set_string (location, "slo:altitude", gps_altitude); } if (gps_latitude) { tracker_resource_set_string (location, "slo:latitude", gps_latitude); } if (gps_longitude) { tracker_resource_set_string (location, "slo:longitude", gps_longitude); } return location; }
static void xml_text_handler_metadata (GMarkupParseContext *context, const gchar *text, gsize text_len, gpointer user_data, GError **error) { ODTMetadataParseInfo *data; TrackerResource *metadata; gchar *date; data = user_data; metadata = data->metadata; if (text_len == 0) { /* ignore empty values */ return; } switch (data->current) { case ODT_TAG_TYPE_TITLE: if (data->has_title) { g_warning ("Avoiding additional title (%s) in OASIS document '%s'", text, data->uri); } else { data->has_title = TRUE; tracker_resource_set_string (metadata, "nie:title", text); } break; case ODT_TAG_TYPE_SUBJECT: if (data->has_subject) { g_warning ("Avoiding additional subject (%s) in OASIS document '%s'", text, data->uri); } else { data->has_subject = TRUE; tracker_resource_set_string (metadata, "nie:subject", text); } break; case ODT_TAG_TYPE_AUTHOR: if (data->has_publisher) { g_warning ("Avoiding additional publisher (%s) in OASIS document '%s'", text, data->uri); } else { TrackerResource *publisher = tracker_extract_new_contact (text); data->has_publisher = TRUE; tracker_resource_set_relation (metadata, "nco:publisher", publisher); g_object_unref (publisher); } break; case ODT_TAG_TYPE_KEYWORDS: { gchar *keywords; gchar *lasts, *keyw; keywords = g_strdup (text); for (keyw = strtok_r (keywords, ",; ", &lasts); keyw; keyw = strtok_r (NULL, ",; ", &lasts)) { tracker_resource_add_string (metadata, "nie:keyword", keyw); } g_free (keywords); break; } case ODT_TAG_TYPE_COMMENTS: if (data->has_comment) { g_warning ("Avoiding additional comment (%s) in OASIS document '%s'", text, data->uri); } else { data->has_comment = TRUE; tracker_resource_set_string (metadata, "nie:comment", text); } break; case ODT_TAG_TYPE_CREATED: if (data->has_content_created) { g_warning ("Avoiding additional creation time (%s) in OASIS document '%s'", text, data->uri); } else { date = tracker_date_guess (text); if (date) { data->has_content_created = TRUE; tracker_resource_set_string (metadata, "nie:contentCreated", date); g_free (date); } else { g_warning ("Could not parse creation time (%s) in OASIS document '%s'", text, data->uri); } } break; case ODT_TAG_TYPE_GENERATOR: if (data->has_generator) { g_warning ("Avoiding additional creation time (%s) in OASIS document '%s'", text, data->uri); } else { data->has_generator = TRUE; tracker_resource_set_string (metadata, "nie:generator", text); } break; default: case ODT_TAG_TYPE_STATS: break; } }
G_MODULE_EXPORT gboolean tracker_extract_get_metadata (TrackerExtractInfo *info) { TrackerConfig *config; GTime creation_date; GError *error = NULL; TrackerResource *metadata; TrackerXmpData *xd = NULL; PDFData pd = { 0 }; /* actual data */ PDFData md = { 0 }; /* for merging */ PopplerDocument *document; gchar *xml = NULL; gchar *content, *uri; guint n_bytes; GPtrArray *keywords; guint i; GFile *file; gchar *filename; int fd; gchar *contents = NULL; gsize len; struct stat st; file = tracker_extract_info_get_file (info); filename = g_file_get_path (file); fd = tracker_file_open_fd (filename); if (fd == -1) { g_warning ("Could not open pdf file '%s': %s\n", filename, g_strerror (errno)); g_free (filename); return FALSE; } if (fstat (fd, &st) == -1) { g_warning ("Could not fstat pdf file '%s': %s\n", filename, g_strerror (errno)); close (fd); g_free (filename); return FALSE; } if (st.st_size == 0) { contents = NULL; len = 0; } else { contents = (gchar *) mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (contents == NULL || contents == MAP_FAILED) { g_warning ("Could not mmap pdf file '%s': %s\n", filename, g_strerror (errno)); close (fd); g_free (filename); return FALSE; } len = st.st_size; } g_free (filename); uri = g_file_get_uri (file); document = poppler_document_new_from_data (contents, len, NULL, &error); if (error) { if (error->code == POPPLER_ERROR_ENCRYPTED) { metadata = tracker_resource_new (NULL); tracker_resource_add_uri (metadata, "rdf:type", "nfo:PaginatedTextDocument"); tracker_resource_set_boolean (metadata, "nfo:isContentEncrypted", TRUE); tracker_extract_info_set_resource (info, metadata); g_object_unref (metadata); g_error_free (error); g_free (uri); close (fd); return TRUE; } else { g_warning ("Couldn't create PopplerDocument from uri:'%s', %s", uri, error->message ? error->message : "no error given"); g_error_free (error); g_free (uri); close (fd); return FALSE; } } if (!document) { g_warning ("Could not create PopplerDocument from uri:'%s', " "NULL returned without an error", uri); g_free (uri); close (fd); return FALSE; } metadata = tracker_resource_new (NULL); tracker_resource_add_uri (metadata, "rdf:type", "nfo:PaginatedTextDocument"); g_object_get (document, "title", &pd.title, "author", &pd.author, "subject", &pd.subject, "keywords", &pd.keywords, "creation-date", &creation_date, "metadata", &xml, NULL); if (creation_date > 0) { pd.creation_date = tracker_date_to_string ((time_t) creation_date); } keywords = g_ptr_array_new_with_free_func ((GDestroyNotify) g_free); if (xml && *xml && (xd = tracker_xmp_new (xml, strlen (xml), uri)) != NULL) { /* The casts here are well understood and known */ md.title = (gchar *) tracker_coalesce_strip (4, pd.title, xd->title, xd->title2, xd->pdf_title); md.subject = (gchar *) tracker_coalesce_strip (2, pd.subject, xd->subject); md.date = (gchar *) tracker_coalesce_strip (3, pd.creation_date, xd->date, xd->time_original); md.author = (gchar *) tracker_coalesce_strip (2, pd.author, xd->creator); write_pdf_data (md, metadata, keywords); if (xd->keywords) { tracker_keywords_parse (keywords, xd->keywords); } if (xd->pdf_keywords) { tracker_keywords_parse (keywords, xd->pdf_keywords); } if (xd->publisher) { TrackerResource *publisher = tracker_extract_new_contact (xd->publisher); tracker_resource_set_relation (metadata, "nco:publisher", publisher); g_object_unref (publisher); } if (xd->type) { tracker_resource_set_string (metadata, "dc:type", xd->type); } if (xd->format) { tracker_resource_set_string (metadata, "dc:format", xd->format); } if (xd->identifier) { tracker_resource_set_string (metadata, "dc:identifier", xd->identifier); } if (xd->source) { tracker_resource_set_string (metadata, "dc:source", xd->source); } if (xd->language) { tracker_resource_set_string (metadata, "dc:language", xd->language); } if (xd->relation) { tracker_resource_set_string (metadata, "dc:relation", xd->relation); } if (xd->coverage) { tracker_resource_set_string (metadata, "dc:coverage", xd->coverage); } if (xd->license) { tracker_resource_set_string (metadata, "nie:license", xd->license); } if (xd->make || xd->model) { TrackerResource *equipment = tracker_extract_new_equipment (xd->make, xd->model); tracker_resource_set_relation (metadata, "nfo:equipment", equipment); g_object_unref (equipment); } if (xd->orientation) { tracker_resource_set_string (metadata, "nfo:orientation", xd->orientation); } if (xd->rights) { tracker_resource_set_string (metadata, "nie:copyright", xd->rights); } if (xd->white_balance) { tracker_resource_set_string (metadata, "nmm:whiteBalance", xd->white_balance); } if (xd->fnumber) { gdouble value; value = g_strtod (xd->fnumber, NULL); tracker_resource_set_double (metadata, "nmm:fnumber", value); } if (xd->flash) { tracker_resource_set_string (metadata, "nmm:flash", xd->flash); } if (xd->focal_length) { gdouble value; value = g_strtod (xd->focal_length, NULL); tracker_resource_set_double (metadata, "nmm:focalLength", value); } /* Question: Shouldn't xd->Artist be merged with md.author instead? */ if (xd->artist || xd->contributor) { TrackerResource *artist; const gchar *artist_name; artist_name = tracker_coalesce_strip (2, xd->artist, xd->contributor); artist = tracker_extract_new_contact (artist_name); tracker_resource_set_relation (metadata, "nco:contributor", artist); g_object_unref (artist); } if (xd->exposure_time) { gdouble value; value = g_strtod (xd->exposure_time, NULL); tracker_resource_set_double (metadata, "nmm:exposureTime", value); } if (xd->iso_speed_ratings) { gdouble value; value = g_strtod (xd->iso_speed_ratings, NULL); tracker_resource_set_double (metadata, "nmm:isoSpeed", value); } if (xd->description) { tracker_resource_set_string (metadata, "nie:description", xd->description); } if (xd->metering_mode) { tracker_resource_set_string (metadata, "nmm:meteringMode", xd->metering_mode); } if (xd->address || xd->state || xd->country || xd->city || xd->gps_altitude || xd->gps_latitude || xd-> gps_longitude) { TrackerResource *location = tracker_extract_new_location (xd->address, xd->state, xd->city, xd->country, xd->gps_altitude, xd->gps_latitude, xd->gps_longitude); tracker_resource_set_relation (metadata, "slo:location", location); g_object_unref (location); } if (xd->regions) { tracker_xmp_apply_regions_to_resource (metadata, xd); } tracker_xmp_free (xd); } else { /* So if we are here we have NO XMP data and we just * write what we know from Poppler. */ write_pdf_data (pd, metadata, keywords); } for (i = 0; i < keywords->len; i++) { TrackerResource *tag; const gchar *p; p = g_ptr_array_index (keywords, i); tag = tracker_extract_new_tag (p); tracker_resource_add_relation (metadata, "nao:hasTag", tag); g_object_unref (tag); } g_ptr_array_free (keywords, TRUE); tracker_resource_set_int64 (metadata, "nfo:pageCount", poppler_document_get_n_pages(document)); config = tracker_main_get_config (); n_bytes = tracker_config_get_max_bytes (config); content = extract_content_text (document, n_bytes); if (content) { tracker_resource_set_string (metadata, "nie:plainTextContent", content); g_free (content); } read_outline (document, metadata); g_free (xml); g_free (pd.keywords); g_free (pd.title); g_free (pd.subject); g_free (pd.creation_date); g_free (pd.author); g_free (pd.date); g_free (uri); g_object_unref (document); if (contents) { munmap (contents, len); } close (fd); tracker_extract_info_set_resource (info, metadata); g_object_unref (metadata); return TRUE; }