/** * tracker_extract_new_music_album_disc: * @album_title: title of the album * @album_artist: (allow none): a #TrackerResource for the album artist, or %NULL * @disc_number: disc number of this disc (the first / only disc in a set should be 1, not 0) * * Create new nmm:MusicAlbumDisc and nmm:MusicAlbum resources. The resources are * given fixed URIs based on @album_title and @disc_number, so they will be * merged with existing entries when serialized to SPARQL and sent to the * Tracker store. * * You can get the album resource from the disc resource by calling: * * tracker_resource_get_first_relation (album_disc, "nmm:albumDiscAlbum"); * * Returns: a newly allocated #TrackerResource instance, of type nmm:MusicAlbumDisc * * Since: 1.10 */ TrackerResource * tracker_extract_new_music_album_disc (const char *album_title, TrackerResource *album_artist, int disc_number) { char *album_uri, *disc_uri; TrackerResource *album, *album_disc; g_return_val_if_fail (album_title != NULL, NULL); album_uri = tracker_sparql_escape_uri_printf ("urn:album:%s", album_title); album = tracker_resource_new (album_uri); tracker_resource_set_uri (album, "rdf:type", "nmm:MusicAlbum"); tracker_resource_set_string (album, "nmm:albumTitle", album_title); if (album_artist != NULL) { tracker_resource_add_relation (album, "nmm:albumArtist", album_artist); } disc_uri = tracker_sparql_escape_uri_printf ("urn:album-disc:%s:Disc%d", album_title, disc_number); album_disc = tracker_resource_new (disc_uri); tracker_resource_set_uri (album_disc, "rdf:type", "nmm:MusicAlbumDisc"); tracker_resource_set_int (album_disc, "nmm:setNumber", disc_number > 0 ? disc_number : 1); tracker_resource_add_relation (album_disc, "nmm:albumDiscAlbum", album); g_free (disc_uri); g_object_unref (album); return album_disc; }
static void parser_start_element (void *data, const xmlChar *name_, const xmlChar **attrs_) { parser_data *pd = data; const gchar *name = (const gchar*) name_; const gchar **attrs = (const gchar**) attrs_; if (!pd || !name) { return; } /* Look for RDFa triple describing the license */ if (g_ascii_strcasecmp (name, "a") == 0) { /* This tag is a license. Ignore, however, if it is * referring to another document. */ if (has_attribute (attrs, "rel", "license") && has_attribute (attrs, "about", NULL) == FALSE) { const xmlChar *href; href = lookup_attribute (attrs, "href"); if (href && !pd->has_license) { tracker_resource_add_string (pd->metadata, "nie:license", href); pd->has_license = TRUE; } } } else if (g_ascii_strcasecmp (name, "title") == 0) { pd->current = READ_TITLE; } else if (g_ascii_strcasecmp (name, "meta") == 0) { if (has_attribute (attrs, "name", "author")) { const xmlChar *author; author = lookup_attribute (attrs, "content"); if (author) { TrackerResource *creator = tracker_extract_new_contact (author); tracker_resource_add_relation (pd->metadata, "nco:creator", creator); g_object_unref (creator); } } if (has_attribute (attrs, "name", "description")) { const xmlChar *desc; desc = lookup_attribute (attrs,"content"); if (desc && !pd->has_description) { tracker_resource_set_string (pd->metadata, "nie:description", desc); pd->has_description = TRUE; } } if (has_attribute (attrs, "name", "keywords")) { const xmlChar* content = lookup_attribute (attrs, "content"); if (content) { gchar **keywords; gint i; keywords = g_strsplit (content, ",", -1); if (keywords) { for (i = 0; keywords[i] != NULL; i++) { if (!keywords[i] || keywords[i] == '\0') { continue; } tracker_resource_add_string (pd->metadata, "nie:keyword", g_strstrip (keywords[i])); } g_strfreev (keywords); } } } } else if (g_ascii_strcasecmp (name, "body") == 0) { pd->in_body = TRUE; } else if (g_ascii_strcasecmp (name, "script") == 0) { /* Ignore javascript and such */ pd->current = READ_IGNORE; } }
G_MODULE_EXPORT gboolean tracker_extract_get_metadata (TrackerExtractInfo *info) { TrackerConfig *config; GTime creation_date; GError *error = NULL; TrackerResource *metadata; TrackerXmpData *xd = NULL; PDFData pd = { 0 }; /* actual data */ PDFData md = { 0 }; /* for merging */ PopplerDocument *document; gchar *xml = NULL; gchar *content, *uri; guint n_bytes; GPtrArray *keywords; guint i; GFile *file; gchar *filename; int fd; gchar *contents = NULL; gsize len; struct stat st; file = tracker_extract_info_get_file (info); filename = g_file_get_path (file); fd = tracker_file_open_fd (filename); if (fd == -1) { g_warning ("Could not open pdf file '%s': %s\n", filename, g_strerror (errno)); g_free (filename); return FALSE; } if (fstat (fd, &st) == -1) { g_warning ("Could not fstat pdf file '%s': %s\n", filename, g_strerror (errno)); close (fd); g_free (filename); return FALSE; } if (st.st_size == 0) { contents = NULL; len = 0; } else { contents = (gchar *) mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (contents == NULL || contents == MAP_FAILED) { g_warning ("Could not mmap pdf file '%s': %s\n", filename, g_strerror (errno)); close (fd); g_free (filename); return FALSE; } len = st.st_size; } g_free (filename); uri = g_file_get_uri (file); document = poppler_document_new_from_data (contents, len, NULL, &error); if (error) { if (error->code == POPPLER_ERROR_ENCRYPTED) { metadata = tracker_resource_new (NULL); tracker_resource_add_uri (metadata, "rdf:type", "nfo:PaginatedTextDocument"); tracker_resource_set_boolean (metadata, "nfo:isContentEncrypted", TRUE); tracker_extract_info_set_resource (info, metadata); g_object_unref (metadata); g_error_free (error); g_free (uri); close (fd); return TRUE; } else { g_warning ("Couldn't create PopplerDocument from uri:'%s', %s", uri, error->message ? error->message : "no error given"); g_error_free (error); g_free (uri); close (fd); return FALSE; } } if (!document) { g_warning ("Could not create PopplerDocument from uri:'%s', " "NULL returned without an error", uri); g_free (uri); close (fd); return FALSE; } metadata = tracker_resource_new (NULL); tracker_resource_add_uri (metadata, "rdf:type", "nfo:PaginatedTextDocument"); g_object_get (document, "title", &pd.title, "author", &pd.author, "subject", &pd.subject, "keywords", &pd.keywords, "creation-date", &creation_date, "metadata", &xml, NULL); if (creation_date > 0) { pd.creation_date = tracker_date_to_string ((time_t) creation_date); } keywords = g_ptr_array_new_with_free_func ((GDestroyNotify) g_free); if (xml && *xml && (xd = tracker_xmp_new (xml, strlen (xml), uri)) != NULL) { /* The casts here are well understood and known */ md.title = (gchar *) tracker_coalesce_strip (4, pd.title, xd->title, xd->title2, xd->pdf_title); md.subject = (gchar *) tracker_coalesce_strip (2, pd.subject, xd->subject); md.date = (gchar *) tracker_coalesce_strip (3, pd.creation_date, xd->date, xd->time_original); md.author = (gchar *) tracker_coalesce_strip (2, pd.author, xd->creator); write_pdf_data (md, metadata, keywords); if (xd->keywords) { tracker_keywords_parse (keywords, xd->keywords); } if (xd->pdf_keywords) { tracker_keywords_parse (keywords, xd->pdf_keywords); } if (xd->publisher) { TrackerResource *publisher = tracker_extract_new_contact (xd->publisher); tracker_resource_set_relation (metadata, "nco:publisher", publisher); g_object_unref (publisher); } if (xd->type) { tracker_resource_set_string (metadata, "dc:type", xd->type); } if (xd->format) { tracker_resource_set_string (metadata, "dc:format", xd->format); } if (xd->identifier) { tracker_resource_set_string (metadata, "dc:identifier", xd->identifier); } if (xd->source) { tracker_resource_set_string (metadata, "dc:source", xd->source); } if (xd->language) { tracker_resource_set_string (metadata, "dc:language", xd->language); } if (xd->relation) { tracker_resource_set_string (metadata, "dc:relation", xd->relation); } if (xd->coverage) { tracker_resource_set_string (metadata, "dc:coverage", xd->coverage); } if (xd->license) { tracker_resource_set_string (metadata, "nie:license", xd->license); } if (xd->make || xd->model) { TrackerResource *equipment = tracker_extract_new_equipment (xd->make, xd->model); tracker_resource_set_relation (metadata, "nfo:equipment", equipment); g_object_unref (equipment); } if (xd->orientation) { tracker_resource_set_string (metadata, "nfo:orientation", xd->orientation); } if (xd->rights) { tracker_resource_set_string (metadata, "nie:copyright", xd->rights); } if (xd->white_balance) { tracker_resource_set_string (metadata, "nmm:whiteBalance", xd->white_balance); } if (xd->fnumber) { gdouble value; value = g_strtod (xd->fnumber, NULL); tracker_resource_set_double (metadata, "nmm:fnumber", value); } if (xd->flash) { tracker_resource_set_string (metadata, "nmm:flash", xd->flash); } if (xd->focal_length) { gdouble value; value = g_strtod (xd->focal_length, NULL); tracker_resource_set_double (metadata, "nmm:focalLength", value); } /* Question: Shouldn't xd->Artist be merged with md.author instead? */ if (xd->artist || xd->contributor) { TrackerResource *artist; const gchar *artist_name; artist_name = tracker_coalesce_strip (2, xd->artist, xd->contributor); artist = tracker_extract_new_contact (artist_name); tracker_resource_set_relation (metadata, "nco:contributor", artist); g_object_unref (artist); } if (xd->exposure_time) { gdouble value; value = g_strtod (xd->exposure_time, NULL); tracker_resource_set_double (metadata, "nmm:exposureTime", value); } if (xd->iso_speed_ratings) { gdouble value; value = g_strtod (xd->iso_speed_ratings, NULL); tracker_resource_set_double (metadata, "nmm:isoSpeed", value); } if (xd->description) { tracker_resource_set_string (metadata, "nie:description", xd->description); } if (xd->metering_mode) { tracker_resource_set_string (metadata, "nmm:meteringMode", xd->metering_mode); } if (xd->address || xd->state || xd->country || xd->city || xd->gps_altitude || xd->gps_latitude || xd-> gps_longitude) { TrackerResource *location = tracker_extract_new_location (xd->address, xd->state, xd->city, xd->country, xd->gps_altitude, xd->gps_latitude, xd->gps_longitude); tracker_resource_set_relation (metadata, "slo:location", location); g_object_unref (location); } if (xd->regions) { tracker_xmp_apply_regions_to_resource (metadata, xd); } tracker_xmp_free (xd); } else { /* So if we are here we have NO XMP data and we just * write what we know from Poppler. */ write_pdf_data (pd, metadata, keywords); } for (i = 0; i < keywords->len; i++) { TrackerResource *tag; const gchar *p; p = g_ptr_array_index (keywords, i); tag = tracker_extract_new_tag (p); tracker_resource_add_relation (metadata, "nao:hasTag", tag); g_object_unref (tag); } g_ptr_array_free (keywords, TRUE); tracker_resource_set_int64 (metadata, "nfo:pageCount", poppler_document_get_n_pages(document)); config = tracker_main_get_config (); n_bytes = tracker_config_get_max_bytes (config); content = extract_content_text (document, n_bytes); if (content) { tracker_resource_set_string (metadata, "nie:plainTextContent", content); g_free (content); } read_outline (document, metadata); g_free (xml); g_free (pd.keywords); g_free (pd.title); g_free (pd.subject); g_free (pd.creation_date); g_free (pd.author); g_free (pd.date); g_free (uri); g_object_unref (document); if (contents) { munmap (contents, len); } close (fd); tracker_extract_info_set_resource (info, metadata); g_object_unref (metadata); return TRUE; }