static gchar * get_file_content (GFile *file, gsize n_bytes) { gchar *text, *uri, *path; int fd; /* If no content requested, return */ if (n_bytes == 0) { return NULL; } uri = g_file_get_uri (file); /* Get filename from URI */ path = g_file_get_path (file); fd = tracker_file_open_fd (path); if (fd == -1) { g_message ("Could not open file '%s': %s", uri, g_strerror (errno)); g_free (uri); g_free (path); return NULL; } g_debug (" Starting to read '%s' up to %" G_GSIZE_FORMAT " bytes...", uri, n_bytes); /* Read up to n_bytes from stream. Output is always, always valid UTF-8, * this function closes the FD. */ text = tracker_read_text_from_fd (fd, n_bytes); g_free (uri); g_free (path); return text; }
G_MODULE_EXPORT gboolean tracker_extract_get_metadata (TrackerExtractInfo *info) { TIFF *image; TrackerXmpData *xd = NULL; TrackerIptcData *id = NULL; TrackerExifData *ed = NULL; MergeData md = { 0 }; TiffData td = { 0 }; gchar *filename, *uri; gchar *date; glong exif_offset; GPtrArray *keywords; guint i; GFile *file; TrackerSparqlBuilder *metadata, *preupdate; const gchar *graph, *urn; int fd; #ifdef HAVE_LIBIPTCDATA gchar *iptc_offset; guint32 iptc_size; #endif #ifdef HAVE_EXEMPI gchar *xmp_offset; guint32 size; #endif /* HAVE_EXEMPI */ file = tracker_extract_info_get_file (info); filename = g_file_get_path (file); preupdate = tracker_extract_info_get_preupdate_builder (info); metadata = tracker_extract_info_get_metadata_builder (info); graph = tracker_extract_info_get_graph (info); urn = tracker_extract_info_get_urn (info); fd = tracker_file_open_fd (filename); if (fd == -1) { g_warning ("Could not open tiff file '%s': %s\n", filename, g_strerror (errno)); g_free (filename); return FALSE; } if ((image = TIFFFdOpen (fd, filename, "r")) == NULL){ g_warning ("Could not open image:'%s'\n", filename); g_free (filename); close (fd); return FALSE; } tracker_sparql_builder_predicate (metadata, "a"); tracker_sparql_builder_object (metadata, "nfo:Image"); tracker_sparql_builder_object (metadata, "nmm:Photo"); uri = g_file_get_uri (file); #ifdef HAVE_LIBIPTCDATA if (TIFFGetField (image, TIFFTAG_RICHTIFFIPTC, &iptc_size, &iptc_offset)) { if (TIFFIsByteSwapped(image) != 0) { TIFFSwabArrayOfLong((uint32*) iptc_offset, (unsigned long) iptc_size); } id = tracker_iptc_new (iptc_offset, 4 * iptc_size, uri); } #endif /* HAVE_LIBIPTCDATA */ if (!id) { id = g_new0 (TrackerIptcData, 1); } /* FIXME There are problems between XMP data embedded with different tools due to bugs in the original spec (type) */ #ifdef HAVE_EXEMPI if (TIFFGetField (image, TIFFTAG_XMLPACKET, &size, &xmp_offset)) { xd = tracker_xmp_new (xmp_offset, size, uri); } #endif /* HAVE_EXEMPI */ if (!xd) { xd = g_new0 (TrackerXmpData, 1); } ed = g_new0 (TrackerExifData, 1); /* Get Tiff specifics */ td.width = tag_to_string (image, TIFFTAG_IMAGEWIDTH, TAG_TYPE_UINT32); td.length = tag_to_string (image, TIFFTAG_IMAGELENGTH, TAG_TYPE_UINT32); td.artist = tag_to_string (image, TIFFTAG_ARTIST, TAG_TYPE_STRING); td.copyright = tag_to_string (image, TIFFTAG_COPYRIGHT, TAG_TYPE_STRING); date = tag_to_string (image, TIFFTAG_DATETIME, TAG_TYPE_STRING); td.date = tracker_date_guess (date); g_free (date); td.title = tag_to_string (image, TIFFTAG_DOCUMENTNAME, TAG_TYPE_STRING); td.description = tag_to_string (image, TIFFTAG_IMAGEDESCRIPTION, TAG_TYPE_STRING); td.make = tag_to_string (image, TIFFTAG_MAKE, TAG_TYPE_STRING); td.model = tag_to_string (image, TIFFTAG_MODEL, TAG_TYPE_STRING); td.orientation = get_orientation (image); /* Get Exif specifics */ if (TIFFGetField (image, TIFFTAG_EXIFIFD, &exif_offset)) { if (TIFFReadEXIFDirectory (image, exif_offset)) { ed->exposure_time = tag_to_string (image, EXIFTAG_EXPOSURETIME, TAG_TYPE_DOUBLE); ed->fnumber = tag_to_string (image, EXIFTAG_FNUMBER, TAG_TYPE_DOUBLE); ed->iso_speed_ratings = tag_to_string (image, EXIFTAG_ISOSPEEDRATINGS, TAG_TYPE_C16_UINT16); date = tag_to_string (image, EXIFTAG_DATETIMEORIGINAL, TAG_TYPE_STRING); ed->time_original = tracker_date_guess (date); g_free (date); ed->metering_mode = get_metering_mode (image); ed->flash = get_flash (image); ed->focal_length = tag_to_string (image, EXIFTAG_DATETIMEORIGINAL, TAG_TYPE_DOUBLE); ed->white_balance = get_white_balance (image); /* ed->software = tag_to_string (image, EXIFTAG_SOFTWARE, TAG_TYPE_STRING); */ } } TIFFClose (image); g_free (filename); md.title = tracker_coalesce_strip (5, xd->title, xd->pdf_title, td.title, ed->document_name, xd->title2); md.orientation = tracker_coalesce_strip (4, xd->orientation, td.orientation, ed->orientation, id->image_orientation); md.copyright = tracker_coalesce_strip (4, xd->rights, td.copyright, ed->copyright, id->copyright_notice); md.white_balance = tracker_coalesce_strip (2, xd->white_balance, ed->white_balance); md.fnumber = tracker_coalesce_strip (2, xd->fnumber, ed->fnumber); md.flash = tracker_coalesce_strip (2, xd->flash, ed->flash); md.focal_length = tracker_coalesce_strip (2, xd->focal_length, ed->focal_length); md.artist = tracker_coalesce_strip (4, xd->artist, td.artist, ed->artist, xd->contributor); md.exposure_time = tracker_coalesce_strip (2, xd->exposure_time, ed->exposure_time); md.iso_speed_ratings = tracker_coalesce_strip (2, xd->iso_speed_ratings, ed->iso_speed_ratings); md.date = tracker_coalesce_strip (6, xd->date, xd->time_original, td.date, ed->time, id->date_created, ed->time_original); md.description = tracker_coalesce_strip (3, xd->description, td.description, ed->description); md.metering_mode = tracker_coalesce_strip (2, xd->metering_mode, ed->metering_mode); md.city = tracker_coalesce_strip (2, xd->city, id->city); md.state = tracker_coalesce_strip (2, xd->state, id->state); md.address = tracker_coalesce_strip (2, xd->address, id->sublocation); md.country = tracker_coalesce_strip (2, xd->country, id->country_name); /* FIXME We are not handling the altitude ref here for xmp */ md.gps_altitude = tracker_coalesce_strip (2, xd->gps_altitude, ed->gps_altitude); md.gps_latitude = tracker_coalesce_strip (2, xd->gps_latitude, ed->gps_latitude); md.gps_longitude = tracker_coalesce_strip (2, xd->gps_longitude, ed->gps_longitude); md.gps_direction = tracker_coalesce_strip (2, xd->gps_direction, ed->gps_direction); md.creator = tracker_coalesce_strip (3, xd->creator, id->byline, id->credit); md.x_dimension = tracker_coalesce_strip (2, td.width, ed->x_dimension); md.y_dimension = tracker_coalesce_strip (2, td.length, ed->y_dimension); md.make = tracker_coalesce_strip (3, xd->make, td.make, ed->make); md.model = tracker_coalesce_strip (3, xd->model, td.model, ed->model); keywords = g_ptr_array_new_with_free_func ((GDestroyNotify) g_free); if (ed->user_comment) { tracker_sparql_builder_predicate (metadata, "nie:comment"); tracker_sparql_builder_object_unvalidated (metadata, ed->user_comment); } if (md.x_dimension) { tracker_sparql_builder_predicate (metadata, "nfo:width"); tracker_sparql_builder_object_unvalidated (metadata, md.x_dimension); } if (md.y_dimension) { tracker_sparql_builder_predicate (metadata, "nfo:height"); tracker_sparql_builder_object_unvalidated (metadata, md.y_dimension); } if (xd->keywords) { tracker_keywords_parse (keywords, xd->keywords); } if (xd->pdf_keywords) { tracker_keywords_parse (keywords, xd->pdf_keywords); } if (xd->subject) { tracker_keywords_parse (keywords, xd->subject); } if (xd->publisher) { gchar *uri = tracker_sparql_escape_uri_printf ("urn:contact:%s", xd->publisher); tracker_sparql_builder_insert_open (preupdate, NULL); if (graph) { tracker_sparql_builder_graph_open (preupdate, graph); } tracker_sparql_builder_subject_iri (preupdate, uri); tracker_sparql_builder_predicate (preupdate, "a"); tracker_sparql_builder_object (preupdate, "nco:Contact"); tracker_sparql_builder_predicate (preupdate, "nco:fullname"); tracker_sparql_builder_object_unvalidated (preupdate, xd->publisher); if (graph) { tracker_sparql_builder_graph_close (preupdate); } tracker_sparql_builder_insert_close (preupdate); tracker_sparql_builder_predicate (metadata, "nco:publisher"); tracker_sparql_builder_object_iri (metadata, uri); g_free (uri); } if (xd->type) { tracker_sparql_builder_predicate (metadata, "dc:type"); tracker_sparql_builder_object_unvalidated (metadata, xd->type); } if (xd->format) { tracker_sparql_builder_predicate (metadata, "dc:format"); tracker_sparql_builder_object_unvalidated (metadata, xd->format); } if (xd->identifier) { tracker_sparql_builder_predicate (metadata, "dc:identifier"); tracker_sparql_builder_object_unvalidated (metadata, xd->identifier); } if (xd->source) { tracker_sparql_builder_predicate (metadata, "dc:source"); tracker_sparql_builder_object_unvalidated (metadata, xd->source); } if (xd->language) { tracker_sparql_builder_predicate (metadata, "dc:language"); tracker_sparql_builder_object_unvalidated (metadata, xd->language); } if (xd->relation) { tracker_sparql_builder_predicate (metadata, "dc:relation"); tracker_sparql_builder_object_unvalidated (metadata, xd->relation); } if (xd->coverage) { tracker_sparql_builder_predicate (metadata, "dc:coverage"); tracker_sparql_builder_object_unvalidated (metadata, xd->coverage); } if (xd->rating) { tracker_sparql_builder_predicate (metadata, "nao:numericRating"); tracker_sparql_builder_object_unvalidated (metadata, xd->rating); } if (xd->license) { tracker_sparql_builder_predicate (metadata, "nie:license"); tracker_sparql_builder_object_unvalidated (metadata, xd->license); } if (xd->regions) { tracker_xmp_apply_regions (preupdate, metadata, graph, xd); } if (md.address || md.state || md.country || md.city || md.gps_altitude || md.gps_latitude || md.gps_longitude) { tracker_sparql_builder_predicate (metadata, "slo:location"); tracker_sparql_builder_object_blank_open (metadata); /* GeoPoint */ tracker_sparql_builder_predicate (metadata, "a"); tracker_sparql_builder_object (metadata, "slo:GeoLocation"); if (md.address || md.state || md.country || md.city) { gchar *addruri; addruri = tracker_sparql_get_uuid_urn (); tracker_sparql_builder_predicate (metadata, "slo:postalAddress"); tracker_sparql_builder_object_iri (metadata, addruri); tracker_sparql_builder_insert_open (preupdate, NULL); if (graph) { tracker_sparql_builder_graph_open (preupdate, graph); } tracker_sparql_builder_subject_iri (preupdate, addruri); g_free (addruri); tracker_sparql_builder_predicate (preupdate, "a"); tracker_sparql_builder_object (preupdate, "nco:PostalAddress"); if (md.address) { tracker_sparql_builder_predicate (preupdate, "nco:streetAddress"); tracker_sparql_builder_object_unvalidated (preupdate, md.address); } if (md.state) { tracker_sparql_builder_predicate (preupdate, "nco:region"); tracker_sparql_builder_object_unvalidated (preupdate, md.state); } if (md.city) { tracker_sparql_builder_predicate (preupdate, "nco:locality"); tracker_sparql_builder_object_unvalidated (preupdate, md.city); } if (md.country) { tracker_sparql_builder_predicate (preupdate, "nco:country"); tracker_sparql_builder_object_unvalidated (preupdate, md.country); } if (graph) { tracker_sparql_builder_graph_close (preupdate); } tracker_sparql_builder_insert_close (preupdate); } if (md.gps_altitude) { tracker_sparql_builder_predicate (metadata, "slo:altitude"); tracker_sparql_builder_object_unvalidated (metadata, md.gps_altitude); } if (md.gps_latitude) { tracker_sparql_builder_predicate (metadata, "slo:latitude"); tracker_sparql_builder_object_unvalidated (metadata, md.gps_latitude); } if (md.gps_longitude) { tracker_sparql_builder_predicate (metadata, "slo:longitude"); tracker_sparql_builder_object_unvalidated (metadata, md.gps_longitude); } tracker_sparql_builder_object_blank_close (metadata); /* GeoLocation */ } if (md.gps_direction) { tracker_sparql_builder_predicate (metadata, "nfo:heading"); tracker_sparql_builder_object_unvalidated (metadata, md.gps_direction); } if (id->contact) { gchar *uri = tracker_sparql_escape_uri_printf ("urn:contact:%s", id->contact); tracker_sparql_builder_insert_open (preupdate, NULL); if (graph) { tracker_sparql_builder_graph_open (preupdate, graph); } tracker_sparql_builder_subject_iri (preupdate, uri); tracker_sparql_builder_predicate (preupdate, "a"); tracker_sparql_builder_object (preupdate, "nco:Contact"); tracker_sparql_builder_predicate (preupdate, "nco:fullname"); tracker_sparql_builder_object_unvalidated (preupdate, id->contact); if (graph) { tracker_sparql_builder_graph_close (preupdate); } tracker_sparql_builder_insert_close (preupdate); tracker_sparql_builder_predicate (metadata, "nco:representative"); tracker_sparql_builder_object_iri (metadata, uri); g_free (uri); } if (id->keywords) { tracker_keywords_parse (keywords, id->keywords); } for (i = 0; i < keywords->len; i++) { gchar *escaped, *subject; const gchar *p; p = g_ptr_array_index (keywords, i); escaped = tracker_sparql_escape_string (p); subject = g_strdup_printf ("_:tag%d", i + 1); /* ensure tag with specified label exists */ tracker_sparql_builder_insert_open (preupdate, graph); tracker_sparql_builder_subject (preupdate, subject); tracker_sparql_builder_predicate (preupdate, "a"); tracker_sparql_builder_object (preupdate, "nao:Tag"); tracker_sparql_builder_predicate (preupdate, "nao:prefLabel"); tracker_sparql_builder_object_unvalidated (preupdate, escaped); tracker_sparql_builder_insert_close (preupdate); tracker_sparql_builder_append (preupdate, "WHERE { FILTER (NOT EXISTS { " "?tag a nao:Tag ; nao:prefLabel \""); tracker_sparql_builder_append (preupdate, escaped); tracker_sparql_builder_append (preupdate, "\" }) }\n"); /* associate file with tag */ tracker_sparql_builder_insert_open (preupdate, graph); tracker_sparql_builder_subject_iri (preupdate, urn); tracker_sparql_builder_predicate (preupdate, "nao:hasTag"); tracker_sparql_builder_object (preupdate, "?tag"); tracker_sparql_builder_insert_close (preupdate); tracker_sparql_builder_where_open (preupdate); tracker_sparql_builder_subject (preupdate, "?tag"); tracker_sparql_builder_predicate (preupdate, "a"); tracker_sparql_builder_object (preupdate, "nao:Tag"); tracker_sparql_builder_predicate (preupdate, "nao:prefLabel"); tracker_sparql_builder_object_unvalidated (preupdate, escaped); tracker_sparql_builder_where_close (preupdate); g_free (subject); g_free (escaped); } g_ptr_array_free (keywords, TRUE); if (md.make || md.model) { gchar *equip_uri; equip_uri = tracker_sparql_escape_uri_printf ("urn:equipment:%s:%s:", md.make ? md.make : "", md.model ? md.model : ""); tracker_sparql_builder_insert_open (preupdate, NULL); if (graph) { tracker_sparql_builder_graph_open (preupdate, graph); } tracker_sparql_builder_subject_iri (preupdate, equip_uri); tracker_sparql_builder_predicate (preupdate, "a"); tracker_sparql_builder_object (preupdate, "nfo:Equipment"); if (md.make) { tracker_sparql_builder_predicate (preupdate, "nfo:manufacturer"); tracker_sparql_builder_object_unvalidated (preupdate, md.make); } if (md.model) { tracker_sparql_builder_predicate (preupdate, "nfo:model"); tracker_sparql_builder_object_unvalidated (preupdate, md.model); } if (graph) { tracker_sparql_builder_graph_close (preupdate); } tracker_sparql_builder_insert_close (preupdate); tracker_sparql_builder_predicate (metadata, "nfo:equipment"); tracker_sparql_builder_object_iri (metadata, equip_uri); g_free (equip_uri); } tracker_guarantee_title_from_file (metadata, "nie:title", md.title, uri, NULL); if (md.orientation) { tracker_sparql_builder_predicate (metadata, "nfo:orientation"); tracker_sparql_builder_object_unvalidated (metadata, md.orientation); } if (md.copyright) { tracker_sparql_builder_predicate (metadata, "nie:copyright"); tracker_sparql_builder_object_unvalidated (metadata, md.copyright); } if (md.white_balance) { tracker_sparql_builder_predicate (metadata, "nmm:whiteBalance"); tracker_sparql_builder_object_unvalidated (metadata, md.white_balance); } if (md.fnumber) { tracker_sparql_builder_predicate (metadata, "nmm:fnumber"); tracker_sparql_builder_object_unvalidated (metadata, md.fnumber); } if (md.flash) { tracker_sparql_builder_predicate (metadata, "nmm:flash"); tracker_sparql_builder_object_unvalidated (metadata, md.flash); } if (md.focal_length) { tracker_sparql_builder_predicate (metadata, "nmm:focalLength"); tracker_sparql_builder_object_unvalidated (metadata, md.focal_length); } if (md.artist) { gchar *uri = tracker_sparql_escape_uri_printf ("urn:contact:%s", md.artist); tracker_sparql_builder_insert_open (preupdate, NULL); if (graph) { tracker_sparql_builder_graph_open (preupdate, graph); } tracker_sparql_builder_subject_iri (preupdate, uri); tracker_sparql_builder_predicate (preupdate, "a"); tracker_sparql_builder_object (preupdate, "nco:Contact"); tracker_sparql_builder_predicate (preupdate, "nco:fullname"); tracker_sparql_builder_object_unvalidated (preupdate, md.artist); if (graph) { tracker_sparql_builder_graph_close (preupdate); } tracker_sparql_builder_insert_close (preupdate); tracker_sparql_builder_predicate (metadata, "nco:contributor"); tracker_sparql_builder_object_iri (metadata, uri); g_free (uri); } if (md.exposure_time) { tracker_sparql_builder_predicate (metadata, "nmm:exposureTime"); tracker_sparql_builder_object_unvalidated (metadata, md.exposure_time); } if (md.iso_speed_ratings) { tracker_sparql_builder_predicate (metadata, "nmm:isoSpeed"); tracker_sparql_builder_object_unvalidated (metadata, md.iso_speed_ratings); } tracker_guarantee_date_from_file_mtime (metadata, "nie:contentCreated", md.date, uri); if (md.description) { tracker_sparql_builder_predicate (metadata, "nie:description"); tracker_sparql_builder_object_unvalidated (metadata, md.description); } if (md.metering_mode) { tracker_sparql_builder_predicate (metadata, "nmm:meteringMode"); tracker_sparql_builder_object_unvalidated (metadata, md.metering_mode); } if (md.creator) { gchar *uri = tracker_sparql_escape_uri_printf ("urn:contact:%s", md.creator); tracker_sparql_builder_insert_open (preupdate, NULL); if (graph) { tracker_sparql_builder_graph_open (preupdate, graph); } tracker_sparql_builder_subject_iri (preupdate, uri); tracker_sparql_builder_predicate (preupdate, "a"); tracker_sparql_builder_object (preupdate, "nco:Contact"); tracker_sparql_builder_predicate (preupdate, "nco:fullname"); tracker_sparql_builder_object_unvalidated (preupdate, md.creator); if (graph) { tracker_sparql_builder_graph_close (preupdate); } tracker_sparql_builder_insert_close (preupdate); /* NOTE: We only have affiliation with * nco:PersonContact and we are using * nco:Contact here. */ /* if (id->byline_title) { */ /* tracker_sparql_builder_insert_open (preupdate, NULL); */ /* tracker_sparql_builder_subject (preupdate, "_:affiliation_by_line"); */ /* tracker_sparql_builder_predicate (preupdate, "a"); */ /* tracker_sparql_builder_object (preupdate, "nco:Affiliation"); */ /* tracker_sparql_builder_predicate (preupdate, "nco:title"); */ /* tracker_sparql_builder_object_unvalidated (preupdate, id->byline_title); */ /* tracker_sparql_builder_insert_close (preupdate); */ /* tracker_sparql_builder_predicate (metadata, "a"); */ /* tracker_sparql_builder_object (metadata, "nco:PersonContact"); */ /* tracker_sparql_builder_predicate (metadata, "nco:hasAffiliation"); */ /* tracker_sparql_builder_object (metadata, "_:affiliation_by_line"); */ /* } */ tracker_sparql_builder_predicate (metadata, "nco:creator"); tracker_sparql_builder_object_iri (metadata, uri); g_free (uri); } if (ed->x_resolution) { gdouble value; value = ed->resolution_unit != 3 ? g_strtod (ed->x_resolution, NULL) : g_strtod (ed->x_resolution, NULL) * CM_TO_INCH; tracker_sparql_builder_predicate (metadata, "nfo:horizontalResolution"); tracker_sparql_builder_object_double (metadata, value); } if (ed->y_resolution) { gdouble value; value = ed->resolution_unit != 3 ? g_strtod (ed->y_resolution, NULL) : g_strtod (ed->y_resolution, NULL) * CM_TO_INCH; tracker_sparql_builder_predicate (metadata, "nfo:verticalResolution"); tracker_sparql_builder_object_double (metadata, value); } tiff_data_free (&td); tracker_exif_free (ed); tracker_xmp_free (xd); tracker_iptc_free (id); g_free (uri); close (fd); return TRUE; }
G_MODULE_EXPORT gboolean tracker_extract_get_metadata (TrackerExtractInfo *info) { TrackerSparqlBuilder *preupdate, *metadata; goffset size; GifFileType *gifFile = NULL; GString *where; const gchar *graph; gchar *filename, *uri; GFile *file; int fd; #if GIFLIB_MAJOR >= 5 int err; #endif preupdate = tracker_extract_info_get_preupdate_builder (info); metadata = tracker_extract_info_get_metadata_builder (info); graph = tracker_extract_info_get_graph (info); file = tracker_extract_info_get_file (info); filename = g_file_get_path (file); size = tracker_file_get_size (filename); if (size < 64) { g_free (filename); return FALSE; } fd = tracker_file_open_fd (filename); if (fd == -1) { g_warning ("Could not open GIF file '%s': %s\n", filename, g_strerror (errno)); g_free (filename); return FALSE; } #if GIFLIB_MAJOR < 5 if ((gifFile = DGifOpenFileHandle (fd)) == NULL) { PrintGifError (); #else /* GIFLIB_MAJOR < 5 */ if ((gifFile = DGifOpenFileHandle (fd, &err)) == NULL) { gif_error ("Could not open GIF file with handle", err); #endif /* GIFLIB_MAJOR < 5 */ close (fd); return FALSE; } g_free (filename); tracker_sparql_builder_predicate (metadata, "a"); tracker_sparql_builder_object (metadata, "nfo:Image"); tracker_sparql_builder_object (metadata, "nmm:Photo"); where = g_string_new (""); uri = g_file_get_uri (file); read_metadata (preupdate, metadata, where, gifFile, uri, graph); tracker_extract_info_set_where_clause (info, where->str); g_string_free (where, TRUE); g_free (uri); if (DGifCloseFile (gifFile) != GIF_OK) { #if GIFLIB_MAJOR < 5 PrintGifError (); #else /* GIFLIB_MAJOR < 5 */ gif_error ("Could not close GIF file", gifFile->Error); #endif /* GIFLIB_MAJOR < 5 */ } return TRUE; }
G_MODULE_EXPORT gboolean tracker_extract_get_metadata (TrackerExtractInfo *info) { TrackerConfig *config; GTime creation_date; GError *error = NULL; TrackerSparqlBuilder *metadata, *preupdate; const gchar *graph; const gchar *urn; TrackerXmpData *xd = NULL; PDFData pd = { 0 }; /* actual data */ PDFData md = { 0 }; /* for merging */ PopplerDocument *document; gchar *xml = NULL; gchar *content, *uri; guint n_bytes; GPtrArray *keywords; guint i; GFile *file; gchar *filename; int fd; gchar *contents = NULL; gsize len; struct stat st; metadata = tracker_extract_info_get_metadata_builder (info); preupdate = tracker_extract_info_get_preupdate_builder (info); graph = tracker_extract_info_get_graph (info); urn = tracker_extract_info_get_urn (info); file = tracker_extract_info_get_file (info); filename = g_file_get_path (file); fd = tracker_file_open_fd (filename); if (fd == -1) { g_warning ("Could not open pdf file '%s': %s\n", filename, g_strerror (errno)); g_free (filename); return FALSE; } if (fstat (fd, &st) == -1) { g_warning ("Could not fstat pdf file '%s': %s\n", filename, g_strerror (errno)); close (fd); g_free (filename); return FALSE; } if (st.st_size == 0) { contents = NULL; len = 0; } else { contents = (gchar *) mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (contents == NULL || contents == MAP_FAILED) { g_warning ("Could not mmap pdf file '%s': %s\n", filename, g_strerror (errno)); close (fd); g_free (filename); return FALSE; } len = st.st_size; } g_free (filename); uri = g_file_get_uri (file); document = poppler_document_new_from_data (contents, len, NULL, &error); if (error) { if (error->code == POPPLER_ERROR_ENCRYPTED) { tracker_sparql_builder_predicate (metadata, "a"); tracker_sparql_builder_object (metadata, "nfo:PaginatedTextDocument"); tracker_sparql_builder_predicate (metadata, "nfo:isContentEncrypted"); tracker_sparql_builder_object_boolean (metadata, TRUE); g_error_free (error); g_free (uri); close (fd); return TRUE; } else { g_warning ("Couldn't create PopplerDocument from uri:'%s', %s", uri, error->message ? error->message : "no error given"); g_error_free (error); g_free (uri); close (fd); return FALSE; } } if (!document) { g_warning ("Could not create PopplerDocument from uri:'%s', " "NULL returned without an error", uri); g_free (uri); close (fd); return FALSE; } tracker_sparql_builder_predicate (metadata, "a"); tracker_sparql_builder_object (metadata, "nfo:PaginatedTextDocument"); g_object_get (document, "title", &pd.title, "author", &pd.author, "subject", &pd.subject, "keywords", &pd.keywords, "creation-date", &creation_date, "metadata", &xml, NULL); if (creation_date > 0) { pd.creation_date = tracker_date_to_string ((time_t) creation_date); } keywords = g_ptr_array_new_with_free_func ((GDestroyNotify) g_free); if (xml && *xml && (xd = tracker_xmp_new (xml, strlen (xml), uri)) != NULL) { /* The casts here are well understood and known */ md.title = (gchar *) tracker_coalesce_strip (4, pd.title, xd->title, xd->title2, xd->pdf_title); md.subject = (gchar *) tracker_coalesce_strip (2, pd.subject, xd->subject); md.date = (gchar *) tracker_coalesce_strip (3, pd.creation_date, xd->date, xd->time_original); md.author = (gchar *) tracker_coalesce_strip (2, pd.author, xd->creator); write_pdf_data (md, metadata, keywords); if (xd->keywords) { tracker_keywords_parse (keywords, xd->keywords); } if (xd->pdf_keywords) { tracker_keywords_parse (keywords, xd->pdf_keywords); } if (xd->publisher) { tracker_sparql_builder_predicate (metadata, "nco:publisher"); tracker_sparql_builder_object_blank_open (metadata); tracker_sparql_builder_predicate (metadata, "a"); tracker_sparql_builder_object (metadata, "nco:Contact"); tracker_sparql_builder_predicate (metadata, "nco:fullname"); tracker_sparql_builder_object_unvalidated (metadata, xd->publisher); tracker_sparql_builder_object_blank_close (metadata); } if (xd->type) { tracker_sparql_builder_predicate (metadata, "dc:type"); tracker_sparql_builder_object_unvalidated (metadata, xd->type); } if (xd->format) { tracker_sparql_builder_predicate (metadata, "dc:format"); tracker_sparql_builder_object_unvalidated (metadata, xd->format); } if (xd->identifier) { tracker_sparql_builder_predicate (metadata, "dc:identifier"); tracker_sparql_builder_object_unvalidated (metadata, xd->identifier); } if (xd->source) { tracker_sparql_builder_predicate (metadata, "dc:source"); tracker_sparql_builder_object_unvalidated (metadata, xd->source); } if (xd->language) { tracker_sparql_builder_predicate (metadata, "dc:language"); tracker_sparql_builder_object_unvalidated (metadata, xd->language); } if (xd->relation) { tracker_sparql_builder_predicate (metadata, "dc:relation"); tracker_sparql_builder_object_unvalidated (metadata, xd->relation); } if (xd->coverage) { tracker_sparql_builder_predicate (metadata, "dc:coverage"); tracker_sparql_builder_object_unvalidated (metadata, xd->coverage); } if (xd->license) { tracker_sparql_builder_predicate (metadata, "nie:license"); tracker_sparql_builder_object_unvalidated (metadata, xd->license); } if (xd->make || xd->model) { gchar *equip_uri; equip_uri = tracker_sparql_escape_uri_printf ("urn:equipment:%s:%s:", xd->make ? xd->make : "", xd->model ? xd->model : ""); tracker_sparql_builder_insert_open (preupdate, NULL); if (graph) { tracker_sparql_builder_graph_open (preupdate, graph); } tracker_sparql_builder_subject_iri (preupdate, equip_uri); tracker_sparql_builder_predicate (preupdate, "a"); tracker_sparql_builder_object (preupdate, "nfo:Equipment"); if (xd->make) { tracker_sparql_builder_predicate (preupdate, "nfo:manufacturer"); tracker_sparql_builder_object_unvalidated (preupdate, xd->make); } if (xd->model) { tracker_sparql_builder_predicate (preupdate, "nfo:model"); tracker_sparql_builder_object_unvalidated (preupdate, xd->model); } if (graph) { tracker_sparql_builder_graph_close (preupdate); } tracker_sparql_builder_insert_close (preupdate); tracker_sparql_builder_predicate (metadata, "nfo:equipment"); tracker_sparql_builder_object_iri (metadata, equip_uri); g_free (equip_uri); } if (xd->orientation) { tracker_sparql_builder_predicate (metadata, "nfo:orientation"); tracker_sparql_builder_object (metadata, xd->orientation); } if (xd->rights) { tracker_sparql_builder_predicate (metadata, "nie:copyright"); tracker_sparql_builder_object_unvalidated (metadata, xd->rights); } if (xd->white_balance) { tracker_sparql_builder_predicate (metadata, "nmm:whiteBalance"); tracker_sparql_builder_object (metadata, xd->white_balance); } if (xd->fnumber) { gdouble value; value = g_strtod (xd->fnumber, NULL); tracker_sparql_builder_predicate (metadata, "nmm:fnumber"); tracker_sparql_builder_object_double (metadata, value); } if (xd->flash) { tracker_sparql_builder_predicate (metadata, "nmm:flash"); tracker_sparql_builder_object (metadata, xd->flash); } if (xd->focal_length) { gdouble value; value = g_strtod (xd->focal_length, NULL); tracker_sparql_builder_predicate (metadata, "nmm:focalLength"); tracker_sparql_builder_object_double (metadata, value); } /* Question: Shouldn't xd->Artist be merged with md.author instead? */ if (xd->artist || xd->contributor) { const gchar *artist; artist = tracker_coalesce_strip (2, xd->artist, xd->contributor); tracker_sparql_builder_predicate (metadata, "nco:contributor"); tracker_sparql_builder_object_blank_open (metadata); tracker_sparql_builder_predicate (metadata, "a"); tracker_sparql_builder_object (metadata, "nco:Contact"); tracker_sparql_builder_predicate (metadata, "nco:fullname"); tracker_sparql_builder_object_unvalidated (metadata, artist); tracker_sparql_builder_object_blank_close (metadata); } if (xd->exposure_time) { gdouble value; value = g_strtod (xd->exposure_time, NULL); tracker_sparql_builder_predicate (metadata, "nmm:exposureTime"); tracker_sparql_builder_object_double (metadata, value); } if (xd->iso_speed_ratings) { gdouble value; value = g_strtod (xd->iso_speed_ratings, NULL); tracker_sparql_builder_predicate (metadata, "nmm:isoSpeed"); tracker_sparql_builder_object_double (metadata, value); } if (xd->description) { tracker_sparql_builder_predicate (metadata, "nie:description"); tracker_sparql_builder_object_unvalidated (metadata, xd->description); } if (xd->metering_mode) { tracker_sparql_builder_predicate (metadata, "nmm:meteringMode"); tracker_sparql_builder_object (metadata, xd->metering_mode); } if (xd->address || xd->state || xd->country || xd->city || xd->gps_altitude || xd->gps_latitude || xd-> gps_longitude) { tracker_sparql_builder_predicate (metadata, "slo:location"); tracker_sparql_builder_object_blank_open (metadata); /* GeoLocation */ tracker_sparql_builder_predicate (metadata, "a"); tracker_sparql_builder_object (metadata, "slo:GeoLocation"); if (xd->address || xd->state || xd->country || xd->city) { gchar *addruri; addruri = tracker_sparql_get_uuid_urn (); tracker_sparql_builder_predicate (metadata, "slo:postalAddress"); tracker_sparql_builder_object_iri (metadata, addruri); tracker_sparql_builder_insert_open (preupdate, NULL); if (graph) { tracker_sparql_builder_graph_open (preupdate, graph); } tracker_sparql_builder_subject_iri (preupdate, addruri); g_free (addruri); tracker_sparql_builder_predicate (preupdate, "a"); tracker_sparql_builder_object (preupdate, "nco:PostalAddress"); if (xd->address) { tracker_sparql_builder_predicate (preupdate, "nco:streetAddress"); tracker_sparql_builder_object_unvalidated (preupdate, xd->address); } if (xd->state) { tracker_sparql_builder_predicate (preupdate, "nco:region"); tracker_sparql_builder_object_unvalidated (preupdate, xd->state); } if (xd->city) { tracker_sparql_builder_predicate (preupdate, "nco:locality"); tracker_sparql_builder_object_unvalidated (preupdate, xd->city); } if (xd->country) { tracker_sparql_builder_predicate (preupdate, "nco:country"); tracker_sparql_builder_object_unvalidated (preupdate, xd->country); } if (graph) { tracker_sparql_builder_graph_close (preupdate); } tracker_sparql_builder_insert_close (preupdate); } if (xd->gps_altitude) { tracker_sparql_builder_predicate (metadata, "slo:altitude"); tracker_sparql_builder_object_unvalidated (metadata, xd->gps_altitude); } if (xd->gps_latitude) { tracker_sparql_builder_predicate (metadata, "slo:latitude"); tracker_sparql_builder_object_unvalidated (metadata, xd->gps_latitude); } if (xd->gps_longitude) { tracker_sparql_builder_predicate (metadata, "slo:longitude"); tracker_sparql_builder_object_unvalidated (metadata, xd->gps_longitude); } tracker_sparql_builder_object_blank_close (metadata); /* GeoLocation */ } if (xd->regions) { tracker_xmp_apply_regions (preupdate, metadata, graph, xd); } tracker_xmp_free (xd); } else { /* So if we are here we have NO XMP data and we just * write what we know from Poppler. */ write_pdf_data (pd, metadata, keywords); } for (i = 0; i < keywords->len; i++) { gchar *escaped, *subject; const gchar *p; p = g_ptr_array_index (keywords, i); escaped = tracker_sparql_escape_string (p); subject = g_strdup_printf ("_:tag%d", i + 1); /* ensure tag with specified label exists */ tracker_sparql_builder_insert_open (preupdate, graph); tracker_sparql_builder_subject (preupdate, subject); tracker_sparql_builder_predicate (preupdate, "a"); tracker_sparql_builder_object (preupdate, "nao:Tag"); tracker_sparql_builder_predicate (preupdate, "nao:prefLabel"); tracker_sparql_builder_object_unvalidated (preupdate, escaped); tracker_sparql_builder_insert_close (preupdate); tracker_sparql_builder_append (preupdate, "WHERE { FILTER (NOT EXISTS { " "?tag a nao:Tag ; nao:prefLabel \""); tracker_sparql_builder_append (preupdate, escaped); tracker_sparql_builder_append (preupdate, "\" }) }\n"); /* associate file with tag */ tracker_sparql_builder_insert_open (preupdate, graph); tracker_sparql_builder_subject_iri (preupdate, urn); tracker_sparql_builder_predicate (preupdate, "nao:hasTag"); tracker_sparql_builder_object (preupdate, "?tag"); tracker_sparql_builder_insert_close (preupdate); tracker_sparql_builder_where_open (preupdate); tracker_sparql_builder_subject (preupdate, "?tag"); tracker_sparql_builder_predicate (preupdate, "a"); tracker_sparql_builder_object (preupdate, "nao:Tag"); tracker_sparql_builder_predicate (preupdate, "nao:prefLabel"); tracker_sparql_builder_object_unvalidated (preupdate, escaped); tracker_sparql_builder_where_close (preupdate); g_free (subject); g_free (escaped); } g_ptr_array_free (keywords, TRUE); tracker_sparql_builder_predicate (metadata, "nfo:pageCount"); tracker_sparql_builder_object_int64 (metadata, poppler_document_get_n_pages (document)); config = tracker_main_get_config (); n_bytes = tracker_config_get_max_bytes (config); content = extract_content_text (document, n_bytes); if (content) { tracker_sparql_builder_predicate (metadata, "nie:plainTextContent"); tracker_sparql_builder_object_unvalidated (metadata, content); g_free (content); } read_outline (document, metadata); g_free (xml); g_free (pd.keywords); g_free (pd.title); g_free (pd.subject); g_free (pd.creation_date); g_free (pd.author); g_free (pd.date); g_free (uri); g_object_unref (document); if (contents) { munmap (contents, len); } close (fd); return TRUE; }
G_MODULE_EXPORT gboolean tracker_extract_get_metadata (TrackerExtractInfo *info) { TrackerConfig *config; GTime creation_date; GError *error = NULL; TrackerResource *metadata; TrackerXmpData *xd = NULL; PDFData pd = { 0 }; /* actual data */ PDFData md = { 0 }; /* for merging */ PopplerDocument *document; gchar *xml = NULL; gchar *content, *uri; guint n_bytes; GPtrArray *keywords; guint i; GFile *file; gchar *filename; int fd; gchar *contents = NULL; gsize len; struct stat st; file = tracker_extract_info_get_file (info); filename = g_file_get_path (file); fd = tracker_file_open_fd (filename); if (fd == -1) { g_warning ("Could not open pdf file '%s': %s\n", filename, g_strerror (errno)); g_free (filename); return FALSE; } if (fstat (fd, &st) == -1) { g_warning ("Could not fstat pdf file '%s': %s\n", filename, g_strerror (errno)); close (fd); g_free (filename); return FALSE; } if (st.st_size == 0) { contents = NULL; len = 0; } else { contents = (gchar *) mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (contents == NULL || contents == MAP_FAILED) { g_warning ("Could not mmap pdf file '%s': %s\n", filename, g_strerror (errno)); close (fd); g_free (filename); return FALSE; } len = st.st_size; } g_free (filename); uri = g_file_get_uri (file); document = poppler_document_new_from_data (contents, len, NULL, &error); if (error) { if (error->code == POPPLER_ERROR_ENCRYPTED) { metadata = tracker_resource_new (NULL); tracker_resource_add_uri (metadata, "rdf:type", "nfo:PaginatedTextDocument"); tracker_resource_set_boolean (metadata, "nfo:isContentEncrypted", TRUE); tracker_extract_info_set_resource (info, metadata); g_object_unref (metadata); g_error_free (error); g_free (uri); close (fd); return TRUE; } else { g_warning ("Couldn't create PopplerDocument from uri:'%s', %s", uri, error->message ? error->message : "no error given"); g_error_free (error); g_free (uri); close (fd); return FALSE; } } if (!document) { g_warning ("Could not create PopplerDocument from uri:'%s', " "NULL returned without an error", uri); g_free (uri); close (fd); return FALSE; } metadata = tracker_resource_new (NULL); tracker_resource_add_uri (metadata, "rdf:type", "nfo:PaginatedTextDocument"); g_object_get (document, "title", &pd.title, "author", &pd.author, "subject", &pd.subject, "keywords", &pd.keywords, "creation-date", &creation_date, "metadata", &xml, NULL); if (creation_date > 0) { pd.creation_date = tracker_date_to_string ((time_t) creation_date); } keywords = g_ptr_array_new_with_free_func ((GDestroyNotify) g_free); if (xml && *xml && (xd = tracker_xmp_new (xml, strlen (xml), uri)) != NULL) { /* The casts here are well understood and known */ md.title = (gchar *) tracker_coalesce_strip (4, pd.title, xd->title, xd->title2, xd->pdf_title); md.subject = (gchar *) tracker_coalesce_strip (2, pd.subject, xd->subject); md.date = (gchar *) tracker_coalesce_strip (3, pd.creation_date, xd->date, xd->time_original); md.author = (gchar *) tracker_coalesce_strip (2, pd.author, xd->creator); write_pdf_data (md, metadata, keywords); if (xd->keywords) { tracker_keywords_parse (keywords, xd->keywords); } if (xd->pdf_keywords) { tracker_keywords_parse (keywords, xd->pdf_keywords); } if (xd->publisher) { TrackerResource *publisher = tracker_extract_new_contact (xd->publisher); tracker_resource_set_relation (metadata, "nco:publisher", publisher); g_object_unref (publisher); } if (xd->type) { tracker_resource_set_string (metadata, "dc:type", xd->type); } if (xd->format) { tracker_resource_set_string (metadata, "dc:format", xd->format); } if (xd->identifier) { tracker_resource_set_string (metadata, "dc:identifier", xd->identifier); } if (xd->source) { tracker_resource_set_string (metadata, "dc:source", xd->source); } if (xd->language) { tracker_resource_set_string (metadata, "dc:language", xd->language); } if (xd->relation) { tracker_resource_set_string (metadata, "dc:relation", xd->relation); } if (xd->coverage) { tracker_resource_set_string (metadata, "dc:coverage", xd->coverage); } if (xd->license) { tracker_resource_set_string (metadata, "nie:license", xd->license); } if (xd->make || xd->model) { TrackerResource *equipment = tracker_extract_new_equipment (xd->make, xd->model); tracker_resource_set_relation (metadata, "nfo:equipment", equipment); g_object_unref (equipment); } if (xd->orientation) { tracker_resource_set_string (metadata, "nfo:orientation", xd->orientation); } if (xd->rights) { tracker_resource_set_string (metadata, "nie:copyright", xd->rights); } if (xd->white_balance) { tracker_resource_set_string (metadata, "nmm:whiteBalance", xd->white_balance); } if (xd->fnumber) { gdouble value; value = g_strtod (xd->fnumber, NULL); tracker_resource_set_double (metadata, "nmm:fnumber", value); } if (xd->flash) { tracker_resource_set_string (metadata, "nmm:flash", xd->flash); } if (xd->focal_length) { gdouble value; value = g_strtod (xd->focal_length, NULL); tracker_resource_set_double (metadata, "nmm:focalLength", value); } /* Question: Shouldn't xd->Artist be merged with md.author instead? */ if (xd->artist || xd->contributor) { TrackerResource *artist; const gchar *artist_name; artist_name = tracker_coalesce_strip (2, xd->artist, xd->contributor); artist = tracker_extract_new_contact (artist_name); tracker_resource_set_relation (metadata, "nco:contributor", artist); g_object_unref (artist); } if (xd->exposure_time) { gdouble value; value = g_strtod (xd->exposure_time, NULL); tracker_resource_set_double (metadata, "nmm:exposureTime", value); } if (xd->iso_speed_ratings) { gdouble value; value = g_strtod (xd->iso_speed_ratings, NULL); tracker_resource_set_double (metadata, "nmm:isoSpeed", value); } if (xd->description) { tracker_resource_set_string (metadata, "nie:description", xd->description); } if (xd->metering_mode) { tracker_resource_set_string (metadata, "nmm:meteringMode", xd->metering_mode); } if (xd->address || xd->state || xd->country || xd->city || xd->gps_altitude || xd->gps_latitude || xd-> gps_longitude) { TrackerResource *location = tracker_extract_new_location (xd->address, xd->state, xd->city, xd->country, xd->gps_altitude, xd->gps_latitude, xd->gps_longitude); tracker_resource_set_relation (metadata, "slo:location", location); g_object_unref (location); } if (xd->regions) { tracker_xmp_apply_regions_to_resource (metadata, xd); } tracker_xmp_free (xd); } else { /* So if we are here we have NO XMP data and we just * write what we know from Poppler. */ write_pdf_data (pd, metadata, keywords); } for (i = 0; i < keywords->len; i++) { TrackerResource *tag; const gchar *p; p = g_ptr_array_index (keywords, i); tag = tracker_extract_new_tag (p); tracker_resource_add_relation (metadata, "nao:hasTag", tag); g_object_unref (tag); } g_ptr_array_free (keywords, TRUE); tracker_resource_set_int64 (metadata, "nfo:pageCount", poppler_document_get_n_pages(document)); config = tracker_main_get_config (); n_bytes = tracker_config_get_max_bytes (config); content = extract_content_text (document, n_bytes); if (content) { tracker_resource_set_string (metadata, "nie:plainTextContent", content); g_free (content); } read_outline (document, metadata); g_free (xml); g_free (pd.keywords); g_free (pd.title); g_free (pd.subject); g_free (pd.creation_date); g_free (pd.author); g_free (pd.date); g_free (uri); g_object_unref (document); if (contents) { munmap (contents, len); } close (fd); tracker_extract_info_set_resource (info, metadata); g_object_unref (metadata); return TRUE; }