Exemplo n.º 1
0
static void
write_pdf_data (PDFData          data,
                TrackerResource *metadata,
                GPtrArray       *keywords)
{
    if (!tracker_is_empty_string (data.title)) {
        tracker_resource_set_string (metadata, "nie:title", data.title);
    }

    if (!tracker_is_empty_string (data.subject)) {
        tracker_resource_set_string (metadata, "nie:subject", data.subject);
    }

    if (!tracker_is_empty_string (data.author)) {
        TrackerResource *author = tracker_extract_new_contact (data.author);
        tracker_resource_set_relation (metadata, "nco:creator", author);
        g_object_unref (author);
    }

    if (!tracker_is_empty_string (data.date)) {
        tracker_resource_set_string (metadata, "nie:contentCreated", data.date);
    }

    if (!tracker_is_empty_string (data.keywords)) {
        tracker_keywords_parse (keywords, data.keywords);
    }
}
/**
 * tracker_extract_new_equipment:
 * @make: (allow none): the manufacturer of the equipment, or %NULL
 * @model: (allow none): the model name of the equipment, or %NULL
 *
 * Create a new nfo:Equipment resource. The URI is based on @make and @model,
 * so only one instance will exist in the Tracker store. At least one of @make
 * and @model must be non-%NULL.
 *
 * This is useful for describing equipment used to create something, for
 * example the camera that was used to take a photograph.
 *
 * Returns: a newly allocated #TrackerResource instance, of type nfo:Equipment
 *
 * Since: 1.10
 */
TrackerResource *
tracker_extract_new_equipment (const char *make,
                               const char *model)
{
	TrackerResource *equipment;
	gchar *equip_uri;

	g_return_val_if_fail (make != NULL || model != NULL, NULL);

	equip_uri = tracker_sparql_escape_uri_printf ("urn:equipment:%s:%s", make ? make : "", model ? model : "");

	equipment = tracker_resource_new (equip_uri);
	tracker_resource_set_uri (equipment, "rdf:type", "nfo:Equipment");

	if (make) {
		tracker_resource_set_string (equipment, "nfo:manufacturer", make);
	}

	if (model) {
		tracker_resource_set_string (equipment, "nfo:model", model);
	}

	g_free (equip_uri);

	return equipment;
}
static void
xml_start_element_handler_metadata (GMarkupParseContext  *context,
                                    const gchar          *element_name,
                                    const gchar         **attribute_names,
                                    const gchar         **attribute_values,
                                    gpointer              user_data,
                                    GError              **error)
{
	ODTMetadataParseInfo *data = user_data;

	if (g_ascii_strcasecmp (element_name, "dc:title") == 0) {
		data->current = ODT_TAG_TYPE_TITLE;
	} else if (g_ascii_strcasecmp (element_name, "dc:subject") == 0) {
		data->current = ODT_TAG_TYPE_SUBJECT;
	} else if (g_ascii_strcasecmp (element_name, "dc:creator") == 0) {
		data->current = ODT_TAG_TYPE_AUTHOR;
	} else if (g_ascii_strcasecmp (element_name, "meta:keyword") == 0) {
		data->current = ODT_TAG_TYPE_KEYWORDS;
	} else if (g_ascii_strcasecmp (element_name, "dc:description") == 0) {
		data->current = ODT_TAG_TYPE_COMMENTS;
	} else if (g_ascii_strcasecmp (element_name, "meta:document-statistic") == 0) {
		TrackerResource *metadata;
		const gchar **a, **v;

		metadata = data->metadata;

		for (a = attribute_names, v = attribute_values; *a; ++a, ++v) {
			if (g_ascii_strcasecmp (*a, "meta:word-count") == 0) {
				if (data->has_word_count) {
					g_warning ("Avoiding additional word count (%s) in OASIS document '%s'",
					           *v, data->uri);
				} else {
					data->has_word_count = TRUE;
					tracker_resource_set_string (metadata, "nfo:wordCount", *v);
				}
			} else if (g_ascii_strcasecmp (*a, "meta:page-count") == 0) {
				if (data->has_page_count) {
					g_warning ("Avoiding additional page count (%s) in OASIS document '%s'",
					           *v, data->uri);
				} else {
					data->has_page_count = TRUE;
					tracker_resource_set_string (metadata, "nfo:pageCount", *v);
				}
			}
		}

		data->current = ODT_TAG_TYPE_STATS;
	} else if (g_ascii_strcasecmp (element_name, "meta:creation-date") == 0) {
		data->current = ODT_TAG_TYPE_CREATED;
	} else if (g_ascii_strcasecmp (element_name, "meta:generator") == 0) {
		data->current = ODT_TAG_TYPE_GENERATOR;
	} else {
		data->current = -1;
	}
}
/**
 * tracker_extract_new_music_album_disc:
 * @album_title: title of the album
 * @album_artist: (allow none): a #TrackerResource for the album artist, or %NULL
 * @disc_number: disc number of this disc (the first / only disc in a set should be 1, not 0)
 *
 * Create new nmm:MusicAlbumDisc and nmm:MusicAlbum resources. The resources are
 * given fixed URIs based on @album_title and @disc_number, so they will be
 * merged with existing entries when serialized to SPARQL and sent to the
 * Tracker store.
 *
 * You can get the album resource from the disc resource by calling:
 *
 *     tracker_resource_get_first_relation (album_disc, "nmm:albumDiscAlbum");
 *
 * Returns: a newly allocated #TrackerResource instance, of type nmm:MusicAlbumDisc
 *
 * Since: 1.10
 */
TrackerResource *
tracker_extract_new_music_album_disc (const char      *album_title,
                                      TrackerResource *album_artist,
                                      int              disc_number)
{
	char *album_uri, *disc_uri;
	TrackerResource *album, *album_disc;

	g_return_val_if_fail (album_title != NULL, NULL);

	album_uri = tracker_sparql_escape_uri_printf ("urn:album:%s", album_title);
	album = tracker_resource_new (album_uri);

	tracker_resource_set_uri (album, "rdf:type", "nmm:MusicAlbum");
	tracker_resource_set_string (album, "nmm:albumTitle", album_title);

	if (album_artist != NULL) {
		tracker_resource_add_relation (album, "nmm:albumArtist", album_artist);
	}

	disc_uri = tracker_sparql_escape_uri_printf ("urn:album-disc:%s:Disc%d", album_title, disc_number);
	album_disc = tracker_resource_new (disc_uri);
	tracker_resource_set_uri (album_disc, "rdf:type", "nmm:MusicAlbumDisc");
	tracker_resource_set_int (album_disc, "nmm:setNumber", disc_number > 0 ? disc_number : 1);
	tracker_resource_add_relation (album_disc, "nmm:albumDiscAlbum", album);

	g_free (disc_uri);

	g_object_unref (album);

	return album_disc;
}
static void
extract_oasis_content (const gchar     *uri,
                       gulong           total_bytes,
                       ODTFileType      file_type,
                       TrackerResource *metadata)
{
	gchar *content = NULL;
	ODTContentParseInfo info;
	GMarkupParseContext *context;
	GError *error = NULL;
	GMarkupParser parser = {
		xml_start_element_handler_content,
		xml_end_element_handler_content,
		xml_text_handler_content,
		NULL,
		NULL
	};

	/* If no content requested, return */
	if (total_bytes == 0) {
		return;
	}

	/* Create parse info */
	info.current = ODT_TAG_TYPE_UNKNOWN;
	info.file_type = file_type;
	info.content = g_string_new ("");
	info.bytes_pending = total_bytes;

	/* Create parsing context */
	context = g_markup_parse_context_new (&parser, 0, &info, NULL);

	/* Load the internal XML file from the Zip archive, and parse it
	 * using the given context */
	tracker_gsf_parse_xml_in_zip (uri, "content.xml", context, &error);

	if (!error || g_error_matches (error, maximum_size_error_quark, 0)) {
		content = g_string_free (info.content, FALSE);
		tracker_resource_set_string (metadata, "nie:plainTextContent", content);
	} else {
		g_warning ("Got error parsing XML file: %s\n", error->message);
		g_string_free (info.content, TRUE);
	}

	if (error) {
		g_error_free (error);
	}

	g_free (content);
	g_markup_parse_context_free (context);
}
/**
 * tracker_extract_new_music_artist:
 * @name: the name of the artist
 *
 * Create a new nmm:Artist resource. The URI will be set based on the URI, so
 * there will only be one resource representing the artist in the Tracker store.
 *
 * Returns: a newly allocated #TrackerResource instance, of type nmm:Artist.
 *
 * Since: 1.10
 */
TrackerResource *
tracker_extract_new_artist (const char *name)
{
	TrackerResource *artist;
	gchar *uri;

	g_return_val_if_fail (name != NULL, NULL);

	uri = tracker_sparql_escape_uri_printf ("urn:artist:%s", name);

	artist = tracker_resource_new (uri);

	tracker_resource_set_uri (artist, "rdf:type", "nmm:Artist");
	tracker_resource_set_string (artist, "nmm:artistName", name);

	g_free (uri);

	return artist;
}
Exemplo n.º 7
0
static void
read_outline (PopplerDocument *document,
              TrackerResource *metadata)
{
    PopplerIndexIter *index;
    GString *toc = NULL;

    index = poppler_index_iter_new (document);

    if (!index) {
        return;
    }

    read_toc (index, &toc);

    if (toc) {
        if (toc->len > 0) {
            tracker_resource_set_string (metadata, "nfo:tableOfContents", toc->str);
        }

        g_string_free (toc, TRUE);
    }
}
Exemplo n.º 8
0
G_MODULE_EXPORT gboolean
tracker_extract_get_metadata (TrackerExtractInfo *info)
{
	TrackerResource *resource;
	GFile *file;
	gchar *filename;
	DviContext *context;

	file = tracker_extract_info_get_file (info);
	filename = g_file_get_path (file);

	context = mdvi_init_context (filename);

	if (context == NULL) {
		g_warning ("Could not open dvi file '%s'\n", filename);
		g_free (filename);
		return FALSE;
	}

	resource = tracker_resource_new (NULL);

	tracker_resource_add_uri (resource, "rdf:type", "nfo:PaginatedTextDocument");

	tracker_resource_set_int64 (resource, "nfo:pageCount", context->npages);

	if (context->fileid) {
		tracker_resource_set_string (resource, "nie:comment", context->fileid);
	}

	mdvi_destroy_context (context);

	tracker_extract_info_set_resource (info, resource);
	g_object_unref (resource);

	return TRUE;
}
Exemplo n.º 9
0
static void
parser_start_element (void           *data,
                      const xmlChar  *name_,
                      const xmlChar **attrs_)
{
	parser_data *pd = data;
	const gchar *name = (const gchar*) name_;
	const gchar **attrs = (const gchar**) attrs_;

	if (!pd || !name) {
		return;
	}

	/* Look for RDFa triple describing the license */
	if (g_ascii_strcasecmp (name, "a") == 0) {
		/* This tag is a license.  Ignore, however, if it is
		 * referring to another document.
		 */
		if (has_attribute (attrs, "rel", "license") &&
		    has_attribute (attrs, "about", NULL) == FALSE) {
			const xmlChar *href;

			href = lookup_attribute (attrs, "href");

			if (href && !pd->has_license) {
				tracker_resource_add_string (pd->metadata, "nie:license", href);
				pd->has_license = TRUE;
			}
		}
	} else if (g_ascii_strcasecmp (name, "title") == 0) {
		pd->current = READ_TITLE;
	} else if (g_ascii_strcasecmp (name, "meta") == 0) {
		if (has_attribute (attrs, "name", "author")) {
			const xmlChar *author;

			author = lookup_attribute (attrs, "content");

			if (author) {
				TrackerResource *creator = tracker_extract_new_contact (author);

				tracker_resource_add_relation (pd->metadata, "nco:creator", creator);

				g_object_unref (creator);
			}
		}

		if (has_attribute (attrs, "name", "description")) {
			const xmlChar *desc;

			desc = lookup_attribute (attrs,"content");

			if (desc && !pd->has_description) {
				tracker_resource_set_string (pd->metadata, "nie:description", desc);
				pd->has_description = TRUE;
			}
		}

		if (has_attribute (attrs, "name", "keywords")) {
			const xmlChar* content = lookup_attribute (attrs, "content");

			if (content) {
				gchar **keywords;
				gint i;

				keywords = g_strsplit (content, ",", -1);
				if (keywords) {
					for (i = 0; keywords[i] != NULL; i++) {
						if (!keywords[i] || keywords[i] == '\0') {
							continue;
						}

						tracker_resource_add_string (pd->metadata, "nie:keyword", g_strstrip (keywords[i]));
					}

					g_strfreev (keywords);
				}
			}
		}
	} else if (g_ascii_strcasecmp (name, "body") == 0) {
		pd->in_body = TRUE;
	} else if (g_ascii_strcasecmp (name, "script") == 0) {
		/* Ignore javascript and such */
		pd->current = READ_IGNORE;
	}
}
Exemplo n.º 10
0
G_MODULE_EXPORT gboolean
tracker_extract_get_metadata (TrackerExtractInfo *info)
{
	TrackerResource *metadata;
	GFile *file;
	TrackerConfig *config;
	htmlDocPtr doc;
	parser_data pd;
	gchar *filename;
	xmlSAXHandler handler = {
		NULL, /* internalSubset */
		NULL, /* isStandalone */
		NULL, /* hasInternalSubset */
		NULL, /* hasExternalSubset */
		NULL, /* resolveEntity */
		NULL, /* getEntity */
		NULL, /* entityDecl */
		NULL, /* notationDecl */
		NULL, /* attributeDecl */
		NULL, /* elementDecl */
		NULL, /* unparsedEntityDecl */
		NULL, /* setDocumentLocator */
		NULL, /* startDocument */
		NULL, /* endDocument */
		parser_start_element, /* startElement */
		parser_end_element, /* endElement */
		NULL, /* reference */
		parser_characters, /* characters */
		NULL, /* ignorableWhitespace */
		NULL, /* processingInstruction */
		NULL, /* comment */
		NULL, /* xmlParserWarning */
		NULL, /* xmlParserError */
		NULL, /* xmlParserError */
		NULL, /* getParameterEntity */
		NULL, /* cdataBlock */
		NULL, /* externalSubset */
		1,    /* initialized */
		NULL, /* private */
		NULL, /* startElementNsSAX2Func */
		NULL, /* endElementNsSAX2Func */
		NULL  /* xmlStructuredErrorFunc */
	};

	file = tracker_extract_info_get_file (info);

	metadata = tracker_resource_new (NULL);
	tracker_resource_add_uri (metadata, "rdf:type", "nfo:HtmlDocument");

	pd.metadata = metadata;
	pd.current = -1;
	pd.in_body = FALSE;
	pd.plain_text = g_string_new (NULL);
	pd.title = g_string_new (NULL);

	config = tracker_main_get_config ();
	pd.n_bytes_remaining = tracker_config_get_max_bytes (config);

	filename = g_file_get_path (file);
	doc = htmlSAXParseFile (filename, NULL, &handler, &pd);
	g_free (filename);

	if (doc) {
		xmlFreeDoc (doc);
	}

	g_strstrip (pd.plain_text->str);
	g_strstrip (pd.title->str);

	if (pd.title->str &&
	    *pd.title->str != '\0') {
		tracker_resource_set_string (metadata, "nie:title", pd.title->str);
	}

	if (pd.plain_text->str &&
	    *pd.plain_text->str != '\0') {
		tracker_resource_set_string (metadata, "nie:plainTextContent", pd.plain_text->str);
	}

	g_string_free (pd.plain_text, TRUE);
	g_string_free (pd.title, TRUE);

	tracker_extract_info_set_resource (info, metadata);
	g_object_unref (metadata);

	return TRUE;
}
/**
 * tracker_extract_new_location:
 * @street_address: (allow none): main part of postal address, or %NULL
 * @state: (allow none): regional part of postal address, or %NULL
 * @city: (allow none): locality part of postal address, or %NULL
 * @country: (allow none): country of postal address, or %NULL
 * @gps_altitude: (allow none): altitude (following WGS 84 reference) as a string, or %NULL
 * @gps_latitude: (allow none): latitude as a string, or %NULL
 * @gps_longitude: (allow none): longitude as a string, or %NULL
 *
 * Create a new slo:GeoLocation resource, with the given postal address and/or
 * GPS coordinates.
 *
 * No validation is done here -- it's up to you to ensure the postal address
 * and GPS coordinates describe the same thing.
 *
 * Returns: a newly allocated #TrackerResource instance, of type slo:GeoLocation
 *
 * Since: 1.10
 */
TrackerResource *
tracker_extract_new_location (const char *street_address,
                              const char *state,
                              const char *city,
                              const char *country,
                              const char *gps_altitude,
                              const char *gps_latitude,
                              const char *gps_longitude)
{
	TrackerResource *location;

	g_return_val_if_fail (street_address != NULL || state != NULL || city != NULL ||
	                      country != NULL || gps_altitude != NULL ||
	                      gps_latitude != NULL || gps_longitude != NULL, NULL);

	location = tracker_resource_new (NULL);
	tracker_resource_set_uri (location, "rdf:type", "slo:GeoLocation");

	if (street_address || state || country || city) {
		TrackerResource *address;
		gchar *addruri;

		addruri = tracker_sparql_get_uuid_urn ();
		address = tracker_resource_new (addruri);

		tracker_resource_set_string (address, "rdf:type", "nco:PostalAddress");

		g_free (addruri);

		if (address) {
			tracker_resource_set_string (address, "nco:streetAddress", street_address);
		}

		if (state) {
			tracker_resource_set_string (address, "nco:region", state);
		}

		if (city) {
			tracker_resource_set_string (address, "nco:locality", city);
		}

		if (country) {
			tracker_resource_set_string (address, "nco:country", country);
		}

		tracker_resource_set_relation (location, "slo:postalAddress", address);
		g_object_unref (address);
	}

	if (gps_altitude) {
		tracker_resource_set_string (location, "slo:altitude", gps_altitude);
	}

	if (gps_latitude) {
		tracker_resource_set_string (location, "slo:latitude", gps_latitude);
	}

	if (gps_longitude) {
		tracker_resource_set_string (location, "slo:longitude", gps_longitude);
	}

	return location;
}
Exemplo n.º 12
0
static void
xml_text_handler_metadata (GMarkupParseContext  *context,
                           const gchar          *text,
                           gsize                 text_len,
                           gpointer              user_data,
                           GError              **error)
{
	ODTMetadataParseInfo *data;
	TrackerResource *metadata;
	gchar *date;

	data = user_data;
	metadata = data->metadata;

	if (text_len == 0) {
		/* ignore empty values */
		return;
	}

	switch (data->current) {
	case ODT_TAG_TYPE_TITLE:
		if (data->has_title) {
			g_warning ("Avoiding additional title (%s) in OASIS document '%s'",
			           text, data->uri);
		} else {
			data->has_title = TRUE;
			tracker_resource_set_string (metadata, "nie:title", text);
		}
		break;

	case ODT_TAG_TYPE_SUBJECT:
		if (data->has_subject) {
			g_warning ("Avoiding additional subject (%s) in OASIS document '%s'",
			           text, data->uri);
		} else {
			data->has_subject = TRUE;
			tracker_resource_set_string (metadata, "nie:subject", text);
		}
		break;

	case ODT_TAG_TYPE_AUTHOR:
		if (data->has_publisher) {
			g_warning ("Avoiding additional publisher (%s) in OASIS document '%s'",
			           text, data->uri);
		} else {
			TrackerResource *publisher = tracker_extract_new_contact (text);

			data->has_publisher = TRUE;
			tracker_resource_set_relation (metadata, "nco:publisher", publisher);

			g_object_unref (publisher);
		}
		break;

	case ODT_TAG_TYPE_KEYWORDS: {
		gchar *keywords;
		gchar *lasts, *keyw;

		keywords = g_strdup (text);

		for (keyw = strtok_r (keywords, ",; ", &lasts);
		     keyw;
		     keyw = strtok_r (NULL, ",; ", &lasts)) {
			tracker_resource_add_string (metadata, "nie:keyword", keyw);
		}

		g_free (keywords);

		break;
	}

	case ODT_TAG_TYPE_COMMENTS:
		if (data->has_comment) {
			g_warning ("Avoiding additional comment (%s) in OASIS document '%s'",
			           text, data->uri);
		} else {
			data->has_comment = TRUE;
			tracker_resource_set_string (metadata, "nie:comment", text);
		}
		break;

	case ODT_TAG_TYPE_CREATED:
		if (data->has_content_created) {
			g_warning ("Avoiding additional creation time (%s) in OASIS document '%s'",
			           text, data->uri);
		} else {
			date = tracker_date_guess (text);
			if (date) {
				data->has_content_created = TRUE;
				tracker_resource_set_string (metadata, "nie:contentCreated", date);
				g_free (date);
			} else {
				g_warning ("Could not parse creation time (%s) in OASIS document '%s'",
				           text, data->uri);
			}
		}
		break;

	case ODT_TAG_TYPE_GENERATOR:
		if (data->has_generator) {
			g_warning ("Avoiding additional creation time (%s) in OASIS document '%s'",
			           text, data->uri);
		} else {
			data->has_generator = TRUE;
			tracker_resource_set_string (metadata, "nie:generator", text);
		}
		break;

	default:
	case ODT_TAG_TYPE_STATS:
		break;
	}
}
Exemplo n.º 13
0
G_MODULE_EXPORT gboolean
tracker_extract_get_metadata (TrackerExtractInfo *info)
{
    TrackerConfig *config;
    GTime creation_date;
    GError *error = NULL;
    TrackerResource *metadata;
    TrackerXmpData *xd = NULL;
    PDFData pd = { 0 }; /* actual data */
    PDFData md = { 0 }; /* for merging */
    PopplerDocument *document;
    gchar *xml = NULL;
    gchar *content, *uri;
    guint n_bytes;
    GPtrArray *keywords;
    guint i;
    GFile *file;
    gchar *filename;
    int fd;
    gchar *contents = NULL;
    gsize len;
    struct stat st;

    file = tracker_extract_info_get_file (info);
    filename = g_file_get_path (file);

    fd = tracker_file_open_fd (filename);

    if (fd == -1) {
        g_warning ("Could not open pdf file '%s': %s\n",
                   filename,
                   g_strerror (errno));
        g_free (filename);
        return FALSE;
    }

    if (fstat (fd, &st) == -1) {
        g_warning ("Could not fstat pdf file '%s': %s\n",
                   filename,
                   g_strerror (errno));
        close (fd);
        g_free (filename);
        return FALSE;
    }

    if (st.st_size == 0) {
        contents = NULL;
        len = 0;
    } else {
        contents = (gchar *) mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
        if (contents == NULL || contents == MAP_FAILED) {
            g_warning ("Could not mmap pdf file '%s': %s\n",
                       filename,
                       g_strerror (errno));
            close (fd);
            g_free (filename);
            return FALSE;
        }
        len = st.st_size;
    }

    g_free (filename);
    uri = g_file_get_uri (file);

    document = poppler_document_new_from_data (contents, len, NULL, &error);

    if (error) {
        if (error->code == POPPLER_ERROR_ENCRYPTED) {
            metadata = tracker_resource_new (NULL);

            tracker_resource_add_uri (metadata, "rdf:type", "nfo:PaginatedTextDocument");
            tracker_resource_set_boolean (metadata, "nfo:isContentEncrypted", TRUE);

            tracker_extract_info_set_resource (info, metadata);
            g_object_unref (metadata);

            g_error_free (error);
            g_free (uri);
            close (fd);

            return TRUE;
        } else {
            g_warning ("Couldn't create PopplerDocument from uri:'%s', %s",
                       uri,
                       error->message ? error->message : "no error given");

            g_error_free (error);
            g_free (uri);
            close (fd);

            return FALSE;
        }
    }

    if (!document) {
        g_warning ("Could not create PopplerDocument from uri:'%s', "
                   "NULL returned without an error",
                   uri);
        g_free (uri);
        close (fd);
        return FALSE;
    }

    metadata = tracker_resource_new (NULL);
    tracker_resource_add_uri (metadata, "rdf:type", "nfo:PaginatedTextDocument");

    g_object_get (document,
                  "title", &pd.title,
                  "author", &pd.author,
                  "subject", &pd.subject,
                  "keywords", &pd.keywords,
                  "creation-date", &creation_date,
                  "metadata", &xml,
                  NULL);

    if (creation_date > 0) {
        pd.creation_date = tracker_date_to_string ((time_t) creation_date);
    }

    keywords = g_ptr_array_new_with_free_func ((GDestroyNotify) g_free);

    if (xml && *xml &&
            (xd = tracker_xmp_new (xml, strlen (xml), uri)) != NULL) {
        /* The casts here are well understood and known */
        md.title = (gchar *) tracker_coalesce_strip (4, pd.title, xd->title, xd->title2, xd->pdf_title);
        md.subject = (gchar *) tracker_coalesce_strip (2, pd.subject, xd->subject);
        md.date = (gchar *) tracker_coalesce_strip (3, pd.creation_date, xd->date, xd->time_original);
        md.author = (gchar *) tracker_coalesce_strip (2, pd.author, xd->creator);

        write_pdf_data (md, metadata, keywords);

        if (xd->keywords) {
            tracker_keywords_parse (keywords, xd->keywords);
        }

        if (xd->pdf_keywords) {
            tracker_keywords_parse (keywords, xd->pdf_keywords);
        }

        if (xd->publisher) {
            TrackerResource *publisher = tracker_extract_new_contact (xd->publisher);
            tracker_resource_set_relation (metadata, "nco:publisher", publisher);
            g_object_unref (publisher);
        }

        if (xd->type) {
            tracker_resource_set_string (metadata, "dc:type", xd->type);
        }

        if (xd->format) {
            tracker_resource_set_string (metadata, "dc:format", xd->format);
        }

        if (xd->identifier) {
            tracker_resource_set_string (metadata, "dc:identifier", xd->identifier);
        }

        if (xd->source) {
            tracker_resource_set_string (metadata, "dc:source", xd->source);
        }

        if (xd->language) {
            tracker_resource_set_string (metadata, "dc:language", xd->language);
        }

        if (xd->relation) {
            tracker_resource_set_string (metadata, "dc:relation", xd->relation);
        }

        if (xd->coverage) {
            tracker_resource_set_string (metadata, "dc:coverage", xd->coverage);
        }

        if (xd->license) {
            tracker_resource_set_string (metadata, "nie:license", xd->license);
        }

        if (xd->make || xd->model) {
            TrackerResource *equipment = tracker_extract_new_equipment (xd->make, xd->model);
            tracker_resource_set_relation (metadata, "nfo:equipment", equipment);
            g_object_unref (equipment);
        }

        if (xd->orientation) {
            tracker_resource_set_string (metadata, "nfo:orientation", xd->orientation);
        }

        if (xd->rights) {
            tracker_resource_set_string (metadata, "nie:copyright", xd->rights);
        }

        if (xd->white_balance) {
            tracker_resource_set_string (metadata, "nmm:whiteBalance", xd->white_balance);
        }

        if (xd->fnumber) {
            gdouble value;

            value = g_strtod (xd->fnumber, NULL);
            tracker_resource_set_double (metadata, "nmm:fnumber", value);
        }

        if (xd->flash) {
            tracker_resource_set_string (metadata, "nmm:flash", xd->flash);
        }

        if (xd->focal_length) {
            gdouble value;

            value = g_strtod (xd->focal_length, NULL);
            tracker_resource_set_double (metadata, "nmm:focalLength", value);
        }

        /* Question: Shouldn't xd->Artist be merged with md.author instead? */

        if (xd->artist || xd->contributor) {
            TrackerResource *artist;
            const gchar *artist_name;

            artist_name = tracker_coalesce_strip (2, xd->artist, xd->contributor);

            artist = tracker_extract_new_contact (artist_name);

            tracker_resource_set_relation (metadata, "nco:contributor", artist);

            g_object_unref (artist);
        }

        if (xd->exposure_time) {
            gdouble value;

            value = g_strtod (xd->exposure_time, NULL);
            tracker_resource_set_double (metadata, "nmm:exposureTime", value);
        }

        if (xd->iso_speed_ratings) {
            gdouble value;

            value = g_strtod (xd->iso_speed_ratings, NULL);
            tracker_resource_set_double (metadata, "nmm:isoSpeed", value);
        }

        if (xd->description) {
            tracker_resource_set_string (metadata, "nie:description", xd->description);
        }

        if (xd->metering_mode) {
            tracker_resource_set_string (metadata, "nmm:meteringMode", xd->metering_mode);
        }

        if (xd->address || xd->state || xd->country || xd->city ||
                xd->gps_altitude || xd->gps_latitude || xd-> gps_longitude) {

            TrackerResource *location = tracker_extract_new_location (xd->address,
                                        xd->state, xd->city, xd->country, xd->gps_altitude,
                                        xd->gps_latitude, xd->gps_longitude);

            tracker_resource_set_relation (metadata, "slo:location", location);

            g_object_unref (location);
        }

        if (xd->regions) {
            tracker_xmp_apply_regions_to_resource (metadata, xd);
        }

        tracker_xmp_free (xd);
    } else {
        /* So if we are here we have NO XMP data and we just
         * write what we know from Poppler.
         */
        write_pdf_data (pd, metadata, keywords);
    }

    for (i = 0; i < keywords->len; i++) {
        TrackerResource *tag;
        const gchar *p;

        p = g_ptr_array_index (keywords, i);
        tag = tracker_extract_new_tag (p);

        tracker_resource_add_relation (metadata, "nao:hasTag", tag);

        g_object_unref (tag);
    }
    g_ptr_array_free (keywords, TRUE);

    tracker_resource_set_int64 (metadata, "nfo:pageCount", poppler_document_get_n_pages(document));

    config = tracker_main_get_config ();
    n_bytes = tracker_config_get_max_bytes (config);
    content = extract_content_text (document, n_bytes);

    if (content) {
        tracker_resource_set_string (metadata, "nie:plainTextContent", content);
        g_free (content);
    }

    read_outline (document, metadata);

    g_free (xml);
    g_free (pd.keywords);
    g_free (pd.title);
    g_free (pd.subject);
    g_free (pd.creation_date);
    g_free (pd.author);
    g_free (pd.date);
    g_free (uri);

    g_object_unref (document);

    if (contents) {
        munmap (contents, len);
    }

    close (fd);

    tracker_extract_info_set_resource (info, metadata);
    g_object_unref (metadata);

    return TRUE;
}