/// Returns a document's properties.
bool XapianIndex::getDocumentInfo(unsigned int docId, DocumentInfo &docInfo) const
{
	bool foundDocument = false;

	if (docId == 0)
	{
		return false;
	}

	XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
	if (pDatabase == NULL)
	{
		cerr << "Bad index " << m_databaseName << endl;
		return false;
	}

	try
	{
		Xapian::Database *pIndex = pDatabase->readLock();
		if (pIndex != NULL)
		{
			Xapian::Document doc = pIndex->get_document(docId);

			// Get the current document data
			string record = doc.get_data();
			if (record.empty() == false)
			{
				string language = Languages::toLocale(StringManip::extractField(record, "language=", ""));

				docInfo = DocumentInfo(StringManip::extractField(record, "caption=", "\n"),
					StringManip::extractField(record, "url=", "\n"),
					StringManip::extractField(record, "type=", "\n"),
					language);
				docInfo.setTimestamp(StringManip::extractField(record, "timestamp=", "\n"));
#ifdef DEBUG
				cout << "XapianIndex::getDocumentInfo: language is "
					<< docInfo.getLanguage() << endl;
#endif
				foundDocument = true;
			}
		}
	}
	catch (const Xapian::Error &error)
	{
		cerr << "Couldn't get document properties: " << error.get_msg() << endl;
	}
	catch (...)
	{
		cerr << "Couldn't get document properties, unknown exception occured" << endl;
	}
	pDatabase->unlock();

	return foundDocument;
}
QString EmailSearchStore::text(int queryId)
{
    Xapian::Document doc = docForQuery(queryId);

    QMutexLocker lock(&m_mutex);
    std::string data;
    try {
        data = doc.get_data();
    } catch (const Xapian::Error &) {
        // Nothing to do, move along
    }

    QString subject = QString::fromUtf8(data.c_str(), data.length());
    if (subject.isEmpty()) {
        return QStringLiteral("No Subject");
    }

    return subject;
}
void XapianIndex::removeCommonTerms(Xapian::Document &doc)
{
    DocumentInfo docInfo;
    set<string> commonTerms;
    string record(doc.get_data());

    // First, remove the magic term
    commonTerms.insert(MAGIC_TERM);

    if (record.empty() == true)
    {
        // Nothing else we can do
        return;
    }

    string language(StringManip::extractField(record, "language=", "\n"));
    string timestamp(StringManip::extractField(record, "timestamp=", "\n"));

    docInfo = DocumentInfo(StringManip::extractField(record, "caption=", "\n"),
                           StringManip::extractField(record, "url=", "\n"),
                           StringManip::extractField(record, "type=", "\n"),
                           Languages::toLocale(language));
    // We used to use timestamp prior to 0.60
    if (timestamp.empty() == true)
    {
        string modTime(StringManip::extractField(record, "modtime=", "\n"));
        if (modTime.empty() == false)
        {
            time_t timeT = (time_t )atol(modTime.c_str());
            timestamp = TimeConverter::toTimestamp(timeT);
        }
    }
    docInfo.setTimestamp(timestamp);
    string bytesSize(StringManip::extractField(record, "size=", ""));
    if (bytesSize.empty() == false)
    {
        docInfo.setSize((off_t )atol(bytesSize.c_str()));
    }
    Url urlObj(docInfo.getLocation());

    // FIXME: remove terms extracted from the title if they don't have more than one posting
    string title(docInfo.getTitle());
    if (title.empty() == false)
    {
        Document titleDoc;
        titleDoc.setData(title.c_str(), title.length());
        Tokenizer titleTokens(&titleDoc);
        removeFirstPostingsFromDocument(titleTokens, doc, "S", language, STORE_UNSTEM);
        titleTokens.rewind();
        removeFirstPostingsFromDocument(titleTokens, doc, "", language, m_stemMode);
    }

    // Location
    string location(docInfo.getLocation());
    commonTerms.insert(string("U") + XapianDatabase::limitTermLength(Url::escapeUrl(location), true));
    // Base file
    string::size_type qmPos = location.find("?");
    if ((urlObj.isLocal() == true) &&
            (qmPos != string::npos))
    {
        commonTerms.insert(string("XFILE:") + XapianDatabase::limitTermLength(Url::escapeUrl(location.substr(0, qmPos)), true));
    }
    // Host name
    string hostName(StringManip::toLowerCase(urlObj.getHost()));
    if (hostName.empty() == false)
    {
        commonTerms.insert(string("H") + XapianDatabase::limitTermLength(hostName, true));
        string::size_type dotPos = hostName.find('.');
        while (dotPos != string::npos)
        {
            commonTerms.insert(string("H") + XapianDatabase::limitTermLength(hostName.substr(dotPos + 1), true));

            // Next
            dotPos = hostName.find('.', dotPos + 1);
        }
    }
    // ...location
    string tree(urlObj.getLocation());
    if (tree.empty() == false)
    {
        commonTerms.insert(string("XDIR:") + XapianDatabase::limitTermLength(Url::escapeUrl(tree), true));
        if (tree[0] == '/')
        {
            commonTerms.insert("XDIR:/");
        }
        string::size_type slashPos = tree.find('/', 1);
        while (slashPos != string::npos)
        {
            commonTerms.insert(string("XDIR:") + XapianDatabase::limitTermLength(Url::escapeUrl(tree.substr(0, slashPos)), true));

            // Next
            slashPos = tree.find('/', slashPos + 1);
        }
    }
    // ...and file name
    string fileName(urlObj.getFile());
    if (fileName.empty() == false)
    {
        string extension;

        commonTerms.insert(string("P") + XapianDatabase::limitTermLength(Url::escapeUrl(fileName), true));

        // Does it have an extension ?
        string::size_type extPos = fileName.rfind('.');
        if ((extPos != string::npos) &&
                (extPos + 1 < fileName.length()))
        {
            extension = StringManip::toLowerCase(fileName.substr(extPos + 1));
        }
        commonTerms.insert(string("XEXT:") + XapianDatabase::limitTermLength(extension));
    }
    // Date terms
    time_t timeT = TimeConverter::fromTimestamp(docInfo.getTimestamp());
    struct tm *tm = localtime(&timeT);
    string yyyymmdd = TimeConverter::toYYYYMMDDString(tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday);
    if (yyyymmdd.length() == 8)
    {
        commonTerms.insert(string("D") + yyyymmdd);
        commonTerms.insert(string("M") + yyyymmdd.substr(0, 6));
        commonTerms.insert(string("Y") + yyyymmdd.substr(0, 4));
    }
    // Language code
    commonTerms.insert(string("L") + Languages::toCode(language));
    // MIME type
    commonTerms.insert(string("T") + docInfo.getType());

    for (set<string>::const_iterator termIter = commonTerms.begin(); termIter != commonTerms.end(); ++termIter)
    {
        try
        {
            doc.remove_term(*termIter);
        }
        catch (const Xapian::Error &error)
        {
#ifdef DEBUG
            cout << "XapianIndex::removeCommonTerms: " << error.get_msg() << endl;
#endif
        }
    }
}
Beispiel #4
0
    void QueryHandler(const QueryMessage &message, const Theron::Address from)
        {
            search::QueryInfo qi=*(message.query);
            std::string resKey(message.resKey);
            delete message.query;
            std::string segString;
            char *output=new char[qi.query.length()*9];
            char *input=new char[qi.query.length()*3];
            memset(output,0,qi.query.length()*9);
            memset(input,0,qi.query.length()*3);
            try 
            {
                UErrorCode  error = U_ZERO_ERROR;
                ucnv_convert("GBK","UTF-8",input,  qi.query.length()*3, qi.query.c_str(), qi.query.length(), &error );
                
                
                bool ret = result->ParagraphProcessing(input, output);
                if (ret)
                {
                    int oLen=strlen(output);
                    char *utf8out=new char[oLen*3];
                    memset(utf8out,0,oLen*3);
                    ucnv_convert("UTF-8","GBK",utf8out,  oLen*3, output, oLen, &error );
                    
                    segString=std::string(utf8out);
                    delete [] utf8out;
                }
            }
            catch (...) {
            }
            delete [] output;
            delete [] input;
            std::list<std::string> segList;
            if(segString.length()>0)
            {
                std::vector<std::string> resv;
                boost::algorithm::split( resv, segString, boost::algorithm::is_any_of(" ") );
                for(std::vector<std::string>::iterator it=resv.begin();it!=resv.end();++it)
                {
                    std::vector<std::string> tmpv;
                    boost::algorithm::split( tmpv, *it, boost::algorithm::is_any_of("/") );
                    if(tmpv.size()>1&&tmpv[1]!="w")
                        segList.push_back(std::string("K")+tmpv[0]);
                }
            }
            search::DocList *dList=new search::DocList();
            if(segList.size()>0)
            {
                Xapian::Query query(Xapian::Query::OP_AND,segList.begin(), segList.end());
                
                while(1)
                {
                    try
                    {
                        db.reopen();
                        Xapian::Enquire  enquire(db);
                        enquire.set_query(query);
                        Xapian::MSet matches = enquire.get_mset(0, 100);
                        for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) {
                            Xapian::Document doc = i.get_document();
                            search::IndexInfo info;
                            info.uid=doc.get_value(1);
                            info.attMap.insert(std::make_pair(std::string("title"),doc.get_value(2)));
                            info.content=doc.get_data();
                            dList->docList.push_back(info);
                        }
                        std::cout<<"doc size:"<<dList->docList.size()<<std::endl;
                        break;
                    }catch(Xapian::DatabaseModifiedError exception)
                    {
                        std::cout<<"try agian"<<std::endl;
                    }catch(...)
                    {
                        break;
                    }
                    
                }
                

            }
            Send(QueryResponceMessage(dList,resKey.c_str()), from);
            
        }
/// Returns a document's properties.
bool XapianIndex::getDocumentInfo(unsigned int docId, DocumentInfo &docInfo) const
{
	bool foundDocument = false;

	if (docId == 0)
	{
		return false;
	}

	XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName);
	if (pDatabase == NULL)
	{
		cerr << "Bad index " << m_databaseName << endl;
		return false;
	}

	try
	{
		Xapian::Database *pIndex = pDatabase->readLock();
		if (pIndex != NULL)
		{
			Xapian::Document doc = pIndex->get_document(docId);

			// Get the current document data
			string record = doc.get_data();
			if (record.empty() == false)
			{
				string language(Languages::toLocale(StringManip::extractField(record, "language=", "")));
				// We used to use timestamp prior to 0.60
				string timestamp(StringManip::extractField(record, "timestamp=", "\n"));

				docInfo = DocumentInfo(StringManip::extractField(record, "caption=", "\n"),
					StringManip::extractField(record, "url=", "\n"),
					StringManip::extractField(record, "type=", "\n"),
					language);
				if (timestamp.empty() == true)
				{
					// This is the format used by Omega
					string modTime(StringManip::extractField(record, "modtime=", "\n"));
					if (modTime.empty() == false)
					{
						time_t timeT = (time_t )atol(modTime.c_str());
						timestamp = TimeConverter::toTimestamp(timeT);
					}
				}
				docInfo.setTimestamp(timestamp);
				foundDocument = true;
			}
		}
	}
	catch (const Xapian::Error &error)
	{
		cerr << "Couldn't get document properties: " << error.get_type() << ": " << error.get_msg() << endl;
	}
	catch (...)
	{
		cerr << "Couldn't get document properties, unknown exception occured" << endl;
	}
	pDatabase->unlock();

	return foundDocument;
}
void XapianIndex::removeCommonTerms(Xapian::Document &doc)
{
	DocumentInfo docInfo;
	string record(doc.get_data());

	// First, remove the magic term
	doc.remove_term(MAGIC_TERM);

	if (record.empty() == true)
        {
		// Nothing else we can do
		return;
	}

	string language(StringManip::extractField(record, "language=", ""));
	string timestamp(StringManip::extractField(record, "timestamp=", "\n"));

	docInfo = DocumentInfo(StringManip::extractField(record, "caption=", "\n"),
		StringManip::extractField(record, "url=", "\n"),
		StringManip::extractField(record, "type=", "\n"),
		Languages::toLocale(language));
	// We used to use timestamp prior to 0.60
	if (timestamp.empty() == true)
	{
		string modTime(StringManip::extractField(record, "modtime=", "\n"));
		if (modTime.empty() == false)
		{
			time_t timeT = (time_t )atol(modTime.c_str());
			timestamp = TimeConverter::toTimestamp(timeT);
		}
	}
	docInfo.setTimestamp(timestamp);
	Url urlObj(docInfo.getLocation());

	// FIXME: remove terms extracted from the title if they don't have more than one posting
	string title(docInfo.getTitle());
	if (title.empty() == false)
	{
		Document titleDoc;
		titleDoc.setData(title.c_str(), title.length());
		Tokenizer titleTokens(&titleDoc);
		removeFirstPostingsFromDocument(titleTokens, doc, "S", language, STORE_UNSTEM);
		titleTokens.rewind();
		removeFirstPostingsFromDocument(titleTokens, doc, "", language, m_stemMode);
	}

	// Title
	doc.remove_term(limitTermLength(string("U") + docInfo.getLocation(), true));
	// Host name
	string hostName(StringManip::toLowerCase(urlObj.getHost()));
	if (hostName.empty() == false)
	{
		doc.remove_term(limitTermLength(string("H") + hostName, true));
		string::size_type dotPos = hostName.find('.');
		while (dotPos != string::npos)
		{
			doc.remove_term(limitTermLength(string("H") + hostName.substr(dotPos + 1), true));

			// Next
			dotPos = hostName.find('.', dotPos + 1);
		}
	}
	// ...location
	string tree(urlObj.getLocation());
	if (tree.empty() == false)
	{
		doc.remove_term(limitTermLength(string("XDIR:") + tree, true));
		string::size_type slashPos = tree.find('/', 1);
		while (slashPos != string::npos)
		{
			doc.remove_term(limitTermLength(string("XDIR:") + tree.substr(0, slashPos), true));

			// Next
			slashPos = tree.find('/', slashPos + 1);
		}
	}
	// ...and file name
	string fileName(urlObj.getFile());
	if (fileName.empty() == false)
	{
		doc.remove_term(limitTermLength(string("P") + StringManip::toLowerCase(fileName), true));
	}
	// Language code
	doc.remove_term(string("L") + Languages::toCode(language));
	// MIME type
	doc.remove_term(string("T") + docInfo.getType());
}
			void query(const std::vector<std::string>& arr_queries, const std::vector<std::string>& arr_selection = {}) {
				
				Xapian::Database databases_ir;
				try {
					Xapian::Database database_ir_object_values(path_database+"object_values/");
					databases_ir.add_database(database_ir_object_values);
				} catch (const Xapian::Error &e) {
					// Database not ready
				}
				try {
					Xapian::Database database_ir_object_descriptions(path_database+"object_descriptions/");
					databases_ir.add_database(database_ir_object_descriptions);
				} catch (const Xapian::Error &e) {
					// Database not ready
				}
				try {
					Xapian::Database database_ir_object_sub_descriptions(path_database+"object_sub_descriptions/");
					databases_ir.add_database(database_ir_object_sub_descriptions);
				} catch (const Xapian::Error &e) {
					// Database not ready
				}
				
				// Filter on Type IDs
				
				Xapian::Query query_ir_identifiers;
				
				if (!arr_selection.empty()) {
					
					std::vector<Xapian::Query> arr_query_identifiers;
					
					for (const auto& str_identifier_field : arr_selection) {

						arr_query_identifiers.push_back(Xapian::Query("T"+str_identifier_field));
					}
					
					query_ir_identifiers = Xapian::Query(Xapian::Query::OP_OR, arr_query_identifiers.begin(), arr_query_identifiers.end());
				}

				Xapian::QueryParser queryparser;
				queryparser.set_database(databases_ir); // Needed to enable specific query flags
				queryparser.set_stemmer(Xapian::Stem("en"));
				queryparser.set_stemming_strategy(queryparser.STEM_SOME);
				queryparser.add_boolean_prefix("identifier", "T");
				//queryparser.add_prefix("value", "SV");
				
				unsigned int count_queries = 0;
				
				for (const auto& str_query : arr_queries) {
					
					const auto query_id = count_queries;
					count_queries++;
						
					Xapian::Query query_ir = queryparser.parse_query(str_query, Xapian::QueryParser::FLAG_DEFAULT | Xapian::QueryParser::FLAG_WILDCARD);
					
					if (!arr_selection.empty()) {
						
						// Update main query
						
						query_ir = Xapian::Query(Xapian::Query::OP_FILTER, query_ir, query_ir_identifiers);
					}
					
					// Run query
					
					Xapian::Enquire enquire(databases_ir);
					enquire.set_query(query_ir);
					
					Xapian::MSet arr_msets = enquire.get_mset(num_offset, num_limit);

					for (Xapian::MSetIterator iterate_arr_mset = arr_msets.begin(); iterate_arr_mset != arr_msets.end(); iterate_arr_mset++) {
											
						//Xapian::docid did = *iterate_arr_mset;
						const int unsigned& nr_rank = iterate_arr_mset.get_rank();
						const int unsigned& nr_weight = iterate_arr_mset.get_weight();
						
						const Xapian::Document doc = iterate_arr_mset.get_document();
						const std::string& str_identifier = doc.get_value(0);
						
						if (map_query_results.find(str_identifier) == map_query_results.end()) {
							
							std::vector<unsigned int> arr_matches;
							
							arr_matches.push_back(query_id);

							const std::string& str_value = (include_value ? doc.get_data() : "");
							
							map_query_results[str_identifier] = std::make_tuple(nr_rank, nr_weight, arr_matches, str_value);
						} else {
							
							type_arr_query_result& arr_query_result = map_query_results[str_identifier];
							
							std::get<0>(arr_query_result) += nr_rank;
							std::get<1>(arr_query_result) += nr_weight;
							
							std::get<2>(arr_query_result).push_back(query_id);
						}
					}
				}
			}
Beispiel #8
0
int main(int argc, char **argv)
{
    // Simplest possible options parsing: we just require two or more
    // parameters.
    if (argc < 3) {
        cout << "usage: " << argv[0] << " <path to database> <search terms>" << endl;
        exit(1);
    }
 
    // Catch any Xapian::Error exceptions thrown
    try {
        // Make the database
	Xapian::Database db(argv[1]);
 
        // Start an enquire session
	Xapian::Enquire enquire(db);
         
        // Set percent and/or weight cutoffs
        enquire.set_cutoff(90,0.2);
         
        // Set weighting schema
        BM25Weight bm1(1.0,0.0,1.0,0.5,0.3);
        enquire.set_weighting_scheme(bm1);
 
        // Build the query object
	Xapian::Query query(Xapian::Query::OP_AND, argv + 2, argv + argc);
        cout << "Performing query" << query.get_description() << "'" << endl;
	
        // Set Stopper
        string stop[8]={"的","了","呵","吧","就","你","我","他"};
        SimpleStopper *ss=new SimpleStopper;
        for(int i=0;i<8;i++){
            ss->add(stop[i]);
        }
        QueryParser qparser;
        qparser.set_stopper(ss);
        qparser.set_database(db);
 
        // Give the query object to the enquire session
        enquire.set_query(query);
 
        // Get the top 10 results of the query
	Xapian::MSet matches = enquire.get_mset(0, 10);                     //最多返回10个文档
 
        // Display the results
        cout << matches.size() << " results found" << endl;
 
        for (Xapian::MSetIterator i = matches.begin();i != matches.end(); ++i) {
	    Xapian::Document doc = i.get_document();
            cout << "Document ID " << *i << "\nPercent " <<i.get_percent() << "%\n" << doc.get_data() << "\n" << endl;
        }
        db.close();
    } catch(const Xapian::Error &error) {
        cout << "Exception: "  << error.get_msg() << endl;
    }
}
Beispiel #9
0
bool
DatabaseWrite::rebuild (GList *cpt_list)
{
	string old_path = m_dbPath + "_old";
	string rebuild_path = m_dbPath + "_rb";
	string db_locale;

	// Create the rebuild directory
	if (g_mkdir_with_parents (rebuild_path.c_str (), 0755) != 0) {
		g_warning ("Unable to create database rebuild directory.");
		return false;
	}

	// check if old unrequired version of db still exists on filesystem
	if (g_file_test (old_path.c_str (), G_FILE_TEST_EXISTS)) {
		g_warning ("Existing xapian old db was not cleaned previously: '%s'.", old_path.c_str ());
		as_utils_delete_dir_recursive (old_path.c_str ());
	}

	// check if old unrequired version of db still exists on filesystem
	if (g_file_test (rebuild_path.c_str (), G_FILE_TEST_EXISTS)) {
		g_debug ("Removing old rebuild-dir from previous database rebuild.");
		as_utils_delete_dir_recursive (rebuild_path.c_str ());
	}

	Xapian::WritableDatabase db (rebuild_path, Xapian::DB_CREATE_OR_OVERWRITE);

	Xapian::TermGenerator term_generator;
	term_generator.set_database(db);
	try {
		/* this tests if we have spelling suggestions (there must be
		 * a better way?!?) - this is needed as inmemory does not have
		 * spelling corrections, but it allows setting the flag and will
		 * raise a exception much later
		 */
		db.add_spelling("test");
		db.remove_spelling("test");

		/* this enables the flag for it (we only reach this line if
		 * the db supports spelling suggestions)
		 */
		term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING);
	} catch (const Xapian::UnimplementedError &error) {
		// Ignore
	}

	for (GList *list = cpt_list; list != NULL; list = list->next) {
		AsComponent *cpt = (AsComponent*) list->data;

		Xapian::Document doc;
		term_generator.set_document (doc);

		doc.set_data (as_component_get_name (cpt));

		// Sanity check
		if (!as_component_has_install_candidate (cpt)) {
			g_warning ("Skipped component '%s' from inclusion into database: Does not have an installation candidate.",
					   as_component_get_id (cpt));
			continue;
		}

		// Package name
		gchar **pkgs = as_component_get_pkgnames (cpt);
		if (pkgs != NULL) {
			gchar *pkgs_cstr = g_strjoinv (";", pkgs);
			string pkgs_str = pkgs_cstr;
			doc.add_value (XapianValues::PKGNAMES, pkgs_str);
			g_free (pkgs_cstr);

			for (uint i = 0; pkgs[i] != NULL; i++) {
				string pkgname = pkgs[i];
				doc.add_term("AP" + pkgname);
				if (pkgname.find ("-") != string::npos) {
					// we need this to work around xapian oddness
					string tmp = pkgname;
					replace (tmp.begin (), tmp.end (), '-', '_');
					doc.add_term (tmp);
				}
				// add packagename as meta-data too
				term_generator.index_text_without_positions (pkgname, WEIGHT_PKGNAME);
			}
		}

		// Source package name
		const gchar *spkgname_cstr = as_component_get_source_pkgname (cpt);
		if (spkgname_cstr != NULL) {
			string spkgname = spkgname_cstr;
			doc.add_value (XapianValues::SOURCE_PKGNAME, spkgname);
			if (!spkgname.empty()) {
				doc.add_term("AP" + spkgname);
				if (spkgname.find ("-") != string::npos) {
					// we need this to work around xapian oddness
					string tmp = spkgname;
					replace (tmp.begin (), tmp.end (), '-', '_');
					doc.add_term (tmp);
				}
				// add packagename as meta-data too
				term_generator.index_text_without_positions (spkgname, WEIGHT_PKGNAME);
			}
		}

		// Bundles
		Bundles bundles;
		GHashTable *bundle_ids = as_component_get_bundles_table (cpt);
		if (g_hash_table_size (bundle_ids) > 0) {
			string ostr;
			g_hash_table_foreach (bundle_ids,
						(GHFunc) bundles_hashtable_to_bundleentry,
						&bundles);
			if (bundles.SerializeToString (&ostr))
				doc.add_value (XapianValues::BUNDLES, ostr);
		}

		// Identifier
		string idname = as_component_get_id (cpt);
		doc.add_value (XapianValues::IDENTIFIER, idname);
		doc.add_term("AI" + idname);
		term_generator.index_text_without_positions (idname, WEIGHT_PKGNAME);

		// Component name
		string cptName = as_component_get_name (cpt);
		doc.add_value (XapianValues::CPTNAME, cptName);

		// Untranslated component name
		string clocale = as_component_get_active_locale (cpt);
		as_component_set_active_locale (cpt, "C");
		string cptNameGeneric = as_component_get_name (cpt);
		doc.add_value (XapianValues::CPTNAME_UNTRANSLATED, cptNameGeneric);
		as_component_set_active_locale (cpt, clocale.c_str());
		term_generator.index_text_without_positions (cptNameGeneric, WEIGHT_DESKTOP_GENERICNAME);

		// Type identifier
		string type_str = as_component_kind_to_string (as_component_get_kind (cpt));
		doc.add_value (XapianValues::TYPE, type_str);
		doc.add_term ("AT" + type_str);

		// Origin
		string cptOrigin = as_component_get_origin (cpt);
		doc.add_value (XapianValues::ORIGIN, cptOrigin);

		// URLs
		GHashTable *urls_table;
		urls_table = as_component_get_urls_table (cpt);
		if (g_hash_table_size (urls_table) > 0) {
			Urls urls;
			string ostr;

			g_hash_table_foreach (urls_table,
						(GHFunc) urls_hashtable_to_urlentry,
						&urls);
			if (urls.SerializeToString (&ostr))
				doc.add_value (XapianValues::URLS, ostr);
		}

		// Icons
		GPtrArray *icons = as_component_get_icons (cpt);
		Icons pbIcons;
		for (uint i = 0; i < icons->len; i++) {
			AsIcon *icon = AS_ICON (g_ptr_array_index (icons, i));

			Icons_Icon *pbIcon = pbIcons.add_icon ();
			pbIcon->set_width (as_icon_get_width (icon));
			pbIcon->set_height (as_icon_get_height (icon));

			if (as_icon_get_kind (icon) == AS_ICON_KIND_REMOTE) {
				pbIcon->set_type (Icons_IconType_REMOTE);
				pbIcon->set_url (as_icon_get_url (icon));
			} else {
				/* TODO: Properly support STOCK and LOCAL icons */
				pbIcon->set_type (Icons_IconType_CACHED);
				pbIcon->set_url (as_icon_get_filename (icon));
			}
		}
		string icons_ostr;
		if (pbIcons.SerializeToString (&icons_ostr))
			doc.add_value (XapianValues::ICONS, icons_ostr);


		// Summary
		string cptSummary = as_component_get_summary (cpt);
		doc.add_value (XapianValues::SUMMARY, cptSummary);
		term_generator.index_text_without_positions (cptSummary, WEIGHT_DESKTOP_SUMMARY);

		// Long description
		string description = as_component_get_description (cpt);
		doc.add_value (XapianValues::DESCRIPTION, description);
		term_generator.index_text_without_positions (description, WEIGHT_DESKTOP_SUMMARY);

		// Categories
		gchar **categories = as_component_get_categories (cpt);
		if (categories != NULL) {
			string categories_str = "";
			for (uint i = 0; categories[i] != NULL; i++) {
				if (as_str_empty (categories[i]))
					continue;

				string cat = categories[i];
				string tmp = cat;
				transform (tmp.begin (), tmp.end (),
						tmp.begin (), ::tolower);
				doc.add_term ("AC" + tmp);
				categories_str += cat + ";";
			}
			doc.add_value (XapianValues::CATEGORIES, categories_str);
		}

		// Add our keywords (with high priority)
		gchar **keywords = as_component_get_keywords (cpt);
		if (keywords != NULL) {
			for (uint i = 0; keywords[i] != NULL; i++) {
				if (keywords[i] == NULL)
					continue;

				string kword = keywords[i];
				term_generator.index_text_without_positions (kword, WEIGHT_DESKTOP_KEYWORD);
			}
		}

		// Data of provided items
		ASCache::ProvidedItems pbPI;
		for (uint j = 0; j < AS_PROVIDED_KIND_LAST; j++) {
			AsProvidedKind kind = (AsProvidedKind) j;
			string kind_str;
			AsProvided *prov = as_component_get_provided_for_kind (cpt, kind);
			if (prov == NULL)
				continue;

			auto *pbProv = pbPI.add_provided ();
			pbProv->set_type ((ProvidedItems_ItemType) kind);

			kind_str = as_provided_kind_to_string (kind);
			gchar **items = as_provided_get_items (prov);
			for (uint j = 0; items[j] != NULL; j++) {
				string item = items[j];
				pbProv->add_item (item);
				doc.add_term ("AE" + kind_str + ";" + item);
			}
			g_free (items);
		}
		string pitems_ostr;
		if (pbPI.SerializeToString (&pitems_ostr))
				doc.add_value (XapianValues::PROVIDED_ITEMS, pitems_ostr);

		// Add screenshot information
		Screenshots screenshots;
		GPtrArray *sslist = as_component_get_screenshots (cpt);
		for (uint i = 0; i < sslist->len; i++) {
			AsScreenshot *sshot = (AsScreenshot*) g_ptr_array_index (sslist, i);
			Screenshots_Screenshot *pb_sshot = screenshots.add_screenshot ();

			pb_sshot->set_primary (false);
			if (as_screenshot_get_kind (sshot) == AS_SCREENSHOT_KIND_DEFAULT)
				pb_sshot->set_primary (true);

			if (as_screenshot_get_caption (sshot) != NULL)
				pb_sshot->set_caption (as_screenshot_get_caption (sshot));

			g_ptr_array_foreach (as_screenshot_get_images (sshot),
						(GFunc) images_array_to_imageentry,
						pb_sshot);
		}
		string scr_ostr;
		if (screenshots.SerializeToString (&scr_ostr))
			doc.add_value (XapianValues::SCREENSHOTS, scr_ostr);

		// Add compulsory-for-desktop information
		gchar **compulsory = as_component_get_compulsory_for_desktops (cpt);
		string compulsory_str;
		if (compulsory != NULL) {
			gchar *str;
			str = g_strjoinv (";", compulsory);
			compulsory_str = string(str);
			g_free (str);
		}
		doc.add_value (XapianValues::COMPULSORY_FOR, compulsory_str);

		// Add project-license
		const gchar *project_license = as_component_get_project_license (cpt);
		if (project_license != NULL)
			doc.add_value (XapianValues::LICENSE, project_license);

		// Add project group
		const gchar *project_group = as_component_get_project_group (cpt);
		if (project_group != NULL)
			doc.add_value (XapianValues::PROJECT_GROUP, project_group);

		// Add developer name
		const gchar *developer_name = as_component_get_developer_name (cpt);
		if (developer_name != NULL)
			doc.add_value (XapianValues::DEVELOPER_NAME, developer_name);

		// Add releases information
		Releases pb_rels;
		GPtrArray *releases = as_component_get_releases (cpt);
		for (uint i = 0; i < releases->len; i++) {
			AsRelease *rel = (AsRelease*) g_ptr_array_index (releases, i);
			Releases_Release *pb_rel = pb_rels.add_release ();

			// version
			pb_rel->set_version (as_release_get_version (rel));
			// UNIX timestamp
			pb_rel->set_unix_timestamp (as_release_get_timestamp (rel));
			// release urgency (if set)
			if (as_release_get_urgency (rel) != AS_URGENCY_KIND_UNKNOWN)
				pb_rel->set_urgency ((Releases_UrgencyType) as_release_get_urgency (rel));

			// add location urls
			GPtrArray *locations = as_release_get_locations (rel);
			for (uint j = 0; j < locations->len; j++) {
				pb_rel->add_location ((gchar*) g_ptr_array_index (locations, j));
			}

			// add checksum info
			for (uint j = 0; j < AS_CHECKSUM_KIND_LAST; j++) {
				if (as_release_get_checksum (rel, (AsChecksumKind) j) != NULL) {
					Releases_Checksum *pb_cs = pb_rel->add_checksum ();
					pb_cs->set_type ((Releases_ChecksumType) j);
					pb_cs->set_value (as_release_get_checksum (rel, (AsChecksumKind) j));
				}
			}

			// add size info
			for (uint j = 0; j < AS_SIZE_KIND_LAST; j++) {
				if (as_release_get_size (rel, (AsSizeKind) j) > 0) {
					Releases_Size *pb_s = pb_rel->add_size ();
					pb_s->set_type ((Releases_SizeType) j);
					pb_s->set_value (as_release_get_size (rel, (AsSizeKind) j));
				}
			}

			// add description
			if (as_release_get_description (rel) != NULL)
				pb_rel->set_description (as_release_get_description (rel));
		}
		string rel_ostr;
		if (pb_rels.SerializeToString (&rel_ostr))
			doc.add_value (XapianValues::RELEASES, rel_ostr);

		// Languages
		GHashTable *langs_table;
		langs_table = as_component_get_languages_map (cpt);
		if (g_hash_table_size (langs_table) > 0) {
			Languages pb_langs;
			string ostr;

			g_hash_table_foreach (langs_table,
						(GHFunc) langs_hashtable_to_langentry,
						&pb_langs);

			if (pb_rels.SerializeToString (&ostr))
				doc.add_value (XapianValues::LANGUAGES, ostr);
		}

		// Postprocess
		string docData = doc.get_data ();
		doc.add_term ("AA" + docData);
		term_generator.index_text_without_positions (docData, WEIGHT_DESKTOP_NAME);

		//! g_debug ("Adding component: %s", as_component_to_string (cpt));
		db.add_document (doc);

		// infer database locale from single component
		// TODO: Do that in a smarter way, if we support multiple databases later.
		if (db_locale.empty ())
			db_locale = as_component_get_active_locale (cpt);
	}

	db.set_metadata ("db-schema-version", to_string (AS_DB_SCHEMA_VERSION));
	db.set_metadata ("db-locale", db_locale);
	db.commit ();

	if (g_rename (m_dbPath.c_str (), old_path.c_str ()) < 0) {
		g_critical ("Error while moving old database out of the way.");
		return false;
	}
	if (g_rename (rebuild_path.c_str (), m_dbPath.c_str ()) < 0) {
		g_critical ("Error while moving rebuilt database.");
		return false;
	}
	as_utils_delete_dir_recursive (old_path.c_str ());

	return true;
}