Ejemplo n.º 1
0
int main(int argc, char **argv) {
	int c;
	string query, inputFile, outputFile;
	bool measure = false;

	while( (c = getopt(argc,argv,"hq:o:m"))!=-1) {
		switch(c) {
		case 'h':
			break;
		case 'q':
			query = optarg;
			break;
		case 'o':
			outputFile = optarg;
			break;
		case 'm':
			measure = true;
			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			return 1;
		}
	}

	if(argc-optind<2) {
		cout << "ERROR: You must supply an input and HDT File" << endl << endl;
		return 1;
	}

	inputFile = argv[optind];
	outputFile = argv[optind+1];

	ConvertProgress progress;
	StopWatch st;

	try {
		// LOAD
		HDT *hdt = HDTManager::mapHDT(inputFile.c_str());

		// CONVERT triples to TripleList
		TriplesList tlist;
		Triples *triples = hdt->getTriples();
		cout << "Old Triples -> TriplesList" << endl;
		st.reset();
		IteratorTripleID *it = triples->searchAll();
		tlist.insert(it);
		delete it;
		cout << "         Old Triples -> TriplesList time" << st <<  endl;

		// Convert tlist to OPS
		cout << "TriplesList sort OPS" << endl;
		st.reset();
		tlist.sort(OPS, &progress);
		cout << "    TriplesList sort OPS time: " << st << endl;

		// Generate new OPS BitmapTriples
		cout << "TriplesList to new BitmapTriples" << endl;
		HDTSpecification spec;
		spec.set("triplesOrder", "OPS");
		BitmapTriples bt(spec);
		st.reset();
		bt.load(tlist, &progress);
		cout << "       TriplesList to new BitmapTriples time" << st << endl;

		// Update Header
#if 1
		cout << "Update Header" << endl;
		string rootNode("_:triples");
		TripleString ts (rootNode, "", "");
		hdt->getHeader()->remove(ts);
		bt.populateHeader(*hdt->getHeader(), "_:triples");
#endif

		// SAVE
		cout << "Save to " << outputFile << endl;
		ofstream out(outputFile.c_str(), ios::binary | ios::out);
		ControlInformation ci;

		ci.clear();
		ci.setType(GLOBAL);
		ci.setFormat(HDTVocabulary::HDT_CONTAINER);
		ci.save(out);

		// HEADER
		ci.clear();
		ci.setType(HEADER);
		hdt->getHeader()->save(out, ci, NULL);

		// DICTIONARY
		ci.clear();
		ci.setType(DICTIONARY);
		hdt->getDictionary()->save(out, ci, NULL);

		// NEW TRIPLES
		ci.clear();
		ci.setType(TRIPLES);
		bt.save(out, ci, NULL);

		out.close();

		delete hdt;
	} catch (char *e) {
		cout << "ERROR: " << e << endl;
	} catch (const char *e) {
		cout << "ERROR: " << e << endl;
	}
}
Ejemplo n.º 2
0
 bool isIndexed() const {
     return triples->isIndexed();
 }
Ejemplo n.º 3
0
int main(int argc, char *argv[]) {
	int c;
	string inputFile;
	string outputFile;

	while ((c = getopt(argc, argv, "hi:o:")) != -1) {
		switch (c) {
		case 'h':
			help();
			break;
		case 'o':
			outputFile = optarg;

			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			help();
			return 1;
		}
	}



	if (argc < 2) {
		cout << "ERROR: You must supply an input HDT File" << endl << endl;
		help();
		return 1;
	}
	inputFile = argv[optind];

		cout<<inputFile<<endl;

	if (outputFile == "")
		outputFile = inputFile;

	// Load HDT file
	HDT *hdt = HDTManager::mapHDT(inputFile.c_str());

	TriplesList* tl = new TriplesList();

	Triples * trip = hdt->getTriples();
	cout<<" Number of triples: "<<(unsigned long)(hdt->getTriples()->getNumberOfElements())<<endl;
	fflush(stdout);
	IteratorTripleID *it = trip->searchAll();
	tl->insert(it);
	cout<<" Number of tripleslist: "<<tl->getNumberOfElements()<<endl;
	delete it; // Remember to delete iterator to avoid memory leaks!

	outputFile = outputFile + "_Statistics";

	//erase summary file content
	ofstream out_summary;
	out_summary.open((outputFile + "_Summary").c_str(), ios::trunc);

	ofstream out_header_stats;
	out_header_stats.open((outputFile + "_HeaderStats").c_str(), ios::trunc);

	out_summary << "* General statistics" << endl;
	out_summary << "# Number of Triples: "
			<< hdt->getTriples()->getNumberOfElements() << endl;
	out_summary << "# Number of Predicates: "
			<< hdt->getDictionary()->getNpredicates() << endl;
	out_summary << "# Number of Subjects: "
			<< hdt->getDictionary()->getNsubjects() << endl;
	out_summary << "# Number of Objects: "
			<< hdt->getDictionary()->getNobjects() << endl;
	out_summary << "# Number of Shared Subject-Objects: "
			<< hdt->getDictionary()->getNshared() << endl;

	double ratioSO = (double) hdt->getDictionary()->getNshared()
							/ (hdt->getDictionary()->getNsubjects()
									+ hdt->getDictionary()->getNobjects()
									- hdt->getDictionary()->getNshared());
	out_summary << "# Ratio Shared Subject-Objects => SO / (S U O) : "
			<< ratioSO << endl;

	out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedSO> "<<ratioSO;
	/*
	 * Compute over the dictionary to get shared subject-predicate and predicate-object
	 */
	IteratorUCharString *itPred = hdt->getDictionary()->getPredicates();

	int numSubjectPredicates = 0;
	int numPredicatesObjects = 0;
	while (itPred->hasNext()) {
		stringstream s;
		s << itPred->next();
		string pred = s.str();

		if (hdt->getDictionary()->stringToId(pred, SUBJECT) > 0) {
			//found
			numSubjectPredicates++;
		}
		if (hdt->getDictionary()->stringToId(pred, OBJECT) > 0) {
			//found
			numPredicatesObjects++;
		}
	}
	delete itPred; // Remember to delete iterator to avoid memory leaks!

	double ratioSP = (double) numSubjectPredicates
			/ (hdt->getDictionary()->getNsubjects()
					+ hdt->getDictionary()->getNpredicates()
					- numSubjectPredicates);
	out_summary << "# Ratio Shared Subject-Predicate => SP / (S U P) : "
			<< ratioSP << endl;

	out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedSP> "<<ratioSP;

	double ratioPO =(double) numPredicatesObjects
			/ (hdt->getDictionary()->getNobjects()
					+ hdt->getDictionary()->getNpredicates()
					- numPredicatesObjects);
	out_summary << "# Ratio Shared Predicate-Object => PO / (P U O) : "
			<< ratioPO << endl;

	out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedPO> "<<ratioPO;

	out_summary.close();
	out_header_stats.close();

	//erase summary file SO and Type content
	ofstream out_summarySO;
	out_summarySO.open((outputFile + "_SO_Summary").c_str(), ios::trunc);
	out_summarySO.close();

	ofstream out_summaryType;
	out_summaryType.open((outputFile + "_Typed_Summary").c_str(), ios::trunc);
	out_summaryType.close();

	//find rdf:type
	unsigned int IDrdftype = 0;
	string rdftype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";

	if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) {
		//found
		IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE);
	} else {
		rdftype = "rdf:type";
		if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) {
			//found
			IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE);
		} else {
			rdftype = "a";
			if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) {
				//found
				IDrdftype = hdt->getDictionary()->stringToId(rdftype,
						PREDICATE);
			}
		}
	}

	tl->calculateDegrees(outputFile, hdt->getDictionary()->getNshared(),
			IDrdftype);



	delete hdt;
}
bool
PluginRDFIndexer::reindex()
{
    Triples tt = m_index->match
        (Triple(Node(), Uri("a"), m_index->expand("vamp:Plugin")));
    Nodes plugins = tt.subjects();

    bool foundSomething = false;
    bool addedSomething = false;

    foreach (Node plugin, plugins) {
        
        if (plugin.type != Node::URI) {
            cerr << "PluginRDFIndexer::reindex: Plugin has no URI: node is "
                 << plugin << endl;
            continue;
        }
        
        Node idn = m_index->complete
            (Triple(plugin, m_index->expand("vamp:identifier"), Node()));

        if (idn.type != Node::Literal) {
            cerr << "PluginRDFIndexer::reindex: Plugin " << plugin
                 << " lacks vamp:identifier literal" << endl;
            continue;
        }

        Node libn = m_index->complete
            (Triple(Node(), m_index->expand("vamp:available_plugin"), plugin));

        if (libn.type != Node::URI) {
            cerr << "PluginRDFIndexer::reindex: Plugin " << plugin 
                 << " is not vamp:available_plugin in any library" << endl;
            continue;
        }

        Node son = m_index->complete
            (Triple(libn, m_index->expand("vamp:identifier"), Node()));

        if (son.type != Node::Literal) {
            cerr << "PluginRDFIndexer::reindex: Library " << libn
                 << " lacks vamp:identifier for soname" << endl;
            continue;
        }

        QString pluginUri = plugin.value;
        QString identifier = idn.value;
        QString soname = son.value;

        QString pluginId = PluginIdentifier::createIdentifier
            ("vamp", soname, identifier);

        foundSomething = true;

        if (m_idToUriMap.find(pluginId) != m_idToUriMap.end()) {
            continue;
        }

        m_idToUriMap[pluginId] = pluginUri;

        addedSomething = true;

        if (pluginUri != "") {
            if (m_uriToIdMap.find(pluginUri) != m_uriToIdMap.end()) {
                cerr << "PluginRDFIndexer::reindex: WARNING: Found multiple plugins with the same URI:" << endl;
                cerr << "  1. Plugin id \"" << m_uriToIdMap[pluginUri] << "\"" << endl;
                cerr << "  2. Plugin id \"" << pluginId << "\"" << endl;
                cerr << "both claim URI <" << pluginUri << ">" << endl;
            } else {
                m_uriToIdMap[pluginUri] = pluginId;
            }
        }
    }

    if (!foundSomething) {
        cerr << "PluginRDFIndexer::reindex: NOTE: Plugins found, but none sufficiently described" << endl;
    }
    
    return addedSomething;
}