int main(int argc, char **argv) { int c; string query, inputFile, outputFile; bool measure = false; while( (c = getopt(argc,argv,"hq:o:m"))!=-1) { switch(c) { case 'h': break; case 'q': query = optarg; break; case 'o': outputFile = optarg; break; case 'm': measure = true; break; default: cout << "ERROR: Unknown option" << endl; return 1; } } if(argc-optind<2) { cout << "ERROR: You must supply an input and HDT File" << endl << endl; return 1; } inputFile = argv[optind]; outputFile = argv[optind+1]; ConvertProgress progress; StopWatch st; try { // LOAD HDT *hdt = HDTManager::mapHDT(inputFile.c_str()); // CONVERT triples to TripleList TriplesList tlist; Triples *triples = hdt->getTriples(); cout << "Old Triples -> TriplesList" << endl; st.reset(); IteratorTripleID *it = triples->searchAll(); tlist.insert(it); delete it; cout << " Old Triples -> TriplesList time" << st << endl; // Convert tlist to OPS cout << "TriplesList sort OPS" << endl; st.reset(); tlist.sort(OPS, &progress); cout << " TriplesList sort OPS time: " << st << endl; // Generate new OPS BitmapTriples cout << "TriplesList to new BitmapTriples" << endl; HDTSpecification spec; spec.set("triplesOrder", "OPS"); BitmapTriples bt(spec); st.reset(); bt.load(tlist, &progress); cout << " TriplesList to new BitmapTriples time" << st << endl; // Update Header #if 1 cout << "Update Header" << endl; string rootNode("_:triples"); TripleString ts (rootNode, "", ""); hdt->getHeader()->remove(ts); bt.populateHeader(*hdt->getHeader(), "_:triples"); #endif // SAVE cout << "Save to " << outputFile << endl; ofstream out(outputFile.c_str(), ios::binary | ios::out); ControlInformation ci; ci.clear(); ci.setType(GLOBAL); ci.setFormat(HDTVocabulary::HDT_CONTAINER); ci.save(out); // HEADER ci.clear(); ci.setType(HEADER); hdt->getHeader()->save(out, ci, NULL); // DICTIONARY ci.clear(); ci.setType(DICTIONARY); hdt->getDictionary()->save(out, ci, NULL); // NEW TRIPLES ci.clear(); ci.setType(TRIPLES); bt.save(out, ci, NULL); out.close(); delete hdt; } catch (char *e) { cout << "ERROR: " << e << endl; } catch (const char *e) { cout << "ERROR: " << e << endl; } }
bool isIndexed() const { return triples->isIndexed(); }
int main(int argc, char *argv[]) { int c; string inputFile; string outputFile; while ((c = getopt(argc, argv, "hi:o:")) != -1) { switch (c) { case 'h': help(); break; case 'o': outputFile = optarg; break; default: cout << "ERROR: Unknown option" << endl; help(); return 1; } } if (argc < 2) { cout << "ERROR: You must supply an input HDT File" << endl << endl; help(); return 1; } inputFile = argv[optind]; cout<<inputFile<<endl; if (outputFile == "") outputFile = inputFile; // Load HDT file HDT *hdt = HDTManager::mapHDT(inputFile.c_str()); TriplesList* tl = new TriplesList(); Triples * trip = hdt->getTriples(); cout<<" Number of triples: "<<(unsigned long)(hdt->getTriples()->getNumberOfElements())<<endl; fflush(stdout); IteratorTripleID *it = trip->searchAll(); tl->insert(it); cout<<" Number of tripleslist: "<<tl->getNumberOfElements()<<endl; delete it; // Remember to delete iterator to avoid memory leaks! outputFile = outputFile + "_Statistics"; //erase summary file content ofstream out_summary; out_summary.open((outputFile + "_Summary").c_str(), ios::trunc); ofstream out_header_stats; out_header_stats.open((outputFile + "_HeaderStats").c_str(), ios::trunc); out_summary << "* General statistics" << endl; out_summary << "# Number of Triples: " << hdt->getTriples()->getNumberOfElements() << endl; out_summary << "# Number of Predicates: " << hdt->getDictionary()->getNpredicates() << endl; out_summary << "# Number of Subjects: " << hdt->getDictionary()->getNsubjects() << endl; out_summary << "# Number of Objects: " << hdt->getDictionary()->getNobjects() << endl; out_summary << "# Number of Shared Subject-Objects: " << hdt->getDictionary()->getNshared() << endl; double ratioSO = (double) hdt->getDictionary()->getNshared() / (hdt->getDictionary()->getNsubjects() + hdt->getDictionary()->getNobjects() - hdt->getDictionary()->getNshared()); out_summary << "# Ratio Shared Subject-Objects => SO / (S U O) : " << ratioSO << endl; out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedSO> "<<ratioSO; /* * Compute over the dictionary to get shared subject-predicate and predicate-object */ IteratorUCharString *itPred = hdt->getDictionary()->getPredicates(); int numSubjectPredicates = 0; int numPredicatesObjects = 0; while (itPred->hasNext()) { stringstream s; s << itPred->next(); string pred = s.str(); if (hdt->getDictionary()->stringToId(pred, SUBJECT) > 0) { //found numSubjectPredicates++; } if (hdt->getDictionary()->stringToId(pred, OBJECT) > 0) { //found numPredicatesObjects++; } } delete itPred; // Remember to delete iterator to avoid memory leaks! double ratioSP = (double) numSubjectPredicates / (hdt->getDictionary()->getNsubjects() + hdt->getDictionary()->getNpredicates() - numSubjectPredicates); out_summary << "# Ratio Shared Subject-Predicate => SP / (S U P) : " << ratioSP << endl; out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedSP> "<<ratioSP; double ratioPO =(double) numPredicatesObjects / (hdt->getDictionary()->getNobjects() + hdt->getDictionary()->getNpredicates() - numPredicatesObjects); out_summary << "# Ratio Shared Predicate-Object => PO / (P U O) : " << ratioPO << endl; out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedPO> "<<ratioPO; out_summary.close(); out_header_stats.close(); //erase summary file SO and Type content ofstream out_summarySO; out_summarySO.open((outputFile + "_SO_Summary").c_str(), ios::trunc); out_summarySO.close(); ofstream out_summaryType; out_summaryType.open((outputFile + "_Typed_Summary").c_str(), ios::trunc); out_summaryType.close(); //find rdf:type unsigned int IDrdftype = 0; string rdftype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) { //found IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE); } else { rdftype = "rdf:type"; if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) { //found IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE); } else { rdftype = "a"; if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) { //found IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE); } } } tl->calculateDegrees(outputFile, hdt->getDictionary()->getNshared(), IDrdftype); delete hdt; }
bool PluginRDFIndexer::reindex() { Triples tt = m_index->match (Triple(Node(), Uri("a"), m_index->expand("vamp:Plugin"))); Nodes plugins = tt.subjects(); bool foundSomething = false; bool addedSomething = false; foreach (Node plugin, plugins) { if (plugin.type != Node::URI) { cerr << "PluginRDFIndexer::reindex: Plugin has no URI: node is " << plugin << endl; continue; } Node idn = m_index->complete (Triple(plugin, m_index->expand("vamp:identifier"), Node())); if (idn.type != Node::Literal) { cerr << "PluginRDFIndexer::reindex: Plugin " << plugin << " lacks vamp:identifier literal" << endl; continue; } Node libn = m_index->complete (Triple(Node(), m_index->expand("vamp:available_plugin"), plugin)); if (libn.type != Node::URI) { cerr << "PluginRDFIndexer::reindex: Plugin " << plugin << " is not vamp:available_plugin in any library" << endl; continue; } Node son = m_index->complete (Triple(libn, m_index->expand("vamp:identifier"), Node())); if (son.type != Node::Literal) { cerr << "PluginRDFIndexer::reindex: Library " << libn << " lacks vamp:identifier for soname" << endl; continue; } QString pluginUri = plugin.value; QString identifier = idn.value; QString soname = son.value; QString pluginId = PluginIdentifier::createIdentifier ("vamp", soname, identifier); foundSomething = true; if (m_idToUriMap.find(pluginId) != m_idToUriMap.end()) { continue; } m_idToUriMap[pluginId] = pluginUri; addedSomething = true; if (pluginUri != "") { if (m_uriToIdMap.find(pluginUri) != m_uriToIdMap.end()) { cerr << "PluginRDFIndexer::reindex: WARNING: Found multiple plugins with the same URI:" << endl; cerr << " 1. Plugin id \"" << m_uriToIdMap[pluginUri] << "\"" << endl; cerr << " 2. Plugin id \"" << pluginId << "\"" << endl; cerr << "both claim URI <" << pluginUri << ">" << endl; } else { m_uriToIdMap[pluginUri] = pluginId; } } } if (!foundSomething) { cerr << "PluginRDFIndexer::reindex: NOTE: Plugins found, but none sufficiently described" << endl; } return addedSomething; }