int main(int argc, char **argv) { int c; string query, inputFile, outputFile; bool measure = false; while( (c = getopt(argc,argv,"hq:o:m"))!=-1) { switch(c) { case 'h': break; case 'q': query = optarg; break; case 'o': outputFile = optarg; break; case 'm': measure = true; break; default: cout << "ERROR: Unknown option" << endl; return 1; } } if(argc-optind<2) { cout << "ERROR: You must supply an input and HDT File" << endl << endl; return 1; } inputFile = argv[optind]; outputFile = argv[optind+1]; ConvertProgress progress; StopWatch st; try { // LOAD HDT *hdt = HDTManager::mapHDT(inputFile.c_str()); // CONVERT triples to TripleList TriplesList tlist; Triples *triples = hdt->getTriples(); cout << "Old Triples -> TriplesList" << endl; st.reset(); IteratorTripleID *it = triples->searchAll(); tlist.insert(it); delete it; cout << " Old Triples -> TriplesList time" << st << endl; // Convert tlist to OPS cout << "TriplesList sort OPS" << endl; st.reset(); tlist.sort(OPS, &progress); cout << " TriplesList sort OPS time: " << st << endl; // Generate new OPS BitmapTriples cout << "TriplesList to new BitmapTriples" << endl; HDTSpecification spec; spec.set("triplesOrder", "OPS"); BitmapTriples bt(spec); st.reset(); bt.load(tlist, &progress); cout << " TriplesList to new BitmapTriples time" << st << endl; // Update Header #if 1 cout << "Update Header" << endl; string rootNode("_:triples"); TripleString ts (rootNode, "", ""); hdt->getHeader()->remove(ts); bt.populateHeader(*hdt->getHeader(), "_:triples"); #endif // SAVE cout << "Save to " << outputFile << endl; ofstream out(outputFile.c_str(), ios::binary | ios::out); ControlInformation ci; ci.clear(); ci.setType(GLOBAL); ci.setFormat(HDTVocabulary::HDT_CONTAINER); ci.save(out); // HEADER ci.clear(); ci.setType(HEADER); hdt->getHeader()->save(out, ci, NULL); // DICTIONARY ci.clear(); ci.setType(DICTIONARY); hdt->getDictionary()->save(out, ci, NULL); // NEW TRIPLES ci.clear(); ci.setType(TRIPLES); bt.save(out, ci, NULL); out.close(); delete hdt; } catch (char *e) { cout << "ERROR: " << e << endl; } catch (const char *e) { cout << "ERROR: " << e << endl; } }
int main(int argc, char *argv[]) { int c; string inputFile; string outputFile; while ((c = getopt(argc, argv, "hi:o:")) != -1) { switch (c) { case 'h': help(); break; case 'o': outputFile = optarg; break; default: cout << "ERROR: Unknown option" << endl; help(); return 1; } } if (argc < 2) { cout << "ERROR: You must supply an input HDT File" << endl << endl; help(); return 1; } inputFile = argv[optind]; cout<<inputFile<<endl; if (outputFile == "") outputFile = inputFile; // Load HDT file HDT *hdt = HDTManager::mapHDT(inputFile.c_str()); TriplesList* tl = new TriplesList(); Triples * trip = hdt->getTriples(); cout<<" Number of triples: "<<(unsigned long)(hdt->getTriples()->getNumberOfElements())<<endl; fflush(stdout); IteratorTripleID *it = trip->searchAll(); tl->insert(it); cout<<" Number of tripleslist: "<<tl->getNumberOfElements()<<endl; delete it; // Remember to delete iterator to avoid memory leaks! outputFile = outputFile + "_Statistics"; //erase summary file content ofstream out_summary; out_summary.open((outputFile + "_Summary").c_str(), ios::trunc); ofstream out_header_stats; out_header_stats.open((outputFile + "_HeaderStats").c_str(), ios::trunc); out_summary << "* General statistics" << endl; out_summary << "# Number of Triples: " << hdt->getTriples()->getNumberOfElements() << endl; out_summary << "# Number of Predicates: " << hdt->getDictionary()->getNpredicates() << endl; out_summary << "# Number of Subjects: " << hdt->getDictionary()->getNsubjects() << endl; out_summary << "# Number of Objects: " << hdt->getDictionary()->getNobjects() << endl; out_summary << "# Number of Shared Subject-Objects: " << hdt->getDictionary()->getNshared() << endl; double ratioSO = (double) hdt->getDictionary()->getNshared() / (hdt->getDictionary()->getNsubjects() + hdt->getDictionary()->getNobjects() - hdt->getDictionary()->getNshared()); out_summary << "# Ratio Shared Subject-Objects => SO / (S U O) : " << ratioSO << endl; out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedSO> "<<ratioSO; /* * Compute over the dictionary to get shared subject-predicate and predicate-object */ IteratorUCharString *itPred = hdt->getDictionary()->getPredicates(); int numSubjectPredicates = 0; int numPredicatesObjects = 0; while (itPred->hasNext()) { stringstream s; s << itPred->next(); string pred = s.str(); if (hdt->getDictionary()->stringToId(pred, SUBJECT) > 0) { //found numSubjectPredicates++; } if (hdt->getDictionary()->stringToId(pred, OBJECT) > 0) { //found numPredicatesObjects++; } } delete itPred; // Remember to delete iterator to avoid memory leaks! double ratioSP = (double) numSubjectPredicates / (hdt->getDictionary()->getNsubjects() + hdt->getDictionary()->getNpredicates() - numSubjectPredicates); out_summary << "# Ratio Shared Subject-Predicate => SP / (S U P) : " << ratioSP << endl; out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedSP> "<<ratioSP; double ratioPO =(double) numPredicatesObjects / (hdt->getDictionary()->getNobjects() + hdt->getDictionary()->getNpredicates() - numPredicatesObjects); out_summary << "# Ratio Shared Predicate-Object => PO / (P U O) : " << ratioPO << endl; out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedPO> "<<ratioPO; out_summary.close(); out_header_stats.close(); //erase summary file SO and Type content ofstream out_summarySO; out_summarySO.open((outputFile + "_SO_Summary").c_str(), ios::trunc); out_summarySO.close(); ofstream out_summaryType; out_summaryType.open((outputFile + "_Typed_Summary").c_str(), ios::trunc); out_summaryType.close(); //find rdf:type unsigned int IDrdftype = 0; string rdftype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) { //found IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE); } else { rdftype = "rdf:type"; if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) { //found IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE); } else { rdftype = "a"; if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) { //found IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE); } } } tl->calculateDegrees(outputFile, hdt->getDictionary()->getNshared(), IDrdftype); delete hdt; }
int main(int argc, char **argv) { int c; string query, inputFile, outputFile, filter1, filter2; bool measure = false; while ((c = getopt(argc, argv, "hq:o:mf:F:")) != -1) { switch (c) { case 'h': help(); break; case 'q': query = optarg; break; case 'o': outputFile = optarg; break; case 'm': measure = true; break; case 'f': filter1 = optarg; break; case 'F': filter2 = optarg; break; default: cout << "ERROR: Unknown option" << endl; help(); return 1; } } if (argc - optind < 1) { cout << "ERROR: You must supply an HDT File" << endl << endl; help(); return 1; } inputFile = argv[optind]; try { HDT *hdt = HDTManager::mapIndexedHDT(inputFile.c_str()); if (filter1 != "" || filter2 != "") { ostream *out; ofstream outF; if (outputFile != "") { outF.open(outputFile.c_str()); out = &outF; } else { out = &cout; } string infile; if (filter1 != "") infile = filter1; else infile = filter2; std::ifstream file(infile.c_str()); if (!file.good()) throw "unable to open filter file"; string linea = ""; string property = ""; string value = ""; StopWatch st_total; size_t totalQueryResults = 0; size_t numQuery=0; while (!file.eof()) { getline(file, linea); if(linea.length()==0) continue; size_t pos = linea.find(';'); if (pos != std::string::npos) { property = linea.substr(0, pos); value = linea.substr(pos + 1); cerr<<"Query "<<numQuery << ": "<<linea<<endl; cout<<">>> Query "<<numQuery << ": "<<linea<<endl; cout<<"property:"<<property<<endl; cout<<"value:"<<value<<endl<<endl; StopWatch st; hdt::LiteralDictionary *dict = dynamic_cast<hdt::LiteralDictionary *>(hdt->getDictionary()); if(dict==NULL) { cerr << "This dictionary does not support substring search" << endl; break; } hdt::Triples *triples = hdt->getTriples(); uint32_t *results = NULL; size_t numResults = dict->substringToId((unsigned char *) value.c_str(), value.length(), &results); TripleID pattern(0, dict->stringToId(property, PREDICATE), 0); for (size_t i = 0; i < numResults; i++) { pattern.setObject(results[i]); string objStr = dict->idToString(results[i], OBJECT); IteratorTripleID *it = triples->search(pattern); unsigned int numTriples = 0; //iterate over the first pattern while (it->hasNext()) { TripleID *ts = it->next(); if (filter1 != "") { // QUERY Q3 cout << dict->idToString(ts->getSubject(), SUBJECT) << " " << objStr << endl; totalQueryResults++; } else { // QUERY Q4 TripleID pat2(ts->getSubject(), 0, 0); TripleString out; string subjStr = dict->idToString(ts->getSubject(), SUBJECT); IteratorTripleID *it2 = triples->search(pat2); while(it2->hasNext()) { TripleID *inner = it2->next(); cout << subjStr << " " << dict->idToString(inner->getPredicate(), PREDICATE) <<" "; if(inner->getObject()==results[i]) { cout << objStr << endl; } else { cout << dict->idToString(inner->getObject(), OBJECT) << endl; } totalQueryResults++; } delete it2; } } delete it; } cout << ">>> Results: " << totalQueryResults << endl; cerr << "Query " << numQuery << " Results: " << totalQueryResults << " in " << st << endl << endl; numQuery++; } } cerr << "Total time: " << st_total << endl; if (outputFile != "") { outF.close(); } file.close(); } delete hdt; } catch (char *e) { cout << "ERROR: " << e << endl; } catch (const char *e) { cout << "ERROR: " << e << endl; } }
int main(int argc, char **argv) { int c; string query, inputFile, outputFile; bool measure = false; while( (c = getopt(argc,argv,"hq:o:m"))!=-1) { switch(c) { case 'h': help(); break; case 'q': query = optarg; break; case 'o': outputFile = optarg; break; case 'm': measure = true; break; default: cout << "ERROR: Unknown option" << endl; help(); return 1; } } if(argc-optind<1) { cout << "ERROR: You must supply an HDT File" << endl << endl; help(); return 1; } inputFile = argv[optind]; try { HDT *hdt = HDTManager::mapHDT(inputFile.c_str()); TripleID pat(0,1,0); #if 0 IteratorTripleID *it = hdt->getTriples()->search(pat); StopWatch st; unsigned int numTriples = 0; while(it->hasNext() && numTriples < 4) { TripleID *ts = it->next(); cout << *ts << endl; numTriples++; } cout << "------" << endl; while(it->hasPrevious()) { TripleID *ts = it->previous(); cout << *ts << endl; } cout << "------" << endl; numTriples=0; while(it->hasNext() && numTriples<10) { TripleID *ts = it->next(); cout << *ts << endl; numTriples++; } delete it; cout << numTriples << " results in " << st << endl; #else IteratorTripleID *it = hdt->getTriples()->search(pat); #if 1 while(it->hasNext()) { cout << *it->next() << endl; } it->goToStart(); cout << "------" << endl; #endif RandomAccessIterator rit(it); int i; for(i=0;i<rit.getNumElements() && i < 20;i++) { TripleID *tid = rit.get(i); cout << i << " => " << *tid << endl; } cout << "------" << endl; for(i-- ; i>=0; i--) { TripleID *tid = rit.get(i); cout << i << " => " << *tid << endl; } cout << "------" << endl; for(i=0;i<rit.getNumElements() && i<20;i++) { TripleID *tid = rit.get(i); cout << i << " => " << *tid << endl; } cout << "------" << endl; cout << 2 << " => " << *rit.get(2) << endl; cout << 7 << " => " << *rit.get(7) << endl; cout << 1 << " => " << *rit.get(1) << endl; cout << 5 << " => " << *rit.get(5) << endl; cout << 0 << " => " << *rit.get(0) << endl; cout << 9 << " => " << *rit.get(9) << endl; cout << 8 << " => " << *rit.get(8) << endl; delete it; #endif delete hdt; } catch (char *e) { cout << "ERROR: " << e << endl; } catch (const char *e) { cout << "ERROR: " << e << endl; } }
int main(int argc, char **argv) { int c; string query, inputFile, outputFile; bool measure = false; while( (c = getopt(argc,argv,"hq:o:m"))!=-1) { switch(c) { case 'h': break; case 'q': query = optarg; break; case 'o': outputFile = optarg; break; case 'm': measure = true; break; default: cout << "ERROR: Unknown option" << endl; return 1; } } if(argc-optind<1) { cout << "ERROR: You must supply an input and HDT File" << endl << endl; return 1; } inputFile = argv[optind]; try { // LOAD HDT *hdt = HDTManager::mapHDT(inputFile.c_str()); // CONVERT Dictionary *dict = hdt->getDictionary(); //LiteralDictionary litDict; FourSectionDictionary litDict; StdoutProgressListener progress; litDict.import(dict, &progress); // SAVE ofstream out(outputFile.c_str(), ios::binary | ios::out); ControlInformation ci; // GLOBAL ci.clear(); ci.setType(GLOBAL); ci.setFormat(HDTVocabulary::HDT_CONTAINER); ci.save(out); // HEADER ci.clear(); ci.setType(HEADER); hdt->getHeader()->save(out, ci, NULL); // NEW DICTIONARY ci.clear(); ci.setType(DICTIONARY); litDict.save(out, ci, NULL); // TRIPLES ci.clear(); ci.setType(TRIPLES); hdt->getTriples()->save(out, ci, NULL); out.close(); delete hdt; } catch (std::exception& e) { cout << "ERROR: " << e.what() << endl; } }