int main(int argc, char **argv) { try { HDT *hdt = HDTManager::mapHDT(argv[1]); IteratorUCharString *it =hdt->getDictionary()->getObjects(); ofstream lit("lit.txt"); ofstream blk("blank.txt"); ofstream uri("uri.txt"); while(it->hasNext()) { unsigned char *str = it->next(); if(*str=='"') { // Literal lit << (char*)str << endl; } else if(*str=='_'){ // Blanco blk << (char*)str << endl; } else { // URI uri << (char*)str << endl; } } lit.close(); blk.close(); uri.close(); delete it; delete hdt; } catch(const char *str) { cerr << str << endl; } catch(char *str) { cerr << str << endl; } }
int main(int argc, char **argv) { int c; string query, inputFile, outputFile; bool measure = false; while( (c = getopt(argc,argv,"hq:o:m"))!=-1) { switch(c) { case 'h': help(); break; case 'q': query = optarg; break; case 'o': outputFile = optarg; break; case 'm': measure = true; break; default: cout << "ERROR: Unknown option" << endl; help(); return 1; } } if(argc-optind<2) { cout << "ERROR: You must supply an HDT File" << endl << endl; help(); return 1; } inputFile = argv[optind]; outputFile = argv[optind+1]; try { HDT *hdt = HDTManager::mapHDT(inputFile.c_str()); hdt->saveToHDT(outputFile.c_str()); cout << "IN: " << inputFile << " Out: " << outputFile << endl; delete hdt; } catch (std::exception& e) { cout << "ERROR: " << e.what() << endl; } }
int main(int argc, char **argv) { int c; string query, inputFile, outputFile; bool measure = false; while( (c = getopt(argc,argv,"hq:o:m"))!=-1) { switch(c) { case 'h': break; case 'q': query = optarg; break; case 'o': outputFile = optarg; break; case 'm': measure = true; break; default: cout << "ERROR: Unknown option" << endl; return 1; } } if(argc-optind<2) { cout << "ERROR: You must supply an input and HDT File" << endl << endl; return 1; } inputFile = argv[optind]; outputFile = argv[optind+1]; ConvertProgress progress; StopWatch st; try { // LOAD HDT *hdt = HDTManager::mapHDT(inputFile.c_str()); // CONVERT triples to TripleList TriplesList tlist; Triples *triples = hdt->getTriples(); cout << "Old Triples -> TriplesList" << endl; st.reset(); IteratorTripleID *it = triples->searchAll(); tlist.insert(it); delete it; cout << " Old Triples -> TriplesList time" << st << endl; // Convert tlist to OPS cout << "TriplesList sort OPS" << endl; st.reset(); tlist.sort(OPS, &progress); cout << " TriplesList sort OPS time: " << st << endl; // Generate new OPS BitmapTriples cout << "TriplesList to new BitmapTriples" << endl; HDTSpecification spec; spec.set("triplesOrder", "OPS"); BitmapTriples bt(spec); st.reset(); bt.load(tlist, &progress); cout << " TriplesList to new BitmapTriples time" << st << endl; // Update Header #if 1 cout << "Update Header" << endl; string rootNode("_:triples"); TripleString ts (rootNode, "", ""); hdt->getHeader()->remove(ts); bt.populateHeader(*hdt->getHeader(), "_:triples"); #endif // SAVE cout << "Save to " << outputFile << endl; ofstream out(outputFile.c_str(), ios::binary | ios::out); ControlInformation ci; ci.clear(); ci.setType(GLOBAL); ci.setFormat(HDTVocabulary::HDT_CONTAINER); ci.save(out); // HEADER ci.clear(); ci.setType(HEADER); hdt->getHeader()->save(out, ci, NULL); // DICTIONARY ci.clear(); ci.setType(DICTIONARY); hdt->getDictionary()->save(out, ci, NULL); // NEW TRIPLES ci.clear(); ci.setType(TRIPLES); bt.save(out, ci, NULL); out.close(); delete hdt; } catch (char *e) { cout << "ERROR: " << e << endl; } catch (const char *e) { cout << "ERROR: " << e << endl; } }
int main(int argc, char *argv[]) { int c; string inputFile; string outputFile; while ((c = getopt(argc, argv, "hi:o:")) != -1) { switch (c) { case 'h': help(); break; case 'o': outputFile = optarg; break; default: cout << "ERROR: Unknown option" << endl; help(); return 1; } } if (argc < 2) { cout << "ERROR: You must supply an input HDT File" << endl << endl; help(); return 1; } inputFile = argv[optind]; cout<<inputFile<<endl; if (outputFile == "") outputFile = inputFile; // Load HDT file HDT *hdt = HDTManager::mapHDT(inputFile.c_str()); TriplesList* tl = new TriplesList(); Triples * trip = hdt->getTriples(); cout<<" Number of triples: "<<(unsigned long)(hdt->getTriples()->getNumberOfElements())<<endl; fflush(stdout); IteratorTripleID *it = trip->searchAll(); tl->insert(it); cout<<" Number of tripleslist: "<<tl->getNumberOfElements()<<endl; delete it; // Remember to delete iterator to avoid memory leaks! outputFile = outputFile + "_Statistics"; //erase summary file content ofstream out_summary; out_summary.open((outputFile + "_Summary").c_str(), ios::trunc); ofstream out_header_stats; out_header_stats.open((outputFile + "_HeaderStats").c_str(), ios::trunc); out_summary << "* General statistics" << endl; out_summary << "# Number of Triples: " << hdt->getTriples()->getNumberOfElements() << endl; out_summary << "# Number of Predicates: " << hdt->getDictionary()->getNpredicates() << endl; out_summary << "# Number of Subjects: " << hdt->getDictionary()->getNsubjects() << endl; out_summary << "# Number of Objects: " << hdt->getDictionary()->getNobjects() << endl; out_summary << "# Number of Shared Subject-Objects: " << hdt->getDictionary()->getNshared() << endl; double ratioSO = (double) hdt->getDictionary()->getNshared() / (hdt->getDictionary()->getNsubjects() + hdt->getDictionary()->getNobjects() - hdt->getDictionary()->getNshared()); out_summary << "# Ratio Shared Subject-Objects => SO / (S U O) : " << ratioSO << endl; out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedSO> "<<ratioSO; /* * Compute over the dictionary to get shared subject-predicate and predicate-object */ IteratorUCharString *itPred = hdt->getDictionary()->getPredicates(); int numSubjectPredicates = 0; int numPredicatesObjects = 0; while (itPred->hasNext()) { stringstream s; s << itPred->next(); string pred = s.str(); if (hdt->getDictionary()->stringToId(pred, SUBJECT) > 0) { //found numSubjectPredicates++; } if (hdt->getDictionary()->stringToId(pred, OBJECT) > 0) { //found numPredicatesObjects++; } } delete itPred; // Remember to delete iterator to avoid memory leaks! double ratioSP = (double) numSubjectPredicates / (hdt->getDictionary()->getNsubjects() + hdt->getDictionary()->getNpredicates() - numSubjectPredicates); out_summary << "# Ratio Shared Subject-Predicate => SP / (S U P) : " << ratioSP << endl; out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedSP> "<<ratioSP; double ratioPO =(double) numPredicatesObjects / (hdt->getDictionary()->getNobjects() + hdt->getDictionary()->getNpredicates() - numPredicatesObjects); out_summary << "# Ratio Shared Predicate-Object => PO / (P U O) : " << ratioPO << endl; out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedPO> "<<ratioPO; out_summary.close(); out_header_stats.close(); //erase summary file SO and Type content ofstream out_summarySO; out_summarySO.open((outputFile + "_SO_Summary").c_str(), ios::trunc); out_summarySO.close(); ofstream out_summaryType; out_summaryType.open((outputFile + "_Typed_Summary").c_str(), ios::trunc); out_summaryType.close(); //find rdf:type unsigned int IDrdftype = 0; string rdftype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) { //found IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE); } else { rdftype = "rdf:type"; if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) { //found IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE); } else { rdftype = "a"; if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) { //found IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE); } } } tl->calculateDegrees(outputFile, hdt->getDictionary()->getNshared(), IDrdftype); delete hdt; }
int main(int argc, char **argv) { int c; string query, inputFile, outputFile; bool measure = false; while( (c = getopt(argc,argv,"hq:o:m"))!=-1) { switch(c) { case 'h': help(); break; case 'q': query = optarg; break; case 'o': outputFile = optarg; break; case 'm': measure = true; break; default: cout << "ERROR: Unknown option" << endl; help(); return 1; } } if(argc-optind<1) { cout << "ERROR: You must supply an HDT File" << endl << endl; help(); return 1; } inputFile = argv[optind]; try { HDT *hdt = HDTManager::mapHDT(inputFile.c_str()); TripleID pat(0,1,0); #if 0 IteratorTripleID *it = hdt->getTriples()->search(pat); StopWatch st; unsigned int numTriples = 0; while(it->hasNext() && numTriples < 4) { TripleID *ts = it->next(); cout << *ts << endl; numTriples++; } cout << "------" << endl; while(it->hasPrevious()) { TripleID *ts = it->previous(); cout << *ts << endl; } cout << "------" << endl; numTriples=0; while(it->hasNext() && numTriples<10) { TripleID *ts = it->next(); cout << *ts << endl; numTriples++; } delete it; cout << numTriples << " results in " << st << endl; #else IteratorTripleID *it = hdt->getTriples()->search(pat); #if 1 while(it->hasNext()) { cout << *it->next() << endl; } it->goToStart(); cout << "------" << endl; #endif RandomAccessIterator rit(it); int i; for(i=0;i<rit.getNumElements() && i < 20;i++) { TripleID *tid = rit.get(i); cout << i << " => " << *tid << endl; } cout << "------" << endl; for(i-- ; i>=0; i--) { TripleID *tid = rit.get(i); cout << i << " => " << *tid << endl; } cout << "------" << endl; for(i=0;i<rit.getNumElements() && i<20;i++) { TripleID *tid = rit.get(i); cout << i << " => " << *tid << endl; } cout << "------" << endl; cout << 2 << " => " << *rit.get(2) << endl; cout << 7 << " => " << *rit.get(7) << endl; cout << 1 << " => " << *rit.get(1) << endl; cout << 5 << " => " << *rit.get(5) << endl; cout << 0 << " => " << *rit.get(0) << endl; cout << 9 << " => " << *rit.get(9) << endl; cout << 8 << " => " << *rit.get(8) << endl; delete it; #endif delete hdt; } catch (char *e) { cout << "ERROR: " << e << endl; } catch (const char *e) { cout << "ERROR: " << e << endl; } }
int main(int argc, char **argv) { int c; string query, inputFile, outputFile, filter1, filter2; bool measure = false; while ((c = getopt(argc, argv, "hq:o:mf:F:")) != -1) { switch (c) { case 'h': help(); break; case 'q': query = optarg; break; case 'o': outputFile = optarg; break; case 'm': measure = true; break; case 'f': filter1 = optarg; break; case 'F': filter2 = optarg; break; default: cout << "ERROR: Unknown option" << endl; help(); return 1; } } if (argc - optind < 1) { cout << "ERROR: You must supply an HDT File" << endl << endl; help(); return 1; } inputFile = argv[optind]; try { HDT *hdt = HDTManager::mapIndexedHDT(inputFile.c_str()); if (filter1 != "" || filter2 != "") { ostream *out; ofstream outF; if (outputFile != "") { outF.open(outputFile.c_str()); out = &outF; } else { out = &cout; } string infile; if (filter1 != "") infile = filter1; else infile = filter2; std::ifstream file(infile.c_str()); if (!file.good()) throw "unable to open filter file"; string linea = ""; string property = ""; string value = ""; StopWatch st_total; size_t totalQueryResults = 0; size_t numQuery=0; while (!file.eof()) { getline(file, linea); if(linea.length()==0) continue; size_t pos = linea.find(';'); if (pos != std::string::npos) { property = linea.substr(0, pos); value = linea.substr(pos + 1); cerr<<"Query "<<numQuery << ": "<<linea<<endl; cout<<">>> Query "<<numQuery << ": "<<linea<<endl; cout<<"property:"<<property<<endl; cout<<"value:"<<value<<endl<<endl; StopWatch st; hdt::LiteralDictionary *dict = dynamic_cast<hdt::LiteralDictionary *>(hdt->getDictionary()); if(dict==NULL) { cerr << "This dictionary does not support substring search" << endl; break; } hdt::Triples *triples = hdt->getTriples(); uint32_t *results = NULL; size_t numResults = dict->substringToId((unsigned char *) value.c_str(), value.length(), &results); TripleID pattern(0, dict->stringToId(property, PREDICATE), 0); for (size_t i = 0; i < numResults; i++) { pattern.setObject(results[i]); string objStr = dict->idToString(results[i], OBJECT); IteratorTripleID *it = triples->search(pattern); unsigned int numTriples = 0; //iterate over the first pattern while (it->hasNext()) { TripleID *ts = it->next(); if (filter1 != "") { // QUERY Q3 cout << dict->idToString(ts->getSubject(), SUBJECT) << " " << objStr << endl; totalQueryResults++; } else { // QUERY Q4 TripleID pat2(ts->getSubject(), 0, 0); TripleString out; string subjStr = dict->idToString(ts->getSubject(), SUBJECT); IteratorTripleID *it2 = triples->search(pat2); while(it2->hasNext()) { TripleID *inner = it2->next(); cout << subjStr << " " << dict->idToString(inner->getPredicate(), PREDICATE) <<" "; if(inner->getObject()==results[i]) { cout << objStr << endl; } else { cout << dict->idToString(inner->getObject(), OBJECT) << endl; } totalQueryResults++; } delete it2; } } delete it; } cout << ">>> Results: " << totalQueryResults << endl; cerr << "Query " << numQuery << " Results: " << totalQueryResults << " in " << st << endl << endl; numQuery++; } } cerr << "Total time: " << st_total << endl; if (outputFile != "") { outF.close(); } file.close(); } delete hdt; } catch (char *e) { cout << "ERROR: " << e << endl; } catch (const char *e) { cout << "ERROR: " << e << endl; } }
int main(int argc, char **argv) { int c; string rdfFormat, inputFile, outputFile; RDFNotation notation = NTRIPLES; while( (c = getopt(argc,argv,"f:"))!=-1) { switch(c) { case 'f': rdfFormat = optarg; cout << "Format: " << rdfFormat << endl; break; default: cout << "ERROR: Unknown option" << endl; help(); return 1; } } if(argc-optind<2) { cout << "ERROR: You must supply an input and output" << endl << endl; help(); return 1; } if(rdfFormat!="") { if(rdfFormat=="ntriples") { notation = NTRIPLES; } else if(rdfFormat=="n3") { notation = N3; } else if(rdfFormat=="turtle") { notation = TURTLE; } else if(rdfFormat=="rdfxml") { notation = XML; } else { cout << "ERROR: The RDF output format must be one of: (ntriples, n3, turtle, rdfxml)" << endl; help(); return 1; } } inputFile = argv[optind]; outputFile = argv[optind+1]; if(inputFile=="") { cout << "ERROR: You must supply an HDT input file" << endl << endl; help(); return 1; } if(outputFile=="") { cout << "ERROR: You must supply an RDF output file" << endl << endl; help(); return 1; } try { StdoutProgressListener progress; HDT *hdt = HDTManager::mapHDT(inputFile.c_str(), &progress); if(outputFile!="-") { RDFSerializer *serializer = RDFSerializer::getSerializer(outputFile.c_str(), notation); hdt->saveToRDF(*serializer); delete serializer; } else { RDFSerializer *serializer = RDFSerializer::getSerializer(cout, notation); hdt->saveToRDF(*serializer); delete serializer; } delete hdt; } catch (std::exception& e) { cerr << "ERROR: " << e.what() << endl; } }
int main(int argc, char **argv) { int c; string query, inputFile, outputFile; bool measure = false; while( (c = getopt(argc,argv,"hq:o:m"))!=-1) { switch(c) { case 'h': break; case 'q': query = optarg; break; case 'o': outputFile = optarg; break; case 'm': measure = true; break; default: cout << "ERROR: Unknown option" << endl; return 1; } } if(argc-optind<1) { cout << "ERROR: You must supply an input and HDT File" << endl << endl; return 1; } inputFile = argv[optind]; try { // LOAD HDT *hdt = HDTManager::mapHDT(inputFile.c_str()); // CONVERT Dictionary *dict = hdt->getDictionary(); //LiteralDictionary litDict; FourSectionDictionary litDict; StdoutProgressListener progress; litDict.import(dict, &progress); // SAVE ofstream out(outputFile.c_str(), ios::binary | ios::out); ControlInformation ci; // GLOBAL ci.clear(); ci.setType(GLOBAL); ci.setFormat(HDTVocabulary::HDT_CONTAINER); ci.save(out); // HEADER ci.clear(); ci.setType(HEADER); hdt->getHeader()->save(out, ci, NULL); // NEW DICTIONARY ci.clear(); ci.setType(DICTIONARY); litDict.save(out, ci, NULL); // TRIPLES ci.clear(); ci.setType(TRIPLES); hdt->getTriples()->save(out, ci, NULL); out.close(); delete hdt; } catch (std::exception& e) { cout << "ERROR: " << e.what() << endl; } }
int main(int argc, char **argv) { string inputFile; string outputFile; bool verbose=false; bool showProgress=false; bool generateIndex=false; string configFile; string options; string rdfFormat; string baseUri; /** * Input file format. If no -f is specified and we can't guess which * format it is, we will use NTRIPLES by default. */ RDFNotation notation = NTRIPLES; int flag; while ((flag = getopt (argc, argv, "c:o:vpf:B:iVh")) != -1) { switch (flag) { case 'c': configFile = optarg; break; case 'o': options = optarg; break; case 'v': verbose = true; break; case 'p': showProgress = true; break; case 'f': rdfFormat = optarg; break; case 'B': baseUri = optarg; break; case 'i': generateIndex=true; break; case 'V': cout << HDTVersion::get_version_string(".") << endl; return 0; case 'h': help(); return 0; default: cerr << "ERROR: Unknown option" << endl; help(); return 1; } } #define vout if (!verbose) {} else std::cerr /* Verbose output */ if (!configFile.empty()) { vout << "Configfile: " << configFile << endl; } if (!options.empty()) { vout << "Options: " << options << endl; } if(argc-optind<2) { cerr << "ERROR: You must supply an input and output" << endl << endl; help(); return 1; } inputFile = argv[optind]; outputFile = argv[optind+1]; if(inputFile=="") { cerr << "ERROR: You must supply an RDF input file" << endl << endl; help(); return 1; } if(outputFile=="") { cerr << "ERROR: You must supply an HDT output file" << endl << endl; help(); return 1; } if(baseUri=="") { baseUri="<file://"+inputFile+">"; } /** * If -f flag (input format) was not specified, we try to guess it * by reading the file extension. */ if (rdfFormat == "") { vout << "Input format not given. Guessing from file extension..." << endl; // Get position of right-most '.' to find file extension. size_t dot_position = inputFile.rfind ('.', inputFile.length ()); if (dot_position != string::npos) // Extract extension from file name rdfFormat = inputFile.substr (dot_position + 1, string::npos); /** * If rdfFormat is still "", it means -f was not specified and the file * didn't have any extension. The default format is defined at the top * of this file: RDFNotation notation = NTRIPLES; */ if (rdfFormat == "" || rdfFormat == "gz") { rdfFormat = "nt"; vout << "No input format detected: using N-Triples by default." << endl; } } // ASSERT: here rdfFormat must be != "" // Lower-case rdfFormat transform (rdfFormat.begin (), rdfFormat.end (), rdfFormat.begin (), ::tolower); // Detect input format if (rdfFormat=="nquads" || rdfFormat=="nq") { notation = NQUADS; } else if (rdfFormat== "ntriples" || rdfFormat=="nt") { notation = NTRIPLES; } else if (rdfFormat=="trig") { notation = TRIG; } else if (rdfFormat=="turtle" || rdfFormat=="ttl") { notation = TURTLE; // -f or file extension detected, but didn't match any valid format. } else { cerr << "ERROR: Input format `" << rdfFormat << "' is not supported.\n" << "Use either of the following:\n" << "\t- `ntriples' or `nt' for N-Triples\n" << "\t- `nquads' or `nq' for N-Quads\n" << "\t- `turtle' or `ttl' for Turtle\n" << "\t- `trig' for TriG" << endl; return 1; } vout << "Detected RDF input format: " << rdfFormat << endl; // Process HDTSpecification spec(configFile); spec.setOptions(options); try { // Read RDF StopWatch globalTimer; ProgressListener* progress = showProgress ? new StdoutProgressListener() : NULL; HDT *hdt = HDTManager::generateHDT(inputFile.c_str(), baseUri.c_str(), notation, spec, progress); ofstream out; // Save HDT hdt->saveToHDT(outputFile.c_str(), progress); globalTimer.stop(); vout << "HDT Successfully generated." << endl; vout << "Total processing time: "; vout << "Clock(" << globalTimer.getRealStr(); vout << ") User(" << globalTimer.getUserStr(); vout << ") System(" << globalTimer.getSystemStr() << ")" << endl; if(generateIndex) { hdt = HDTManager::indexedHDT(hdt, progress); } delete hdt; delete progress; } catch (std::exception& e) { cerr << "ERROR: " << e.what() << endl; return 1; } }
int main(int argc, char **argv) { int c; char *inputFile=NULL, *insertFile=NULL, *removeFile=NULL, *outputFile=NULL; char *insertSubject=NULL, *insertPredicate=NULL, *insertObject=NULL; char *removeSubject=NULL, *removePredicate=NULL, *removeObject=NULL; bool insertSingle = false; bool removeSingle = false; bool insertMultiple = false; bool removeMultiple = false; while ((c = getopt(argc, argv, "hO:i:r:I:R:")) != -1) { switch (c) { case 'h': help(); break; case 'O': outputFile = optarg; break; case 'i': insertSingle = true; insertSubject = optarg; insertPredicate = argv[optind++]; insertObject = argv[optind++]; break; case 'r': removeSingle = true; removeSubject = optarg; removePredicate = argv[optind++]; removeObject = argv[optind++]; break; case 'I': insertMultiple = true; insertFile = optarg; break; case 'R': removeMultiple = true; removeFile = optarg; break; default: cout << "ERROR: Unknown option" << endl; help(); return 1; } } if (argc - optind < 2) { cout << "ERROR: You must supply an input and output HDT File" << endl << endl; help(); return 1; } inputFile = argv[optind]; outputFile = argv[optind+1]; if (strcmp(inputFile,outputFile)==0){ cerr<< "ERROR: input and output files must me different" << endl <<endl; return 1; } try { // LOAD HDT *hdt = HDTManager::mapHDT(inputFile); // Replace header Header *head = hdt->getHeader(); if (insertSingle) { TripleString ti(insertSubject, insertPredicate, insertObject); head->insert(ti); } if (removeSingle) { TripleString ti(removeSubject, removePredicate, removeObject); head->remove(ti); } if (insertMultiple) { string line; std::ifstream infile(insertFile); while (getline(infile, line)) { TripleString ti; ti.read(line); head->insert(ti); } } if (removeMultiple) { string line; std::ifstream infile(removeFile); while (getline(infile, line)) { TripleString ti; ti.read(line); head->remove(ti); } } // SAVE hdt->saveToHDT(outputFile); delete hdt; } catch (std::exception& e) { cerr << "ERROR: " << e.what() << endl; return 1; } }