void iterate(HDT *hdt, char *query, ostream &out, bool measure) { TripleString tripleString; tripleString.read(query); const char *subj = tripleString.getSubject().c_str(); const char *pred = tripleString.getPredicate().c_str(); const char *obj = tripleString.getObject().c_str(); if(strcmp(subj, "?")==0) { subj=""; } if(strcmp(pred, "?")==0) { pred=""; } if(strcmp(obj, "?")==0) { obj=""; } #if 0 cout << "Subject: |" << subj <<"|"<< endl; cout << "Predicate: |" << pred <<"|"<< endl; cout << "Object: |" << obj << "|"<<endl; #endif try { IteratorTripleString *it = hdt->search(subj, pred, obj); StopWatch st; unsigned int numTriples=0; while(it->hasNext() && interruptSignal==0) { TripleString *ts = it->next(); if(!measure) out << *ts << endl; numTriples++; } cout << numTriples << " results in " << st << endl; delete it; interruptSignal=0; // Interrupt caught, enable again. } catch (char *e) { cerr << e << endl; } }
int main(int argc, char *argv[]) { int c; string inputFile, outputFile, limit; string type = "null"; string dir = "data/hdt/"; while ((c = getopt(argc, argv, "hi:t:l:o:d:")) != -1) { switch (c) { case 'h': help(); break; case 'd': dir = optarg; break; case 'i': inputFile = optarg; break; case 'o': outputFile = optarg; break; case 't': type = optarg; break; case 'l': limit = optarg; break; default: cout << "ERROR: Unknown option" << endl; help(); return 1; } } // Load HDT file vector<HDT*> HDTversions; int numVersions = 0; if (limit.length() > 0) { numVersions = atoi((char*) limit.c_str()); } else { cerr << "[WARNING] limit not provided, trying to load 58 versions" << endl; numVersions = 58; } ostream *out; ofstream outF; if (outputFile != "") { outF.open(outputFile.c_str()); out = &outF; } else { out = &cout; } for (int i = 0; i < numVersions; i++) { std::stringstream sstm; sstm << dir << (i + 1) << ".hdt"; cout << "Loading " << sstm.str() << endl; HDTversions.push_back( HDTManager::mapIndexedHDT((char*) sstm.str().c_str())); //cout<<"loaded 1.hdt! Press any key to load 2.hdt"<<endl; //int c = getchar(); } cout << "WARMUP... " << endl; for (int i = 0; i < numVersions; i++) { // Enumerate all different predicates cout << "Dataset " << (i + 1) << " contains " << HDTversions[i]->getDictionary()->getNpredicates() << " predicates." << endl; // Enumerate all triples matching a pattern ("" means any) IteratorTripleString *it = HDTversions[i]->search("", "", ""); int count = 0; while (it->hasNext() && count < 100) { TripleString *triple = it->next(); //cout << "Result Warmup: " << triple->getSubject() << ", " << triple->getPredicate() << ", " << triple->getObject() << endl; count++; } delete it; // Remember to delete iterator to avoid memory leaks! /*IteratorUCharString *itPred = HDTversions[i]->getDictionary()->getPredicates(); while(itPred->hasNext()) { unsigned char *str = itPred->next(); // Warning this pointer is only valid until next call to next(); cout << str << endl; itPred->freeStr(str); } delete itPred; // Remember to delete iterator to avoid memory leaks! */ } cout << "... WARMUP finished!" << endl; if (type == "null") { cerr << "[ERROR] Please provide a type of query (-t [s,p,o])" << endl; help(); exit(0); } //read queries cout << "opening file:" << inputFile << endl; std::ifstream file((char*) inputFile.c_str()); cout << "opened! " << endl; if (!file.good()) throw "unable to open filter file"; string linea = ""; vector<double> times(numVersions, 0); int num_queries = 0; while (!file.eof()) { getline(file, linea); cout << "Reading line:" << linea << endl; if (linea.length() == 0) continue; size_t pos = linea.find(' '); if (pos != std::string::npos) { string query = linea.substr(0, pos); string subject = "", predicate = "", object = ""; if (type == "s") { subject = query; } else if (type == "p") { predicate = query; } else if (type == "o") { object = query; } else { vector<string> elements = split(linea, " "); if (type == "sp") { subject = elements[0]; predicate = elements[1]; } else if (type == "so") { subject = elements[0]; object = elements[1]; } else if (type == "po") { predicate = elements[0]; object = elements[1]; } else if (type == "spo") { subject = elements[0]; predicate = elements[1]; object = elements[2]; } } for (int i = 0; i < numVersions; i++) { StopWatch st; IteratorTripleString *it = HDTversions[i]->search( subject.c_str(), predicate.c_str(), object.c_str()); int numResults = 0; while (it->hasNext()) { TripleString *triple = it->next(); //cout << "Result: " << triple->getSubject() << ", " << triple->getPredicate() << ", " << triple->getObject() << endl; numResults++; } delete it; double time = st.toMillis(); cout << numResults << " Results in " << time << " ms" << endl; times[i] = times[i] + time; } num_queries++; } } //compute mean of queries *out << "<version>,<mean_time>,<total>" << endl; for (int i = 0; i < numVersions; i++) { *out << (i + 1) << "," << times[i] / num_queries<<","<<times[i] << endl; } for (int i = 0; i < numVersions; i++) { delete HDTversions[i]; // Remember to delete instance when no longer needed! } if (outputFile != "") { outF.close(); } }
int main(int argc, char **argv) { int c; string outputFile; while( (c = getopt(argc,argv,"ho:"))!=-1) { switch(c) { case 'h': help(); break; case 'o': outputFile = optarg; break; default: cout << "ERROR: Unknown option" << endl; help(); return 1; } } if(argc-optind<1) { cout << "ERROR: You must supply an HDT File" << endl << endl; help(); return 1; } try { #ifdef HAVE_LIBZ igzstream *inGz=NULL; #endif ifstream *inF=NULL; istream *in=NULL; string inputFile = argv[optind]; std::string suffix = inputFile.substr(inputFile.find_last_of(".") + 1); std::string pipeCommand; if( suffix == "gz"){ #ifdef HAVE_LIBZ in = inGz = new igzstream(inputFile.c_str()); #else throw std::runtime_error("Support for GZIP was not compiled in this version. Please Decompress the file before importing it."); #endif } else { in = inF = new ifstream(inputFile.c_str(), ios::binary); } if (!in->good()) { cerr << "Error opening file " << inputFile << endl; throw std::runtime_error("Error opening file for reading"); } ControlInformation controlInformation; // Load Global Control Information controlInformation.load(*in); // Load header controlInformation.load(*in); Header *header = HDTFactory::readHeader(controlInformation); header->load(*in, controlInformation); if( suffix == "gz") { #ifdef HAVE_LIBZ inGz->close(); #endif } else { inF->close(); } // Save IteratorTripleString *it = header->search("","",""); while(it->hasNext()) { TripleString *ts = it->next(); cout << *ts << " ."<< endl; } if(outputFile!="") { ofstream out(outputFile.c_str()); if(!out.good()){ throw std::runtime_error("Could not open output file."); } RDFSerializerNTriples serializer(out, NTRIPLES); serializer.serialize(it); out.close(); } else { RDFSerializerNTriples serializer(cout, NTRIPLES); serializer.serialize(it); } delete it; delete header; } catch (std::exception& e) { cerr << "ERROR: " << e.what() << endl; } }