void TriplesKyoto::load(ModifiableTriples &input, ProgressListener *listener) { IteratorTripleID *it = input.searchAll(); while(it->hasNext()) { TripleID *triple = it->next(); this->insert(*triple); } delete it; }
int main(int argc, char **argv) { int c; string query, inputFile, outputFile; bool measure = false; while( (c = getopt(argc,argv,"hq:o:m"))!=-1) { switch(c) { case 'h': help(); break; case 'q': query = optarg; break; case 'o': outputFile = optarg; break; case 'm': measure = true; break; default: cout << "ERROR: Unknown option" << endl; help(); return 1; } } if(argc-optind<1) { cout << "ERROR: You must supply an HDT File" << endl << endl; help(); return 1; } inputFile = argv[optind]; try { HDT *hdt = HDTManager::mapHDT(inputFile.c_str()); TripleID pat(0,1,0); #if 0 IteratorTripleID *it = hdt->getTriples()->search(pat); StopWatch st; unsigned int numTriples = 0; while(it->hasNext() && numTriples < 4) { TripleID *ts = it->next(); cout << *ts << endl; numTriples++; } cout << "------" << endl; while(it->hasPrevious()) { TripleID *ts = it->previous(); cout << *ts << endl; } cout << "------" << endl; numTriples=0; while(it->hasNext() && numTriples<10) { TripleID *ts = it->next(); cout << *ts << endl; numTriples++; } delete it; cout << numTriples << " results in " << st << endl; #else IteratorTripleID *it = hdt->getTriples()->search(pat); #if 1 while(it->hasNext()) { cout << *it->next() << endl; } it->goToStart(); cout << "------" << endl; #endif RandomAccessIterator rit(it); int i; for(i=0;i<rit.getNumElements() && i < 20;i++) { TripleID *tid = rit.get(i); cout << i << " => " << *tid << endl; } cout << "------" << endl; for(i-- ; i>=0; i--) { TripleID *tid = rit.get(i); cout << i << " => " << *tid << endl; } cout << "------" << endl; for(i=0;i<rit.getNumElements() && i<20;i++) { TripleID *tid = rit.get(i); cout << i << " => " << *tid << endl; } cout << "------" << endl; cout << 2 << " => " << *rit.get(2) << endl; cout << 7 << " => " << *rit.get(7) << endl; cout << 1 << " => " << *rit.get(1) << endl; cout << 5 << " => " << *rit.get(5) << endl; cout << 0 << " => " << *rit.get(0) << endl; cout << 9 << " => " << *rit.get(9) << endl; cout << 8 << " => " << *rit.get(8) << endl; delete it; #endif delete hdt; } catch (char *e) { cout << "ERROR: " << e << endl; } catch (const char *e) { cout << "ERROR: " << e << endl; } }
void CompactTriples::load(ModifiableTriples &triples, ProgressListener *listener) { triples.sort(order); IteratorTripleID *it = triples.searchAll(); vector<unsigned int> vectorY, vectorZ; unsigned int lastX, lastY, lastZ; unsigned int x, y, z; // First triple if(it->hasNext()) { TripleID *triple = it->next(); swapComponentOrder(triple, SPO, order); lastX = x = triple->getSubject(); lastY = y = triple->getPredicate(); lastZ = z = triple->getObject(); vectorY.push_back(y); vectorZ.push_back(z); numTriples++; } // Rest of the triples while(it->hasNext()) { TripleID *triple = it->next(); //cout << "111> " << triple << endl; swapComponentOrder(triple, SPO, order); //cout << "222> " << triple << endl; x = triple->getSubject(); y = triple->getPredicate(); z = triple->getObject(); if(x!=lastX) { vectorY.push_back(0); vectorY.push_back(y); vectorZ.push_back(0); vectorZ.push_back(z); } else if(y!=lastY) { vectorY.push_back(y); vectorZ.push_back(0); vectorZ.push_back(z); } else { vectorZ.push_back(z); } lastX = x; lastY = y; lastZ = z; NOTIFYCOND(listener, "Converting to CompactTriples.", numTriples, triples.getNumberOfElements()); numTriples++; } delete it; VectorUIntIterator itY(vectorY); VectorUIntIterator itZ(vectorZ); streamY->add(itY); streamZ->add(itZ); #if 0 // Debug Adjacency Lists cout << "Y" << vectorY.size() << "): "; for(unsigned int i=0;i<arrayY->getNumberOfElements();i++){ cout << arrayY->get(i) << " "; } cout << endl; cout << "Z" << vectorZ.size() << "): "; for(unsigned int i=0;i<arrayZ->getNumberOfElements();i++){ cout << arrayZ->get(i) << " "; } cout << endl; #endif }
void BasicHDT::loadTriplesFromHDTs(const char** fileNames, size_t numFiles, const char* baseUri, ProgressListener* listener) { // Generate Triples ModifiableTriples* triplesList = new TriplesList(spec); //ModifiableTriples *triplesList = new TriplesKyoto(spec); //ModifiableTriples *triplesList = new TripleListDisk(); StopWatch st; IntermediateListener iListener(listener); try { NOTIFY(listener, "Loading Triples", 0, 100); iListener.setRange(0, 60); triplesList->startProcessing(&iListener); TriplesLoader tripLoader(dictionary, triplesList, &iListener); // FIXME: Import from files uint64_t totalOriginalSize=0; BasicHDT hdt; for(size_t i=0;i<numFiles;i++) { const char *fileName = fileNames[i]; cout << endl << "Load triples from " << fileName << endl; hdt.mapHDT(fileName); Dictionary *dict = hdt.getDictionary(); // Create mapping arrays cout << "Generating mapping subjects" << endl; unsigned int nsubjects = dict->getNsubjects(); LogSequence2 subjectMap(bits(dictionary->getNsubjects()), nsubjects); subjectMap.resize(nsubjects); for(unsigned int i=0;i<nsubjects;i++) { string str = dict->idToString(i+1, SUBJECT); unsigned int newid = dictionary->stringToId(str, SUBJECT); subjectMap.set(i, newid); } cout << "Generating mapping predicates" << endl; unsigned int npredicates = dict->getNpredicates(); LogSequence2 predicateMap(bits(dictionary->getNpredicates()), npredicates); predicateMap.resize(npredicates); for(unsigned int i=0;i<npredicates;i++) { string str = dict->idToString(i+1, PREDICATE); unsigned int newid = dictionary->stringToId(str, PREDICATE); predicateMap.set(i, newid); } cout << "Generating mapping objects" << endl; unsigned int nobjects = dict->getNobjects(); LogSequence2 objectMap(bits(dictionary->getNobjects()), nobjects); objectMap.resize(nobjects); for(unsigned int i=0;i<nobjects;i++) { string str = dict->idToString(i+1, OBJECT); unsigned int newid = dictionary->stringToId(str, OBJECT); objectMap.set(i, newid); } totalOriginalSize += hdt.getHeader()->getPropertyLong("_:statistics", HDTVocabulary::ORIGINAL_SIZE.c_str()); size_t numtriples = hdt.getTriples()->getNumberOfElements(); IteratorTripleID *it = hdt.getTriples()->searchAll(); TripleID newTid; char str[100]; long long int j = 0; while(it->hasNext()) { TripleID *tid = it->next(); newTid.setAll( (unsigned int)subjectMap.get(tid->getSubject()-1), (unsigned int)predicateMap.get(tid->getPredicate()-1), (unsigned int)objectMap.get(tid->getObject()-1) ); triplesList->insert(newTid); if ((listener != NULL) && (j % 100000) == 0) { sprintf(str, "%lld triples added.", j); listener->notifyProgress((j*100)/numtriples, str); } j++; } delete it; } triplesList->stopProcessing(&iListener); // SORT & Duplicates TripleComponentOrder order = parseOrder(spec.get("triplesOrder").c_str()); if (order == Unknown) { order = SPO; } iListener.setRange(80, 85); triplesList->sort(order, &iListener); iListener.setRange(85, 90); triplesList->removeDuplicates(&iListener); header->insert("_:statistics", HDTVocabulary::ORIGINAL_SIZE, totalOriginalSize); } catch (const char *e) { cout << "Catch exception triples" << e << endl; delete triplesList; throw e; } catch (char *e) { cout << "Catch exception triples" << e << endl; delete triplesList; throw e; } if (triples->getType() == triplesList->getType()) { delete triples; triples = triplesList; } else { iListener.setRange(90, 100); try { triples->load(*triplesList, &iListener); } catch (const char* e) { delete triplesList; throw e; } delete triplesList; } //cout << triples->getNumberOfElements() << " triples added in " << st << endl << endl; }
int main(int argc, char **argv) { int c; string query, inputFile, outputFile, filter1, filter2; bool measure = false; while ((c = getopt(argc, argv, "hq:o:mf:F:")) != -1) { switch (c) { case 'h': help(); break; case 'q': query = optarg; break; case 'o': outputFile = optarg; break; case 'm': measure = true; break; case 'f': filter1 = optarg; break; case 'F': filter2 = optarg; break; default: cout << "ERROR: Unknown option" << endl; help(); return 1; } } if (argc - optind < 1) { cout << "ERROR: You must supply an HDT File" << endl << endl; help(); return 1; } inputFile = argv[optind]; try { HDT *hdt = HDTManager::mapIndexedHDT(inputFile.c_str()); if (filter1 != "" || filter2 != "") { ostream *out; ofstream outF; if (outputFile != "") { outF.open(outputFile.c_str()); out = &outF; } else { out = &cout; } string infile; if (filter1 != "") infile = filter1; else infile = filter2; std::ifstream file(infile.c_str()); if (!file.good()) throw "unable to open filter file"; string linea = ""; string property = ""; string value = ""; StopWatch st_total; size_t totalQueryResults = 0; size_t numQuery=0; while (!file.eof()) { getline(file, linea); if(linea.length()==0) continue; size_t pos = linea.find(';'); if (pos != std::string::npos) { property = linea.substr(0, pos); value = linea.substr(pos + 1); cerr<<"Query "<<numQuery << ": "<<linea<<endl; cout<<">>> Query "<<numQuery << ": "<<linea<<endl; cout<<"property:"<<property<<endl; cout<<"value:"<<value<<endl<<endl; StopWatch st; hdt::LiteralDictionary *dict = dynamic_cast<hdt::LiteralDictionary *>(hdt->getDictionary()); if(dict==NULL) { cerr << "This dictionary does not support substring search" << endl; break; } hdt::Triples *triples = hdt->getTriples(); uint32_t *results = NULL; size_t numResults = dict->substringToId((unsigned char *) value.c_str(), value.length(), &results); TripleID pattern(0, dict->stringToId(property, PREDICATE), 0); for (size_t i = 0; i < numResults; i++) { pattern.setObject(results[i]); string objStr = dict->idToString(results[i], OBJECT); IteratorTripleID *it = triples->search(pattern); unsigned int numTriples = 0; //iterate over the first pattern while (it->hasNext()) { TripleID *ts = it->next(); if (filter1 != "") { // QUERY Q3 cout << dict->idToString(ts->getSubject(), SUBJECT) << " " << objStr << endl; totalQueryResults++; } else { // QUERY Q4 TripleID pat2(ts->getSubject(), 0, 0); TripleString out; string subjStr = dict->idToString(ts->getSubject(), SUBJECT); IteratorTripleID *it2 = triples->search(pat2); while(it2->hasNext()) { TripleID *inner = it2->next(); cout << subjStr << " " << dict->idToString(inner->getPredicate(), PREDICATE) <<" "; if(inner->getObject()==results[i]) { cout << objStr << endl; } else { cout << dict->idToString(inner->getObject(), OBJECT) << endl; } totalQueryResults++; } delete it2; } } delete it; } cout << ">>> Results: " << totalQueryResults << endl; cerr << "Query " << numQuery << " Results: " << totalQueryResults << " in " << st << endl << endl; numQuery++; } } cerr << "Total time: " << st_total << endl; if (outputFile != "") { outF.close(); } file.close(); } delete hdt; } catch (char *e) { cout << "ERROR: " << e << endl; } catch (const char *e) { cout << "ERROR: " << e << endl; } }