void BasicHDT::loadTriples(const char* fileName, const char* baseUri, RDFNotation notation, ProgressListener* listener) { // Generate Triples ModifiableTriples* triplesList = new TriplesList(spec); //ModifiableTriples *triplesList = new TriplesKyoto(spec); //ModifiableTriples *triplesList = new TripleListDisk(); StopWatch st; IntermediateListener iListener(listener); try { NOTIFY(listener, "Loading Triples", 0, 100); iListener.setRange(0, 60); triplesList->startProcessing(&iListener); TriplesLoader tripLoader(dictionary, triplesList, &iListener); RDFParserCallback *pars = RDFParserCallback::getParserCallback( notation); pars->doParse(fileName, baseUri, notation, &tripLoader); delete pars; header->insert("_:statistics", HDTVocabulary::ORIGINAL_SIZE, tripLoader.getSize()); triplesList->stopProcessing(&iListener); // SORT & Duplicates TripleComponentOrder order = parseOrder( spec.get("triplesOrder").c_str()); if (order == Unknown) { order = SPO; } iListener.setRange(80, 85); triplesList->sort(order, &iListener); iListener.setRange(85, 90); triplesList->removeDuplicates(&iListener); } catch (const char *e) { cout << "Catch exception triples" << e << endl; delete triplesList; throw e; } catch (char *e) { cout << "Catch exception triples" << e << endl; delete triplesList; throw e; } if (triples->getType() == triplesList->getType()) { delete triples; triples = triplesList; } else { iListener.setRange(90, 100); try { triples->load(*triplesList, &iListener); } catch (const char* e) { delete triplesList; throw e; } delete triplesList; } //cout << triples->getNumberOfElements() << " triples added in " << st << endl << endl; }
void BasicHDT::loadTriplesFromHDTs(const char** fileNames, size_t numFiles, const char* baseUri, ProgressListener* listener) { // Generate Triples ModifiableTriples* triplesList = new TriplesList(spec); //ModifiableTriples *triplesList = new TriplesKyoto(spec); //ModifiableTriples *triplesList = new TripleListDisk(); StopWatch st; IntermediateListener iListener(listener); try { NOTIFY(listener, "Loading Triples", 0, 100); iListener.setRange(0, 60); triplesList->startProcessing(&iListener); TriplesLoader tripLoader(dictionary, triplesList, &iListener); // FIXME: Import from files uint64_t totalOriginalSize=0; BasicHDT hdt; for(size_t i=0;i<numFiles;i++) { const char *fileName = fileNames[i]; cout << endl << "Load triples from " << fileName << endl; hdt.mapHDT(fileName); Dictionary *dict = hdt.getDictionary(); // Create mapping arrays cout << "Generating mapping subjects" << endl; unsigned int nsubjects = dict->getNsubjects(); LogSequence2 subjectMap(bits(dictionary->getNsubjects()), nsubjects); subjectMap.resize(nsubjects); for(unsigned int i=0;i<nsubjects;i++) { string str = dict->idToString(i+1, SUBJECT); unsigned int newid = dictionary->stringToId(str, SUBJECT); subjectMap.set(i, newid); } cout << "Generating mapping predicates" << endl; unsigned int npredicates = dict->getNpredicates(); LogSequence2 predicateMap(bits(dictionary->getNpredicates()), npredicates); predicateMap.resize(npredicates); for(unsigned int i=0;i<npredicates;i++) { string str = dict->idToString(i+1, PREDICATE); unsigned int newid = dictionary->stringToId(str, PREDICATE); predicateMap.set(i, newid); } cout << "Generating mapping objects" << endl; unsigned int nobjects = dict->getNobjects(); LogSequence2 objectMap(bits(dictionary->getNobjects()), nobjects); objectMap.resize(nobjects); for(unsigned int i=0;i<nobjects;i++) { string str = dict->idToString(i+1, OBJECT); unsigned int newid = dictionary->stringToId(str, OBJECT); objectMap.set(i, newid); } totalOriginalSize += hdt.getHeader()->getPropertyLong("_:statistics", HDTVocabulary::ORIGINAL_SIZE.c_str()); size_t numtriples = hdt.getTriples()->getNumberOfElements(); IteratorTripleID *it = hdt.getTriples()->searchAll(); TripleID newTid; char str[100]; long long int j = 0; while(it->hasNext()) { TripleID *tid = it->next(); newTid.setAll( (unsigned int)subjectMap.get(tid->getSubject()-1), (unsigned int)predicateMap.get(tid->getPredicate()-1), (unsigned int)objectMap.get(tid->getObject()-1) ); triplesList->insert(newTid); if ((listener != NULL) && (j % 100000) == 0) { sprintf(str, "%lld triples added.", j); listener->notifyProgress((j*100)/numtriples, str); } j++; } delete it; } triplesList->stopProcessing(&iListener); // SORT & Duplicates TripleComponentOrder order = parseOrder(spec.get("triplesOrder").c_str()); if (order == Unknown) { order = SPO; } iListener.setRange(80, 85); triplesList->sort(order, &iListener); iListener.setRange(85, 90); triplesList->removeDuplicates(&iListener); header->insert("_:statistics", HDTVocabulary::ORIGINAL_SIZE, totalOriginalSize); } catch (const char *e) { cout << "Catch exception triples" << e << endl; delete triplesList; throw e; } catch (char *e) { cout << "Catch exception triples" << e << endl; delete triplesList; throw e; } if (triples->getType() == triplesList->getType()) { delete triples; triples = triplesList; } else { iListener.setRange(90, 100); try { triples->load(*triplesList, &iListener); } catch (const char* e) { delete triplesList; throw e; } delete triplesList; } //cout << triples->getNumberOfElements() << " triples added in " << st << endl << endl; }