void PlainTriples::load(std::istream &input, ControlInformation &controlInformation, ProgressListener *listener) { std::string format = controlInformation.getFormat(); if(format!=getType()) { throw std::runtime_error("Trying to read PlainTriples but the data is not PlainTriples"); } //unsigned int numTriples = controlInformation.getUint("numTriples"); order = (TripleComponentOrder) controlInformation.getUint("order"); IntermediateListener iListener(listener); iListener.setRange(0,33); iListener.notifyProgress(0, "PlainTriples loading subjects"); delete streamX; streamX = IntSequence::getArray(input); streamX->load(input); iListener.setRange(33, 66); iListener.notifyProgress(0, "PlainTriples loading predicates"); delete streamY; streamY = IntSequence::getArray(input); streamY->load(input); iListener.setRange(66, 100); iListener.notifyProgress(0, "PlainTriples loading objects"); delete streamZ; streamZ = IntSequence::getArray(input); streamZ->load(input); }
void FourSectionDictionary::save(std::ostream & output, ControlInformation & controlInformation, ProgressListener *listener) { controlInformation.setFormat(HDTVocabulary::DICTIONARY_TYPE_FOUR); controlInformation.setUint("mapping", this->mapping); controlInformation.setUint("sizeStrings", this->sizeStrings); controlInformation.save(output); IntermediateListener iListener(listener); iListener.setRange(0,10); iListener.notifyProgress(0, "Dictionary save shared area."); shared->save(output); iListener.setRange(10,45); iListener.notifyProgress(0, "Dictionary save subjects."); subjects->save(output); iListener.setRange(45,60); iListener.notifyProgress(0, "Dictionary save predicates."); predicates->save(output); iListener.setRange(60,100); iListener.notifyProgress(0, "Dictionary save objects."); objects->save(output); }
size_t PlainHeader::load(unsigned char *ptr, unsigned char *ptrMax, ProgressListener *listener) { size_t count = 0; // Read ControlInformation ControlInformation controlInformation; count += controlInformation.load(&ptr[count], ptrMax); std::string format = controlInformation.getFormat(); uint32_t headerSize = controlInformation.getUint("length"); // FIXME: Use format to create custom parser. if(format!=HDTVocabulary::HEADER_NTRIPLES) { throw "This Header format is not supported"; } string str(&ptr[count], &ptr[count+headerSize]); // Convert into a stringstream stringstream strstream(str, stringstream::in); triples.clear(); // Parse header RDFParserNtriples parser(strstream, NTRIPLES); while(parser.hasNext()) { TripleString *ts = parser.next(); triples.push_back(*ts); } count+=headerSize; return count; }
void PlainHeader::load(std::istream & input, ControlInformation &controlInformation, ProgressListener *listener) { std::string format = controlInformation.getFormat(); uint32_t headerSize = controlInformation.getUint("length"); // FIXME: Use format to create custom parser. if(format!=HDTVocabulary::HEADER_NTRIPLES) { throw "This Header format is not supported"; } // Read all header into a string string str(headerSize,'\0'); input.read(&str[0], headerSize); if(input.gcount()!=headerSize) { throw "Error reading header"; } // Convert into a stringstream stringstream strstream(str, stringstream::in); triples.clear(); // Parse header RDFParserNtriples parser(strstream, NTRIPLES); while(parser.hasNext()) { TripleString *ts = parser.next(); triples.push_back(*ts); } }
void BasicHDT::loadOrCreateIndex(ProgressListener *listener) { string indexname = this->fileName + ".index"; ifstream in(indexname.c_str(), ios::binary); if(in.good()) { if(mappedHDT) { // Map this->loadMMapIndex(listener); } else { // Read from file ControlInformation ci; ci.load(in); triples->loadIndex(in, ci, listener); } in.close(); } else { IntermediateListener iListener(listener); iListener.setRange(0,90); triples->generateIndex(&iListener); iListener.setRange(90,100); this->saveIndex(&iListener); } }
size_t FourSectionDictionary::load(unsigned char *ptr, unsigned char *ptrMax, ProgressListener *listener) { size_t count=0; IntermediateListener iListener(listener); ControlInformation ci; count += ci.load(&ptr[count], ptrMax); //this->mapping = ci.getUint("mapping"); this->mapping = MAPPING2; this->sizeStrings = ci.getUint("sizeStrings"); iListener.setRange(0,25); iListener.notifyProgress(0, "Dictionary read shared area."); delete shared; shared = csd::CSD::create(ptr[count]); if(shared==NULL){ shared = new csd::CSD_PFC(); throw std::runtime_error("Could not read shared."); } count += shared->load(&ptr[count], ptrMax); //shared = new csd::CSD_Cache(shared); iListener.setRange(25,50); iListener.notifyProgress(0, "Dictionary read subjects."); delete subjects; subjects = csd::CSD::create(ptr[count]); if(subjects==NULL){ subjects = new csd::CSD_PFC(); throw std::runtime_error("Could not read subjects."); } count += subjects->load(&ptr[count], ptrMax); //subjects = new csd::CSD_Cache(subjects); iListener.setRange(50,75); iListener.notifyProgress(0, "Dictionary read predicates."); delete predicates; predicates = csd::CSD::create(ptr[count]); if(predicates==NULL){ predicates = new csd::CSD_PFC(); throw std::runtime_error("Could not read predicates."); } count += predicates->load(&ptr[count], ptrMax); predicates = new csd::CSD_Cache2(predicates); iListener.setRange(75,100); iListener.notifyProgress(0, "Dictionary read objects."); delete objects; objects = csd::CSD::create(ptr[count]); if(objects==NULL){ objects = new csd::CSD_PFC(); throw std::runtime_error("Could not read objects."); } count += objects->load(&ptr[count], ptrMax); //objects = new csd::CSD_Cache(objects); return count; }
void PlainDictionary::save(std::ostream &output, ControlInformation &controlInformation, ProgressListener *listener) { controlInformation.setFormat(HDTVocabulary::DICTIONARY_TYPE_PLAIN); controlInformation.setUint("mapping", this->mapping); controlInformation.setUint("sizeStrings", this->sizeStrings); controlInformation.setUint("numEntries", this->getNumberOfElements()); controlInformation.save(output); unsigned int i = 0; unsigned int counter=0; const char marker = '\1'; //shared subjects-objects from subjects for (i = 0; i < shared.size(); i++) { output << shared[i]->str; output.put(marker); //character to split file counter++; NOTIFYCOND(listener, "PlainDictionary saving shared", counter, getNumberOfElements()); } output.put(marker); //extra line to set the begining of next part of dictionary //not shared subjects for (i = 0; i < subjects.size(); i++) { output << subjects[i]->str; output.put(marker); //character to split file counter++; NOTIFYCOND(listener, "PlainDictionary saving subjects", counter, getNumberOfElements()); } output.put(marker); //extra line to set the begining of next part of dictionary //not shared objects for (i = 0; i < objects.size(); i++) { output << objects[i]->str; output.put(marker); //character to split file counter++; NOTIFYCOND(listener, "PlainDictionary saving objects", counter, getNumberOfElements()); } output.put(marker); //extra line to set the begining of next part of dictionary //predicates for (i = 0; i < predicates.size(); i++) { output << predicates[i]->str; output.put(marker); //character to split file counter++; NOTIFYCOND(listener, "PlainDictionary saving predicates", counter, getNumberOfElements()); } output.put(marker); }
void FourSectionDictionary::load(std::istream & input, ControlInformation & ci, ProgressListener *listener) { std::string format = ci.getFormat(); if(format!=getType()) { throw std::runtime_error("Trying to read a FourSectionDictionary but the data is not FourSectionDictionary"); } //this->mapping = ci.getUint("mapping"); this->mapping = MAPPING2; this->sizeStrings = ci.getUint("sizeStrings"); IntermediateListener iListener(listener); iListener.setRange(0,25); iListener.notifyProgress(0, "Dictionary read shared area."); delete shared; shared = csd::CSD::load(input); if(shared==NULL){ shared = new csd::CSD_PFC(); throw std::runtime_error("Could not read shared."); } //shared = new csd::CSD_Cache(shared); iListener.setRange(25,50); iListener.notifyProgress(0, "Dictionary read subjects."); delete subjects; subjects = csd::CSD::load(input); if(subjects==NULL){ subjects = new csd::CSD_PFC(); throw std::runtime_error("Could not read subjects."); } //subjects = new csd::CSD_Cache(subjects); iListener.setRange(50,75); iListener.notifyProgress(0, "Dictionary read predicates."); delete predicates; predicates = csd::CSD::load(input); if(predicates==NULL){ predicates = new csd::CSD_PFC(); throw std::runtime_error("Could not read predicates."); } predicates = new csd::CSD_Cache2(predicates); iListener.setRange(75,100); iListener.notifyProgress(0, "Dictionary read objects."); delete objects; objects = csd::CSD::load(input); if(objects==NULL){ objects = new csd::CSD_PFC(); throw std::runtime_error("Could not read objects."); } //objects = new csd::CSD_Cache(objects); }
void TripleListDisk::save(std::ostream & output, ControlInformation &controlInformation, ProgressListener *listener) { controlInformation.setFormat(getType()); controlInformation.save(output); for(unsigned int i=0; i<numTotalTriples; i++) { TripleID *tid = getTripleID(i); if(tid->isValid()) { //cout << "Write: " << tid << " " << *tid << endl; output.write((char *)tid, sizeof(TripleID)); } } }
void PlainDictionary::load(std::istream & input, ControlInformation &ci, ProgressListener *listener) { std::string line; unsigned char region = 1; startProcessing(); std::string format = ci.getFormat(); if(format!=getType()) { throw "Trying to read a PlainDictionary but the data is not PlainDictionary"; } this->mapping = ci.getUint("mapping"); this->sizeStrings = ci.getUint("sizeStrings"); unsigned int numElements = ci.getUint("numEntries"); unsigned int numLine = 0; IntermediateListener iListener(listener); iListener.setRange(0,25); while(region<5 && getline(input, line,'\1')) { //std::cout << line << std::endl; if(line!="") { if (region == 1) { //shared SO NOTIFYCOND(&iListener, "Dictionary loading shared area.", numLine, numElements); insert(line, SHARED_SUBJECT); } else if (region == 2) { //not shared Subjects NOTIFYCOND(&iListener, "Dictionary loading subjects.", numLine, numElements); insert(line, NOT_SHARED_SUBJECT); NOTIFYCOND(&iListener, "Dictionary loading objects.", numLine, numElements); } else if (region == 3) { //not shared Objects insert(line, NOT_SHARED_OBJECT); NOTIFYCOND(&iListener, "Dictionary loading predicates.", numLine, numElements); } else if (region == 4) { //predicates insert(line, NOT_SHARED_PREDICATE); } } else { region++; } numLine++; } // No stopProcessing() Needed. Dictionary already split and sorted in file. updateIDs(); }
void CompactTriples::save(std::ostream & output, ControlInformation &controlInformation, ProgressListener *listener) { controlInformation.clear(); controlInformation.setUint("numTriples", getNumberOfElements()); controlInformation.setFormat(HDTVocabulary::TRIPLES_TYPE_COMPACT); controlInformation.setUint("order", order); controlInformation.save(output); IntermediateListener iListener(listener); iListener.setRange(0,30); iListener.notifyProgress(0, "CompactTriples saving Stream Y"); streamY->save(output); iListener.setRange(30,100); iListener.notifyProgress(0, "CompactTriples saving Stream Z"); streamZ->save(output); }
void PlainHeader::save(std::ostream & output, ControlInformation &controlInformation, ProgressListener *listener) { // TODO: Choose format from spec (NTRIPLES, RDFXML...) and implement. // Dump header into a stringbuffer to know size. stringstream strbuf(stringstream::out); for(vector<TripleString>::iterator it = triples.begin(); it!=triples.end(); it++){ strbuf << *it << " ." << endl; } string str = strbuf.str(); // Dump header controlInformation.setFormat(HDTVocabulary::HEADER_NTRIPLES); controlInformation.setUint("length", str.length()); controlInformation.save(output); // Dump data output << str; }
size_t BasicHDT::loadMMap(unsigned char *ptr, unsigned char *ptrMax, ProgressListener *listener) { size_t count=0; ControlInformation controlInformation; IntermediateListener iListener(listener); // Load Global ControlInformation count+=controlInformation.load(&ptr[count], ptrMax); std::string hdtFormat = controlInformation.getFormat(); if(hdtFormat!=HDTVocabulary::HDT_CONTAINER) { throw "This software cannot open this version of HDT File."; } // Load Header iListener.setRange(0,5); controlInformation.load(&ptr[count], ptrMax); delete header; header = HDTFactory::readHeader(controlInformation); count+= header->load(&ptr[count], ptrMax, &iListener); // Load dictionary iListener.setRange(5, 60); controlInformation.load(&ptr[count], ptrMax); delete dictionary; dictionary = HDTFactory::readDictionary(controlInformation); count += dictionary->load(&ptr[count], ptrMax, &iListener); // Load triples iListener.setRange(60,100); controlInformation.load(&ptr[count], ptrMax); delete triples; triples = HDTFactory::readTriples(controlInformation); count += triples->load(&ptr[count], ptrMax, &iListener); return count; }
void PlainTriples::save(std::ostream & output, ControlInformation &controlInformation, ProgressListener *listener) { controlInformation.clear(); controlInformation.setUint("numTriples", getNumberOfElements()); controlInformation.setFormat(HDTVocabulary::TRIPLES_TYPE_PLAIN); controlInformation.setUint("order", order); controlInformation.save(output); IntermediateListener iListener(listener); iListener.setRange(0,33); iListener.notifyProgress(0, "PlainTriples saving subjects"); streamX->save(output); iListener.setRange(33, 66); iListener.notifyProgress(0, "PlainTriples saving predicates"); streamY->save(output); iListener.setRange(66, 100); iListener.notifyProgress(0, "PlainTriples saving objects"); streamZ->save(output); }
void CompactTriples::load(std::istream &input, ControlInformation &controlInformation, ProgressListener *listener) { std::string format = controlInformation.getFormat(); if(format != HDTVocabulary::TRIPLES_TYPE_COMPACT) { throw "Trying to read CompactTriples but data is not CompactTriples"; } numTriples = controlInformation.getUint("numTriples"); order = (TripleComponentOrder) controlInformation.getUint("order"); IntermediateListener iListener(listener); iListener.setRange(0,30); iListener.notifyProgress(0, "CompactTriples loading Stream Y"); delete streamY; streamY = IntSequence::getArray(input); streamY->load(input); iListener.setRange(30,100); iListener.notifyProgress(0, "CompactTriples saving Stream Y"); delete streamZ; streamZ = IntSequence::getArray(input); streamZ->load(input); }
void TripleListDisk::load(std::istream & input, ControlInformation &controlInformation, ProgressListener *listener) { // FIXME: Read controlInformation std::string format = controlInformation.getFormat(); if(format!=getType()) { throw "Trying to read a FourSectionDictionary but the data is not FourSectionDictionary"; } this->ensureSize(numTotalTriples); unsigned int numRead=0; while(input.good() && numRead<numTotalTriples) { input.read((char *)&arrayTriples[numRead], sizeof(TripleID)); numRead++; } cout << "Succesfully read triples: " << numRead << endl; }
void BasicHDT::loadFromHDT(std::istream & input, ProgressListener *listener) { try { ControlInformation controlInformation; IntermediateListener iListener(listener); // Load Global ControlInformation. controlInformation.load(input); std::string hdtFormat = controlInformation.getFormat(); if(hdtFormat!=HDTVocabulary::HDT_CONTAINER) { throw "This software cannot open this version of HDT File."; } // Load header iListener.setRange(0,5); controlInformation.load(input); delete header; header = HDTFactory::readHeader(controlInformation); header->load(input, controlInformation, &iListener); //Load Dictionary. iListener.setRange(5, 60); controlInformation.load(input); delete dictionary; dictionary = HDTFactory::readDictionary(controlInformation); dictionary->load(input, controlInformation, &iListener); // Load Triples iListener.setRange(60,100); controlInformation.load(input); delete triples; triples = HDTFactory::readTriples(controlInformation); triples->load(input, controlInformation, &iListener); } catch (const char *ex) { cout << "Exception loading HDT: " << ex; deleteComponents(); createComponents(); throw ex; } catch (char *ex) { cout << "Exception loading HDT: " << ex; deleteComponents(); createComponents(); throw ex; } }
void BasicHDT::saveToHDT(std::ostream & output, ProgressListener *listener) { ControlInformation controlInformation; IntermediateListener iListener(listener); controlInformation.clear(); controlInformation.setType(GLOBAL); controlInformation.setFormat(HDTVocabulary::HDT_CONTAINER); controlInformation.save(output); controlInformation.clear(); controlInformation.setType(HEADER); iListener.setRange(0,5); header->save(output, controlInformation, &iListener); controlInformation.clear(); controlInformation.setType(DICTIONARY); iListener.setRange(5,70); dictionary->save(output, controlInformation, &iListener); controlInformation.clear(); controlInformation.setType(TRIPLES); iListener.setRange(70,100); triples->save(output, controlInformation, &iListener); }
int main(int argc, char **argv) { int c; string outputFile; while( (c = getopt(argc,argv,"ho:"))!=-1) { switch(c) { case 'h': help(); break; case 'o': outputFile = optarg; break; default: cout << "ERROR: Unknown option" << endl; help(); return 1; } } if(argc-optind<1) { cout << "ERROR: You must supply an HDT File" << endl << endl; help(); return 1; } try { #ifdef HAVE_LIBZ igzstream *inGz=NULL; #endif ifstream *inF=NULL; istream *in=NULL; string inputFile = argv[optind]; std::string suffix = inputFile.substr(inputFile.find_last_of(".") + 1); std::string pipeCommand; if( suffix == "gz"){ #ifdef HAVE_LIBZ in = inGz = new igzstream(inputFile.c_str()); #else throw std::runtime_error("Support for GZIP was not compiled in this version. Please Decompress the file before importing it."); #endif } else { in = inF = new ifstream(inputFile.c_str(), ios::binary); } if (!in->good()) { cerr << "Error opening file " << inputFile << endl; throw std::runtime_error("Error opening file for reading"); } ControlInformation controlInformation; // Load Global Control Information controlInformation.load(*in); // Load header controlInformation.load(*in); Header *header = HDTFactory::readHeader(controlInformation); header->load(*in, controlInformation); if( suffix == "gz") { #ifdef HAVE_LIBZ inGz->close(); #endif } else { inF->close(); } // Save IteratorTripleString *it = header->search("","",""); while(it->hasNext()) { TripleString *ts = it->next(); cout << *ts << " ."<< endl; } if(outputFile!="") { ofstream out(outputFile.c_str()); if(!out.good()){ throw std::runtime_error("Could not open output file."); } RDFSerializerNTriples serializer(out, NTRIPLES); serializer.serialize(it); out.close(); } else { RDFSerializerNTriples serializer(cout, NTRIPLES); serializer.serialize(it); } delete it; delete header; } catch (std::exception& e) { cerr << "ERROR: " << e.what() << endl; } }
int main(int argc, char **argv) { int c; string query, inputFile, outputFile; bool measure = false; while( (c = getopt(argc,argv,"hq:o:m"))!=-1) { switch(c) { case 'h': break; case 'q': query = optarg; break; case 'o': outputFile = optarg; break; case 'm': measure = true; break; default: cout << "ERROR: Unknown option" << endl; return 1; } } if(argc-optind<2) { cout << "ERROR: You must supply an input and HDT File" << endl << endl; return 1; } inputFile = argv[optind]; outputFile = argv[optind+1]; ConvertProgress progress; StopWatch st; try { // LOAD HDT *hdt = HDTManager::mapHDT(inputFile.c_str()); // CONVERT triples to TripleList TriplesList tlist; Triples *triples = hdt->getTriples(); cout << "Old Triples -> TriplesList" << endl; st.reset(); IteratorTripleID *it = triples->searchAll(); tlist.insert(it); delete it; cout << " Old Triples -> TriplesList time" << st << endl; // Convert tlist to OPS cout << "TriplesList sort OPS" << endl; st.reset(); tlist.sort(OPS, &progress); cout << " TriplesList sort OPS time: " << st << endl; // Generate new OPS BitmapTriples cout << "TriplesList to new BitmapTriples" << endl; HDTSpecification spec; spec.set("triplesOrder", "OPS"); BitmapTriples bt(spec); st.reset(); bt.load(tlist, &progress); cout << " TriplesList to new BitmapTriples time" << st << endl; // Update Header #if 1 cout << "Update Header" << endl; string rootNode("_:triples"); TripleString ts (rootNode, "", ""); hdt->getHeader()->remove(ts); bt.populateHeader(*hdt->getHeader(), "_:triples"); #endif // SAVE cout << "Save to " << outputFile << endl; ofstream out(outputFile.c_str(), ios::binary | ios::out); ControlInformation ci; ci.clear(); ci.setType(GLOBAL); ci.setFormat(HDTVocabulary::HDT_CONTAINER); ci.save(out); // HEADER ci.clear(); ci.setType(HEADER); hdt->getHeader()->save(out, ci, NULL); // DICTIONARY ci.clear(); ci.setType(DICTIONARY); hdt->getDictionary()->save(out, ci, NULL); // NEW TRIPLES ci.clear(); ci.setType(TRIPLES); bt.save(out, ci, NULL); out.close(); delete hdt; } catch (char *e) { cout << "ERROR: " << e << endl; } catch (const char *e) { cout << "ERROR: " << e << endl; } }
int main(int argc, char **argv) { int c; string query, inputFile, outputFile; bool measure = false; while( (c = getopt(argc,argv,"hq:o:m"))!=-1) { switch(c) { case 'h': break; case 'q': query = optarg; break; case 'o': outputFile = optarg; break; case 'm': measure = true; break; default: cout << "ERROR: Unknown option" << endl; return 1; } } if(argc-optind<1) { cout << "ERROR: You must supply an input and HDT File" << endl << endl; return 1; } inputFile = argv[optind]; try { // LOAD HDT *hdt = HDTManager::mapHDT(inputFile.c_str()); // CONVERT Dictionary *dict = hdt->getDictionary(); //LiteralDictionary litDict; FourSectionDictionary litDict; StdoutProgressListener progress; litDict.import(dict, &progress); // SAVE ofstream out(outputFile.c_str(), ios::binary | ios::out); ControlInformation ci; // GLOBAL ci.clear(); ci.setType(GLOBAL); ci.setFormat(HDTVocabulary::HDT_CONTAINER); ci.save(out); // HEADER ci.clear(); ci.setType(HEADER); hdt->getHeader()->save(out, ci, NULL); // NEW DICTIONARY ci.clear(); ci.setType(DICTIONARY); litDict.save(out, ci, NULL); // TRIPLES ci.clear(); ci.setType(TRIPLES); hdt->getTriples()->save(out, ci, NULL); out.close(); delete hdt; } catch (std::exception& e) { cout << "ERROR: " << e.what() << endl; } }