Esempio n. 1
0
void PlainTriples::load(ModifiableTriples &triples, ProgressListener *listener) {
	triples.sort(order);

	IntermediateListener iListener(listener);

	iListener.setRange(0,33);
	iListener.notifyProgress(0, "PlainTriples Importing subjects");
	IteratorTripleID *itS = triples.searchAll();
	ComponentIterator subjIt(itS, SUBJECT);
	streamX->add(subjIt);
	delete itS;

	iListener.setRange(33, 66);
	iListener.notifyProgress(0, "PlainTriples Importing predicates");
	IteratorTripleID *itP = triples.searchAll();
	ComponentIterator predIt(itP, PREDICATE);
	streamY->add(predIt);
	delete itP;

	iListener.setRange(66, 100);
	iListener.notifyProgress(0, "PlainTriples Importing objects");
	IteratorTripleID *itO = triples.searchAll();
	ComponentIterator objIt(itO, OBJECT);
	streamZ->add(objIt);
	delete itO;
}
Esempio n. 2
0
void PlainTriples::load(std::istream &input, ControlInformation &controlInformation, ProgressListener *listener)
{
	std::string format = controlInformation.getFormat();
	if(format!=getType()) {
		throw std::runtime_error("Trying to read PlainTriples but the data is not PlainTriples");
	}

	//unsigned int numTriples = controlInformation.getUint("numTriples");
	order = (TripleComponentOrder) controlInformation.getUint("order");

	IntermediateListener iListener(listener);

	iListener.setRange(0,33);
	iListener.notifyProgress(0, "PlainTriples loading subjects");
	delete streamX;
	streamX = IntSequence::getArray(input);
	streamX->load(input);

	iListener.setRange(33, 66);
	iListener.notifyProgress(0, "PlainTriples loading predicates");
	delete streamY;
	streamY = IntSequence::getArray(input);
	streamY->load(input);

	iListener.setRange(66, 100);
	iListener.notifyProgress(0, "PlainTriples loading objects");
	delete streamZ;
	streamZ = IntSequence::getArray(input);
    streamZ->load(input);
}
Esempio n. 3
0
void FourSectionDictionary::save(std::ostream & output, ControlInformation & controlInformation, ProgressListener *listener)
{
	controlInformation.setFormat(HDTVocabulary::DICTIONARY_TYPE_FOUR);

	controlInformation.setUint("mapping", this->mapping);
	controlInformation.setUint("sizeStrings", this->sizeStrings);

	controlInformation.save(output);

	IntermediateListener iListener(listener);

	iListener.setRange(0,10);
	iListener.notifyProgress(0, "Dictionary save shared area.");
	shared->save(output);

	iListener.setRange(10,45);
	iListener.notifyProgress(0, "Dictionary save subjects.");
	subjects->save(output);

	iListener.setRange(45,60);
	iListener.notifyProgress(0, "Dictionary save predicates.");
	predicates->save(output);

	iListener.setRange(60,100);
	iListener.notifyProgress(0, "Dictionary save objects.");
	objects->save(output);
}
Esempio n. 4
0
void BasicHDT::loadOrCreateIndex(ProgressListener *listener) {

	string indexname = this->fileName + ".index";

	ifstream in(indexname.c_str(), ios::binary);

	if(in.good()) {
        if(mappedHDT) {
            // Map
            this->loadMMapIndex(listener);
        } else {
            // Read from file
            ControlInformation ci;
            ci.load(in);
            triples->loadIndex(in, ci, listener);
        }
        in.close();
	} else {
        IntermediateListener iListener(listener);
        iListener.setRange(0,90);
        triples->generateIndex(&iListener);

        iListener.setRange(90,100);
        this->saveIndex(&iListener);
    }
}
Esempio n. 5
0
size_t BasicHDT::loadMMap(unsigned char *ptr, unsigned char *ptrMax, ProgressListener *listener) {
	size_t count=0;
    ControlInformation controlInformation;
    IntermediateListener iListener(listener);

    // Load Global ControlInformation
    count+=controlInformation.load(&ptr[count], ptrMax);
    std::string hdtFormat = controlInformation.getFormat();
    if(hdtFormat!=HDTVocabulary::HDT_CONTAINER) {
    	throw "This software cannot open this version of HDT File.";
    }

    // Load Header
    iListener.setRange(0,5);
    controlInformation.load(&ptr[count], ptrMax);
    delete header;
    header = HDTFactory::readHeader(controlInformation);
    count+= header->load(&ptr[count], ptrMax, &iListener);

	// Load dictionary
    iListener.setRange(5, 60);
    controlInformation.load(&ptr[count], ptrMax);
    delete dictionary;
    dictionary = HDTFactory::readDictionary(controlInformation);
    count += dictionary->load(&ptr[count], ptrMax, &iListener);

	// Load triples
    iListener.setRange(60,100);
    controlInformation.load(&ptr[count], ptrMax);
    delete triples;
    triples = HDTFactory::readTriples(controlInformation);
    count += triples->load(&ptr[count], ptrMax,  &iListener);

	return count;
}
Esempio n. 6
0
void BasicHDT::loadFromSeveralHDT(const char **fileNames, size_t numFiles, string baseUri, ProgressListener *listener)
{
	try {
		// Make sure that URI starts and ends with <>
		if(baseUri.at(0)!='<')
			baseUri = '<'+baseUri;
		if(baseUri.at(baseUri.length()-1)!='>')
			baseUri.append(">");

		IntermediateListener iListener(listener);

		iListener.setRange(0,50);
		loadDictionaryFromHDTs(fileNames, numFiles, baseUri.c_str(), &iListener);

		iListener.setRange(50,99);
		loadTriplesFromHDTs(fileNames, numFiles, baseUri.c_str(), &iListener);

		fillHeader(baseUri);

	}catch (const char *e) {
		cout << "Catch exception load: " << e << endl;
		deleteComponents();
		createComponents();
		throw e;
	} catch (char *e) {
		cout << "Catch exception load: " << e << endl;
		deleteComponents();
		createComponents();
		throw e;
	}
}
Esempio n. 7
0
void BasicHDT::saveToHDT(std::ostream & output, ProgressListener *listener)
{
	ControlInformation controlInformation;
	IntermediateListener iListener(listener);

	controlInformation.clear();
	controlInformation.setType(GLOBAL);
	controlInformation.setFormat(HDTVocabulary::HDT_CONTAINER);
	controlInformation.save(output);

	controlInformation.clear();
    controlInformation.setType(HEADER);
	iListener.setRange(0,5);
	header->save(output, controlInformation, &iListener);

	controlInformation.clear();
    controlInformation.setType(DICTIONARY);
	iListener.setRange(5,70);
	dictionary->save(output, controlInformation, &iListener);

	controlInformation.clear();
    controlInformation.setType(TRIPLES);
	iListener.setRange(70,100);
	triples->save(output, controlInformation, &iListener);
}
Esempio n. 8
0
void BasicHDT::loadTriples(const char* fileName, const char* baseUri, RDFNotation notation, ProgressListener* listener) {
	// Generate Triples
	ModifiableTriples* triplesList = new TriplesList(spec);
	//ModifiableTriples *triplesList = new TriplesKyoto(spec);
	//ModifiableTriples *triplesList = new TripleListDisk();
	StopWatch st;
	IntermediateListener iListener(listener);
	try {
		NOTIFY(listener, "Loading Triples", 0, 100);
		iListener.setRange(0, 60);

		triplesList->startProcessing(&iListener);

		TriplesLoader tripLoader(dictionary, triplesList, &iListener);

		RDFParserCallback *pars = RDFParserCallback::getParserCallback(
				notation);
		pars->doParse(fileName, baseUri, notation, &tripLoader);
		delete pars;
		header->insert("_:statistics", HDTVocabulary::ORIGINAL_SIZE, tripLoader.getSize());

		triplesList->stopProcessing(&iListener);

		// SORT & Duplicates
		TripleComponentOrder order = parseOrder(
				spec.get("triplesOrder").c_str());
		if (order == Unknown) {
			order = SPO;
		}

		iListener.setRange(80, 85);
		triplesList->sort(order, &iListener);

		iListener.setRange(85, 90);
		triplesList->removeDuplicates(&iListener);
	} catch (const char *e) {
		cout << "Catch exception triples" << e << endl;
		delete triplesList;
		throw e;
	} catch (char *e) {
		cout << "Catch exception triples" << e << endl;
		delete triplesList;
		throw e;
	}
	if (triples->getType() == triplesList->getType()) {
		delete triples;
		triples = triplesList;
	} else {
		iListener.setRange(90, 100);
		try {
			triples->load(*triplesList, &iListener);
		} catch (const char* e) {
			delete triplesList;
			throw e;
		}
		delete triplesList;
	}

	//cout << triples->getNumberOfElements() << " triples added in " << st << endl << endl;
}
Esempio n. 9
0
void BasicHDT::loadDictionaryFromHDTs(const char** fileName, size_t numFiles, const char* baseUri, ProgressListener* listener) {

        StopWatch st;
        IntermediateListener iListener(listener);

        // Create temporary dictionary
       	ModifiableDictionary *dict = getLoadDictionary();
       	dict->startProcessing();
        try {
        	NOTIFY(listener, "Loading Dictionary", 0, 100);
        	iListener.setRange(0, 80);

        	for(size_t i=0;i<numFiles;i++) {
        		addDictionaryFromHDT(fileName[i], dict, &iListener);
        	}

        	iListener.setRange(80, 90);
        	dict->stopProcessing(&iListener);

        	// Convert to final format
        	dictionary->import(dict);

        	delete dict;
        } catch (const char *e) {
        	cout << "Catch exception dictionary: " << e << endl;
        	delete dict;
        	throw e;
        } catch (char *e) {
        	cout << "Catch exception dictionary: " << e << endl;
        	delete dict;
        	throw e;
        }
}
Esempio n. 10
0
size_t FourSectionDictionary::load(unsigned char *ptr, unsigned char *ptrMax, ProgressListener *listener)
{
    size_t count=0;

    IntermediateListener iListener(listener);
    ControlInformation ci;
    count += ci.load(&ptr[count], ptrMax);

    //this->mapping = ci.getUint("mapping");
    this->mapping = MAPPING2;
    this->sizeStrings = ci.getUint("sizeStrings");

    iListener.setRange(0,25);
    iListener.notifyProgress(0, "Dictionary read shared area.");
    delete shared;
    shared = csd::CSD::create(ptr[count]);
    if(shared==NULL){
        shared = new csd::CSD_PFC();
        throw std::runtime_error("Could not read shared.");
    }
    count += shared->load(&ptr[count], ptrMax);
    //shared = new csd::CSD_Cache(shared);

    iListener.setRange(25,50);
    iListener.notifyProgress(0, "Dictionary read subjects.");
    delete subjects;
    subjects = csd::CSD::create(ptr[count]);
    if(subjects==NULL){
        subjects = new csd::CSD_PFC();
        throw std::runtime_error("Could not read subjects.");
    }
    count += subjects->load(&ptr[count], ptrMax);
    //subjects = new csd::CSD_Cache(subjects);

    iListener.setRange(50,75);
    iListener.notifyProgress(0, "Dictionary read predicates.");
    delete predicates;
    predicates = csd::CSD::create(ptr[count]);
    if(predicates==NULL){
        predicates = new csd::CSD_PFC();
        throw std::runtime_error("Could not read predicates.");
    }
    count += predicates->load(&ptr[count], ptrMax);
    predicates = new csd::CSD_Cache2(predicates);

    iListener.setRange(75,100);
    iListener.notifyProgress(0, "Dictionary read objects.");
    delete objects;
    objects = csd::CSD::create(ptr[count]);
    if(objects==NULL){
        objects = new csd::CSD_PFC();
        throw std::runtime_error("Could not read objects.");
    }
    count += objects->load(&ptr[count], ptrMax);
    //objects = new csd::CSD_Cache(objects);

    return count;
}
Esempio n. 11
0
void PlainDictionary::stopProcessing(ProgressListener *listener)
{
    IntermediateListener iListener(listener);
    iListener.setRange(0,50);
    this->split(listener);

    iListener.setRange(0,100);
    this->lexicographicSort(&iListener);

    //dumpSizes(cout);
}
Esempio n. 12
0
void FourSectionDictionary::load(std::istream & input, ControlInformation & ci, ProgressListener *listener)
{
	std::string format = ci.getFormat();
	if(format!=getType()) {
		throw std::runtime_error("Trying to read a FourSectionDictionary but the data is not FourSectionDictionary");
	}
	//this->mapping = ci.getUint("mapping");
	this->mapping = MAPPING2;
	this->sizeStrings = ci.getUint("sizeStrings");

	IntermediateListener iListener(listener);

	iListener.setRange(0,25);
	iListener.notifyProgress(0, "Dictionary read shared area.");
	delete shared;
	shared = csd::CSD::load(input);
	if(shared==NULL){
		shared = new csd::CSD_PFC();
		throw std::runtime_error("Could not read shared.");
	}
	//shared = new csd::CSD_Cache(shared);

	iListener.setRange(25,50);
	iListener.notifyProgress(0, "Dictionary read subjects.");
	delete subjects;
	subjects = csd::CSD::load(input);
	if(subjects==NULL){
		subjects = new csd::CSD_PFC();
		throw std::runtime_error("Could not read subjects.");
	}
	//subjects = new csd::CSD_Cache(subjects);

	iListener.setRange(50,75);
	iListener.notifyProgress(0, "Dictionary read predicates.");
	delete predicates;
	predicates = csd::CSD::load(input);
	if(predicates==NULL){
		predicates = new csd::CSD_PFC();
		throw std::runtime_error("Could not read predicates.");
	}
	predicates = new csd::CSD_Cache2(predicates);

	iListener.setRange(75,100);
	iListener.notifyProgress(0, "Dictionary read objects.");
	delete objects;
	objects = csd::CSD::load(input);
	if(objects==NULL){
		objects = new csd::CSD_PFC();
		throw std::runtime_error("Could not read objects.");
	}
	//objects = new csd::CSD_Cache(objects);
}
Esempio n. 13
0
void FourSectionDictionary::import(Dictionary *other, ProgressListener *listener) {

	try {
		IntermediateListener iListener(listener);

		NOTIFY(listener, "DictionaryPFC loading subjects", 0, 100);
		iListener.setRange(0, 20);
		IteratorUCharString *itSubj = other->getSubjects();
		delete subjects;
		subjects = loadSection(itSubj, blocksize, &iListener);
		delete itSubj;

		NOTIFY(listener, "DictionaryPFC loading predicates", 25, 30);
		iListener.setRange(20, 21);
		IteratorUCharString *itPred = other->getPredicates();
		delete predicates;
		predicates = loadSection(itPred, blocksize, &iListener);
		delete itPred;

		NOTIFY(listener, "DictionaryPFC loading objects", 30, 90);
		iListener.setRange(21, 90);
		IteratorUCharString *itObj = other->getObjects();
		delete objects;
		objects = loadSection(itObj, blocksize, &iListener);
		delete itObj;

		NOTIFY(listener, "DictionaryPFC loading shared", 90, 100);
		iListener.setRange(90, 100);
		IteratorUCharString *itShared = other->getShared();
		delete shared;
		shared = loadSection(itShared, blocksize, &iListener);
		delete itShared;

		this->sizeStrings = other->size();
		this->mapping = other->getMapping();
	} catch (const char *e) {
		delete subjects;
		delete predicates;
		delete objects;
		delete shared;
		subjects = new csd::CSD_PFC();
		predicates = new csd::CSD_PFC();
		objects = new csd::CSD_PFC();
		shared = new csd::CSD_PFC();
		throw e;
	}
}
Esempio n. 14
0
void PlainDictionary::load(std::istream & input, ControlInformation &ci, ProgressListener *listener)
{
    std::string line;
    unsigned char region = 1;

    startProcessing();

    std::string format = ci.getFormat();
    if(format!=getType()) {
        throw "Trying to read a PlainDictionary but the data is not PlainDictionary";
    }

    this->mapping = ci.getUint("mapping");
    this->sizeStrings = ci.getUint("sizeStrings");
    unsigned int numElements = ci.getUint("numEntries");
    unsigned int numLine = 0;

    IntermediateListener iListener(listener);
    iListener.setRange(0,25);
    while(region<5 && getline(input, line,'\1')) {
        //std::cout << line << std::endl;
        if(line!="") {
            if (region == 1) { //shared SO
                NOTIFYCOND(&iListener, "Dictionary loading shared area.", numLine, numElements);
                insert(line, SHARED_SUBJECT);
            } else if (region == 2) { //not shared Subjects
                NOTIFYCOND(&iListener, "Dictionary loading subjects.", numLine, numElements);
                insert(line, NOT_SHARED_SUBJECT);
                NOTIFYCOND(&iListener, "Dictionary loading objects.", numLine, numElements);
            } else if (region == 3) { //not shared Objects
                insert(line, NOT_SHARED_OBJECT);
                NOTIFYCOND(&iListener, "Dictionary loading predicates.", numLine, numElements);
            } else if (region == 4) { //predicates
                insert(line, NOT_SHARED_PREDICATE);
            }
        } else {
            region++;
        }

        numLine++;
    }

    // No stopProcessing() Needed. Dictionary already split and sorted in file.
    updateIDs();
}
Esempio n. 15
0
void BasicHDT::loadFromHDT(std::istream & input, ProgressListener *listener)
{
    try {
	ControlInformation controlInformation;
	IntermediateListener iListener(listener);

	// Load Global ControlInformation.
	controlInformation.load(input);
	std::string hdtFormat = controlInformation.getFormat();
	if(hdtFormat!=HDTVocabulary::HDT_CONTAINER) {
		throw "This software cannot open this version of HDT File.";
	}

	// Load header
	iListener.setRange(0,5);
	controlInformation.load(input);
	delete header;
	header = HDTFactory::readHeader(controlInformation);
	header->load(input, controlInformation, &iListener);

	//Load Dictionary.
	iListener.setRange(5, 60);
	controlInformation.load(input);
	delete dictionary;
	dictionary = HDTFactory::readDictionary(controlInformation);
	dictionary->load(input, controlInformation, &iListener);

	// Load Triples
	iListener.setRange(60,100);
	controlInformation.load(input);
	delete triples;
	triples = HDTFactory::readTriples(controlInformation);
	triples->load(input, controlInformation, &iListener);
    } catch (const char *ex) {
        cout << "Exception loading HDT: " << ex;
        deleteComponents();
        createComponents();
        throw ex;
    } catch (char *ex) {
    	cout << "Exception loading HDT: " << ex;
    	deleteComponents();
        createComponents();
        throw ex;
    }
}
Esempio n. 16
0
void CompactTriples::save(std::ostream & output, ControlInformation &controlInformation, ProgressListener *listener)
{
	controlInformation.clear();
	controlInformation.setUint("numTriples", getNumberOfElements());
	controlInformation.setFormat(HDTVocabulary::TRIPLES_TYPE_COMPACT);
	controlInformation.setUint("order", order);
	controlInformation.save(output);

	IntermediateListener iListener(listener);

	iListener.setRange(0,30);
	iListener.notifyProgress(0, "CompactTriples saving Stream Y");
	streamY->save(output);

	iListener.setRange(30,100);
	iListener.notifyProgress(0, "CompactTriples saving Stream Z");
	streamZ->save(output);
}
Esempio n. 17
0
void BasicHDT::loadDictionary(const char* fileName, const char* baseUri, RDFNotation notation, ProgressListener* listener) {

	StopWatch st;
	IntermediateListener iListener(listener);

	// Create temporary dictionary
	ModifiableDictionary *dict = getLoadDictionary();
	dict->startProcessing();

	try {
		NOTIFY(listener, "Loading Dictionary", 0, 100);
		iListener.setRange(0, 80);

		// Load data
		DictionaryLoader dictLoader(dict, &iListener);

		RDFParserCallback *parser = RDFParserCallback::getParserCallback(notation);
		parser->doParse(fileName, baseUri, notation, &dictLoader);
		delete parser;

		iListener.setRange(80, 90);
		dict->stopProcessing(&iListener);

		// Convert to final format
		if (dictionary->getType()!=HDTVocabulary::DICTIONARY_TYPE_PLAIN){
			dictionary->import(dict);
			delete dict;
		}
		else{
			dictionary = dict;
		}

	} catch (const char *e) {
		cout << "Catch exception dictionary: " << e << endl;
		delete dict;
		throw e;
	} catch (char *e) {
		cout << "Catch exception dictionary: " << e << endl;
		delete dict;
		throw e;
	}
}
Esempio n. 18
0
/**
 * Load an HDT from a file, using memory mapping
 * @param input
 */
void BasicHDT::mapHDT(const char *fileNameChar, ProgressListener *listener) {

	IntermediateListener iListener(listener);

    std::string fileStr(fileNameChar);
    size_t pos = fileStr.find_last_of(".");
    std::string suffix = fileStr.substr(pos + 1);

    if( suffix == "gz") {
        #ifdef USE_LIBZ
            this->fileName.assign(fileStr.substr(0, pos));
            ifstream test(fileName.c_str());
            if(test.good()) {
               test.close();
            } else {
                test.close();

                iListener.setRange(0,80);
                fileUtil::decompress(fileNameChar, fileName.c_str(), &iListener);
                iListener.setRange(80,100);
            }
        #else
            throw "Support for GZIP was not compiled in this version. Please decompress the file before opening it.";
        #endif
    } else {
        this->fileName.assign(fileNameChar);
    }

    // Clean previous
    if(mappedHDT!=NULL) {
        delete mappedHDT;
    }

    mappedHDT = new FileMap(fileName.c_str());

    unsigned char *ptr = mappedHDT->getPtr();
    size_t mappedSize = mappedHDT->getMappedSize();

    // Load
    this->loadMMap(ptr, ptr+mappedSize, &iListener);
}
Esempio n. 19
0
void PlainTriples::save(std::ostream & output, ControlInformation &controlInformation, ProgressListener *listener)
{
	controlInformation.clear();
	controlInformation.setUint("numTriples", getNumberOfElements());
	controlInformation.setFormat(HDTVocabulary::TRIPLES_TYPE_PLAIN);
	controlInformation.setUint("order", order);
	controlInformation.save(output);

	IntermediateListener iListener(listener);

	iListener.setRange(0,33);
	iListener.notifyProgress(0, "PlainTriples saving subjects");
	streamX->save(output);

	iListener.setRange(33, 66);
	iListener.notifyProgress(0, "PlainTriples saving predicates");
	streamY->save(output);

	iListener.setRange(66, 100);
	iListener.notifyProgress(0, "PlainTriples saving objects");
	streamZ->save(output);
}
Esempio n. 20
0
void CompactTriples::load(std::istream &input, ControlInformation &controlInformation, ProgressListener *listener)
{
	std::string format = controlInformation.getFormat();
	if(format != HDTVocabulary::TRIPLES_TYPE_COMPACT) {
		throw "Trying to read CompactTriples but data is not CompactTriples";
	}

	numTriples = controlInformation.getUint("numTriples");
	order = (TripleComponentOrder) controlInformation.getUint("order");

	IntermediateListener iListener(listener);

	iListener.setRange(0,30);
	iListener.notifyProgress(0, "CompactTriples loading Stream Y");
	delete streamY;
	streamY = IntSequence::getArray(input);
	streamY->load(input);

	iListener.setRange(30,100);
	iListener.notifyProgress(0, "CompactTriples saving Stream Y");
	delete streamZ;
	streamZ = IntSequence::getArray(input);
    streamZ->load(input);
}
Esempio n. 21
0
void PredicateIndexArray::generate(ProgressListener *listener) {
    // Count predicates


    StopWatch st;
    IntermediateListener iListener(listener);
    iListener.setRange(0,20);
    LogSequence2 *predCount = new LogSequence2(bits(triples->arrayY->getNumberOfElements()));

    size_t maxCount = 0;
    for(size_t i=0;i<triples->arrayY->getNumberOfElements(); i++) {
        // Read value
        size_t val = triples->arrayY->get(i);

        // Grow if necessary
        if(predCount->getNumberOfElements()<val) {
            predCount->resize(val);
        }

        // Increment
        size_t count = predCount->get(val-1)+1;
        maxCount = count>maxCount ? count : maxCount;
        predCount->set(val-1, count);

        NOTIFYCOND3(&iListener, "Counting appearances of predicates", i, triples->arrayY->getNumberOfElements(), 20000);
    }
    predCount->reduceBits();

#if 0
    for(size_t i=0;i<predCount->getNumberOfElements();i++) {
        cout << "Predicate " << i << " appears " << predCount->get(i) << " times." << endl;
    }
#endif

    // Convert predicate count to bitmap
    bitmap = new BitSequence375(triples->arrayY->getNumberOfElements());
    size_t tempCountPred=0;
    iListener.setRange(20,25);
    for(size_t i=0;i<predCount->getNumberOfElements();i++) {
        tempCountPred += predCount->get(i);
        bitmap->set(tempCountPred-1, true);
        NOTIFYCOND3(&iListener, "Creating Predicate bitmap", i, predCount->getNumberOfElements(), 100000);
    }
    if(triples->arrayY->getNumberOfElements())
        bitmap->set(triples->arrayY->getNumberOfElements()-1, true);
    cerr << "Predicate Bitmap in " << st << endl;
    st.reset();

    delete predCount;

    // Create predicate index
    LogSequence2 *array = new LogSequence2(bits(triples->arrayY->getNumberOfElements()), triples->arrayY->getNumberOfElements());
    array->resize(triples->arrayY->getNumberOfElements());

    LogSequence2 *insertArray = new LogSequence2(bits(triples->arrayY->getNumberOfElements()), bitmap->countOnes());
    insertArray->resize(bitmap->countOnes());

    iListener.setRange(25,100);
    for(size_t i=0;i<triples->arrayY->getNumberOfElements(); i++) {
            size_t predicateValue = triples->arrayY->get(i);

            size_t insertBase = predicateValue==1 ? 0 : bitmap->select1(predicateValue-1)+1;
            size_t insertOffset = insertArray->get(predicateValue-1);
            insertArray->set(predicateValue-1, insertOffset+1);

            array->set(insertBase+insertOffset, i);
            NOTIFYCOND3(&iListener, "Generating predicate references", i, triples->arrayY->getNumberOfElements(), 100000);
    }

    this->array = array;

    delete insertArray;

    cerr << "Count predicates in " << st << endl;
}
Esempio n. 22
0
void BasicHDT::loadTriplesFromHDTs(const char** fileNames, size_t numFiles, const char* baseUri, ProgressListener* listener) {
	// Generate Triples
	ModifiableTriples* triplesList = new TriplesList(spec);
	//ModifiableTriples *triplesList = new TriplesKyoto(spec);
	//ModifiableTriples *triplesList = new TripleListDisk();
	StopWatch st;
	IntermediateListener iListener(listener);
	try {
		NOTIFY(listener, "Loading Triples", 0, 100);
		iListener.setRange(0, 60);

		triplesList->startProcessing(&iListener);

		TriplesLoader tripLoader(dictionary, triplesList, &iListener);

		// FIXME: Import from files

		uint64_t totalOriginalSize=0;
		BasicHDT hdt;

		for(size_t i=0;i<numFiles;i++) {
			const char *fileName = fileNames[i];
	        cout << endl << "Load triples from " << fileName << endl;
	        hdt.mapHDT(fileName);
	        Dictionary *dict = hdt.getDictionary();

	        // Create mapping arrays
	        cout << "Generating mapping subjects" << endl;
	        unsigned int nsubjects = dict->getNsubjects();
	        LogSequence2 subjectMap(bits(dictionary->getNsubjects()), nsubjects);
	        subjectMap.resize(nsubjects);
	        for(unsigned int i=0;i<nsubjects;i++) {
	        	string str = dict->idToString(i+1, SUBJECT);
	        	unsigned int newid = dictionary->stringToId(str, SUBJECT);
	        	subjectMap.set(i, newid);
	        }

	        cout << "Generating mapping predicates" << endl;
	        unsigned int npredicates = dict->getNpredicates();
	        LogSequence2 predicateMap(bits(dictionary->getNpredicates()), npredicates);
	        predicateMap.resize(npredicates);
	        for(unsigned int i=0;i<npredicates;i++) {
	        	string str = dict->idToString(i+1, PREDICATE);
	        	unsigned int newid = dictionary->stringToId(str, PREDICATE);
	        	predicateMap.set(i, newid);
	        }

	        cout << "Generating mapping objects" << endl;
	        unsigned int nobjects = dict->getNobjects();
	        LogSequence2 objectMap(bits(dictionary->getNobjects()), nobjects);
	        objectMap.resize(nobjects);
	        for(unsigned int i=0;i<nobjects;i++) {
	        	string str = dict->idToString(i+1, OBJECT);
	        	unsigned int newid = dictionary->stringToId(str, OBJECT);
	        	objectMap.set(i, newid);
	        }

	        totalOriginalSize += hdt.getHeader()->getPropertyLong("_:statistics", HDTVocabulary::ORIGINAL_SIZE.c_str());

	        size_t numtriples = hdt.getTriples()->getNumberOfElements();
	        IteratorTripleID *it = hdt.getTriples()->searchAll();

	        TripleID newTid;
	        char str[100];
	        long long int j = 0;
	        while(it->hasNext()) {
	        	TripleID *tid = it->next();

	        	newTid.setAll(
	        			(unsigned int)subjectMap.get(tid->getSubject()-1),
	        			(unsigned int)predicateMap.get(tid->getPredicate()-1),
	        			(unsigned int)objectMap.get(tid->getObject()-1)
	        			);

	        	triplesList->insert(newTid);

	        	if ((listener != NULL) && (j % 100000) == 0) {
	        		sprintf(str, "%lld triples added.", j);
	        		listener->notifyProgress((j*100)/numtriples, str);
	        	}
	            j++;
	        }
	        delete it;
		}

		triplesList->stopProcessing(&iListener);

		// SORT & Duplicates
		TripleComponentOrder order = parseOrder(spec.get("triplesOrder").c_str());
		if (order == Unknown) {
			order = SPO;
		}

		iListener.setRange(80, 85);
		triplesList->sort(order, &iListener);

		iListener.setRange(85, 90);
		triplesList->removeDuplicates(&iListener);

		header->insert("_:statistics", HDTVocabulary::ORIGINAL_SIZE, totalOriginalSize);
	} catch (const char *e) {
		cout << "Catch exception triples" << e << endl;
		delete triplesList;
		throw e;
	} catch (char *e) {
		cout << "Catch exception triples" << e << endl;
		delete triplesList;
		throw e;
	}
	if (triples->getType() == triplesList->getType()) {
		delete triples;
		triples = triplesList;
	} else {
		iListener.setRange(90, 100);
		try {
			triples->load(*triplesList, &iListener);
		} catch (const char* e) {
			delete triplesList;
			throw e;
		}
		delete triplesList;
	}

	//cout << triples->getNumberOfElements() << " triples added in " << st << endl << endl;

}