Esempio n. 1
0
/** Split
 * @return void
 */
void PlainDictionary::split(ProgressListener *listener) {
    subjects.clear();
    shared.clear();
    objects.clear();

    unsigned int total = hashSubject.size()+hashObject.size();
    unsigned int count = 0;

    for(DictEntryIt subj_it = hashSubject.begin(); subj_it!=hashSubject.end() && subj_it->first; subj_it++) {
        //cout << "Check Subj: " << subj_it->first << endl;
        DictEntryIt other = hashObject.find(subj_it->first);

        if(other==hashObject.end()) {
            // Only subject
            subjects.push_back(subj_it->second);
        } else {
            // Exist in both
            shared.push_back(subj_it->second);
        }
        count++;
        NOTIFYCOND(listener, "Extracting shared subjects", count, total);
    }

    for(DictEntryIt obj_it = hashObject.begin(); obj_it!=hashObject.end(); ++obj_it) {
        //cout << "Check Obj: " << obj_it->first << endl;
        DictEntryIt other = hashSubject.find(obj_it->first);

        if(other==hashSubject.end()) {
            // Only object
            objects.push_back(obj_it->second);
        }
        count++;
        NOTIFYCOND(listener, "Extracting shared objects", count, total);
    }
}
Esempio n. 2
0
void PlainDictionary::save(std::ostream &output, ControlInformation &controlInformation, ProgressListener *listener)
{
    controlInformation.setFormat(HDTVocabulary::DICTIONARY_TYPE_PLAIN);
    controlInformation.setUint("mapping", this->mapping);
    controlInformation.setUint("sizeStrings", this->sizeStrings);
    controlInformation.setUint("numEntries", this->getNumberOfElements());

    controlInformation.save(output);

    unsigned int i = 0;
    unsigned int counter=0;
    const char marker = '\1';

    //shared subjects-objects from subjects
    for (i = 0; i < shared.size(); i++) {
        output << shared[i]->str;
        output.put(marker); //character to split file
        counter++;
        NOTIFYCOND(listener, "PlainDictionary saving shared", counter, getNumberOfElements());
    }

    output.put(marker); //extra line to set the begining of next part of dictionary

    //not shared subjects
    for (i = 0; i < subjects.size(); i++) {
        output << subjects[i]->str;
        output.put(marker); //character to split file
        counter++;
        NOTIFYCOND(listener, "PlainDictionary saving subjects", counter, getNumberOfElements());
    }

    output.put(marker); //extra line to set the begining of next part of dictionary

    //not shared objects
    for (i = 0; i < objects.size(); i++) {
        output << objects[i]->str;
        output.put(marker); //character to split file
        counter++;
        NOTIFYCOND(listener, "PlainDictionary saving objects", counter, getNumberOfElements());
    }

    output.put(marker); //extra line to set the begining of next part of dictionary

    //predicates
    for (i = 0; i < predicates.size(); i++) {
        output << predicates[i]->str;
        output.put(marker); //character  to split file
        counter++;
        NOTIFYCOND(listener, "PlainDictionary saving predicates", counter, getNumberOfElements());
    }

    output.put(marker);
}
Esempio n. 3
0
void PlainDictionary::load(std::istream & input, ControlInformation &ci, ProgressListener *listener)
{
    std::string line;
    unsigned char region = 1;

    startProcessing();

    std::string format = ci.getFormat();
    if(format!=getType()) {
        throw "Trying to read a PlainDictionary but the data is not PlainDictionary";
    }

    this->mapping = ci.getUint("mapping");
    this->sizeStrings = ci.getUint("sizeStrings");
    unsigned int numElements = ci.getUint("numEntries");
    unsigned int numLine = 0;

    IntermediateListener iListener(listener);
    iListener.setRange(0,25);
    while(region<5 && getline(input, line,'\1')) {
        //std::cout << line << std::endl;
        if(line!="") {
            if (region == 1) { //shared SO
                NOTIFYCOND(&iListener, "Dictionary loading shared area.", numLine, numElements);
                insert(line, SHARED_SUBJECT);
            } else if (region == 2) { //not shared Subjects
                NOTIFYCOND(&iListener, "Dictionary loading subjects.", numLine, numElements);
                insert(line, NOT_SHARED_SUBJECT);
                NOTIFYCOND(&iListener, "Dictionary loading objects.", numLine, numElements);
            } else if (region == 3) { //not shared Objects
                insert(line, NOT_SHARED_OBJECT);
                NOTIFYCOND(&iListener, "Dictionary loading predicates.", numLine, numElements);
            } else if (region == 4) { //predicates
                insert(line, NOT_SHARED_PREDICATE);
            }
        } else {
            region++;
        }

        numLine++;
    }

    // No stopProcessing() Needed. Dictionary already split and sorted in file.
    updateIDs();
}
void CompactTriples::load(ModifiableTriples &triples, ProgressListener *listener) {
	triples.sort(order);

	IteratorTripleID *it = triples.searchAll();

	vector<unsigned int> vectorY, vectorZ;
	unsigned int lastX, lastY, lastZ;
	unsigned int x, y, z;

	// First triple
	if(it->hasNext()) {
		TripleID *triple = it->next();

		swapComponentOrder(triple, SPO, order);

		lastX = x = triple->getSubject();
		lastY = y = triple->getPredicate();
		lastZ = z = triple->getObject();

		vectorY.push_back(y);
		vectorZ.push_back(z);

		numTriples++;
	}

	// Rest of the triples
	while(it->hasNext()) {
		TripleID *triple = it->next();
		//cout << "111> " << triple << endl;

		swapComponentOrder(triple, SPO, order);
		//cout << "222> " << triple << endl;

		x = triple->getSubject();
		y = triple->getPredicate();
		z = triple->getObject();

		if(x!=lastX) {
			vectorY.push_back(0);
			vectorY.push_back(y);

			vectorZ.push_back(0);
			vectorZ.push_back(z);
		} else if(y!=lastY) {
			vectorY.push_back(y);
			vectorZ.push_back(0);
			vectorZ.push_back(z);
		} else {
			vectorZ.push_back(z);
		}

		lastX = x;
		lastY = y;
		lastZ = z;

		NOTIFYCOND(listener, "Converting to CompactTriples.", numTriples, triples.getNumberOfElements());
		numTriples++;
	}

	delete it;

	VectorUIntIterator itY(vectorY);
	VectorUIntIterator itZ(vectorZ);

	streamY->add(itY);
	streamZ->add(itZ);

#if 0
	// Debug Adjacency Lists
	cout << "Y" << vectorY.size() << "): ";
	for(unsigned int i=0;i<arrayY->getNumberOfElements();i++){
		cout << arrayY->get(i) << " ";
	}
	cout << endl;

	cout << "Z" << vectorZ.size() << "): ";
	for(unsigned int i=0;i<arrayZ->getNumberOfElements();i++){
		cout << arrayZ->get(i) << " ";
	}
	cout << endl;
#endif

}
Esempio n. 5
0
CSD_PFC::CSD_PFC(hdt::IteratorUCharString *it, uint32_t blocksize, hdt::ProgressListener *listener) : isMapped(false)
{
    this->type = PFC;
    this->numstrings = 0;
    this->bytes = 0;
    this->blocksize = blocksize;
    this->nblocks = 0;

    uint64_t reservedSize = 1024;
    text = (unsigned char*)malloc(reservedSize*sizeof(unsigned char));

    // Pointers to the first string of each block.
    blocks = new hdt::LogSequence2(sizeof(size_t)==8 ? 34 : 32);

    unsigned char *currentStr = NULL;
    size_t currentLength = 0;
    string previousStr;

    while (it->hasNext())
    {
        currentStr = it->next();
        currentLength = strlen( (char*) currentStr);

        // Realloc size of the buffer if necessary.
        // +1 for string terminator +10 for VByte encoding (worst case)
        if ((bytes+currentLength+11) > reservedSize)
        {
            reservedSize = (bytes+currentLength+10)*2;

            text = (unsigned char*)realloc(text, reservedSize*sizeof(unsigned char));
        }

        if ((numstrings % blocksize) == 0)
        {
            // First string in the current block!
            blocks->push_back(bytes);
            nblocks++;

            // The string is explicitly copied to the encoded sequence.
            strncpy((char*)(text+bytes), (char*)currentStr, currentLength);
            bytes+=currentLength;
        } else {
            // Regular string

            // Calculate the length of the common prefix
            unsigned int delta = longest_common_prefix((unsigned char *)previousStr.c_str(), currentStr, previousStr.length(), currentLength);

            // The prefix is differentially encoded
            bytes += VByte::encode(text+bytes, delta);

            // The suffix is copied to the sequence
            strncpy((char*)(text+bytes), (char*)currentStr+delta, currentLength-delta);
            bytes+=currentLength-delta;
        }

        text[bytes] = '\0';
        bytes++;

        // New string processed
        numstrings++;

        // Save previous
        previousStr.assign((char*)currentStr);

        NOTIFYCOND(listener, "Converting dictionary to PFC", numstrings, it->getNumberOfElements());

        it->freeStr(currentStr);
    }

    // Storing the final byte position in the vector of positions
    blocks->push_back(bytes);

    // Trunc encoded sequence to save unused memory
    text = (unsigned char *) realloc(text, bytes*sizeof(unsigned char));

    blocks->reduceBits();
}