Exemplo n.º 1
0
void PlainTriples::load(ModifiableTriples &triples, ProgressListener *listener) {
	triples.sort(order);

	IntermediateListener iListener(listener);

	iListener.setRange(0,33);
	iListener.notifyProgress(0, "PlainTriples Importing subjects");
	IteratorTripleID *itS = triples.searchAll();
	ComponentIterator subjIt(itS, SUBJECT);
	streamX->add(subjIt);
	delete itS;

	iListener.setRange(33, 66);
	iListener.notifyProgress(0, "PlainTriples Importing predicates");
	IteratorTripleID *itP = triples.searchAll();
	ComponentIterator predIt(itP, PREDICATE);
	streamY->add(predIt);
	delete itP;

	iListener.setRange(66, 100);
	iListener.notifyProgress(0, "PlainTriples Importing objects");
	IteratorTripleID *itO = triples.searchAll();
	ComponentIterator objIt(itO, OBJECT);
	streamZ->add(objIt);
	delete itO;
}
Exemplo n.º 2
0
void TripleListDisk::load(ModifiableTriples & input, ProgressListener *listener)
{
	TripleID all(0,0,0);
	IteratorTripleID *it = input.search(all);

	startProcessing();
	ensureSize(input.getNumberOfElements());
	insert(it);
	stopProcessing();
}
Exemplo n.º 3
0
void TriplesKyoto::load(ModifiableTriples &input, ProgressListener *listener)
{
	IteratorTripleID *it = input.searchAll();

	while(it->hasNext()) {
		TripleID *triple = it->next();

		this->insert(*triple);
	}

	delete it;
}
Exemplo n.º 4
0
void BasicHDT::loadTriples(const char* fileName, const char* baseUri, RDFNotation notation, ProgressListener* listener) {
	// Generate Triples
	ModifiableTriples* triplesList = new TriplesList(spec);
	//ModifiableTriples *triplesList = new TriplesKyoto(spec);
	//ModifiableTriples *triplesList = new TripleListDisk();
	StopWatch st;
	IntermediateListener iListener(listener);
	try {
		NOTIFY(listener, "Loading Triples", 0, 100);
		iListener.setRange(0, 60);

		triplesList->startProcessing(&iListener);

		TriplesLoader tripLoader(dictionary, triplesList, &iListener);

		RDFParserCallback *pars = RDFParserCallback::getParserCallback(
				notation);
		pars->doParse(fileName, baseUri, notation, &tripLoader);
		delete pars;
		header->insert("_:statistics", HDTVocabulary::ORIGINAL_SIZE, tripLoader.getSize());

		triplesList->stopProcessing(&iListener);

		// SORT & Duplicates
		TripleComponentOrder order = parseOrder(
				spec.get("triplesOrder").c_str());
		if (order == Unknown) {
			order = SPO;
		}

		iListener.setRange(80, 85);
		triplesList->sort(order, &iListener);

		iListener.setRange(85, 90);
		triplesList->removeDuplicates(&iListener);
	} catch (const char *e) {
		cout << "Catch exception triples" << e << endl;
		delete triplesList;
		throw e;
	} catch (char *e) {
		cout << "Catch exception triples" << e << endl;
		delete triplesList;
		throw e;
	}
	if (triples->getType() == triplesList->getType()) {
		delete triples;
		triples = triplesList;
	} else {
		iListener.setRange(90, 100);
		try {
			triples->load(*triplesList, &iListener);
		} catch (const char* e) {
			delete triplesList;
			throw e;
		}
		delete triplesList;
	}

	//cout << triples->getNumberOfElements() << " triples added in " << st << endl << endl;
}
Exemplo n.º 5
0
void CompactTriples::load(ModifiableTriples &triples, ProgressListener *listener) {
	triples.sort(order);

	IteratorTripleID *it = triples.searchAll();

	vector<unsigned int> vectorY, vectorZ;
	unsigned int lastX, lastY, lastZ;
	unsigned int x, y, z;

	// First triple
	if(it->hasNext()) {
		TripleID *triple = it->next();

		swapComponentOrder(triple, SPO, order);

		lastX = x = triple->getSubject();
		lastY = y = triple->getPredicate();
		lastZ = z = triple->getObject();

		vectorY.push_back(y);
		vectorZ.push_back(z);

		numTriples++;
	}

	// Rest of the triples
	while(it->hasNext()) {
		TripleID *triple = it->next();
		//cout << "111> " << triple << endl;

		swapComponentOrder(triple, SPO, order);
		//cout << "222> " << triple << endl;

		x = triple->getSubject();
		y = triple->getPredicate();
		z = triple->getObject();

		if(x!=lastX) {
			vectorY.push_back(0);
			vectorY.push_back(y);

			vectorZ.push_back(0);
			vectorZ.push_back(z);
		} else if(y!=lastY) {
			vectorY.push_back(y);
			vectorZ.push_back(0);
			vectorZ.push_back(z);
		} else {
			vectorZ.push_back(z);
		}

		lastX = x;
		lastY = y;
		lastZ = z;

		NOTIFYCOND(listener, "Converting to CompactTriples.", numTriples, triples.getNumberOfElements());
		numTriples++;
	}

	delete it;

	VectorUIntIterator itY(vectorY);
	VectorUIntIterator itZ(vectorZ);

	streamY->add(itY);
	streamZ->add(itZ);

#if 0
	// Debug Adjacency Lists
	cout << "Y" << vectorY.size() << "): ";
	for(unsigned int i=0;i<arrayY->getNumberOfElements();i++){
		cout << arrayY->get(i) << " ";
	}
	cout << endl;

	cout << "Z" << vectorZ.size() << "): ";
	for(unsigned int i=0;i<arrayZ->getNumberOfElements();i++){
		cout << arrayZ->get(i) << " ";
	}
	cout << endl;
#endif

}
Exemplo n.º 6
0
void BasicHDT::loadTriplesFromHDTs(const char** fileNames, size_t numFiles, const char* baseUri, ProgressListener* listener) {
	// Generate Triples
	ModifiableTriples* triplesList = new TriplesList(spec);
	//ModifiableTriples *triplesList = new TriplesKyoto(spec);
	//ModifiableTriples *triplesList = new TripleListDisk();
	StopWatch st;
	IntermediateListener iListener(listener);
	try {
		NOTIFY(listener, "Loading Triples", 0, 100);
		iListener.setRange(0, 60);

		triplesList->startProcessing(&iListener);

		TriplesLoader tripLoader(dictionary, triplesList, &iListener);

		// FIXME: Import from files

		uint64_t totalOriginalSize=0;
		BasicHDT hdt;

		for(size_t i=0;i<numFiles;i++) {
			const char *fileName = fileNames[i];
	        cout << endl << "Load triples from " << fileName << endl;
	        hdt.mapHDT(fileName);
	        Dictionary *dict = hdt.getDictionary();

	        // Create mapping arrays
	        cout << "Generating mapping subjects" << endl;
	        unsigned int nsubjects = dict->getNsubjects();
	        LogSequence2 subjectMap(bits(dictionary->getNsubjects()), nsubjects);
	        subjectMap.resize(nsubjects);
	        for(unsigned int i=0;i<nsubjects;i++) {
	        	string str = dict->idToString(i+1, SUBJECT);
	        	unsigned int newid = dictionary->stringToId(str, SUBJECT);
	        	subjectMap.set(i, newid);
	        }

	        cout << "Generating mapping predicates" << endl;
	        unsigned int npredicates = dict->getNpredicates();
	        LogSequence2 predicateMap(bits(dictionary->getNpredicates()), npredicates);
	        predicateMap.resize(npredicates);
	        for(unsigned int i=0;i<npredicates;i++) {
	        	string str = dict->idToString(i+1, PREDICATE);
	        	unsigned int newid = dictionary->stringToId(str, PREDICATE);
	        	predicateMap.set(i, newid);
	        }

	        cout << "Generating mapping objects" << endl;
	        unsigned int nobjects = dict->getNobjects();
	        LogSequence2 objectMap(bits(dictionary->getNobjects()), nobjects);
	        objectMap.resize(nobjects);
	        for(unsigned int i=0;i<nobjects;i++) {
	        	string str = dict->idToString(i+1, OBJECT);
	        	unsigned int newid = dictionary->stringToId(str, OBJECT);
	        	objectMap.set(i, newid);
	        }

	        totalOriginalSize += hdt.getHeader()->getPropertyLong("_:statistics", HDTVocabulary::ORIGINAL_SIZE.c_str());

	        size_t numtriples = hdt.getTriples()->getNumberOfElements();
	        IteratorTripleID *it = hdt.getTriples()->searchAll();

	        TripleID newTid;
	        char str[100];
	        long long int j = 0;
	        while(it->hasNext()) {
	        	TripleID *tid = it->next();

	        	newTid.setAll(
	        			(unsigned int)subjectMap.get(tid->getSubject()-1),
	        			(unsigned int)predicateMap.get(tid->getPredicate()-1),
	        			(unsigned int)objectMap.get(tid->getObject()-1)
	        			);

	        	triplesList->insert(newTid);

	        	if ((listener != NULL) && (j % 100000) == 0) {
	        		sprintf(str, "%lld triples added.", j);
	        		listener->notifyProgress((j*100)/numtriples, str);
	        	}
	            j++;
	        }
	        delete it;
		}

		triplesList->stopProcessing(&iListener);

		// SORT & Duplicates
		TripleComponentOrder order = parseOrder(spec.get("triplesOrder").c_str());
		if (order == Unknown) {
			order = SPO;
		}

		iListener.setRange(80, 85);
		triplesList->sort(order, &iListener);

		iListener.setRange(85, 90);
		triplesList->removeDuplicates(&iListener);

		header->insert("_:statistics", HDTVocabulary::ORIGINAL_SIZE, totalOriginalSize);
	} catch (const char *e) {
		cout << "Catch exception triples" << e << endl;
		delete triplesList;
		throw e;
	} catch (char *e) {
		cout << "Catch exception triples" << e << endl;
		delete triplesList;
		throw e;
	}
	if (triples->getType() == triplesList->getType()) {
		delete triples;
		triples = triplesList;
	} else {
		iListener.setRange(90, 100);
		try {
			triples->load(*triplesList, &iListener);
		} catch (const char* e) {
			delete triplesList;
			throw e;
		}
		delete triplesList;
	}

	//cout << triples->getNumberOfElements() << " triples added in " << st << endl << endl;

}