예제 #1
0
void TriplesKyoto::load(ModifiableTriples &input, ProgressListener *listener)
{
	IteratorTripleID *it = input.searchAll();

	while(it->hasNext()) {
		TripleID *triple = it->next();

		this->insert(*triple);
	}

	delete it;
}
예제 #2
0
int main(int argc, char **argv) {
	int c;
	string query, inputFile, outputFile;
	bool measure = false;

	while( (c = getopt(argc,argv,"hq:o:m"))!=-1) {
		switch(c) {
		case 'h':
			help();
			break;
		case 'q':
			query = optarg;
			break;
		case 'o':
			outputFile = optarg;
			break;
		case 'm':
			measure = true;
			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			help();
			return 1;
		}
	}

	if(argc-optind<1) {
		cout << "ERROR: You must supply an HDT File" << endl << endl;
		help();
		return 1;
	}

	inputFile = argv[optind];


	try {
		HDT *hdt = HDTManager::mapHDT(inputFile.c_str());

		TripleID pat(0,1,0);
#if 0
		IteratorTripleID *it = hdt->getTriples()->search(pat);

		StopWatch st;
		unsigned int numTriples = 0;
		while(it->hasNext() && numTriples < 4) {
			TripleID *ts = it->next();
			cout << *ts << endl;

			numTriples++;
		}

		cout << "------" << endl;

		while(it->hasPrevious()) {
			TripleID *ts = it->previous();
			cout << *ts << endl;
		}

		cout << "------" << endl;

		numTriples=0;
		while(it->hasNext() && numTriples<10) {
			TripleID *ts = it->next();
			cout << *ts << endl;
			numTriples++;
		}

		delete it;
		cout << numTriples << " results in " << st << endl;

#else
		IteratorTripleID *it = hdt->getTriples()->search(pat);

#if 1
		while(it->hasNext()) {
			cout << *it->next() << endl;
		}
		it->goToStart();
		cout << "------" << endl;
#endif

		RandomAccessIterator rit(it);

		int i;
		for(i=0;i<rit.getNumElements() && i < 20;i++) {
			TripleID *tid = rit.get(i);
			cout << i << " => " << *tid << endl;
		}

		cout << "------" << endl;

		for(i-- ; i>=0; i--) {
			TripleID *tid = rit.get(i);
			cout << i << " => " << *tid << endl;
		}

		cout << "------" << endl;

		for(i=0;i<rit.getNumElements() && i<20;i++) {
			TripleID *tid = rit.get(i);
			cout << i << " => " << *tid << endl;
		}

		cout << "------" << endl;

		cout << 2 << " => " << *rit.get(2) << endl;
		cout << 7 << " => " << *rit.get(7) << endl;
		cout << 1 << " => " << *rit.get(1) << endl;
		cout << 5 << " => " << *rit.get(5) << endl;
		cout << 0 << " => " << *rit.get(0) << endl;
		cout << 9 << " => " << *rit.get(9) << endl;
		cout << 8 << " => " << *rit.get(8) << endl;

		delete it;

#endif
		delete hdt;
	} catch (char *e) {
		cout << "ERROR: " << e << endl;
	} catch (const char *e) {
		cout << "ERROR: " << e << endl;
	}
}
예제 #3
0
void CompactTriples::load(ModifiableTriples &triples, ProgressListener *listener) {
	triples.sort(order);

	IteratorTripleID *it = triples.searchAll();

	vector<unsigned int> vectorY, vectorZ;
	unsigned int lastX, lastY, lastZ;
	unsigned int x, y, z;

	// First triple
	if(it->hasNext()) {
		TripleID *triple = it->next();

		swapComponentOrder(triple, SPO, order);

		lastX = x = triple->getSubject();
		lastY = y = triple->getPredicate();
		lastZ = z = triple->getObject();

		vectorY.push_back(y);
		vectorZ.push_back(z);

		numTriples++;
	}

	// Rest of the triples
	while(it->hasNext()) {
		TripleID *triple = it->next();
		//cout << "111> " << triple << endl;

		swapComponentOrder(triple, SPO, order);
		//cout << "222> " << triple << endl;

		x = triple->getSubject();
		y = triple->getPredicate();
		z = triple->getObject();

		if(x!=lastX) {
			vectorY.push_back(0);
			vectorY.push_back(y);

			vectorZ.push_back(0);
			vectorZ.push_back(z);
		} else if(y!=lastY) {
			vectorY.push_back(y);
			vectorZ.push_back(0);
			vectorZ.push_back(z);
		} else {
			vectorZ.push_back(z);
		}

		lastX = x;
		lastY = y;
		lastZ = z;

		NOTIFYCOND(listener, "Converting to CompactTriples.", numTriples, triples.getNumberOfElements());
		numTriples++;
	}

	delete it;

	VectorUIntIterator itY(vectorY);
	VectorUIntIterator itZ(vectorZ);

	streamY->add(itY);
	streamZ->add(itZ);

#if 0
	// Debug Adjacency Lists
	cout << "Y" << vectorY.size() << "): ";
	for(unsigned int i=0;i<arrayY->getNumberOfElements();i++){
		cout << arrayY->get(i) << " ";
	}
	cout << endl;

	cout << "Z" << vectorZ.size() << "): ";
	for(unsigned int i=0;i<arrayZ->getNumberOfElements();i++){
		cout << arrayZ->get(i) << " ";
	}
	cout << endl;
#endif

}
예제 #4
0
파일: BasicHDT.cpp 프로젝트: akjoshi/hdt-it
void BasicHDT::loadTriplesFromHDTs(const char** fileNames, size_t numFiles, const char* baseUri, ProgressListener* listener) {
	// Generate Triples
	ModifiableTriples* triplesList = new TriplesList(spec);
	//ModifiableTriples *triplesList = new TriplesKyoto(spec);
	//ModifiableTriples *triplesList = new TripleListDisk();
	StopWatch st;
	IntermediateListener iListener(listener);
	try {
		NOTIFY(listener, "Loading Triples", 0, 100);
		iListener.setRange(0, 60);

		triplesList->startProcessing(&iListener);

		TriplesLoader tripLoader(dictionary, triplesList, &iListener);

		// FIXME: Import from files

		uint64_t totalOriginalSize=0;
		BasicHDT hdt;

		for(size_t i=0;i<numFiles;i++) {
			const char *fileName = fileNames[i];
	        cout << endl << "Load triples from " << fileName << endl;
	        hdt.mapHDT(fileName);
	        Dictionary *dict = hdt.getDictionary();

	        // Create mapping arrays
	        cout << "Generating mapping subjects" << endl;
	        unsigned int nsubjects = dict->getNsubjects();
	        LogSequence2 subjectMap(bits(dictionary->getNsubjects()), nsubjects);
	        subjectMap.resize(nsubjects);
	        for(unsigned int i=0;i<nsubjects;i++) {
	        	string str = dict->idToString(i+1, SUBJECT);
	        	unsigned int newid = dictionary->stringToId(str, SUBJECT);
	        	subjectMap.set(i, newid);
	        }

	        cout << "Generating mapping predicates" << endl;
	        unsigned int npredicates = dict->getNpredicates();
	        LogSequence2 predicateMap(bits(dictionary->getNpredicates()), npredicates);
	        predicateMap.resize(npredicates);
	        for(unsigned int i=0;i<npredicates;i++) {
	        	string str = dict->idToString(i+1, PREDICATE);
	        	unsigned int newid = dictionary->stringToId(str, PREDICATE);
	        	predicateMap.set(i, newid);
	        }

	        cout << "Generating mapping objects" << endl;
	        unsigned int nobjects = dict->getNobjects();
	        LogSequence2 objectMap(bits(dictionary->getNobjects()), nobjects);
	        objectMap.resize(nobjects);
	        for(unsigned int i=0;i<nobjects;i++) {
	        	string str = dict->idToString(i+1, OBJECT);
	        	unsigned int newid = dictionary->stringToId(str, OBJECT);
	        	objectMap.set(i, newid);
	        }

	        totalOriginalSize += hdt.getHeader()->getPropertyLong("_:statistics", HDTVocabulary::ORIGINAL_SIZE.c_str());

	        size_t numtriples = hdt.getTriples()->getNumberOfElements();
	        IteratorTripleID *it = hdt.getTriples()->searchAll();

	        TripleID newTid;
	        char str[100];
	        long long int j = 0;
	        while(it->hasNext()) {
	        	TripleID *tid = it->next();

	        	newTid.setAll(
	        			(unsigned int)subjectMap.get(tid->getSubject()-1),
	        			(unsigned int)predicateMap.get(tid->getPredicate()-1),
	        			(unsigned int)objectMap.get(tid->getObject()-1)
	        			);

	        	triplesList->insert(newTid);

	        	if ((listener != NULL) && (j % 100000) == 0) {
	        		sprintf(str, "%lld triples added.", j);
	        		listener->notifyProgress((j*100)/numtriples, str);
	        	}
	            j++;
	        }
	        delete it;
		}

		triplesList->stopProcessing(&iListener);

		// SORT & Duplicates
		TripleComponentOrder order = parseOrder(spec.get("triplesOrder").c_str());
		if (order == Unknown) {
			order = SPO;
		}

		iListener.setRange(80, 85);
		triplesList->sort(order, &iListener);

		iListener.setRange(85, 90);
		triplesList->removeDuplicates(&iListener);

		header->insert("_:statistics", HDTVocabulary::ORIGINAL_SIZE, totalOriginalSize);
	} catch (const char *e) {
		cout << "Catch exception triples" << e << endl;
		delete triplesList;
		throw e;
	} catch (char *e) {
		cout << "Catch exception triples" << e << endl;
		delete triplesList;
		throw e;
	}
	if (triples->getType() == triplesList->getType()) {
		delete triples;
		triples = triplesList;
	} else {
		iListener.setRange(90, 100);
		try {
			triples->load(*triplesList, &iListener);
		} catch (const char* e) {
			delete triplesList;
			throw e;
		}
		delete triplesList;
	}

	//cout << triples->getNumberOfElements() << " triples added in " << st << endl << endl;

}
예제 #5
0
int main(int argc, char **argv) {
	int c;
	string query, inputFile, outputFile, filter1, filter2;
	bool measure = false;

	while ((c = getopt(argc, argv, "hq:o:mf:F:")) != -1) {
		switch (c) {
		case 'h':
			help();
			break;
		case 'q':
			query = optarg;
			break;
		case 'o':
			outputFile = optarg;
			break;
		case 'm':
			measure = true;
			break;
		case 'f':
			filter1 = optarg;
			break;
		case 'F':
			filter2 = optarg;
			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			help();
			return 1;
		}
	}

	if (argc - optind < 1) {
		cout << "ERROR: You must supply an HDT File" << endl << endl;
		help();
		return 1;
	}

	inputFile = argv[optind];


	try {
		HDT *hdt = HDTManager::mapIndexedHDT(inputFile.c_str());

		if (filter1 != "" || filter2 != "") {
			ostream *out;
			ofstream outF;

			if (outputFile != "") {
				outF.open(outputFile.c_str());
				out = &outF;
			} else {
				out = &cout;
			}

			string infile;
			if (filter1 != "")
				infile = filter1;
			else
				infile = filter2;

			std::ifstream file(infile.c_str());
			if (!file.good())
				throw "unable to open filter file";

			string linea = "";
			string property = "";
			string value = "";

			StopWatch st_total;

			size_t totalQueryResults = 0;

			size_t numQuery=0;

			while (!file.eof()) {
				getline(file, linea);
				if(linea.length()==0) 
					continue;
				size_t pos = linea.find(';');

				if (pos != std::string::npos) {
					property = linea.substr(0, pos);
					value = linea.substr(pos + 1);

					cerr<<"Query "<<numQuery << ": "<<linea<<endl;
					cout<<">>> Query "<<numQuery << ": "<<linea<<endl;
					cout<<"property:"<<property<<endl;
					cout<<"value:"<<value<<endl<<endl;

					StopWatch st;

					hdt::LiteralDictionary *dict = dynamic_cast<hdt::LiteralDictionary *>(hdt->getDictionary());
					if(dict==NULL) {
						cerr << "This dictionary does not support substring search" << endl;
						break;
					}
					hdt::Triples *triples = hdt->getTriples();

					uint32_t *results = NULL;
					size_t numResults = dict->substringToId((unsigned char *) value.c_str(), value.length(), &results);

					TripleID pattern(0, dict->stringToId(property, PREDICATE), 0);

					for (size_t i = 0; i < numResults; i++) {

						pattern.setObject(results[i]);

						string objStr = dict->idToString(results[i], OBJECT);

						IteratorTripleID *it = triples->search(pattern);

						unsigned int numTriples = 0;

						//iterate over the first pattern
						while (it->hasNext()) {
							TripleID *ts = it->next();

							if (filter1 != "") {
								// QUERY Q3
								cout << dict->idToString(ts->getSubject(), SUBJECT) << " " << objStr << endl;
								totalQueryResults++;

							} else {
								// QUERY Q4
								TripleID pat2(ts->getSubject(), 0, 0);
								TripleString out;
								string subjStr = dict->idToString(ts->getSubject(), SUBJECT);

								IteratorTripleID *it2 = triples->search(pat2);
								while(it2->hasNext()) {
									TripleID *inner = it2->next();

									cout << subjStr << " " << dict->idToString(inner->getPredicate(), PREDICATE) <<" ";

									if(inner->getObject()==results[i]) {
										cout << objStr << endl;
									} else {
										cout << dict->idToString(inner->getObject(), OBJECT) << endl;
									}
									totalQueryResults++;
								}
								delete it2;
							}
						}
						delete it;
					}
					
					cout << ">>> Results: " << totalQueryResults << endl;
					cerr << "Query " << numQuery << " Results: " << totalQueryResults << " in " << st << endl << endl;
					numQuery++;
				}
			}
			cerr << "Total time: " << st_total << endl;

			if (outputFile != "") {
				outF.close();
			}

			file.close();

		}

		delete hdt;
	} catch (char *e) {
		cout << "ERROR: " << e << endl;
	} catch (const char *e) {
		cout << "ERROR: " << e << endl;
	}
}