Пример #1
0
void iterate(HDT *hdt, char *query, ostream &out, bool measure) {
	TripleString tripleString;
	tripleString.read(query);

	const char *subj = tripleString.getSubject().c_str();
	const char *pred = tripleString.getPredicate().c_str();
	const char *obj = tripleString.getObject().c_str();
	if(strcmp(subj, "?")==0) {
		subj="";
	}
	if(strcmp(pred, "?")==0) {
		pred="";
	}
	if(strcmp(obj, "?")==0) {
		obj="";
	}

#if 0
	cout << "Subject: |" << subj <<"|"<< endl;
	cout << "Predicate: |" << pred <<"|"<< endl;
	cout << "Object: |" << obj << "|"<<endl;
#endif

	try {
		IteratorTripleString *it = hdt->search(subj, pred, obj);

		StopWatch st;
		unsigned int numTriples=0;
		while(it->hasNext() && interruptSignal==0) {
			TripleString *ts = it->next();
			if(!measure)
				out << *ts << endl;
			numTriples++;
		}
		cout << numTriples << " results in " << st << endl;
		delete it;

		interruptSignal=0;	// Interrupt caught, enable again.
	} catch (char *e) {
		cerr << e << endl;
	}

}
Пример #2
0
int main(int argc, char *argv[]) {

	int c;
	string inputFile, outputFile, limit;
	string type = "null";
	string dir = "data/hdt/";
	while ((c = getopt(argc, argv, "hi:t:l:o:d:")) != -1) {
		switch (c) {
		case 'h':
			help();
			break;
		case 'd':
			dir = optarg;
			break;
		case 'i':
			inputFile = optarg;
			break;
		case 'o':
			outputFile = optarg;
			break;
		case 't':
			type = optarg;
			break;
		case 'l':
			limit = optarg;
			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			help();
			return 1;
		}
	}
	// Load HDT file
	vector<HDT*> HDTversions;

	int numVersions = 0;
	if (limit.length() > 0) {
		numVersions = atoi((char*) limit.c_str());
	} else {
		cerr << "[WARNING] limit not provided, trying to load 58 versions"
				<< endl;
		numVersions = 58;
	}
	ostream *out;
	ofstream outF;

	if (outputFile != "") {
		outF.open(outputFile.c_str());
		out = &outF;
	} else {
		out = &cout;
	}

	for (int i = 0; i < numVersions; i++) {
		std::stringstream sstm;
		sstm << dir << (i + 1) << ".hdt";
		cout << "Loading " << sstm.str() << endl;
		HDTversions.push_back(
				HDTManager::mapIndexedHDT((char*) sstm.str().c_str()));
		//cout<<"loaded 1.hdt! Press any key to load 2.hdt"<<endl;
		//int c = getchar();

	}

	cout << "WARMUP... " << endl;
	for (int i = 0; i < numVersions; i++) {
		// Enumerate all different predicates
		cout << "Dataset " << (i + 1) << " contains "
				<< HDTversions[i]->getDictionary()->getNpredicates()
				<< " predicates." << endl;

		// Enumerate all triples matching a pattern ("" means any)
		IteratorTripleString *it = HDTversions[i]->search("", "", "");
		int count = 0;
		while (it->hasNext() && count < 100) {
			TripleString *triple = it->next();
			//cout << "Result Warmup: " << triple->getSubject() << ", " << triple->getPredicate() << ", " << triple->getObject() << endl;
			count++;
		}
		delete it; // Remember to delete iterator to avoid memory leaks!

		/*IteratorUCharString *itPred = HDTversions[i]->getDictionary()->getPredicates();
		 while(itPred->hasNext()) {
		 unsigned char *str = itPred->next(); // Warning this pointer is only valid until next call to next();
		 cout << str << endl;
		 itPred->freeStr(str);
		 }
		 delete itPred;  // Remember to delete iterator to avoid memory leaks!
		 */
	}
	cout << "... WARMUP finished!" << endl;

	if (type == "null") {
		cerr << "[ERROR] Please provide a type of query (-t [s,p,o])" << endl;
		help();
		exit(0);
	}

	//read queries
	cout << "opening file:" << inputFile << endl;
	std::ifstream file((char*) inputFile.c_str());
	cout << "opened! " << endl;

	if (!file.good())
		throw "unable to open filter file";
	string linea = "";

	vector<double> times(numVersions, 0);
	int num_queries = 0;
	while (!file.eof()) {
		getline(file, linea);
		cout << "Reading line:" << linea << endl;

		if (linea.length() == 0)
			continue;
		size_t pos = linea.find(' ');

		if (pos != std::string::npos) {
			string query = linea.substr(0, pos);
			string subject = "", predicate = "", object = "";
			if (type == "s") {
				subject = query;
			} else if (type == "p") {
				predicate = query;
			} else if (type == "o") {
				object = query;
			} else {
				vector<string> elements = split(linea, " ");
				if (type == "sp") {
					subject = elements[0];
					predicate = elements[1];
				} else if (type == "so") {
					subject = elements[0];
					object = elements[1];
				} else if (type == "po") {
					predicate = elements[0];
					object = elements[1];
				} else if (type == "spo") {
					subject = elements[0];
					predicate = elements[1];
					object = elements[2];
				}
			}

			for (int i = 0; i < numVersions; i++) {
				StopWatch st;
				IteratorTripleString *it = HDTversions[i]->search(
						subject.c_str(), predicate.c_str(), object.c_str());
				int numResults = 0;
				while (it->hasNext()) {
					TripleString *triple = it->next();
					//cout << "Result: " << triple->getSubject() << ", " << triple->getPredicate() << ", " << triple->getObject() << endl;
					numResults++;
				}
				delete it;
				double time = st.toMillis();
				cout << numResults << " Results in " << time << " ms" << endl;
				times[i] = times[i] + time;
			}
			num_queries++;
		}
	}
	//compute mean of queries
	*out << "<version>,<mean_time>,<total>" << endl;
	for (int i = 0; i < numVersions; i++) {
		*out << (i + 1) << "," << times[i] / num_queries<<","<<times[i] << endl;
	}

	for (int i = 0; i < numVersions; i++) {
		delete HDTversions[i]; // Remember to delete instance when no longer needed!
	}
	if (outputFile != "") {
		outF.close();
	}
}
Пример #3
0
int main(int argc, char **argv) {
	int c;
	string outputFile;

	while( (c = getopt(argc,argv,"ho:"))!=-1) {
		switch(c) {
		case 'h':
			help();
			break;
		case 'o':
			outputFile = optarg;
			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			help();
			return 1;
		}
	}

	if(argc-optind<1) {
		cout << "ERROR: You must supply an HDT File" << endl << endl;
		help();
		return 1;
	}


	try {
#ifdef HAVE_LIBZ
		igzstream *inGz=NULL;
#endif
		ifstream *inF=NULL;
		istream *in=NULL;

		string inputFile = argv[optind];
		std::string suffix = inputFile.substr(inputFile.find_last_of(".") + 1);
		std::string pipeCommand;

		if( suffix == "gz"){
			#ifdef HAVE_LIBZ
				in = inGz = new igzstream(inputFile.c_str());
			#else
				throw std::runtime_error("Support for GZIP was not compiled in this version. Please Decompress the file before importing it.");
			#endif
		} else {
			in = inF = new ifstream(inputFile.c_str(), ios::binary);
		}

		if (!in->good())
		{
			cerr << "Error opening file " << inputFile << endl;
			throw std::runtime_error("Error opening file for reading");
		}

		ControlInformation controlInformation;

		// Load Global Control Information
		controlInformation.load(*in);

		// Load header
		controlInformation.load(*in);
		Header *header = HDTFactory::readHeader(controlInformation);
		header->load(*in, controlInformation);

		if( suffix == "gz") {
#ifdef HAVE_LIBZ
			inGz->close();
#endif
		} else {
			inF->close();
		}

		// Save
		IteratorTripleString *it = header->search("","","");

		while(it->hasNext()) {
			TripleString *ts = it->next();
			cout << *ts << " ."<< endl;
		}

		if(outputFile!="") {
			ofstream out(outputFile.c_str());
			if(!out.good()){
				throw std::runtime_error("Could not open output file.");
			}
			RDFSerializerNTriples serializer(out, NTRIPLES);
			serializer.serialize(it);
			out.close();
		} else {
			RDFSerializerNTriples serializer(cout, NTRIPLES);
			serializer.serialize(it);
		}
		delete it;

		delete header;

	} catch (std::exception& e) {
		cerr << "ERROR: " << e.what() << endl;
	}
}