예제 #1
0
int main(int argc, char **argv) {

	try {
		HDT *hdt = HDTManager::mapHDT(argv[1]);
		IteratorUCharString *it =hdt->getDictionary()->getObjects();

		ofstream lit("lit.txt");
		ofstream blk("blank.txt");
		ofstream uri("uri.txt");
		while(it->hasNext()) {
		    unsigned char *str = it->next();
		    if(*str=='"') {
			  // Literal
			lit << (char*)str << endl;
		    } else if(*str=='_'){
			 // Blanco
			blk << (char*)str << endl;
		    } else {
			 // URI
			uri << (char*)str << endl;
		    }
		}
		lit.close();
		blk.close();
		uri.close();

		delete it;
		delete hdt;
	 } catch(const char *str) {
		cerr << str << endl;
	 } catch(char *str) {
		cerr << str << endl;
	 }

}
예제 #2
0
파일: conops.cpp 프로젝트: akjoshi/hdt-it
int main(int argc, char **argv) {
	int c;
	string query, inputFile, outputFile;
	bool measure = false;

	while( (c = getopt(argc,argv,"hq:o:m"))!=-1) {
		switch(c) {
		case 'h':
			break;
		case 'q':
			query = optarg;
			break;
		case 'o':
			outputFile = optarg;
			break;
		case 'm':
			measure = true;
			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			return 1;
		}
	}

	if(argc-optind<2) {
		cout << "ERROR: You must supply an input and HDT File" << endl << endl;
		return 1;
	}

	inputFile = argv[optind];
	outputFile = argv[optind+1];

	ConvertProgress progress;
	StopWatch st;

	try {
		// LOAD
		HDT *hdt = HDTManager::mapHDT(inputFile.c_str());

		// CONVERT triples to TripleList
		TriplesList tlist;
		Triples *triples = hdt->getTriples();
		cout << "Old Triples -> TriplesList" << endl;
		st.reset();
		IteratorTripleID *it = triples->searchAll();
		tlist.insert(it);
		delete it;
		cout << "         Old Triples -> TriplesList time" << st <<  endl;

		// Convert tlist to OPS
		cout << "TriplesList sort OPS" << endl;
		st.reset();
		tlist.sort(OPS, &progress);
		cout << "    TriplesList sort OPS time: " << st << endl;

		// Generate new OPS BitmapTriples
		cout << "TriplesList to new BitmapTriples" << endl;
		HDTSpecification spec;
		spec.set("triplesOrder", "OPS");
		BitmapTriples bt(spec);
		st.reset();
		bt.load(tlist, &progress);
		cout << "       TriplesList to new BitmapTriples time" << st << endl;

		// Update Header
#if 1
		cout << "Update Header" << endl;
		string rootNode("_:triples");
		TripleString ts (rootNode, "", "");
		hdt->getHeader()->remove(ts);
		bt.populateHeader(*hdt->getHeader(), "_:triples");
#endif

		// SAVE
		cout << "Save to " << outputFile << endl;
		ofstream out(outputFile.c_str(), ios::binary | ios::out);
		ControlInformation ci;

		ci.clear();
		ci.setType(GLOBAL);
		ci.setFormat(HDTVocabulary::HDT_CONTAINER);
		ci.save(out);

		// HEADER
		ci.clear();
		ci.setType(HEADER);
		hdt->getHeader()->save(out, ci, NULL);

		// DICTIONARY
		ci.clear();
		ci.setType(DICTIONARY);
		hdt->getDictionary()->save(out, ci, NULL);

		// NEW TRIPLES
		ci.clear();
		ci.setType(TRIPLES);
		bt.save(out, ci, NULL);

		out.close();

		delete hdt;
	} catch (char *e) {
		cout << "ERROR: " << e << endl;
	} catch (const char *e) {
		cout << "ERROR: " << e << endl;
	}
}
예제 #3
0
int main(int argc, char *argv[]) {
	int c;
	string inputFile;
	string outputFile;

	while ((c = getopt(argc, argv, "hi:o:")) != -1) {
		switch (c) {
		case 'h':
			help();
			break;
		case 'o':
			outputFile = optarg;

			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			help();
			return 1;
		}
	}



	if (argc < 2) {
		cout << "ERROR: You must supply an input HDT File" << endl << endl;
		help();
		return 1;
	}
	inputFile = argv[optind];

		cout<<inputFile<<endl;

	if (outputFile == "")
		outputFile = inputFile;

	// Load HDT file
	HDT *hdt = HDTManager::mapHDT(inputFile.c_str());

	TriplesList* tl = new TriplesList();

	Triples * trip = hdt->getTriples();
	cout<<" Number of triples: "<<(unsigned long)(hdt->getTriples()->getNumberOfElements())<<endl;
	fflush(stdout);
	IteratorTripleID *it = trip->searchAll();
	tl->insert(it);
	cout<<" Number of tripleslist: "<<tl->getNumberOfElements()<<endl;
	delete it; // Remember to delete iterator to avoid memory leaks!

	outputFile = outputFile + "_Statistics";

	//erase summary file content
	ofstream out_summary;
	out_summary.open((outputFile + "_Summary").c_str(), ios::trunc);

	ofstream out_header_stats;
	out_header_stats.open((outputFile + "_HeaderStats").c_str(), ios::trunc);

	out_summary << "* General statistics" << endl;
	out_summary << "# Number of Triples: "
			<< hdt->getTriples()->getNumberOfElements() << endl;
	out_summary << "# Number of Predicates: "
			<< hdt->getDictionary()->getNpredicates() << endl;
	out_summary << "# Number of Subjects: "
			<< hdt->getDictionary()->getNsubjects() << endl;
	out_summary << "# Number of Objects: "
			<< hdt->getDictionary()->getNobjects() << endl;
	out_summary << "# Number of Shared Subject-Objects: "
			<< hdt->getDictionary()->getNshared() << endl;

	double ratioSO = (double) hdt->getDictionary()->getNshared()
							/ (hdt->getDictionary()->getNsubjects()
									+ hdt->getDictionary()->getNobjects()
									- hdt->getDictionary()->getNshared());
	out_summary << "# Ratio Shared Subject-Objects => SO / (S U O) : "
			<< ratioSO << endl;

	out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedSO> "<<ratioSO;
	/*
	 * Compute over the dictionary to get shared subject-predicate and predicate-object
	 */
	IteratorUCharString *itPred = hdt->getDictionary()->getPredicates();

	int numSubjectPredicates = 0;
	int numPredicatesObjects = 0;
	while (itPred->hasNext()) {
		stringstream s;
		s << itPred->next();
		string pred = s.str();

		if (hdt->getDictionary()->stringToId(pred, SUBJECT) > 0) {
			//found
			numSubjectPredicates++;
		}
		if (hdt->getDictionary()->stringToId(pred, OBJECT) > 0) {
			//found
			numPredicatesObjects++;
		}
	}
	delete itPred; // Remember to delete iterator to avoid memory leaks!

	double ratioSP = (double) numSubjectPredicates
			/ (hdt->getDictionary()->getNsubjects()
					+ hdt->getDictionary()->getNpredicates()
					- numSubjectPredicates);
	out_summary << "# Ratio Shared Subject-Predicate => SP / (S U P) : "
			<< ratioSP << endl;

	out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedSP> "<<ratioSP;

	double ratioPO =(double) numPredicatesObjects
			/ (hdt->getDictionary()->getNobjects()
					+ hdt->getDictionary()->getNpredicates()
					- numPredicatesObjects);
	out_summary << "# Ratio Shared Predicate-Object => PO / (P U O) : "
			<< ratioPO << endl;

	out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedPO> "<<ratioPO;

	out_summary.close();
	out_header_stats.close();

	//erase summary file SO and Type content
	ofstream out_summarySO;
	out_summarySO.open((outputFile + "_SO_Summary").c_str(), ios::trunc);
	out_summarySO.close();

	ofstream out_summaryType;
	out_summaryType.open((outputFile + "_Typed_Summary").c_str(), ios::trunc);
	out_summaryType.close();

	//find rdf:type
	unsigned int IDrdftype = 0;
	string rdftype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";

	if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) {
		//found
		IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE);
	} else {
		rdftype = "rdf:type";
		if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) {
			//found
			IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE);
		} else {
			rdftype = "a";
			if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) {
				//found
				IDrdftype = hdt->getDictionary()->stringToId(rdftype,
						PREDICATE);
			}
		}
	}

	tl->calculateDegrees(outputFile, hdt->getDictionary()->getNshared(),
			IDrdftype);



	delete hdt;
}
예제 #4
0
int main(int argc, char **argv) {
	int c;
	string query, inputFile, outputFile, filter1, filter2;
	bool measure = false;

	while ((c = getopt(argc, argv, "hq:o:mf:F:")) != -1) {
		switch (c) {
		case 'h':
			help();
			break;
		case 'q':
			query = optarg;
			break;
		case 'o':
			outputFile = optarg;
			break;
		case 'm':
			measure = true;
			break;
		case 'f':
			filter1 = optarg;
			break;
		case 'F':
			filter2 = optarg;
			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			help();
			return 1;
		}
	}

	if (argc - optind < 1) {
		cout << "ERROR: You must supply an HDT File" << endl << endl;
		help();
		return 1;
	}

	inputFile = argv[optind];


	try {
		HDT *hdt = HDTManager::mapIndexedHDT(inputFile.c_str());

		if (filter1 != "" || filter2 != "") {
			ostream *out;
			ofstream outF;

			if (outputFile != "") {
				outF.open(outputFile.c_str());
				out = &outF;
			} else {
				out = &cout;
			}

			string infile;
			if (filter1 != "")
				infile = filter1;
			else
				infile = filter2;

			std::ifstream file(infile.c_str());
			if (!file.good())
				throw "unable to open filter file";

			string linea = "";
			string property = "";
			string value = "";

			StopWatch st_total;

			size_t totalQueryResults = 0;

			size_t numQuery=0;

			while (!file.eof()) {
				getline(file, linea);
				if(linea.length()==0) 
					continue;
				size_t pos = linea.find(';');

				if (pos != std::string::npos) {
					property = linea.substr(0, pos);
					value = linea.substr(pos + 1);

					cerr<<"Query "<<numQuery << ": "<<linea<<endl;
					cout<<">>> Query "<<numQuery << ": "<<linea<<endl;
					cout<<"property:"<<property<<endl;
					cout<<"value:"<<value<<endl<<endl;

					StopWatch st;

					hdt::LiteralDictionary *dict = dynamic_cast<hdt::LiteralDictionary *>(hdt->getDictionary());
					if(dict==NULL) {
						cerr << "This dictionary does not support substring search" << endl;
						break;
					}
					hdt::Triples *triples = hdt->getTriples();

					uint32_t *results = NULL;
					size_t numResults = dict->substringToId((unsigned char *) value.c_str(), value.length(), &results);

					TripleID pattern(0, dict->stringToId(property, PREDICATE), 0);

					for (size_t i = 0; i < numResults; i++) {

						pattern.setObject(results[i]);

						string objStr = dict->idToString(results[i], OBJECT);

						IteratorTripleID *it = triples->search(pattern);

						unsigned int numTriples = 0;

						//iterate over the first pattern
						while (it->hasNext()) {
							TripleID *ts = it->next();

							if (filter1 != "") {
								// QUERY Q3
								cout << dict->idToString(ts->getSubject(), SUBJECT) << " " << objStr << endl;
								totalQueryResults++;

							} else {
								// QUERY Q4
								TripleID pat2(ts->getSubject(), 0, 0);
								TripleString out;
								string subjStr = dict->idToString(ts->getSubject(), SUBJECT);

								IteratorTripleID *it2 = triples->search(pat2);
								while(it2->hasNext()) {
									TripleID *inner = it2->next();

									cout << subjStr << " " << dict->idToString(inner->getPredicate(), PREDICATE) <<" ";

									if(inner->getObject()==results[i]) {
										cout << objStr << endl;
									} else {
										cout << dict->idToString(inner->getObject(), OBJECT) << endl;
									}
									totalQueryResults++;
								}
								delete it2;
							}
						}
						delete it;
					}
					
					cout << ">>> Results: " << totalQueryResults << endl;
					cerr << "Query " << numQuery << " Results: " << totalQueryResults << " in " << st << endl << endl;
					numQuery++;
				}
			}
			cerr << "Total time: " << st_total << endl;

			if (outputFile != "") {
				outF.close();
			}

			file.close();

		}

		delete hdt;
	} catch (char *e) {
		cout << "ERROR: " << e << endl;
	} catch (const char *e) {
		cout << "ERROR: " << e << endl;
	}
}
예제 #5
0
int main(int argc, char **argv) {
	int c;
	string query, inputFile, outputFile;
	bool measure = false;

	while( (c = getopt(argc,argv,"hq:o:m"))!=-1) {
		switch(c) {
		case 'h':
			break;
		case 'q':
			query = optarg;
			break;
		case 'o':
			outputFile = optarg;
			break;
		case 'm':
			measure = true;
			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			return 1;
		}
	}

	if(argc-optind<1) {
		cout << "ERROR: You must supply an input and HDT File" << endl << endl;
		return 1;
	}

	inputFile = argv[optind];

	try {
		// LOAD
		HDT *hdt = HDTManager::mapHDT(inputFile.c_str());

		// CONVERT
		Dictionary *dict = hdt->getDictionary();
		//LiteralDictionary litDict;
		FourSectionDictionary litDict;
		StdoutProgressListener progress;
		litDict.import(dict, &progress);

		// SAVE
		ofstream out(outputFile.c_str(), ios::binary | ios::out);
		ControlInformation ci;

		// GLOBAL
		ci.clear();
		ci.setType(GLOBAL);
		ci.setFormat(HDTVocabulary::HDT_CONTAINER);
		ci.save(out);

		// HEADER
		ci.clear();
		ci.setType(HEADER);
		hdt->getHeader()->save(out, ci, NULL);

		// NEW DICTIONARY
		ci.clear();
		ci.setType(DICTIONARY);
		litDict.save(out, ci, NULL);

		// TRIPLES
		ci.clear();
		ci.setType(TRIPLES);
		hdt->getTriples()->save(out, ci, NULL);

		out.close();

		delete hdt;
	} catch (std::exception& e) {
		cout << "ERROR: " << e.what() << endl;
	}
}