Пример #1
0
int main(int argc, char **argv) {
	int c;
	string query, inputFile, outputFile;
	bool measure = false;

	while( (c = getopt(argc,argv,"hq:o:m"))!=-1) {
		switch(c) {
		case 'h':
			break;
		case 'q':
			query = optarg;
			break;
		case 'o':
			outputFile = optarg;
			break;
		case 'm':
			measure = true;
			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			return 1;
		}
	}

	if(argc-optind<2) {
		cout << "ERROR: You must supply an input and HDT File" << endl << endl;
		return 1;
	}

	inputFile = argv[optind];
	outputFile = argv[optind+1];

	ConvertProgress progress;
	StopWatch st;

	try {
		// LOAD
		HDT *hdt = HDTManager::mapHDT(inputFile.c_str());

		// CONVERT triples to TripleList
		TriplesList tlist;
		Triples *triples = hdt->getTriples();
		cout << "Old Triples -> TriplesList" << endl;
		st.reset();
		IteratorTripleID *it = triples->searchAll();
		tlist.insert(it);
		delete it;
		cout << "         Old Triples -> TriplesList time" << st <<  endl;

		// Convert tlist to OPS
		cout << "TriplesList sort OPS" << endl;
		st.reset();
		tlist.sort(OPS, &progress);
		cout << "    TriplesList sort OPS time: " << st << endl;

		// Generate new OPS BitmapTriples
		cout << "TriplesList to new BitmapTriples" << endl;
		HDTSpecification spec;
		spec.set("triplesOrder", "OPS");
		BitmapTriples bt(spec);
		st.reset();
		bt.load(tlist, &progress);
		cout << "       TriplesList to new BitmapTriples time" << st << endl;

		// Update Header
#if 1
		cout << "Update Header" << endl;
		string rootNode("_:triples");
		TripleString ts (rootNode, "", "");
		hdt->getHeader()->remove(ts);
		bt.populateHeader(*hdt->getHeader(), "_:triples");
#endif

		// SAVE
		cout << "Save to " << outputFile << endl;
		ofstream out(outputFile.c_str(), ios::binary | ios::out);
		ControlInformation ci;

		ci.clear();
		ci.setType(GLOBAL);
		ci.setFormat(HDTVocabulary::HDT_CONTAINER);
		ci.save(out);

		// HEADER
		ci.clear();
		ci.setType(HEADER);
		hdt->getHeader()->save(out, ci, NULL);

		// DICTIONARY
		ci.clear();
		ci.setType(DICTIONARY);
		hdt->getDictionary()->save(out, ci, NULL);

		// NEW TRIPLES
		ci.clear();
		ci.setType(TRIPLES);
		bt.save(out, ci, NULL);

		out.close();

		delete hdt;
	} catch (char *e) {
		cout << "ERROR: " << e << endl;
	} catch (const char *e) {
		cout << "ERROR: " << e << endl;
	}
}
Пример #2
0
int main(int argc, char *argv[]) {
	int c;
	string inputFile;
	string outputFile;

	while ((c = getopt(argc, argv, "hi:o:")) != -1) {
		switch (c) {
		case 'h':
			help();
			break;
		case 'o':
			outputFile = optarg;

			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			help();
			return 1;
		}
	}



	if (argc < 2) {
		cout << "ERROR: You must supply an input HDT File" << endl << endl;
		help();
		return 1;
	}
	inputFile = argv[optind];

		cout<<inputFile<<endl;

	if (outputFile == "")
		outputFile = inputFile;

	// Load HDT file
	HDT *hdt = HDTManager::mapHDT(inputFile.c_str());

	TriplesList* tl = new TriplesList();

	Triples * trip = hdt->getTriples();
	cout<<" Number of triples: "<<(unsigned long)(hdt->getTriples()->getNumberOfElements())<<endl;
	fflush(stdout);
	IteratorTripleID *it = trip->searchAll();
	tl->insert(it);
	cout<<" Number of tripleslist: "<<tl->getNumberOfElements()<<endl;
	delete it; // Remember to delete iterator to avoid memory leaks!

	outputFile = outputFile + "_Statistics";

	//erase summary file content
	ofstream out_summary;
	out_summary.open((outputFile + "_Summary").c_str(), ios::trunc);

	ofstream out_header_stats;
	out_header_stats.open((outputFile + "_HeaderStats").c_str(), ios::trunc);

	out_summary << "* General statistics" << endl;
	out_summary << "# Number of Triples: "
			<< hdt->getTriples()->getNumberOfElements() << endl;
	out_summary << "# Number of Predicates: "
			<< hdt->getDictionary()->getNpredicates() << endl;
	out_summary << "# Number of Subjects: "
			<< hdt->getDictionary()->getNsubjects() << endl;
	out_summary << "# Number of Objects: "
			<< hdt->getDictionary()->getNobjects() << endl;
	out_summary << "# Number of Shared Subject-Objects: "
			<< hdt->getDictionary()->getNshared() << endl;

	double ratioSO = (double) hdt->getDictionary()->getNshared()
							/ (hdt->getDictionary()->getNsubjects()
									+ hdt->getDictionary()->getNobjects()
									- hdt->getDictionary()->getNshared());
	out_summary << "# Ratio Shared Subject-Objects => SO / (S U O) : "
			<< ratioSO << endl;

	out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedSO> "<<ratioSO;
	/*
	 * Compute over the dictionary to get shared subject-predicate and predicate-object
	 */
	IteratorUCharString *itPred = hdt->getDictionary()->getPredicates();

	int numSubjectPredicates = 0;
	int numPredicatesObjects = 0;
	while (itPred->hasNext()) {
		stringstream s;
		s << itPred->next();
		string pred = s.str();

		if (hdt->getDictionary()->stringToId(pred, SUBJECT) > 0) {
			//found
			numSubjectPredicates++;
		}
		if (hdt->getDictionary()->stringToId(pred, OBJECT) > 0) {
			//found
			numPredicatesObjects++;
		}
	}
	delete itPred; // Remember to delete iterator to avoid memory leaks!

	double ratioSP = (double) numSubjectPredicates
			/ (hdt->getDictionary()->getNsubjects()
					+ hdt->getDictionary()->getNpredicates()
					- numSubjectPredicates);
	out_summary << "# Ratio Shared Subject-Predicate => SP / (S U P) : "
			<< ratioSP << endl;

	out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedSP> "<<ratioSP;

	double ratioPO =(double) numPredicatesObjects
			/ (hdt->getDictionary()->getNobjects()
					+ hdt->getDictionary()->getNpredicates()
					- numPredicatesObjects);
	out_summary << "# Ratio Shared Predicate-Object => PO / (P U O) : "
			<< ratioPO << endl;

	out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedPO> "<<ratioPO;

	out_summary.close();
	out_header_stats.close();

	//erase summary file SO and Type content
	ofstream out_summarySO;
	out_summarySO.open((outputFile + "_SO_Summary").c_str(), ios::trunc);
	out_summarySO.close();

	ofstream out_summaryType;
	out_summaryType.open((outputFile + "_Typed_Summary").c_str(), ios::trunc);
	out_summaryType.close();

	//find rdf:type
	unsigned int IDrdftype = 0;
	string rdftype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";

	if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) {
		//found
		IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE);
	} else {
		rdftype = "rdf:type";
		if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) {
			//found
			IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE);
		} else {
			rdftype = "a";
			if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) {
				//found
				IDrdftype = hdt->getDictionary()->stringToId(rdftype,
						PREDICATE);
			}
		}
	}

	tl->calculateDegrees(outputFile, hdt->getDictionary()->getNshared(),
			IDrdftype);



	delete hdt;
}