예제 #1
0
int main(int argc, char **argv) {

	try {
		HDT *hdt = HDTManager::mapHDT(argv[1]);
		IteratorUCharString *it =hdt->getDictionary()->getObjects();

		ofstream lit("lit.txt");
		ofstream blk("blank.txt");
		ofstream uri("uri.txt");
		while(it->hasNext()) {
		    unsigned char *str = it->next();
		    if(*str=='"') {
			  // Literal
			lit << (char*)str << endl;
		    } else if(*str=='_'){
			 // Blanco
			blk << (char*)str << endl;
		    } else {
			 // URI
			uri << (char*)str << endl;
		    }
		}
		lit.close();
		blk.close();
		uri.close();

		delete it;
		delete hdt;
	 } catch(const char *str) {
		cerr << str << endl;
	 } catch(char *str) {
		cerr << str << endl;
	 }

}
예제 #2
0
int main(int argc, char *argv[]) {
	int c;
	string inputFile;
	string outputFile;

	while ((c = getopt(argc, argv, "hi:o:")) != -1) {
		switch (c) {
		case 'h':
			help();
			break;
		case 'o':
			outputFile = optarg;

			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			help();
			return 1;
		}
	}



	if (argc < 2) {
		cout << "ERROR: You must supply an input HDT File" << endl << endl;
		help();
		return 1;
	}
	inputFile = argv[optind];

		cout<<inputFile<<endl;

	if (outputFile == "")
		outputFile = inputFile;

	// Load HDT file
	HDT *hdt = HDTManager::mapHDT(inputFile.c_str());

	TriplesList* tl = new TriplesList();

	Triples * trip = hdt->getTriples();
	cout<<" Number of triples: "<<(unsigned long)(hdt->getTriples()->getNumberOfElements())<<endl;
	fflush(stdout);
	IteratorTripleID *it = trip->searchAll();
	tl->insert(it);
	cout<<" Number of tripleslist: "<<tl->getNumberOfElements()<<endl;
	delete it; // Remember to delete iterator to avoid memory leaks!

	outputFile = outputFile + "_Statistics";

	//erase summary file content
	ofstream out_summary;
	out_summary.open((outputFile + "_Summary").c_str(), ios::trunc);

	ofstream out_header_stats;
	out_header_stats.open((outputFile + "_HeaderStats").c_str(), ios::trunc);

	out_summary << "* General statistics" << endl;
	out_summary << "# Number of Triples: "
			<< hdt->getTriples()->getNumberOfElements() << endl;
	out_summary << "# Number of Predicates: "
			<< hdt->getDictionary()->getNpredicates() << endl;
	out_summary << "# Number of Subjects: "
			<< hdt->getDictionary()->getNsubjects() << endl;
	out_summary << "# Number of Objects: "
			<< hdt->getDictionary()->getNobjects() << endl;
	out_summary << "# Number of Shared Subject-Objects: "
			<< hdt->getDictionary()->getNshared() << endl;

	double ratioSO = (double) hdt->getDictionary()->getNshared()
							/ (hdt->getDictionary()->getNsubjects()
									+ hdt->getDictionary()->getNobjects()
									- hdt->getDictionary()->getNshared());
	out_summary << "# Ratio Shared Subject-Objects => SO / (S U O) : "
			<< ratioSO << endl;

	out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedSO> "<<ratioSO;
	/*
	 * Compute over the dictionary to get shared subject-predicate and predicate-object
	 */
	IteratorUCharString *itPred = hdt->getDictionary()->getPredicates();

	int numSubjectPredicates = 0;
	int numPredicatesObjects = 0;
	while (itPred->hasNext()) {
		stringstream s;
		s << itPred->next();
		string pred = s.str();

		if (hdt->getDictionary()->stringToId(pred, SUBJECT) > 0) {
			//found
			numSubjectPredicates++;
		}
		if (hdt->getDictionary()->stringToId(pred, OBJECT) > 0) {
			//found
			numPredicatesObjects++;
		}
	}
	delete itPred; // Remember to delete iterator to avoid memory leaks!

	double ratioSP = (double) numSubjectPredicates
			/ (hdt->getDictionary()->getNsubjects()
					+ hdt->getDictionary()->getNpredicates()
					- numSubjectPredicates);
	out_summary << "# Ratio Shared Subject-Predicate => SP / (S U P) : "
			<< ratioSP << endl;

	out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedSP> "<<ratioSP;

	double ratioPO =(double) numPredicatesObjects
			/ (hdt->getDictionary()->getNobjects()
					+ hdt->getDictionary()->getNpredicates()
					- numPredicatesObjects);
	out_summary << "# Ratio Shared Predicate-Object => PO / (P U O) : "
			<< ratioPO << endl;

	out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedPO> "<<ratioPO;

	out_summary.close();
	out_header_stats.close();

	//erase summary file SO and Type content
	ofstream out_summarySO;
	out_summarySO.open((outputFile + "_SO_Summary").c_str(), ios::trunc);
	out_summarySO.close();

	ofstream out_summaryType;
	out_summaryType.open((outputFile + "_Typed_Summary").c_str(), ios::trunc);
	out_summaryType.close();

	//find rdf:type
	unsigned int IDrdftype = 0;
	string rdftype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";

	if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) {
		//found
		IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE);
	} else {
		rdftype = "rdf:type";
		if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) {
			//found
			IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE);
		} else {
			rdftype = "a";
			if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) {
				//found
				IDrdftype = hdt->getDictionary()->stringToId(rdftype,
						PREDICATE);
			}
		}
	}

	tl->calculateDegrees(outputFile, hdt->getDictionary()->getNshared(),
			IDrdftype);



	delete hdt;
}