int main(int argc, char **argv) { try { HDT *hdt = HDTManager::mapHDT(argv[1]); IteratorUCharString *it =hdt->getDictionary()->getObjects(); ofstream lit("lit.txt"); ofstream blk("blank.txt"); ofstream uri("uri.txt"); while(it->hasNext()) { unsigned char *str = it->next(); if(*str=='"') { // Literal lit << (char*)str << endl; } else if(*str=='_'){ // Blanco blk << (char*)str << endl; } else { // URI uri << (char*)str << endl; } } lit.close(); blk.close(); uri.close(); delete it; delete hdt; } catch(const char *str) { cerr << str << endl; } catch(char *str) { cerr << str << endl; } }
int main(int argc, char *argv[]) { int c; string inputFile; string outputFile; while ((c = getopt(argc, argv, "hi:o:")) != -1) { switch (c) { case 'h': help(); break; case 'o': outputFile = optarg; break; default: cout << "ERROR: Unknown option" << endl; help(); return 1; } } if (argc < 2) { cout << "ERROR: You must supply an input HDT File" << endl << endl; help(); return 1; } inputFile = argv[optind]; cout<<inputFile<<endl; if (outputFile == "") outputFile = inputFile; // Load HDT file HDT *hdt = HDTManager::mapHDT(inputFile.c_str()); TriplesList* tl = new TriplesList(); Triples * trip = hdt->getTriples(); cout<<" Number of triples: "<<(unsigned long)(hdt->getTriples()->getNumberOfElements())<<endl; fflush(stdout); IteratorTripleID *it = trip->searchAll(); tl->insert(it); cout<<" Number of tripleslist: "<<tl->getNumberOfElements()<<endl; delete it; // Remember to delete iterator to avoid memory leaks! outputFile = outputFile + "_Statistics"; //erase summary file content ofstream out_summary; out_summary.open((outputFile + "_Summary").c_str(), ios::trunc); ofstream out_header_stats; out_header_stats.open((outputFile + "_HeaderStats").c_str(), ios::trunc); out_summary << "* General statistics" << endl; out_summary << "# Number of Triples: " << hdt->getTriples()->getNumberOfElements() << endl; out_summary << "# Number of Predicates: " << hdt->getDictionary()->getNpredicates() << endl; out_summary << "# Number of Subjects: " << hdt->getDictionary()->getNsubjects() << endl; out_summary << "# Number of Objects: " << hdt->getDictionary()->getNobjects() << endl; out_summary << "# Number of Shared Subject-Objects: " << hdt->getDictionary()->getNshared() << endl; double ratioSO = (double) hdt->getDictionary()->getNshared() / (hdt->getDictionary()->getNsubjects() + hdt->getDictionary()->getNobjects() - hdt->getDictionary()->getNshared()); out_summary << "# Ratio Shared Subject-Objects => SO / (S U O) : " << ratioSO << endl; out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedSO> "<<ratioSO; /* * Compute over the dictionary to get shared subject-predicate and predicate-object */ IteratorUCharString *itPred = hdt->getDictionary()->getPredicates(); int numSubjectPredicates = 0; int numPredicatesObjects = 0; while (itPred->hasNext()) { stringstream s; s << itPred->next(); string pred = s.str(); if (hdt->getDictionary()->stringToId(pred, SUBJECT) > 0) { //found numSubjectPredicates++; } if (hdt->getDictionary()->stringToId(pred, OBJECT) > 0) { //found numPredicatesObjects++; } } delete itPred; // Remember to delete iterator to avoid memory leaks! double ratioSP = (double) numSubjectPredicates / (hdt->getDictionary()->getNsubjects() + hdt->getDictionary()->getNpredicates() - numSubjectPredicates); out_summary << "# Ratio Shared Subject-Predicate => SP / (S U P) : " << ratioSP << endl; out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedSP> "<<ratioSP; double ratioPO =(double) numPredicatesObjects / (hdt->getDictionary()->getNobjects() + hdt->getDictionary()->getNpredicates() - numPredicatesObjects); out_summary << "# Ratio Shared Predicate-Object => PO / (P U O) : " << ratioPO << endl; out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedPO> "<<ratioPO; out_summary.close(); out_header_stats.close(); //erase summary file SO and Type content ofstream out_summarySO; out_summarySO.open((outputFile + "_SO_Summary").c_str(), ios::trunc); out_summarySO.close(); ofstream out_summaryType; out_summaryType.open((outputFile + "_Typed_Summary").c_str(), ios::trunc); out_summaryType.close(); //find rdf:type unsigned int IDrdftype = 0; string rdftype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) { //found IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE); } else { rdftype = "rdf:type"; if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) { //found IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE); } else { rdftype = "a"; if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) { //found IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE); } } } tl->calculateDegrees(outputFile, hdt->getDictionary()->getNshared(), IDrdftype); delete hdt; }