예제 #1
0
int main(int argc, char **argv) {

	try {
		HDT *hdt = HDTManager::mapHDT(argv[1]);
		IteratorUCharString *it =hdt->getDictionary()->getObjects();

		ofstream lit("lit.txt");
		ofstream blk("blank.txt");
		ofstream uri("uri.txt");
		while(it->hasNext()) {
		    unsigned char *str = it->next();
		    if(*str=='"') {
			  // Literal
			lit << (char*)str << endl;
		    } else if(*str=='_'){
			 // Blanco
			blk << (char*)str << endl;
		    } else {
			 // URI
			uri << (char*)str << endl;
		    }
		}
		lit.close();
		blk.close();
		uri.close();

		delete it;
		delete hdt;
	 } catch(const char *str) {
		cerr << str << endl;
	 } catch(char *str) {
		cerr << str << endl;
	 }

}
예제 #2
0
int main(int argc, char **argv) {
	int c;
	string query, inputFile, outputFile;
	bool measure = false;

	while( (c = getopt(argc,argv,"hq:o:m"))!=-1) {
		switch(c) {
		case 'h':
			help();
			break;
		case 'q':
			query = optarg;
			break;
		case 'o':
			outputFile = optarg;
			break;
		case 'm':
			measure = true;
			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			help();
			return 1;
		}
	}

	if(argc-optind<2) {
		cout << "ERROR: You must supply an HDT File" << endl << endl;
		help();
		return 1;
	}

	inputFile = argv[optind];
	outputFile = argv[optind+1];

	try {
		HDT *hdt = HDTManager::mapHDT(inputFile.c_str());
		hdt->saveToHDT(outputFile.c_str());

		cout << "IN: " << inputFile << " Out: " << outputFile << endl;

		delete hdt;
	} catch (std::exception& e) {
		cout << "ERROR: " << e.what() << endl;
	}
}
예제 #3
0
파일: conops.cpp 프로젝트: akjoshi/hdt-it
int main(int argc, char **argv) {
	int c;
	string query, inputFile, outputFile;
	bool measure = false;

	while( (c = getopt(argc,argv,"hq:o:m"))!=-1) {
		switch(c) {
		case 'h':
			break;
		case 'q':
			query = optarg;
			break;
		case 'o':
			outputFile = optarg;
			break;
		case 'm':
			measure = true;
			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			return 1;
		}
	}

	if(argc-optind<2) {
		cout << "ERROR: You must supply an input and HDT File" << endl << endl;
		return 1;
	}

	inputFile = argv[optind];
	outputFile = argv[optind+1];

	ConvertProgress progress;
	StopWatch st;

	try {
		// LOAD
		HDT *hdt = HDTManager::mapHDT(inputFile.c_str());

		// CONVERT triples to TripleList
		TriplesList tlist;
		Triples *triples = hdt->getTriples();
		cout << "Old Triples -> TriplesList" << endl;
		st.reset();
		IteratorTripleID *it = triples->searchAll();
		tlist.insert(it);
		delete it;
		cout << "         Old Triples -> TriplesList time" << st <<  endl;

		// Convert tlist to OPS
		cout << "TriplesList sort OPS" << endl;
		st.reset();
		tlist.sort(OPS, &progress);
		cout << "    TriplesList sort OPS time: " << st << endl;

		// Generate new OPS BitmapTriples
		cout << "TriplesList to new BitmapTriples" << endl;
		HDTSpecification spec;
		spec.set("triplesOrder", "OPS");
		BitmapTriples bt(spec);
		st.reset();
		bt.load(tlist, &progress);
		cout << "       TriplesList to new BitmapTriples time" << st << endl;

		// Update Header
#if 1
		cout << "Update Header" << endl;
		string rootNode("_:triples");
		TripleString ts (rootNode, "", "");
		hdt->getHeader()->remove(ts);
		bt.populateHeader(*hdt->getHeader(), "_:triples");
#endif

		// SAVE
		cout << "Save to " << outputFile << endl;
		ofstream out(outputFile.c_str(), ios::binary | ios::out);
		ControlInformation ci;

		ci.clear();
		ci.setType(GLOBAL);
		ci.setFormat(HDTVocabulary::HDT_CONTAINER);
		ci.save(out);

		// HEADER
		ci.clear();
		ci.setType(HEADER);
		hdt->getHeader()->save(out, ci, NULL);

		// DICTIONARY
		ci.clear();
		ci.setType(DICTIONARY);
		hdt->getDictionary()->save(out, ci, NULL);

		// NEW TRIPLES
		ci.clear();
		ci.setType(TRIPLES);
		bt.save(out, ci, NULL);

		out.close();

		delete hdt;
	} catch (char *e) {
		cout << "ERROR: " << e << endl;
	} catch (const char *e) {
		cout << "ERROR: " << e << endl;
	}
}
예제 #4
0
int main(int argc, char *argv[]) {
	int c;
	string inputFile;
	string outputFile;

	while ((c = getopt(argc, argv, "hi:o:")) != -1) {
		switch (c) {
		case 'h':
			help();
			break;
		case 'o':
			outputFile = optarg;

			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			help();
			return 1;
		}
	}



	if (argc < 2) {
		cout << "ERROR: You must supply an input HDT File" << endl << endl;
		help();
		return 1;
	}
	inputFile = argv[optind];

		cout<<inputFile<<endl;

	if (outputFile == "")
		outputFile = inputFile;

	// Load HDT file
	HDT *hdt = HDTManager::mapHDT(inputFile.c_str());

	TriplesList* tl = new TriplesList();

	Triples * trip = hdt->getTriples();
	cout<<" Number of triples: "<<(unsigned long)(hdt->getTriples()->getNumberOfElements())<<endl;
	fflush(stdout);
	IteratorTripleID *it = trip->searchAll();
	tl->insert(it);
	cout<<" Number of tripleslist: "<<tl->getNumberOfElements()<<endl;
	delete it; // Remember to delete iterator to avoid memory leaks!

	outputFile = outputFile + "_Statistics";

	//erase summary file content
	ofstream out_summary;
	out_summary.open((outputFile + "_Summary").c_str(), ios::trunc);

	ofstream out_header_stats;
	out_header_stats.open((outputFile + "_HeaderStats").c_str(), ios::trunc);

	out_summary << "* General statistics" << endl;
	out_summary << "# Number of Triples: "
			<< hdt->getTriples()->getNumberOfElements() << endl;
	out_summary << "# Number of Predicates: "
			<< hdt->getDictionary()->getNpredicates() << endl;
	out_summary << "# Number of Subjects: "
			<< hdt->getDictionary()->getNsubjects() << endl;
	out_summary << "# Number of Objects: "
			<< hdt->getDictionary()->getNobjects() << endl;
	out_summary << "# Number of Shared Subject-Objects: "
			<< hdt->getDictionary()->getNshared() << endl;

	double ratioSO = (double) hdt->getDictionary()->getNshared()
							/ (hdt->getDictionary()->getNsubjects()
									+ hdt->getDictionary()->getNobjects()
									- hdt->getDictionary()->getNshared());
	out_summary << "# Ratio Shared Subject-Objects => SO / (S U O) : "
			<< ratioSO << endl;

	out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedSO> "<<ratioSO;
	/*
	 * Compute over the dictionary to get shared subject-predicate and predicate-object
	 */
	IteratorUCharString *itPred = hdt->getDictionary()->getPredicates();

	int numSubjectPredicates = 0;
	int numPredicatesObjects = 0;
	while (itPred->hasNext()) {
		stringstream s;
		s << itPred->next();
		string pred = s.str();

		if (hdt->getDictionary()->stringToId(pred, SUBJECT) > 0) {
			//found
			numSubjectPredicates++;
		}
		if (hdt->getDictionary()->stringToId(pred, OBJECT) > 0) {
			//found
			numPredicatesObjects++;
		}
	}
	delete itPred; // Remember to delete iterator to avoid memory leaks!

	double ratioSP = (double) numSubjectPredicates
			/ (hdt->getDictionary()->getNsubjects()
					+ hdt->getDictionary()->getNpredicates()
					- numSubjectPredicates);
	out_summary << "# Ratio Shared Subject-Predicate => SP / (S U P) : "
			<< ratioSP << endl;

	out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedSP> "<<ratioSP;

	double ratioPO =(double) numPredicatesObjects
			/ (hdt->getDictionary()->getNobjects()
					+ hdt->getDictionary()->getNpredicates()
					- numPredicatesObjects);
	out_summary << "# Ratio Shared Predicate-Object => PO / (P U O) : "
			<< ratioPO << endl;

	out_header_stats<<vocabSubject<<" <"<<vocabPredicate<<"ratioSharedPO> "<<ratioPO;

	out_summary.close();
	out_header_stats.close();

	//erase summary file SO and Type content
	ofstream out_summarySO;
	out_summarySO.open((outputFile + "_SO_Summary").c_str(), ios::trunc);
	out_summarySO.close();

	ofstream out_summaryType;
	out_summaryType.open((outputFile + "_Typed_Summary").c_str(), ios::trunc);
	out_summaryType.close();

	//find rdf:type
	unsigned int IDrdftype = 0;
	string rdftype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";

	if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) {
		//found
		IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE);
	} else {
		rdftype = "rdf:type";
		if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) {
			//found
			IDrdftype = hdt->getDictionary()->stringToId(rdftype, PREDICATE);
		} else {
			rdftype = "a";
			if (hdt->getDictionary()->stringToId(rdftype, PREDICATE) > 0) {
				//found
				IDrdftype = hdt->getDictionary()->stringToId(rdftype,
						PREDICATE);
			}
		}
	}

	tl->calculateDegrees(outputFile, hdt->getDictionary()->getNshared(),
			IDrdftype);



	delete hdt;
}
예제 #5
0
int main(int argc, char **argv) {
	int c;
	string query, inputFile, outputFile;
	bool measure = false;

	while( (c = getopt(argc,argv,"hq:o:m"))!=-1) {
		switch(c) {
		case 'h':
			help();
			break;
		case 'q':
			query = optarg;
			break;
		case 'o':
			outputFile = optarg;
			break;
		case 'm':
			measure = true;
			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			help();
			return 1;
		}
	}

	if(argc-optind<1) {
		cout << "ERROR: You must supply an HDT File" << endl << endl;
		help();
		return 1;
	}

	inputFile = argv[optind];


	try {
		HDT *hdt = HDTManager::mapHDT(inputFile.c_str());

		TripleID pat(0,1,0);
#if 0
		IteratorTripleID *it = hdt->getTriples()->search(pat);

		StopWatch st;
		unsigned int numTriples = 0;
		while(it->hasNext() && numTriples < 4) {
			TripleID *ts = it->next();
			cout << *ts << endl;

			numTriples++;
		}

		cout << "------" << endl;

		while(it->hasPrevious()) {
			TripleID *ts = it->previous();
			cout << *ts << endl;
		}

		cout << "------" << endl;

		numTriples=0;
		while(it->hasNext() && numTriples<10) {
			TripleID *ts = it->next();
			cout << *ts << endl;
			numTriples++;
		}

		delete it;
		cout << numTriples << " results in " << st << endl;

#else
		IteratorTripleID *it = hdt->getTriples()->search(pat);

#if 1
		while(it->hasNext()) {
			cout << *it->next() << endl;
		}
		it->goToStart();
		cout << "------" << endl;
#endif

		RandomAccessIterator rit(it);

		int i;
		for(i=0;i<rit.getNumElements() && i < 20;i++) {
			TripleID *tid = rit.get(i);
			cout << i << " => " << *tid << endl;
		}

		cout << "------" << endl;

		for(i-- ; i>=0; i--) {
			TripleID *tid = rit.get(i);
			cout << i << " => " << *tid << endl;
		}

		cout << "------" << endl;

		for(i=0;i<rit.getNumElements() && i<20;i++) {
			TripleID *tid = rit.get(i);
			cout << i << " => " << *tid << endl;
		}

		cout << "------" << endl;

		cout << 2 << " => " << *rit.get(2) << endl;
		cout << 7 << " => " << *rit.get(7) << endl;
		cout << 1 << " => " << *rit.get(1) << endl;
		cout << 5 << " => " << *rit.get(5) << endl;
		cout << 0 << " => " << *rit.get(0) << endl;
		cout << 9 << " => " << *rit.get(9) << endl;
		cout << 8 << " => " << *rit.get(8) << endl;

		delete it;

#endif
		delete hdt;
	} catch (char *e) {
		cout << "ERROR: " << e << endl;
	} catch (const char *e) {
		cout << "ERROR: " << e << endl;
	}
}
예제 #6
0
int main(int argc, char **argv) {
	int c;
	string query, inputFile, outputFile, filter1, filter2;
	bool measure = false;

	while ((c = getopt(argc, argv, "hq:o:mf:F:")) != -1) {
		switch (c) {
		case 'h':
			help();
			break;
		case 'q':
			query = optarg;
			break;
		case 'o':
			outputFile = optarg;
			break;
		case 'm':
			measure = true;
			break;
		case 'f':
			filter1 = optarg;
			break;
		case 'F':
			filter2 = optarg;
			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			help();
			return 1;
		}
	}

	if (argc - optind < 1) {
		cout << "ERROR: You must supply an HDT File" << endl << endl;
		help();
		return 1;
	}

	inputFile = argv[optind];


	try {
		HDT *hdt = HDTManager::mapIndexedHDT(inputFile.c_str());

		if (filter1 != "" || filter2 != "") {
			ostream *out;
			ofstream outF;

			if (outputFile != "") {
				outF.open(outputFile.c_str());
				out = &outF;
			} else {
				out = &cout;
			}

			string infile;
			if (filter1 != "")
				infile = filter1;
			else
				infile = filter2;

			std::ifstream file(infile.c_str());
			if (!file.good())
				throw "unable to open filter file";

			string linea = "";
			string property = "";
			string value = "";

			StopWatch st_total;

			size_t totalQueryResults = 0;

			size_t numQuery=0;

			while (!file.eof()) {
				getline(file, linea);
				if(linea.length()==0) 
					continue;
				size_t pos = linea.find(';');

				if (pos != std::string::npos) {
					property = linea.substr(0, pos);
					value = linea.substr(pos + 1);

					cerr<<"Query "<<numQuery << ": "<<linea<<endl;
					cout<<">>> Query "<<numQuery << ": "<<linea<<endl;
					cout<<"property:"<<property<<endl;
					cout<<"value:"<<value<<endl<<endl;

					StopWatch st;

					hdt::LiteralDictionary *dict = dynamic_cast<hdt::LiteralDictionary *>(hdt->getDictionary());
					if(dict==NULL) {
						cerr << "This dictionary does not support substring search" << endl;
						break;
					}
					hdt::Triples *triples = hdt->getTriples();

					uint32_t *results = NULL;
					size_t numResults = dict->substringToId((unsigned char *) value.c_str(), value.length(), &results);

					TripleID pattern(0, dict->stringToId(property, PREDICATE), 0);

					for (size_t i = 0; i < numResults; i++) {

						pattern.setObject(results[i]);

						string objStr = dict->idToString(results[i], OBJECT);

						IteratorTripleID *it = triples->search(pattern);

						unsigned int numTriples = 0;

						//iterate over the first pattern
						while (it->hasNext()) {
							TripleID *ts = it->next();

							if (filter1 != "") {
								// QUERY Q3
								cout << dict->idToString(ts->getSubject(), SUBJECT) << " " << objStr << endl;
								totalQueryResults++;

							} else {
								// QUERY Q4
								TripleID pat2(ts->getSubject(), 0, 0);
								TripleString out;
								string subjStr = dict->idToString(ts->getSubject(), SUBJECT);

								IteratorTripleID *it2 = triples->search(pat2);
								while(it2->hasNext()) {
									TripleID *inner = it2->next();

									cout << subjStr << " " << dict->idToString(inner->getPredicate(), PREDICATE) <<" ";

									if(inner->getObject()==results[i]) {
										cout << objStr << endl;
									} else {
										cout << dict->idToString(inner->getObject(), OBJECT) << endl;
									}
									totalQueryResults++;
								}
								delete it2;
							}
						}
						delete it;
					}
					
					cout << ">>> Results: " << totalQueryResults << endl;
					cerr << "Query " << numQuery << " Results: " << totalQueryResults << " in " << st << endl << endl;
					numQuery++;
				}
			}
			cerr << "Total time: " << st_total << endl;

			if (outputFile != "") {
				outF.close();
			}

			file.close();

		}

		delete hdt;
	} catch (char *e) {
		cout << "ERROR: " << e << endl;
	} catch (const char *e) {
		cout << "ERROR: " << e << endl;
	}
}
예제 #7
0
파일: hdt2rdf.cpp 프로젝트: rdmpage/hdt-cpp
int main(int argc, char **argv) {
	int c;
	string rdfFormat, inputFile, outputFile;
	RDFNotation notation = NTRIPLES;

	while( (c = getopt(argc,argv,"f:"))!=-1) {
		switch(c) {
		case 'f':
			rdfFormat = optarg;
			cout << "Format: " << rdfFormat << endl;
			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			help();
			return 1;
		}
	}

	if(argc-optind<2) {
		cout << "ERROR: You must supply an input and output" << endl << endl;
		help();
		return 1;
	}

	if(rdfFormat!="") {
		if(rdfFormat=="ntriples") {
			notation = NTRIPLES;
		} else if(rdfFormat=="n3") {
			notation = N3;
		} else if(rdfFormat=="turtle") {
			notation = TURTLE;
		} else if(rdfFormat=="rdfxml") {
			notation = XML;
		} else {
			cout << "ERROR: The RDF output format must be one of: (ntriples, n3, turtle, rdfxml)" << endl;
			help();
			return 1;
		}
	}

	inputFile = argv[optind];
	outputFile = argv[optind+1];

	if(inputFile=="") {
		cout << "ERROR: You must supply an HDT input file" << endl << endl;
		help();
		return 1;
	}

	if(outputFile=="") {
		cout << "ERROR: You must supply an RDF output file" << endl << endl;
		help();
		return 1;
	}

	try {
		StdoutProgressListener progress;
		HDT *hdt = HDTManager::mapHDT(inputFile.c_str(), &progress);

		if(outputFile!="-") {
			RDFSerializer *serializer = RDFSerializer::getSerializer(outputFile.c_str(), notation);
			hdt->saveToRDF(*serializer);
			delete serializer;
		} else {
			RDFSerializer *serializer = RDFSerializer::getSerializer(cout, notation);
			hdt->saveToRDF(*serializer);
			delete serializer;
		}
		delete hdt;
	} catch (std::exception& e) {
		cerr << "ERROR: " << e.what() << endl;
	}

}
예제 #8
0
int main(int argc, char **argv) {
	int c;
	string query, inputFile, outputFile;
	bool measure = false;

	while( (c = getopt(argc,argv,"hq:o:m"))!=-1) {
		switch(c) {
		case 'h':
			break;
		case 'q':
			query = optarg;
			break;
		case 'o':
			outputFile = optarg;
			break;
		case 'm':
			measure = true;
			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			return 1;
		}
	}

	if(argc-optind<1) {
		cout << "ERROR: You must supply an input and HDT File" << endl << endl;
		return 1;
	}

	inputFile = argv[optind];

	try {
		// LOAD
		HDT *hdt = HDTManager::mapHDT(inputFile.c_str());

		// CONVERT
		Dictionary *dict = hdt->getDictionary();
		//LiteralDictionary litDict;
		FourSectionDictionary litDict;
		StdoutProgressListener progress;
		litDict.import(dict, &progress);

		// SAVE
		ofstream out(outputFile.c_str(), ios::binary | ios::out);
		ControlInformation ci;

		// GLOBAL
		ci.clear();
		ci.setType(GLOBAL);
		ci.setFormat(HDTVocabulary::HDT_CONTAINER);
		ci.save(out);

		// HEADER
		ci.clear();
		ci.setType(HEADER);
		hdt->getHeader()->save(out, ci, NULL);

		// NEW DICTIONARY
		ci.clear();
		ci.setType(DICTIONARY);
		litDict.save(out, ci, NULL);

		// TRIPLES
		ci.clear();
		ci.setType(TRIPLES);
		hdt->getTriples()->save(out, ci, NULL);

		out.close();

		delete hdt;
	} catch (std::exception& e) {
		cout << "ERROR: " << e.what() << endl;
	}
}
예제 #9
0
int main(int argc, char **argv) {
	string inputFile;
	string outputFile;
	bool verbose=false;
	bool showProgress=false;
	bool generateIndex=false;
	string configFile;
	string options;
	string rdfFormat;
	string baseUri;

    /**
     * Input file format. If no -f is specified and we can't guess which
     * format it is, we will use NTRIPLES by default.
     */
    RDFNotation notation = NTRIPLES;

    int flag;
    while ((flag = getopt (argc, argv, "c:o:vpf:B:iVh")) != -1)
    {
        switch (flag)
        {
            case 'c':
                configFile = optarg;
                break;
            case 'o':
                options = optarg;
                break;
            case 'v':
                verbose = true;
                break;
            case 'p':
                showProgress = true;
                break;
            case 'f':
                rdfFormat = optarg;
                break;
            case 'B':
                baseUri = optarg;
                break;
            case 'i':
                generateIndex=true;
                break;
            case 'V':
                cout << HDTVersion::get_version_string(".") << endl;
                return 0;
            case 'h':
                help();
                return 0;
            default:
                cerr << "ERROR: Unknown option" << endl;
                
                help();
                
                return 1;
        }
    }

#define vout if (!verbose) {} else std::cerr /* Verbose output */

	if (!configFile.empty()) {
		vout << "Configfile: " << configFile << endl;
	}
	if (!options.empty()) {
		vout << "Options: " << options << endl;
	}

	if(argc-optind<2) {
		cerr << "ERROR: You must supply an input and output" << endl << endl;
		help();
		return 1;
	}

	inputFile = argv[optind];
	outputFile = argv[optind+1];

	if(inputFile=="") {
		cerr << "ERROR: You must supply an RDF input file" << endl << endl;
		help();
		return 1;
	}

	if(outputFile=="") {
		cerr << "ERROR: You must supply an HDT output file" << endl << endl;
		help();
		return 1;
	}

	if(baseUri=="") {
		baseUri="<file://"+inputFile+">";
	}

    /**
     * If -f flag (input format) was not specified, we try to guess it
     * by reading the file extension.
     */
    if (rdfFormat == "")
    {
        vout << "Input format not given. Guessing from file extension..." << endl;
        
        // Get position of right-most '.' to find file extension.
        size_t dot_position = inputFile.rfind ('.', inputFile.length ());
        
        if (dot_position != string::npos)
            // Extract extension from file name
            rdfFormat = inputFile.substr (dot_position + 1, string::npos);
        
        /**
         * If rdfFormat is still "", it means -f was not specified and the file
         * didn't have any extension. The default format is defined at the top
         * of this file: RDFNotation notation = NTRIPLES;
         */
        if (rdfFormat == "" || rdfFormat == "gz")
        {
            rdfFormat = "nt";
            vout << "No input format detected: using N-Triples by default." << endl;
        }
    }
    
    // ASSERT: here rdfFormat must be != ""
    
    // Lower-case rdfFormat
    transform (rdfFormat.begin (), rdfFormat.end (), rdfFormat.begin (), ::tolower);

    // Detect input format
    if (rdfFormat=="nquads" || rdfFormat=="nq") {
        notation = NQUADS;
    } else if (rdfFormat== "ntriples" || rdfFormat=="nt") {
        notation = NTRIPLES;
    } else if (rdfFormat=="trig") {
        notation = TRIG;
    } else if (rdfFormat=="turtle" || rdfFormat=="ttl") {
        notation = TURTLE;
    // -f or file extension detected, but didn't match any valid format.
    } else {
        cerr << "ERROR: Input format `" << rdfFormat << "' is not supported.\n"
             << "Use either of the following:\n"
             << "\t- `ntriples' or `nt' for N-Triples\n"
             << "\t- `nquads' or `nq' for N-Quads\n"
             << "\t- `turtle' or `ttl' for Turtle\n"
             << "\t- `trig' for TriG" << endl;
        return 1;
    }

    vout << "Detected RDF input format: " << rdfFormat << endl;

	// Process
	HDTSpecification spec(configFile);

	spec.setOptions(options);

	try {
		// Read RDF
		StopWatch globalTimer;

		ProgressListener* progress = showProgress ? new StdoutProgressListener() : NULL;
		HDT *hdt = HDTManager::generateHDT(inputFile.c_str(), baseUri.c_str(), notation, spec, progress);

		ofstream out;

		// Save HDT
		hdt->saveToHDT(outputFile.c_str(), progress);

		globalTimer.stop();
		vout << "HDT Successfully generated." << endl;
		vout << "Total processing time: ";
		vout << "Clock(" << globalTimer.getRealStr();
		vout << ")  User(" << globalTimer.getUserStr();
		vout << ")  System(" << globalTimer.getSystemStr() << ")" << endl;

		if(generateIndex) {
			hdt = HDTManager::indexedHDT(hdt, progress);
		}

		delete hdt;
		delete progress;
	} catch (std::exception& e) {
		cerr << "ERROR: " << e.what() << endl;
		return 1;
	}

}
예제 #10
0
int main(int argc, char **argv) {
	int c;
	char *inputFile=NULL, *insertFile=NULL, *removeFile=NULL, *outputFile=NULL;
	char *insertSubject=NULL, *insertPredicate=NULL, *insertObject=NULL;
	char *removeSubject=NULL, *removePredicate=NULL, *removeObject=NULL;
	bool insertSingle = false;
	bool removeSingle = false;

	bool insertMultiple = false;
	bool removeMultiple = false;

	while ((c = getopt(argc, argv, "hO:i:r:I:R:")) != -1) {
		switch (c) {
		case 'h':
			help();
			break;
		case 'O':
			outputFile = optarg;
			break;
		case 'i':
			insertSingle = true;
			insertSubject = optarg;
			insertPredicate = argv[optind++];
			insertObject = argv[optind++];
			break;
		case 'r':
			removeSingle = true;
			removeSubject = optarg;
			removePredicate = argv[optind++];
			removeObject = argv[optind++];
			break;
		case 'I':
			insertMultiple = true;
			insertFile = optarg;
			break;
		case 'R':
			removeMultiple = true;
			removeFile = optarg;
			break;
		default:
			cout << "ERROR: Unknown option" << endl;
			help();
			return 1;
		}
	}

	if (argc - optind < 2) {
		cout << "ERROR: You must supply an input and output HDT File" << endl << endl;
		help();
		return 1;
	}
	inputFile = argv[optind];
	outputFile = argv[optind+1];
	if (strcmp(inputFile,outputFile)==0){
		cerr<< "ERROR: input and output files must me different" << endl <<endl;
		return 1;
	}

	try {
		// LOAD
		HDT *hdt = HDTManager::mapHDT(inputFile);

		// Replace header
		Header *head = hdt->getHeader();

		if (insertSingle) {
			TripleString ti(insertSubject, insertPredicate, insertObject);
			head->insert(ti);
		}
		if (removeSingle) {
			TripleString ti(removeSubject, removePredicate, removeObject);
			head->remove(ti);
		}
		if (insertMultiple) {
			string line;
			std::ifstream infile(insertFile);
			while (getline(infile, line)) {
				TripleString ti;
				ti.read(line);
				head->insert(ti);
			}
		}
		if (removeMultiple) {
			string line;
			std::ifstream infile(removeFile);
			while (getline(infile, line)) {
				TripleString ti;
				ti.read(line);
				head->remove(ti);
			}
		}
		// SAVE
		hdt->saveToHDT(outputFile);

		delete hdt;
	} catch (std::exception& e) {
		cerr << "ERROR: " << e.what() << endl;
		return 1;
	}
}