int main(int argc, char** argv) { // handle parameters po::variables_map cfg; if (!init_params(argc,argv, &cfg)) exit(1); // something is wrong DfTable dft (cfg["dftable"].as<string>()); cerr << "DF table loaded (" << dft.size() << " entries)." << endl; std::string metric; bool normalize = false; Scorer* scorer = setup_metric(cfg, metric, normalize); if (cfg.count("N")) scorer->setDocCount(cfg["N"].as<double>()); else scorer->setDocCount(dft.mMaxDf); cerr << "Term weighting metric: " << metric << " " << scorer->print() << " [normalization=" << normalize << "]" << endl; bool verbose = false; if (cfg.count("verbose")) verbose = true; string docid; int len = -1; string v_str; TermVector v; int c = 0; int skipped = 0; int empty_input = 0; int empty_output = 0; while (cin >> docid) { cin.ignore(1, '\t'); cin >> len; cin.ignore(1, '\t'); getline(cin, v_str); if (docid.size() == 0 || len <= 0) continue; if (v_str.size() == 0) { cerr << "WARNING: input vector (id=" << docid << ",len=" << len << ") empty!" << endl; cout << docid << "\t" << len << "\t" << endl; empty_input++; continue; } v = vutils::read_vector(v_str); Document d(v, scorer, docid, len); skipped += d.calc_wvec(dft, normalize, verbose); if (d.wvec_.size() == 0) { if (verbose) cerr << "WARNING! " << d.docid_ << " output vector is empty!" << endl; empty_output++; } // write to output cout << d.docid_ << "\t" << d.len_ << "\t"; vutils::write_vector(d.wvec_, cout); cout << endl; c++; c%1000==0 ? cerr << "." << c << "." : cerr ; } delete scorer; cerr << "done." << endl << endl << c << "\tlines read/written." << endl << empty_input << "\tempty input vectors." << endl << empty_output << "\tempty output vectors." << endl << skipped << "\tterms skipped due missing df value." << endl; }