void BuildGraph::print_setting (void) { cout << "filename : " << filename_ << endl; print.Vector("sentences", sentences_); print.Dict("relscore", *relscore_); cout << "winsize : " << winsize_ << endl; print.Array("lambda", lamb_list_, lamb_size_); print.Array("cutoff_path", cutoff_list_, cutoff_size_); }
int main (void) { string text_dirpath = string("/Users/KimKR/Desktop/NEXT_LAB/keyword/test"); string relscore_path = string("/Users/KimKR/Desktop/NEXT_LAB/keyword/rel_score/test/bm25_3_3"); vector<string> textfiles = vector<string>(); getdir(text_dirpath, textfiles); int winsize = 2; int cutoff_list[] = {3}; int cutoff_size = 1; double lambda_list[1]; int lambda_size = 1; Converter cv; Print print; // for(int i = 0; i < 21; ++i) { // lambda_list[i] = i * 0.05; // } lambda_list[0] = 0.85; clock_t all_begin = clock(); for(int fidx = textfiles.size() - 1; fidx >= 0; --fidx) { string filename = textfiles[fidx]; vector<string> sentences = TextRead(text_dirpath + "/" + filename); clock_t begin = clock(); for(int k = K_STR; k < K_STEP; ++k) { double k1 = k * 0.1; for(int b1 = 0; b1 < 1; ++b1) { double b = 0.05 * b1; string relfile_path = relscore_path + "_" + cv.to_string(k1, 2) + "_" + cv.to_string(b, 3) + "/" + filename; map<string, double> relscore; if(RelscoreRead(relfile_path, &relscore) == 0) continue; BuildGraph bg(filename, sentences, &relscore, winsize, k1, b, lambda_list, lambda_size, cutoff_list, cutoff_size); Graph G; G.null_ = true; bg.run(true, false, false, false); print.Vector("top_score_candidates", bg.top_score_candidates_.at(3).at(0.85)); SaveGraph(bg.top_score_candidates_, bg.filename_, bg.k1_, bg.b_, bg.winsize_); } } clock_t end = clock(); double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC; cout << filename << " : " << elapsed_secs << endl; } clock_t end = clock(); double elapsed_secs = double(end - all_begin) / CLOCKS_PER_SEC; cout << "all finished" << " : " << elapsed_secs << endl; getchar(); }
void BuildGraph::generate_candidates(void) { for(int cidx = 0; cidx < cutoff_size_; ++cidx) { int cutoff = cutoff_list_[cidx]; for(int lidx = 0; lidx < lamb_size_; ++lidx) { double lamb = lamb_list_[lidx]; for(int sidx = 0; sidx < num_of_sentence_; ++sidx) { vector<string> words = Split(sentences_[sidx], ';'); vector<string> candidate; for(int widx = 0; widx < words.size(); ++widx) { if (node_scores_[cutoff][lamb].count(words[widx])) { candidate.push_back(words[widx]); } else if (candidate.size() > 0) { keywords_candidates_[cutoff][lamb].push_back(Join(candidate, ' ')); candidate.clear(); } } if (candidate.size() > 0) { keywords_candidates_[cutoff][lamb].push_back(Join(candidate, ' ')); } print.Vector("keywords_candidates_ : ", keywords_candidates_[cutoff][lamb]); } } } };