void getNB(HashGraph* G, int v, int distance, hash_set<int>& result, vector<bool>& mark) { assert(distance==1 || distance==2); EdgeMap* p_neighbors=G->getNeighbors(v); EdgeMap::iterator pnb; for (pnb=p_neighbors->begin(); pnb!=p_neighbors->end(); pnb++) { UINT w=pnb->first; if(!mark[w]) result.insert(w); } if(distance==1) return; hash_set<int>::iterator p; vector<int> temp; for(p=result.begin(); p!=result.end(); p++) temp.push_back(*p); int imnb_size=result.size(); //result.clear(); for(int i=0; i<imnb_size; i++) { p_neighbors=G->getNeighbors(temp[i]); for (pnb=p_neighbors->begin(); pnb!=p_neighbors->end(); pnb++) { UINT w=pnb->first; if(!mark[w]) result.insert(w); //automatically handel duplication. } } for(int i=0; i<imnb_size; i++) result.erase(temp[i]); }
int trim_entropy_filter(vector<string>* keep_words, hash_set<string>& cad_words_set, WordInfoMap& wordinfo_map) { keep_words->reserve(cad_words_set.size()); for (hash_set<string>::iterator it = cad_words_set.begin(); it != cad_words_set.end(); ++it) { WordInfoMap::iterator it_map = wordinfo_map.find(*it); if (it_map == wordinfo_map.end()) { fprintf(stderr, "WARNING, word[%s] in cad_word, not in word_info", it->c_str()); continue; } if (it_map->first.size() <= WORD_LEN - 4 && it_map->second.calc_is_keep()) { keep_words->push_back(*it); } } return 0; }