int main() { Bipartite* B; int N = 0,g,b,m,x,y; char tab[200][200]; while(1) { clr(tab); scanf("%d %d %d",&g,&b,&m); if(g == 0) return 0; B = new Bipartite(g,b); while(m--) { scanf("%d %d",&x,&y); tab[x - 1][y - 1] = 0; } for(x = 0;x < g;x++) for(y = 0;y < b;y++) if(tab[x][y]) B->addedge(x,y); printf("Case %d: %d\n",++N,g + b - B->result()); delete B; } }
// Reads a Bipartiteite graph from stdin, then returns a copy of it. In the future we may want to implement it without any copying. Bipartite *readBipartiteGraph() { unsigned numberOfXVertices, numberOfYVertices, edges; // Läs antal hörn X, Y och kanter scanf("%d %d %d", &numberOfXVertices, &numberOfYVertices, &edges); Bipartite *graph = new Bipartite(numberOfXVertices,numberOfYVertices); // Läs in kanterna for (unsigned i = 0; i < edges; ++i) { unsigned from, to; scanf("%d %d", &from, &to); graph->addEdge(from-1,to-1); } return graph; }
void solve1( int M1, int L1, const Bipartite *m_to_l1, const double *P1, double *q1 ) { M = M1; L = L1; P = P1; q = q1; m_to_l = m_to_l1; double *qp = q; for ( int l=0; l<L; l++ ) { auto &_to_m = m_to_l->getFromSet( l ); memset( qp, 0, sizeof(double) * LabelSet::classes ); for ( auto& ele : _to_m ) { int m = ele.first; double alpha = ele.second; addScaledTo( qp, P + m * LabelSet::classes, LabelSet::classes, alpha ); } double s = sum_vec( qp, LabelSet::classes ); scale( qp, LabelSet::classes, 1.0 / s ); qp += LabelSet::classes; } }
/* * Restricted Energy on q(l) is * sum_m ( D(m) - alpha(l,m) * q(l) + alpha(l,m) * q'(l) )^2 * + sum_j w(i,j) * ( q'(l) - q(j) )^2 */ inline double restrict_energy( int l, double *q_l = nullptr ) { auto& _to_m = m_to_l->getFromSet( l ); auto& _to_j = forest->GetWeights( l ); double energy = 0.0; double t0[LabelSet::classes]; if ( nullptr == q_l ) { for ( auto& ele : _to_m ) { int m = ele.first; energy += norm2( D + m * LabelSet::classes, LabelSet::classes ); } for ( auto& ele : _to_j ) { int j = ele.first; double wt = static_cast<double>( ele.second ); minus( q + l * LabelSet::classes, q + j * LabelSet::classes, t0, LabelSet::classes ); energy += options.beta * wt * norm2( t0, LabelSet::classes ); } } else { double t0[LabelSet::classes]; for ( auto& ele : _to_m ) { int m = ele.first; double alpha = ele.second; memcpy( t0, D + m * LabelSet::classes, sizeof(double) * LabelSet::classes ); minusScaledFrom( t0, q + l * LabelSet::classes, LabelSet::classes, alpha ); addScaledTo( t0, q_l, LabelSet::classes, alpha ); energy += norm2( t0, LabelSet::classes ); } for ( auto& ele : _to_j ) { int j = ele.first; double wt = static_cast<double>( ele.second ); minus( q_l, q + j * LabelSet::classes, t0, LabelSet::classes ); energy += options.beta * wt * norm2( t0, LabelSet::classes ); } } return energy; }
// D(m) = sum alpha(l,m)*q(l,m) - P(m) inline void update_D( int m ) { auto& _to_l = m_to_l->getToSet( m ); double *t = D + m * LabelSet::classes; memset( t, 0, sizeof(double) * LabelSet::classes ); for ( auto& ele : _to_l ) { int l = ele.first; double alpha = ele.second; addScaledTo( t, q + l * LabelSet::classes, LabelSet::classes, alpha ); } minusFrom( t, P + m * LabelSet::classes, LabelSet::classes ); }
/* * Construct the sorted features (words) for each dataset. * */ int main(int argc, char** argv) { if(argc != 2){ cout<<"please specify scale.."<<endl; exit(0); } int scale = atoi(argv[1]); // read the data source file string dataset = "restaurant"; vector<string> source1 = FileIO::readFileLines("data/"+dataset+"/source_1.txt"); vector<string> source2 = FileIO::readFileLines("data/"+dataset+"/source_2.txt"); int N1 = source1.size(); int N2 = source2.size(); int N = N1+N2; int pair_num = N*N; srand (time(NULL)); cout<<"finish loading source fle..."<<endl; // scan the source file and hash the word from string to int map<string, int> word_id; map<int, string> id_word; for(size_t i=0;i < source1.size();i++){ vector<string> segs = split(source1[i], ' '); for(size_t j=0;j < segs.size();j++){ string word = format(segs[j]); int id = getWordId(word, word_id); word_id[word] = id; id_word[id] = word; } } for(size_t i=0;i < source2.size();i++){ vector<string> segs = split(source2[i], ' '); for(size_t j=0;j < segs.size();j++){ string word = format(segs[j]); int id = getWordId(word, word_id); word_id[word] = id; id_word[id] = word; } } cout<<"finish constructing word id..."<<endl; // construct the inverted lists. Each list is sorted by entity id. vector<set<int> > combine_inv_lists(word_id.size()); vector<set<int> > inv_lists1(word_id.size()); for(size_t i=0;i < source1.size();i++){ vector<string> segs = split(source1[i], ' '); for(size_t j=0;j < segs.size();j++){ string word = format(segs[j]); int id = word_id[word]; inv_lists1[id].insert(i); combine_inv_lists[id].insert(i); } } vector<set<int> > inv_lists2(word_id.size()); for(size_t i=0;i < source2.size();i++){ vector<string> segs = split(source2[i], ' '); for(size_t j=0;j < segs.size();j++){ string word = format(segs[j]); int id = word_id[word]; inv_lists2[id].insert(i); combine_inv_lists[id].insert(i); } } cout<<"finish constructing inverted lists..."<<endl; // refine the inverted lists by removing stop-words. for(size_t wid=0;wid < word_id.size();wid++){ int len = combine_inv_lists[wid].size(); if(len == 1 || len > 0.1*scale*N){ inv_lists1[wid].clear(); inv_lists2[wid].clear(); combine_inv_lists[wid].clear(); } } // construct the bipartite graph between entity-pairs and terms int word_num = word_id.size(); Bipartite *bigraph = new Bipartite(pair_num, word_num); cout<<"finish init bigraph"<<endl; for(size_t wid=0;wid < word_id.size();wid++){ for(set<int>::iterator id1=inv_lists1[wid].begin();id1!=inv_lists1[wid].end();id1++){ for(set<int>::iterator id2=inv_lists2[wid].begin();id2!=inv_lists2[wid].end();id2++){ int pid = (*id1)*N+N1+(*id2); bigraph->addEdge(pid, wid); //cout<<(*id1)<<"\t"<<(*id2)<<"\t"<<pid<<"\t"<<wid<<"\t"<<id_word[wid]<<endl; } } } bigraph->init(); bigraph->iterate(); bigraph->output(id_word, combine_inv_lists); //exit(0); cout<<"edge num: "<<bigraph->activePairNum()<<endl; RandomWalk* walker; for(int iter=0;;iter++){ walker = new RandomWalk(bigraph->p_score, N1+N2, 20, 60, 10); for(int i=0;i < N1;i++){ int id1=i; for(int j=0;j < N2;j++){ int id2=j+N1; if(bigraph->p_score[id1*N+id2]>0){ bigraph->p_score[id2*N+id1]=bigraph->p_score[id1*N+id2]; walker->addEdge(id1,id2); } } } cout<<"edge num: "<<bigraph->activePairNum()<<endl; walker->iterate(); if(iter==5){ break; } bigraph->updatePScore(walker->p_conf); bigraph->output(id_word, combine_inv_lists); } /* RandomWalk* walker = new RandomWalk(bigraph->p_score, N); for(int iter=1;iter <= 200;iter++){ cout<<"iteration "<<iter++<<endl; if(walker->iterate()==0){ break; } } */ set<string> matches = FileIO::readMatch("data/"+dataset+"/match.txt"); int Num=1000; double max_weight=-1; vector<vector<int> > buckets(Num); for(int i=0;i < N1;i++){ int id1=i; for(int j=0;j < N2;j++){ int id2=j+N1; double weight = walker->p_conf[id1*N+id2]*bigraph->p_score[id1*N+id2]; if(weight > max_weight){ max_weight=weight; } } } double seg = max_weight/Num+0.001; for(int i=0;i < N1;i++){ int id1=i; for(int j=0;j < N2;j++){ int id2=j+N1; double weight = walker->p_conf[id1*N+id2]*bigraph->p_score[id1*N+id2]; int idx=(int)(weight/seg); buckets[idx].push_back(id1*N+id2); } } vector<double> conf_vec; int count=0,total_pair=0; for(int i=Num-1;i>0;i--){ for(size_t j=0;j < buckets[i].size();j++){ int key = buckets[i][j]; int id1 = key/N; int id2 = key%N; if(id1 < id2){ stringstream ss; ss<<id1<<"_"<<(id2-N1); cout<<id1<<"\t"<<id2<<"("<<id2-N1<<")\t"<<walker->p_conf[id1*N+id2]<<"\t"<<bigraph->p_score[id1*N+id2]<<"\t"; if(matches.find(ss.str()) != matches.end()){ cout<<"true"; }else{ cout<<"false"; } conf_vec.push_back(walker->p_conf[id1*N+id2]); size_t C=10; if(conf_vec.size() > C){ double avg_conf=0.0; for(size_t z=conf_vec.size()-1;z >= conf_vec.size()-C;z--){ avg_conf += conf_vec[z]; } if(walker->p_conf[id1*N+id2]>0.9999 && avg_conf/C >= 0.98){ total_pair++; if(matches.find(ss.str()) != matches.end()){ count++; } } cout<<"\t"<<avg_conf/C; }else{ total_pair++; if(matches.find(ss.str()) != matches.end()){ count++; } } cout<<endl; } } } /* int count=0, total_pair=0; set<string> results; for(int i=0;i < N1;i++){ int id1=i; for(int j=0;j < N2;j++){ int id2=j+N1; if(walker->p_conf[id1*N+id2]>0.95){ stringstream ss; ss<<id1<<"_"<<j; results.insert(ss.str()); total_pair++; if(matches.find(ss.str()) != matches.end()){ count++; }else{ cout<<"not_match: "<<id1<<"\t"<<j<<"("<<(j+N1)<<")\t"<<bigraph->p_score[id1*N+id2]<<endl; } } } } for(set<string>::iterator iter=matches.begin();iter != matches.end();iter++){ if(results.find(*iter) == results.end()){ cout<<"miss\t"<<*iter<<endl; } } */ double precison = 1.0*count/total_pair; double recall = 1.0*count/matches.size(); double f1 = 2*precison*recall/(precison+recall); cout<<"precision: "<<count<<"\t"<<total_pair<<"\t"<<precison<<"\t"<<recall<<"\t"<<f1<<endl; return 1; }
inline void update_q( int l ) { auto& _to_m = m_to_l->getFromSet( l ); auto& _to_j = forest->GetWeights( l ); double t0[LabelSet::classes]; memset( t0, 0, sizeof(double) * LabelSet::classes ); double t1[LabelSet::classes]; // t0 = sum_m alpha(l,m) * D(m) for ( auto& ele : _to_m ) { int m = ele.first; double alpha = ele.second; addScaledTo( t0, D + m * LabelSet::classes, LabelSet::classes, alpha ); } // t0 += sum_m wt(l,j) (q(l) - q(j) ) for ( auto& ele : _to_j ) { int j = ele.first; double wt = static_cast<double>( ele.second ); minus( q + l * LabelSet::classes, q + j * LabelSet::classes, t1, LabelSet::classes ); addScaledTo( t0, t1, LabelSet::classes, wt ); } // negate t0 to get negative gradient direction negate( t0, LabelSet::classes ); // Line Search double energy_old = restrict_energy( l ) * options.wolf; bool updated = false; normalize_vec( t0, t0, LabelSet::classes ); double energy_new = 0.0; for ( int i=0; i<40; i++ ) { scale( t0, LabelSet::classes, options.shrinkRatio ); add( t0, q + l * LabelSet::classes, t1, LabelSet::classes ); // Simplex Projection watershed( t1, t0, LabelSet::classes ); energy_new = restrict_energy( l, t0 ); if ( energy_new < energy_old ) { updated = true; break; } } if ( updated ) { for ( auto& ele : _to_m ) { int m = ele.first; double alpha = ele.second; update_D( m, l, t0, alpha ); } memcpy( q + l * LabelSet::classes, t0, sizeof(double) * LabelSet::classes ); } }