Example #1
0
int main()
{
	Bipartite* B;
	int N = 0,g,b,m,x,y;
	char tab[200][200];
	while(1)
	{
		clr(tab);
		scanf("%d %d %d",&g,&b,&m);
		if(g == 0)
			return 0;
		B = new Bipartite(g,b);
		while(m--)
		{
			scanf("%d %d",&x,&y);
			tab[x - 1][y - 1] = 0;
		}
		for(x = 0;x < g;x++)
			for(y = 0;y < b;y++)
				if(tab[x][y])
					B->addedge(x,y);
		printf("Case %d: %d\n",++N,g + b - B->result());
		delete B;
	}
}
// Reads a Bipartiteite graph from stdin, then returns a copy of it. In the future we may want to implement it without any copying.
Bipartite *readBipartiteGraph() {
	unsigned numberOfXVertices, numberOfYVertices, edges;
	// Läs antal hörn X, Y och kanter
	scanf("%d %d %d", &numberOfXVertices, &numberOfYVertices, &edges);
	Bipartite *graph  = new Bipartite(numberOfXVertices,numberOfYVertices);
	// Läs in kanterna
	for (unsigned i = 0; i < edges; ++i) {
		unsigned from, to;
		scanf("%d %d", &from, &to);
		graph->addEdge(from-1,to-1);
	}
	return graph;
}
Example #3
0
  void solve1( int M1, int L1,
               const Bipartite *m_to_l1,
               const double *P1, double *q1 )
  {
    M = M1;
    L = L1;
    P = P1;
    q = q1;
    m_to_l = m_to_l1;

    
    double *qp = q;
    for ( int l=0; l<L; l++ ) {
      auto &_to_m = m_to_l->getFromSet( l );
      memset( qp, 0, sizeof(double) * LabelSet::classes );
      for ( auto& ele : _to_m ) {
        int m = ele.first;
        double alpha = ele.second;
        addScaledTo( qp, P + m * LabelSet::classes, LabelSet::classes, alpha );
      }

      double s = sum_vec( qp, LabelSet::classes );
      
      scale( qp, LabelSet::classes, 1.0 / s );
      qp += LabelSet::classes;
    }
    
  }
Example #4
0
  /*
   * Restricted Energy on q(l) is
   *     sum_m ( D(m) - alpha(l,m) * q(l) + alpha(l,m) * q'(l) )^2
   *   + sum_j w(i,j) * ( q'(l) - q(j) )^2
   */
  inline double restrict_energy( int l, double *q_l = nullptr )
  {

    auto& _to_m = m_to_l->getFromSet( l );
    auto& _to_j = forest->GetWeights( l );
    
    double energy = 0.0;
    double t0[LabelSet::classes];

    if ( nullptr == q_l ) {
      for ( auto& ele : _to_m ) {
        int m = ele.first;
        energy += norm2( D + m * LabelSet::classes, LabelSet::classes );
      }
      
      for ( auto& ele : _to_j ) {
        int j = ele.first;
        double wt = static_cast<double>( ele.second );

        minus( q + l * LabelSet::classes, q + j * LabelSet::classes, t0, LabelSet::classes );
        energy += options.beta * wt * norm2( t0, LabelSet::classes );
      }
    } else {

      double t0[LabelSet::classes];

      for ( auto& ele : _to_m ) {
        int m = ele.first;
        double alpha = ele.second;

        memcpy( t0, D + m * LabelSet::classes, sizeof(double) * LabelSet::classes );
        minusScaledFrom( t0, q + l * LabelSet::classes, LabelSet::classes, alpha );
        addScaledTo( t0, q_l, LabelSet::classes, alpha );
        energy += norm2( t0, LabelSet::classes );
      }

      
      for ( auto& ele : _to_j ) {
        int j = ele.first;
        double wt = static_cast<double>( ele.second );
        minus( q_l, q + j * LabelSet::classes, t0, LabelSet::classes );
        energy += options.beta * wt * norm2( t0, LabelSet::classes );
      }

    }

    return energy;

  }
Example #5
0
  // D(m) = sum alpha(l,m)*q(l,m) - P(m)
  inline void update_D( int m )
  {

    auto& _to_l = m_to_l->getToSet( m );

    

    double *t = D + m * LabelSet::classes;
    memset( t, 0, sizeof(double) * LabelSet::classes );

    for ( auto& ele : _to_l ) {
      int l = ele.first;
      double alpha = ele.second;
      addScaledTo( t, q + l * LabelSet::classes, LabelSet::classes, alpha );
    }
    
    minusFrom( t, P + m * LabelSet::classes, LabelSet::classes );
  }
/*
 * Construct the sorted features (words) for each dataset.
 *
 */
int main(int argc, char** argv)
{
	if(argc != 2){
		cout<<"please specify scale.."<<endl;
		exit(0);
	}
	int scale = atoi(argv[1]);

	// read the data source file
	string dataset = "restaurant";
	vector<string> source1 = FileIO::readFileLines("data/"+dataset+"/source_1.txt");
	vector<string> source2 = FileIO::readFileLines("data/"+dataset+"/source_2.txt");
	int N1 = source1.size();
	int N2 = source2.size();
	int N = N1+N2;
	int pair_num = N*N;
	srand (time(NULL));
	cout<<"finish loading source fle..."<<endl;


	// scan the source file and hash the word from string to int
	map<string, int> word_id;
	map<int, string> id_word;
	for(size_t i=0;i < source1.size();i++){
		vector<string> segs = split(source1[i], ' ');
		for(size_t j=0;j < segs.size();j++){
			string word = format(segs[j]);
			int id = getWordId(word, word_id);
			word_id[word] = id;
			id_word[id] = word;
		}
	}

	for(size_t i=0;i < source2.size();i++){
		vector<string> segs = split(source2[i], ' ');
		for(size_t j=0;j < segs.size();j++){
			string word = format(segs[j]);
			int id = getWordId(word, word_id);
			word_id[word] = id;
			id_word[id] = word;
		}
	}
	cout<<"finish constructing word id..."<<endl;


	// construct the inverted lists. Each list is sorted by entity id.
	vector<set<int> > combine_inv_lists(word_id.size());
	vector<set<int> > inv_lists1(word_id.size());
	for(size_t i=0;i < source1.size();i++){
		vector<string> segs = split(source1[i], ' ');
		for(size_t j=0;j < segs.size();j++){
			string word = format(segs[j]);
			int id = word_id[word];
			inv_lists1[id].insert(i);
			combine_inv_lists[id].insert(i);
		}
	}
	vector<set<int> > inv_lists2(word_id.size());
	for(size_t i=0;i < source2.size();i++){
		vector<string> segs = split(source2[i], ' ');
		for(size_t j=0;j < segs.size();j++){
			string word = format(segs[j]);
			int id = word_id[word];
			inv_lists2[id].insert(i);
			combine_inv_lists[id].insert(i);
		}
	}
	cout<<"finish constructing inverted lists..."<<endl;


	// refine the inverted lists by removing stop-words. 
	for(size_t wid=0;wid < word_id.size();wid++){
		int len = combine_inv_lists[wid].size(); 
		if(len == 1 || len > 0.1*scale*N){
			inv_lists1[wid].clear();
			inv_lists2[wid].clear();
			combine_inv_lists[wid].clear();
		}
	}
	


	// construct the bipartite graph between entity-pairs and terms
	int word_num = word_id.size();
	Bipartite *bigraph = new Bipartite(pair_num, word_num);
	cout<<"finish init bigraph"<<endl;
	for(size_t wid=0;wid < word_id.size();wid++){
		for(set<int>::iterator id1=inv_lists1[wid].begin();id1!=inv_lists1[wid].end();id1++){
			for(set<int>::iterator id2=inv_lists2[wid].begin();id2!=inv_lists2[wid].end();id2++){
				int pid = (*id1)*N+N1+(*id2);
				bigraph->addEdge(pid, wid);
				//cout<<(*id1)<<"\t"<<(*id2)<<"\t"<<pid<<"\t"<<wid<<"\t"<<id_word[wid]<<endl;
			}
		}
	}

	bigraph->init();
	

	bigraph->iterate();

	bigraph->output(id_word, combine_inv_lists);
	//exit(0);
	cout<<"edge num: "<<bigraph->activePairNum()<<endl;

	RandomWalk* walker;
	for(int iter=0;;iter++){
		walker = new RandomWalk(bigraph->p_score, N1+N2, 20, 60, 10);
		for(int i=0;i < N1;i++){
			int id1=i;
			for(int j=0;j < N2;j++){
				int id2=j+N1;
				if(bigraph->p_score[id1*N+id2]>0){
					bigraph->p_score[id2*N+id1]=bigraph->p_score[id1*N+id2];
					walker->addEdge(id1,id2);
				}
			}
		}
		cout<<"edge num: "<<bigraph->activePairNum()<<endl;

		walker->iterate();
		if(iter==5){
			break;
		}
		bigraph->updatePScore(walker->p_conf);
		bigraph->output(id_word, combine_inv_lists);
	}

	/*
	RandomWalk* walker = new RandomWalk(bigraph->p_score, N);
	for(int iter=1;iter <= 200;iter++){
		cout<<"iteration "<<iter++<<endl;
		if(walker->iterate()==0){
			break;
		}
	}
	*/
	set<string> matches = FileIO::readMatch("data/"+dataset+"/match.txt");

	int Num=1000;
	double max_weight=-1;
	vector<vector<int> > buckets(Num);
	for(int i=0;i < N1;i++){
		int id1=i;
		for(int j=0;j < N2;j++){
			int id2=j+N1;
			double weight = walker->p_conf[id1*N+id2]*bigraph->p_score[id1*N+id2];
			if(weight > max_weight){
				max_weight=weight;
			}
		}
	}
	double seg = max_weight/Num+0.001;
	for(int i=0;i < N1;i++){
		int id1=i;
		for(int j=0;j < N2;j++){
			int id2=j+N1;
			double weight = walker->p_conf[id1*N+id2]*bigraph->p_score[id1*N+id2];
			int idx=(int)(weight/seg);
			buckets[idx].push_back(id1*N+id2);
		}
	}
	vector<double> conf_vec;
	int count=0,total_pair=0;
	for(int i=Num-1;i>0;i--){
		for(size_t j=0;j < buckets[i].size();j++){
			int key = buckets[i][j];
			int id1 = key/N;
			int id2 = key%N;
			if(id1 < id2){
				stringstream ss;
				ss<<id1<<"_"<<(id2-N1);
				cout<<id1<<"\t"<<id2<<"("<<id2-N1<<")\t"<<walker->p_conf[id1*N+id2]<<"\t"<<bigraph->p_score[id1*N+id2]<<"\t";
				if(matches.find(ss.str()) != matches.end()){
					cout<<"true";
				}else{
					cout<<"false";
				}
				conf_vec.push_back(walker->p_conf[id1*N+id2]);
				size_t C=10;
				if(conf_vec.size() > C){
					double avg_conf=0.0;
					for(size_t z=conf_vec.size()-1;z >= conf_vec.size()-C;z--){
						avg_conf += conf_vec[z];
					}
					if(walker->p_conf[id1*N+id2]>0.9999 && avg_conf/C >= 0.98){
						total_pair++;
						if(matches.find(ss.str()) != matches.end()){
							count++;
						}
					}
					cout<<"\t"<<avg_conf/C;
				}else{
					total_pair++;
					if(matches.find(ss.str()) != matches.end()){
						count++;
					}
				}
				cout<<endl;
			}

		}
	}




	/*
	int count=0, total_pair=0;
	set<string> results;
	for(int i=0;i < N1;i++){
		int id1=i;
		for(int j=0;j < N2;j++){
			int id2=j+N1;
			if(walker->p_conf[id1*N+id2]>0.95){
				stringstream ss;
				ss<<id1<<"_"<<j;
				results.insert(ss.str());
				total_pair++;
				if(matches.find(ss.str()) != matches.end()){
					count++;
				}else{
					cout<<"not_match: "<<id1<<"\t"<<j<<"("<<(j+N1)<<")\t"<<bigraph->p_score[id1*N+id2]<<endl;
				}
			}
		}
	}
	for(set<string>::iterator iter=matches.begin();iter != matches.end();iter++){
		if(results.find(*iter) == results.end()){
			cout<<"miss\t"<<*iter<<endl;
		}
	}
	*/
	double precison = 1.0*count/total_pair;
	double recall = 1.0*count/matches.size();
	double f1 = 2*precison*recall/(precison+recall);

	cout<<"precision: "<<count<<"\t"<<total_pair<<"\t"<<precison<<"\t"<<recall<<"\t"<<f1<<endl;


	return 1;
}
Example #7
0
  inline void update_q( int l )
  {

    auto& _to_m = m_to_l->getFromSet( l );
    auto& _to_j = forest->GetWeights( l );
    
    double t0[LabelSet::classes];
    memset( t0, 0, sizeof(double) * LabelSet::classes );
    double t1[LabelSet::classes];
        
    


    // t0 = sum_m alpha(l,m) * D(m) 
    for ( auto& ele : _to_m ) {
      int m = ele.first;
      double alpha = ele.second;
      addScaledTo( t0, D + m * LabelSet::classes, LabelSet::classes, alpha );
    }


    
    // t0 += sum_m wt(l,j) (q(l) - q(j) )
    for ( auto& ele : _to_j ) {
      int j = ele.first;
      double wt = static_cast<double>( ele.second );
      minus( q + l * LabelSet::classes, q + j * LabelSet::classes, t1, LabelSet::classes );
      addScaledTo( t0, t1, LabelSet::classes, wt );
    }

    // negate t0 to get negative gradient direction
    negate( t0, LabelSet::classes );
    
    // Line Search
    double energy_old = restrict_energy( l ) * options.wolf;

    bool updated = false;

    
    normalize_vec( t0, t0, LabelSet::classes );
    double energy_new = 0.0;
    for ( int i=0; i<40; i++ ) {
      scale( t0, LabelSet::classes, options.shrinkRatio );
      add( t0, q + l * LabelSet::classes, t1, LabelSet::classes );
      // Simplex Projection
      watershed( t1, t0, LabelSet::classes );

      energy_new = restrict_energy( l, t0 );
      if ( energy_new < energy_old ) {
        updated = true;
        break;
      }
    }

    if ( updated ) {
      for ( auto& ele : _to_m ) {
        int m = ele.first;
        double alpha = ele.second;
        update_D( m, l, t0, alpha );
      }
      memcpy( q + l * LabelSet::classes, t0, sizeof(double) * LabelSet::classes );
    }
  }