Пример #1
0
void DSU::ComputeTBD(TBD &pqueue, int maxkeep, int num_threshold, bool outer, bool noLP, bool shifts, bool debug, vector<RNAsaddle> *output_saddles, int conn_neighs, bool no_new)
{
  int cnt = 0;

  clock_t time_tbd = clock();

  // go through all pairs in queue
  while (pqueue.size()>0) {
    // check time:
    double time_secs = ((clock()  - time)/(double)CLOCKS_PER_SEC);
    if (stop_after && (time_secs > stop_after)) {
      fprintf(stderr, "Time threshold reached (%d secs.), processed %d/%d\n", stop_after, cnt, pqueue.size()+cnt);
      break;
    }

    // just visualisation
    if (!output_saddles && cnt%100==0) {
      double tim = (clock()  - time_tbd)/(double)CLOCKS_PER_SEC;
      std::pair<int, int> mem = getValue();
      //double one = ((sizeof(char)*strlen(seq) + sizeof(short)*strlen(seq)) + sizeof(RNAsaddle)) / 1024.0;
      fprintf(stderr, "Finding path: %7d/%7d; Time: %6.2f; Est.:%6.2f Mem.:%6.1fMB VM %6.1fMB PM\n", cnt, pqueue.size()+cnt, tim, tim/(double)cnt*pqueue.size(), mem.first/1024.0, mem.second/1024.0);
    }

    // apply threshold
    if (cnt>num_threshold) {
      fprintf(stderr, "Number threshold reached, processed %d/%d\n", cnt, pqueue.size()+cnt);
      break;
    } else {
      cnt++;
    }

    // get next
    TBDentry tbd = pqueue.get_first();
    if (tbd.i==-1) {
      fprintf(stderr, "Ending the path-finding -- i = %5d ; j = %5d ; fiber = %c ; type = %s \n", tbd.i, tbd.j, tbd.fiber?'Y':'N', type1_str[tbd.type_clust]);
      break;
    }

    // check no-conn
    if (conectivity.size() > 0 && !tbd.fiber && conectivity.joint(tbd.i, tbd.j)) continue;


    // get path
    if (debug) fprintf(stderr, "path between (%3d, %3d) type=%s fiber=%d:\n", tbd.i, tbd.j, type1_str[tbd.type_clust], tbd.fiber);
    //2fprintf(stderr, "depth: %d\n%s\n%s\n%s\n", maxkeep, seq, LM[tbd.i].str_ch, LM[tbd.j].str_ch);
    if (pknots) {
      path_pk *path = get_path_light_pk(seq, LM[tbd.i].structure, LM[tbd.j].structure, maxkeep);

      // variables for outer insertion
      double max_energy= -1e8;
      path_pk *max_path = path;

      // variables for inner loops and insertions

      // get the length of path for speed up
      int length = 0;
      for (path_pk *tmp = path; tmp && tmp->structure; tmp++) {
        if (max_path->en < tmp->en) max_path = tmp;
        length ++;
      }

      // create vector of known LM numbers on path (where 0 and length-1 are known)
      vector<int> lm_numbers(length, -1);
      lm_numbers[0] = tbd.i;
      lm_numbers[length-1] = tbd.j;

      // debug
      if (debug) {
        for (int i=0; i<length; i++) {
          fprintf(stderr, "path[%3d] %s %6.2f\n", i, pt_to_str_pk(path[i].structure).c_str(), path[i].en/100.0);
        }
      }

      // bisect the path and find new LMs:
      unsigned int old_size = LM.size();
      FindNumbers(0, length-1, path, lm_numbers, shifts, noLP, debug, no_new);

      // if we have found new minima and we want to do more than simple reevaluation of path (--conn-neighs>0)
      if (LM.size() - old_size > 0 && conn_neighs > 0 && !no_new) {
        for (unsigned int j=old_size; j<LM.size(); j++) {

          // sort 'em according to Hamming D. and take first "conn_neighs"
          multimap<int, int> distances;
          for (unsigned int i=0; i<old_size; i++) {
            distances.insert(make_pair(HammingDist(LM[i].structure, LM[j].structure), i));
          }
          int cnt = 0;
          int last_hd = -1;
          for (auto it=distances.begin(); it!=distances.end(); it++) {
            if (cnt > conn_neighs && last_hd != it->first) {
              break;
            }

            pqueue.insert(it->second, j, EXPERIM, false);

            cnt++;
            last_hd = it->first;
          }
        }

      }

      // debug
      if (debug) {
        int diff = 1;
        int last_num = lm_numbers[0];
        for (int i=0; i<length; i++) {
          fprintf(stderr, "path[%3d]= %4d (%s %6.2f)\n", i, lm_numbers[i], pt_to_str_pk(path[i].structure).c_str(), path[i].en/100.0);
          if (lm_numbers[i]!=last_num && lm_numbers[i]!=-1) {
            diff++;
            last_num=lm_numbers[i];
          }
        }
        histo[length][diff]++;
        histo[length][0]++;
      }

      // now process the array of found numbers:
      int last_num = lm_numbers[0];
      for (int i=1; i<length; i++) {
        if (lm_numbers[i]!=-1 && lm_numbers[i]!=last_num) {

          // get the highest saddle in case we traveled through many "-1" saddles:
          int j=i-1;
          int highest_num = i;
          while (j>0) {
            // check if not higher saddle:
            if (path[highest_num].en < path[j].en) highest_num = j;

            // we found first that is not -1
            if (lm_numbers[j]!=-1) break;
            j--;
          }

          // save saddle
          SDtype typ = (j==i-1?DIRECT:REDUCED);
          RNAsaddle saddle(last_num, lm_numbers[i], typ);
          saddle.energy = path[highest_num].en;
          saddle.str_ch = NULL;
          saddle.structure = allocopy(path[highest_num].structure);
          bool inserted = InsertUB(saddle, debug);

          // ???
          if (output_saddles && inserted) {
            output_saddles->push_back(saddle);
          }

          // try to insert new things into TBD:
          if ((lm_numbers[i]!=lm_numbers[length-1] || lm_numbers[i-1]!=lm_numbers[0]) && !no_new) {
            // check no-conn
            if (conectivity.size() > 0) conectivity.union_set(tbd.i, tbd.j);
            pqueue.insert(lm_numbers[i-1], lm_numbers[i], NEW_FOUND, true);


          }
          last_num = lm_numbers[i];
        }
      }

      // insert saddle between outer structures
      if (outer) {
        RNAsaddle tmp(tbd.i, tbd.j, NOT_SURE);
        tmp.energy = en_fltoi(max_energy);
        tmp.str_ch = NULL;
        tmp.structure = allocopy(max_path->structure);

        bool inserted = InsertUB(tmp, debug);

        if (output_saddles && inserted) {
          output_saddles->push_back(tmp);
        }
      }

      free_path_pk(path);
    } else {
      //fprintf(stderr, "%s\n%s\n%s\n", seq, LM[tbd.i].str_ch, LM[tbd.j].str_ch);
      path_t *path = get_path(seq, LM[tbd.i].str_ch, LM[tbd.j].str_ch, maxkeep);

      // variables for outer insertion
      double max_energy= -1e8;
      path_t *max_path = path;

      // variables for inner loops and insertions

      // get the length of path for speed up
      int length = 0;
      for (path_t *tmp = path; tmp && tmp->s; tmp++) {
        if (max_path->en < tmp->en) max_path = tmp;
        length ++;
      }

      // create vector of known LM numbers on path (where 0 and length-1 are known)
      vector<int> lm_numbers(length, -1);
      lm_numbers[0] = tbd.i;
      lm_numbers[length-1] = tbd.j;

      // bisect the path and find new LMs:
      unsigned int old_size = LM.size();
      FindNumbers(0, length-1, path, lm_numbers, shifts, noLP, debug, no_new);

      // if we have found new minima and we want to do more than simple reevaluation of path (--conn-neighs>0)
      if (LM.size() - old_size > 0 && conn_neighs > 0 && !no_new) {
        for (unsigned int j=old_size; j<LM.size(); j++) {

          // sort 'em according to Hamming D. and take first "conn_neighs"
          multimap<int, int> distances;
          for (unsigned int i=0; i<old_size; i++) {
            distances.insert(make_pair(HammingDist(LM[i].structure, LM[j].structure), i));
          }
          int cnt = 0;
          int last_hd = -1;
          for (auto it=distances.begin(); it!=distances.end(); it++) {
            if (cnt > conn_neighs && last_hd != it->first) {
              break;
            }

            pqueue.insert(it->second, j, EXPERIM, false);

            cnt++;
            last_hd = it->first;
          }
        }

      }

      // debug
      if (debug) {
        int diff = 1;
        int last_num = lm_numbers[0];
        for (int i=0; i<length; i++) {
          fprintf(stderr, "path[%3d]= %4d (%s %6.2f)\n", i, lm_numbers[i], path[i].s, path[i].en);
          if (lm_numbers[i]!=last_num && lm_numbers[i]!=-1) {
            diff++;
            last_num=lm_numbers[i];
          }
        }
        histo[length][diff]++;
        histo[length][0]++;
      }

      // now process the array of found numbers:
      int last_num = lm_numbers[0];
      for (int i=1; i<length; i++) {
        if (lm_numbers[i]!=-1 && lm_numbers[i]!=last_num) {

          // get the highest saddle in case we traveled through many "-1" saddles:
          int j=i-1;
          int highest_num = i;
          while (j>0) {
            // check if not higher saddle:
            if (path[highest_num].en < path[j].en) highest_num = j;

            // we found first that is not -1
            if (lm_numbers[j]!=-1) break;
            j--;
          }

          // save saddle
          SDtype typ = (j==i-1?DIRECT:REDUCED);
          RNAsaddle saddle(last_num, lm_numbers[i], typ);
          saddle.energy = en_fltoi(path[highest_num].en);
          saddle.str_ch = NULL;
          saddle.structure = make_pair_table(path[highest_num].s);
          bool inserted = InsertUB(saddle, debug);

          // ???
          if (output_saddles && inserted) {
            output_saddles->push_back(saddle);
          }

          // try to insert new things into TBD:
          if ((lm_numbers[i]!=lm_numbers[length-1] || lm_numbers[i-1]!=lm_numbers[0]) && !no_new) {
            // check no-conn
            if (conectivity.size() > 0) conectivity.union_set(tbd.i, tbd.j);
            pqueue.insert(lm_numbers[i-1], lm_numbers[i], NEW_FOUND, true);
          }
          last_num = lm_numbers[i];
        }
      }

      // insert saddle between outer structures
      if (outer) {
        RNAsaddle tmp(tbd.i, tbd.j, NOT_SURE);
        tmp.energy = en_fltoi(max_energy);
        tmp.str_ch = NULL;
        tmp.structure = make_pair_table(max_path->s);

        bool inserted = InsertUB(tmp, debug);

        if (output_saddles && inserted) {
          output_saddles->push_back(tmp);
        }
      }

      free_path(path);
    }

    // free stuff
    //if (last_str) free(last_str);

  } // all doing while
  fprintf(stderr, "The end of finding paths(%d). Size of pqueue = %d\n", cnt, (int)pqueue.size());
}
Пример #2
0
// ----------------------------------------------------------------------------
// 生成一条测试用例
// ----------------------------------------------------------------------------
int* D_APSO::Evolve()
{
	double inertia = 0.9 ;
	double factor1 = 1.3 ;
	double factor2 = 1.3 ;
	double factor_max = 1.8 ;
	double factor_min = 0.8 ;

	int *best = new int[sut->parameter] ;  
	
	vector<DParticle> T ;
	int *gBest = new int[sut->parameter];  
	int fitbest = 0 ;

	for( int i = 0 ; i < config.population ; i++ )
	{
		DParticle a( sut->parameter , sut->value , sut->tway ) ;
		a.RandomInit();

		T.push_back(a);
	}

	vector<DParticle>::iterator x = T.begin();     
	for( int c = 0 ; c < sut->parameter ; c++)
		gBest[c] = (*x).position[c] ;

	int it = 1 ;

	// adaptive
	int state = 1 ;
	double f_value = 0 ;
	double sigma_max = 1.0 ;
	double sigma_min = 0.1 ;

	while( true )
	{		
		for( vector<DParticle>::iterator i = T.begin() ; i != T.end() ; i++ )
		{
			int fit = sut->FitnessValue( (*i).position , 0 ) ;

			if( fit == sut->testcaseCoverMax && PSO_Result.size() == 0 )
			{
				for( int c = 0 ; c< sut->parameter ; c++)
					best[c] = (*i).position[c] ;

				delete[] gBest ;
				for( vector<DParticle>::iterator j = T.begin() ; j != T.end() ; j++ )
					j->clear();
				T.clear();

				return best ;
			}

			if ( fit > i->fitness_pbest )
				i->Setpbest( fit );
			
			if ( fit > fitbest )    
			{
				fitbest = fit ;
				for( int c = 0 ; c < sut->parameter ; c++)
					gBest[c] = (*i).position[c] ;
			}
			else if( fit == fitbest )
			{
				if( HammingDist((*i).position) < HammingDist(gBest) )
				{
					for( int c = 0 ; c< sut->parameter ; c++)
						gBest[c] = (*i).position[c] ;
				}
			}

		}

		if ( it >= config.iteration )
			break ;

		// adaptive parameter
		f_value = FCalculate( T , gBest ) ;
		state = FuzzyDicsion( f_value , state );
		
		inertia = 1 / ( 1 + 1.5 * exp( -2.6 * f_value ) ) ;

		if( state == 1 )
		{
			factor1 = factor1 + ( 0.05 + (double)(rand()%50)/1000.0 );
			factor2 = factor2 - ( 0.05 + (double)(rand()%50)/1000.0 );
		}
		if( state == 2 )
		{
			factor1 = factor1 + 0.5 * ( 0.05 + (double)(rand()%50)/1000.0 );
			factor2 = factor2 - 0.5 * ( 0.05 + (double)(rand()%50)/1000.0 );
		}
		if( state == 3 )
		{
			factor1 = factor1 + 0.5 * ( 0.05 + (double)(rand()%50)/1000.0 );
			factor2 = factor2 + 0.5 * ( 0.05 + (double)(rand()%50)/1000.0 );
		}
		if( state == 4 )
		{
			factor1 = factor1 - ( 0.05 + (double)(rand()%50)/1000.0 );
			factor2 = factor2 + ( 0.05 + (double)(rand()%50)/1000.0 );
		}

		if( factor1 > factor_max )
			factor1 = factor_max ;
		if( factor1 < factor_min )
			factor1 = factor_min ;

		if( factor2 > factor_max )
			factor2 = factor_max ;
		if( factor2 < factor_min )
			factor2 = factor_min ;

		for( vector<DParticle>::iterator i = T.begin() ; i != T.end() ; i++ )  
		{
			i->velocityUpdate( inertia, factor1, factor2, pro1_threshold , gBest );
			i->positionUpdate( pro2_threshold , pro3_threshold );
		} 
		
		// ELS
		int *gbest_tmp = new int[sut->parameter];
		for( int k=0 ; k<sut->parameter ; k++ )
			gbest_tmp[k] = gBest[k] ;

		int dim = (int)( ((double)(rand()%1000)/1000.0) * sut->parameter ); 
		gbest_tmp[dim] = gbest_tmp[dim] + (int)((double)( sut->value[dim] - 1 ) * 
			Gaussrand(0, pow(sigma_max-(sigma_max-sigma_min)*(it/config.iteration),2) ));
		if( gbest_tmp[dim] >= sut->value[dim] )
			gbest_tmp[dim] = sut->value[dim] - 1 ;
		if( gbest_tmp[dim] < 0 )
			gbest_tmp[dim] = 0 ;

		int fit_tmp = sut->FitnessValue(gbest_tmp,0) ;
		if( fit_tmp > fitbest )
		{
			fitbest = fit_tmp ;
			for( int c = 0 ; c < sut->parameter ; c++)
				gBest[c] = gbest_tmp[c] ;
		}

		it++ ;

	}  // end while

	for( int k = 0 ; k < sut->parameter ; k++ ) 
		best[k] = gBest[k] ;

	delete[] gBest ;
	for( vector<DParticle>::iterator j = T.begin() ; j != T.end() ; j++ )
		j->clear();
	T.clear();

	return best ;
}
Пример #3
0
int DSU::Cluster(Opt &opt, int kmax)
{
  // pqueue for pairs of LM
  TBD output;

  // if no-conn flag:
  if (opt.no_conn) conectivity.enlarge_parent(LM.size());

  if (kmax>0) {
    // create data structures
    vector<lm_pair> to_cluster;
    to_cluster.reserve(LM.size());
    UF_set_child ufset;
    ufset.enlarge_parent(LM.size());

    // representative nodes
    set<int> represents;

    // fill it
    for (unsigned int i=0; i<LM.size(); i++) {
      for (unsigned int j=i+1; j<LM.size(); j++) {
        to_cluster.push_back(lm_pair(i,j,HammingDist(LM[i].structure, LM[j].structure)));
      }
    }
    sort(to_cluster.begin(), to_cluster.end());

    // process:
    int last_hd = to_cluster[0].hd;
    for (unsigned int i=0; i<to_cluster.size(); i++) {
      lm_pair &cp = to_cluster[i];

      if (cp.hd!=last_hd) {
        // do something?, cause we are on higher level...
      }

      // see if we are not joint yet:
      if (!ufset.joint(cp.i, cp.j)) {

        //fprintf(stderr, "clustering %d %d (%d)\n", cp.i, cp.j, cp.d);
        // try to connect
        int father1 = ufset.find(cp.i);
        int father2 = ufset.find(cp.j);
        if (ufset.count(father1) + ufset.count(father2) > kmax) { // cannot connect them, need to insert all edges into the TBD

          // join clusters
          JoinClusters(opt, ufset, represents, output, cp.i, cp.j);

        } else {

          // connect them
          ufset.union_set(cp.i, cp.j);

        }
      }
      last_hd = cp.hd;
    }

    // now we have just one cluster, we have to add all intercluster connections that are left:
    int father = ufset.find(0);
    set<int> first = ufset.get_children(father);
    // insert all inter edges:
    for (set<int>::iterator it=first.begin(); it!=first.end(); it++) {
      set<int>::iterator it2 = it; it2++;
      for (;it2!=first.end(); it2++) {
        output.insert(*it, *it2, INTER_CLUSTER, false);
      }
    }
    // and its represent node
    represents.insert(father);

    // and finally add represent edges:
    for (set<int>::iterator it=represents.begin(); it!=represents.end(); it++) {
      set<int>::iterator it2 = it; it2++;
      for (;it2!=represents.end(); it2++) {
        output.insert(*it, *it2, REPRESENT, false);
      }
    }
  } else {
    // now we don't do clustering, we have to add all intercluster connections
    for (unsigned int i=0; i<LM.size(); i++) {
      for (unsigned int j=i+1; j<LM.size(); j++) {
        output.insert(i, j, INTER_CLUSTER, false);
      }
    }
  }

  fprintf(stderr, "output size = %d (%d, %d, %d)\n", output.size(), output.sizes[0], output.sizes[1], output.sizes[2]);

  // now finish:
  ComputeTBD(output, opt.maxkeep, opt.num_threshold, opt.outer, opt.noLP, opt.shifts, opt.debug, NULL, opt.conn_neighs, opt.no_new);

  // now just resort UBlist to something sorted according energy
  saddles.reserve(UBlist.size());
  for (set<RNAsaddle, RNAsaddle_comp>::iterator it=UBlist.begin(); it!=UBlist.end(); it++) {
    RNAsaddle saddle = *it;
    if (it->str_ch) free(it->str_ch);
    saddle.str_ch = pt_to_chars_pk(it->structure);
    //if (pknots) pt_to_str_pk(it->structure, saddle.str_ch);
    saddles.push_back(saddle);
  }
  sort(saddles.begin(), saddles.end());
  UBlist.clear();
/*
  for (int i=0; i<saddles.size(); i++) {
    fprintf(stderr, "%d %d %.2f\n", saddles[i].lm1, saddles[i].lm2, saddles[i].energy/100.0);
  }*/

  // debug
  if (opt.debug) {
    fprintf(stderr, "found %d, not found %d\n", debug_c, debug_c2);
    for (int i=0; i<(int)histo.size(); i++) {
      if (histo[i][0]) {
        fprintf(stderr, "%5d(%5d) |", i, histo[i][0]);
        for (int j=1; j<min(50, (int)histo[i].size()); j++) {
          fprintf(stderr, "%5d", histo[i][j]);
        }
        fprintf(stderr, "\n");
      }
    }
  }


  return 0;
}