void DSU::ComputeTBD(TBD &pqueue, int maxkeep, int num_threshold, bool outer, bool noLP, bool shifts, bool debug, vector<RNAsaddle> *output_saddles, int conn_neighs, bool no_new) { int cnt = 0; clock_t time_tbd = clock(); // go through all pairs in queue while (pqueue.size()>0) { // check time: double time_secs = ((clock() - time)/(double)CLOCKS_PER_SEC); if (stop_after && (time_secs > stop_after)) { fprintf(stderr, "Time threshold reached (%d secs.), processed %d/%d\n", stop_after, cnt, pqueue.size()+cnt); break; } // just visualisation if (!output_saddles && cnt%100==0) { double tim = (clock() - time_tbd)/(double)CLOCKS_PER_SEC; std::pair<int, int> mem = getValue(); //double one = ((sizeof(char)*strlen(seq) + sizeof(short)*strlen(seq)) + sizeof(RNAsaddle)) / 1024.0; fprintf(stderr, "Finding path: %7d/%7d; Time: %6.2f; Est.:%6.2f Mem.:%6.1fMB VM %6.1fMB PM\n", cnt, pqueue.size()+cnt, tim, tim/(double)cnt*pqueue.size(), mem.first/1024.0, mem.second/1024.0); } // apply threshold if (cnt>num_threshold) { fprintf(stderr, "Number threshold reached, processed %d/%d\n", cnt, pqueue.size()+cnt); break; } else { cnt++; } // get next TBDentry tbd = pqueue.get_first(); if (tbd.i==-1) { fprintf(stderr, "Ending the path-finding -- i = %5d ; j = %5d ; fiber = %c ; type = %s \n", tbd.i, tbd.j, tbd.fiber?'Y':'N', type1_str[tbd.type_clust]); break; } // check no-conn if (conectivity.size() > 0 && !tbd.fiber && conectivity.joint(tbd.i, tbd.j)) continue; // get path if (debug) fprintf(stderr, "path between (%3d, %3d) type=%s fiber=%d:\n", tbd.i, tbd.j, type1_str[tbd.type_clust], tbd.fiber); //2fprintf(stderr, "depth: %d\n%s\n%s\n%s\n", maxkeep, seq, LM[tbd.i].str_ch, LM[tbd.j].str_ch); if (pknots) { path_pk *path = get_path_light_pk(seq, LM[tbd.i].structure, LM[tbd.j].structure, maxkeep); // variables for outer insertion double max_energy= -1e8; path_pk *max_path = path; // variables for inner loops and insertions // get the length of path for speed up int length = 0; for (path_pk *tmp = path; tmp && tmp->structure; tmp++) { if (max_path->en < tmp->en) max_path = tmp; length ++; } // create vector of known LM numbers on path (where 0 and length-1 are known) vector<int> lm_numbers(length, -1); lm_numbers[0] = tbd.i; lm_numbers[length-1] = tbd.j; // debug if (debug) { for (int i=0; i<length; i++) { fprintf(stderr, "path[%3d] %s %6.2f\n", i, pt_to_str_pk(path[i].structure).c_str(), path[i].en/100.0); } } // bisect the path and find new LMs: unsigned int old_size = LM.size(); FindNumbers(0, length-1, path, lm_numbers, shifts, noLP, debug, no_new); // if we have found new minima and we want to do more than simple reevaluation of path (--conn-neighs>0) if (LM.size() - old_size > 0 && conn_neighs > 0 && !no_new) { for (unsigned int j=old_size; j<LM.size(); j++) { // sort 'em according to Hamming D. and take first "conn_neighs" multimap<int, int> distances; for (unsigned int i=0; i<old_size; i++) { distances.insert(make_pair(HammingDist(LM[i].structure, LM[j].structure), i)); } int cnt = 0; int last_hd = -1; for (auto it=distances.begin(); it!=distances.end(); it++) { if (cnt > conn_neighs && last_hd != it->first) { break; } pqueue.insert(it->second, j, EXPERIM, false); cnt++; last_hd = it->first; } } } // debug if (debug) { int diff = 1; int last_num = lm_numbers[0]; for (int i=0; i<length; i++) { fprintf(stderr, "path[%3d]= %4d (%s %6.2f)\n", i, lm_numbers[i], pt_to_str_pk(path[i].structure).c_str(), path[i].en/100.0); if (lm_numbers[i]!=last_num && lm_numbers[i]!=-1) { diff++; last_num=lm_numbers[i]; } } histo[length][diff]++; histo[length][0]++; } // now process the array of found numbers: int last_num = lm_numbers[0]; for (int i=1; i<length; i++) { if (lm_numbers[i]!=-1 && lm_numbers[i]!=last_num) { // get the highest saddle in case we traveled through many "-1" saddles: int j=i-1; int highest_num = i; while (j>0) { // check if not higher saddle: if (path[highest_num].en < path[j].en) highest_num = j; // we found first that is not -1 if (lm_numbers[j]!=-1) break; j--; } // save saddle SDtype typ = (j==i-1?DIRECT:REDUCED); RNAsaddle saddle(last_num, lm_numbers[i], typ); saddle.energy = path[highest_num].en; saddle.str_ch = NULL; saddle.structure = allocopy(path[highest_num].structure); bool inserted = InsertUB(saddle, debug); // ??? if (output_saddles && inserted) { output_saddles->push_back(saddle); } // try to insert new things into TBD: if ((lm_numbers[i]!=lm_numbers[length-1] || lm_numbers[i-1]!=lm_numbers[0]) && !no_new) { // check no-conn if (conectivity.size() > 0) conectivity.union_set(tbd.i, tbd.j); pqueue.insert(lm_numbers[i-1], lm_numbers[i], NEW_FOUND, true); } last_num = lm_numbers[i]; } } // insert saddle between outer structures if (outer) { RNAsaddle tmp(tbd.i, tbd.j, NOT_SURE); tmp.energy = en_fltoi(max_energy); tmp.str_ch = NULL; tmp.structure = allocopy(max_path->structure); bool inserted = InsertUB(tmp, debug); if (output_saddles && inserted) { output_saddles->push_back(tmp); } } free_path_pk(path); } else { //fprintf(stderr, "%s\n%s\n%s\n", seq, LM[tbd.i].str_ch, LM[tbd.j].str_ch); path_t *path = get_path(seq, LM[tbd.i].str_ch, LM[tbd.j].str_ch, maxkeep); // variables for outer insertion double max_energy= -1e8; path_t *max_path = path; // variables for inner loops and insertions // get the length of path for speed up int length = 0; for (path_t *tmp = path; tmp && tmp->s; tmp++) { if (max_path->en < tmp->en) max_path = tmp; length ++; } // create vector of known LM numbers on path (where 0 and length-1 are known) vector<int> lm_numbers(length, -1); lm_numbers[0] = tbd.i; lm_numbers[length-1] = tbd.j; // bisect the path and find new LMs: unsigned int old_size = LM.size(); FindNumbers(0, length-1, path, lm_numbers, shifts, noLP, debug, no_new); // if we have found new minima and we want to do more than simple reevaluation of path (--conn-neighs>0) if (LM.size() - old_size > 0 && conn_neighs > 0 && !no_new) { for (unsigned int j=old_size; j<LM.size(); j++) { // sort 'em according to Hamming D. and take first "conn_neighs" multimap<int, int> distances; for (unsigned int i=0; i<old_size; i++) { distances.insert(make_pair(HammingDist(LM[i].structure, LM[j].structure), i)); } int cnt = 0; int last_hd = -1; for (auto it=distances.begin(); it!=distances.end(); it++) { if (cnt > conn_neighs && last_hd != it->first) { break; } pqueue.insert(it->second, j, EXPERIM, false); cnt++; last_hd = it->first; } } } // debug if (debug) { int diff = 1; int last_num = lm_numbers[0]; for (int i=0; i<length; i++) { fprintf(stderr, "path[%3d]= %4d (%s %6.2f)\n", i, lm_numbers[i], path[i].s, path[i].en); if (lm_numbers[i]!=last_num && lm_numbers[i]!=-1) { diff++; last_num=lm_numbers[i]; } } histo[length][diff]++; histo[length][0]++; } // now process the array of found numbers: int last_num = lm_numbers[0]; for (int i=1; i<length; i++) { if (lm_numbers[i]!=-1 && lm_numbers[i]!=last_num) { // get the highest saddle in case we traveled through many "-1" saddles: int j=i-1; int highest_num = i; while (j>0) { // check if not higher saddle: if (path[highest_num].en < path[j].en) highest_num = j; // we found first that is not -1 if (lm_numbers[j]!=-1) break; j--; } // save saddle SDtype typ = (j==i-1?DIRECT:REDUCED); RNAsaddle saddle(last_num, lm_numbers[i], typ); saddle.energy = en_fltoi(path[highest_num].en); saddle.str_ch = NULL; saddle.structure = make_pair_table(path[highest_num].s); bool inserted = InsertUB(saddle, debug); // ??? if (output_saddles && inserted) { output_saddles->push_back(saddle); } // try to insert new things into TBD: if ((lm_numbers[i]!=lm_numbers[length-1] || lm_numbers[i-1]!=lm_numbers[0]) && !no_new) { // check no-conn if (conectivity.size() > 0) conectivity.union_set(tbd.i, tbd.j); pqueue.insert(lm_numbers[i-1], lm_numbers[i], NEW_FOUND, true); } last_num = lm_numbers[i]; } } // insert saddle between outer structures if (outer) { RNAsaddle tmp(tbd.i, tbd.j, NOT_SURE); tmp.energy = en_fltoi(max_energy); tmp.str_ch = NULL; tmp.structure = make_pair_table(max_path->s); bool inserted = InsertUB(tmp, debug); if (output_saddles && inserted) { output_saddles->push_back(tmp); } } free_path(path); } // free stuff //if (last_str) free(last_str); } // all doing while fprintf(stderr, "The end of finding paths(%d). Size of pqueue = %d\n", cnt, (int)pqueue.size()); }
// ---------------------------------------------------------------------------- // 生成一条测试用例 // ---------------------------------------------------------------------------- int* D_APSO::Evolve() { double inertia = 0.9 ; double factor1 = 1.3 ; double factor2 = 1.3 ; double factor_max = 1.8 ; double factor_min = 0.8 ; int *best = new int[sut->parameter] ; vector<DParticle> T ; int *gBest = new int[sut->parameter]; int fitbest = 0 ; for( int i = 0 ; i < config.population ; i++ ) { DParticle a( sut->parameter , sut->value , sut->tway ) ; a.RandomInit(); T.push_back(a); } vector<DParticle>::iterator x = T.begin(); for( int c = 0 ; c < sut->parameter ; c++) gBest[c] = (*x).position[c] ; int it = 1 ; // adaptive int state = 1 ; double f_value = 0 ; double sigma_max = 1.0 ; double sigma_min = 0.1 ; while( true ) { for( vector<DParticle>::iterator i = T.begin() ; i != T.end() ; i++ ) { int fit = sut->FitnessValue( (*i).position , 0 ) ; if( fit == sut->testcaseCoverMax && PSO_Result.size() == 0 ) { for( int c = 0 ; c< sut->parameter ; c++) best[c] = (*i).position[c] ; delete[] gBest ; for( vector<DParticle>::iterator j = T.begin() ; j != T.end() ; j++ ) j->clear(); T.clear(); return best ; } if ( fit > i->fitness_pbest ) i->Setpbest( fit ); if ( fit > fitbest ) { fitbest = fit ; for( int c = 0 ; c < sut->parameter ; c++) gBest[c] = (*i).position[c] ; } else if( fit == fitbest ) { if( HammingDist((*i).position) < HammingDist(gBest) ) { for( int c = 0 ; c< sut->parameter ; c++) gBest[c] = (*i).position[c] ; } } } if ( it >= config.iteration ) break ; // adaptive parameter f_value = FCalculate( T , gBest ) ; state = FuzzyDicsion( f_value , state ); inertia = 1 / ( 1 + 1.5 * exp( -2.6 * f_value ) ) ; if( state == 1 ) { factor1 = factor1 + ( 0.05 + (double)(rand()%50)/1000.0 ); factor2 = factor2 - ( 0.05 + (double)(rand()%50)/1000.0 ); } if( state == 2 ) { factor1 = factor1 + 0.5 * ( 0.05 + (double)(rand()%50)/1000.0 ); factor2 = factor2 - 0.5 * ( 0.05 + (double)(rand()%50)/1000.0 ); } if( state == 3 ) { factor1 = factor1 + 0.5 * ( 0.05 + (double)(rand()%50)/1000.0 ); factor2 = factor2 + 0.5 * ( 0.05 + (double)(rand()%50)/1000.0 ); } if( state == 4 ) { factor1 = factor1 - ( 0.05 + (double)(rand()%50)/1000.0 ); factor2 = factor2 + ( 0.05 + (double)(rand()%50)/1000.0 ); } if( factor1 > factor_max ) factor1 = factor_max ; if( factor1 < factor_min ) factor1 = factor_min ; if( factor2 > factor_max ) factor2 = factor_max ; if( factor2 < factor_min ) factor2 = factor_min ; for( vector<DParticle>::iterator i = T.begin() ; i != T.end() ; i++ ) { i->velocityUpdate( inertia, factor1, factor2, pro1_threshold , gBest ); i->positionUpdate( pro2_threshold , pro3_threshold ); } // ELS int *gbest_tmp = new int[sut->parameter]; for( int k=0 ; k<sut->parameter ; k++ ) gbest_tmp[k] = gBest[k] ; int dim = (int)( ((double)(rand()%1000)/1000.0) * sut->parameter ); gbest_tmp[dim] = gbest_tmp[dim] + (int)((double)( sut->value[dim] - 1 ) * Gaussrand(0, pow(sigma_max-(sigma_max-sigma_min)*(it/config.iteration),2) )); if( gbest_tmp[dim] >= sut->value[dim] ) gbest_tmp[dim] = sut->value[dim] - 1 ; if( gbest_tmp[dim] < 0 ) gbest_tmp[dim] = 0 ; int fit_tmp = sut->FitnessValue(gbest_tmp,0) ; if( fit_tmp > fitbest ) { fitbest = fit_tmp ; for( int c = 0 ; c < sut->parameter ; c++) gBest[c] = gbest_tmp[c] ; } it++ ; } // end while for( int k = 0 ; k < sut->parameter ; k++ ) best[k] = gBest[k] ; delete[] gBest ; for( vector<DParticle>::iterator j = T.begin() ; j != T.end() ; j++ ) j->clear(); T.clear(); return best ; }
int DSU::Cluster(Opt &opt, int kmax) { // pqueue for pairs of LM TBD output; // if no-conn flag: if (opt.no_conn) conectivity.enlarge_parent(LM.size()); if (kmax>0) { // create data structures vector<lm_pair> to_cluster; to_cluster.reserve(LM.size()); UF_set_child ufset; ufset.enlarge_parent(LM.size()); // representative nodes set<int> represents; // fill it for (unsigned int i=0; i<LM.size(); i++) { for (unsigned int j=i+1; j<LM.size(); j++) { to_cluster.push_back(lm_pair(i,j,HammingDist(LM[i].structure, LM[j].structure))); } } sort(to_cluster.begin(), to_cluster.end()); // process: int last_hd = to_cluster[0].hd; for (unsigned int i=0; i<to_cluster.size(); i++) { lm_pair &cp = to_cluster[i]; if (cp.hd!=last_hd) { // do something?, cause we are on higher level... } // see if we are not joint yet: if (!ufset.joint(cp.i, cp.j)) { //fprintf(stderr, "clustering %d %d (%d)\n", cp.i, cp.j, cp.d); // try to connect int father1 = ufset.find(cp.i); int father2 = ufset.find(cp.j); if (ufset.count(father1) + ufset.count(father2) > kmax) { // cannot connect them, need to insert all edges into the TBD // join clusters JoinClusters(opt, ufset, represents, output, cp.i, cp.j); } else { // connect them ufset.union_set(cp.i, cp.j); } } last_hd = cp.hd; } // now we have just one cluster, we have to add all intercluster connections that are left: int father = ufset.find(0); set<int> first = ufset.get_children(father); // insert all inter edges: for (set<int>::iterator it=first.begin(); it!=first.end(); it++) { set<int>::iterator it2 = it; it2++; for (;it2!=first.end(); it2++) { output.insert(*it, *it2, INTER_CLUSTER, false); } } // and its represent node represents.insert(father); // and finally add represent edges: for (set<int>::iterator it=represents.begin(); it!=represents.end(); it++) { set<int>::iterator it2 = it; it2++; for (;it2!=represents.end(); it2++) { output.insert(*it, *it2, REPRESENT, false); } } } else { // now we don't do clustering, we have to add all intercluster connections for (unsigned int i=0; i<LM.size(); i++) { for (unsigned int j=i+1; j<LM.size(); j++) { output.insert(i, j, INTER_CLUSTER, false); } } } fprintf(stderr, "output size = %d (%d, %d, %d)\n", output.size(), output.sizes[0], output.sizes[1], output.sizes[2]); // now finish: ComputeTBD(output, opt.maxkeep, opt.num_threshold, opt.outer, opt.noLP, opt.shifts, opt.debug, NULL, opt.conn_neighs, opt.no_new); // now just resort UBlist to something sorted according energy saddles.reserve(UBlist.size()); for (set<RNAsaddle, RNAsaddle_comp>::iterator it=UBlist.begin(); it!=UBlist.end(); it++) { RNAsaddle saddle = *it; if (it->str_ch) free(it->str_ch); saddle.str_ch = pt_to_chars_pk(it->structure); //if (pknots) pt_to_str_pk(it->structure, saddle.str_ch); saddles.push_back(saddle); } sort(saddles.begin(), saddles.end()); UBlist.clear(); /* for (int i=0; i<saddles.size(); i++) { fprintf(stderr, "%d %d %.2f\n", saddles[i].lm1, saddles[i].lm2, saddles[i].energy/100.0); }*/ // debug if (opt.debug) { fprintf(stderr, "found %d, not found %d\n", debug_c, debug_c2); for (int i=0; i<(int)histo.size(); i++) { if (histo[i][0]) { fprintf(stderr, "%5d(%5d) |", i, histo[i][0]); for (int j=1; j<min(50, (int)histo[i].size()); j++) { fprintf(stderr, "%5d", histo[i][j]); } fprintf(stderr, "\n"); } } } return 0; }