///////////////////////////////////////////////////////////////////////////////////// // Realign hits: compute F/B/MAC and MAC-backtrace algorithms ///////////////////////////////////////////////////////////////////////////////////// void PosteriorDecoder::realign(HMM &q, HMM &t, Hit &hit, PosteriorMatrix &p_mm, ViterbiMatrix &viterbi_matrix, std::vector<PosteriorDecoder::MACBacktraceResult> alignment_to_exclude, char * exclstr, char* template_exclstr, int par_min_overlap, float shift, float mact, float corr) { HMM & curr_q_hmm = q; HMM & curr_t_hmm = t; memorizeHitValues(hit); initializeForAlignment(curr_q_hmm, curr_t_hmm, hit, viterbi_matrix, 0, t.L, par_min_overlap); for (size_t ibt = 0; ibt < alignment_to_exclude.size(); ibt++) { // Mask out previous found MAC alignments excludeMACAlignment(q.L, hit.L, viterbi_matrix, 0, alignment_to_exclude.at(ibt)); } if(exclstr) { // Mask excluded regions exclude_regions(exclstr, curr_q_hmm, curr_t_hmm, viterbi_matrix); } if(template_exclstr) { // Mask excluded regions exclude_template_regions(template_exclstr, curr_q_hmm, curr_t_hmm, viterbi_matrix); } forwardAlgorithm(curr_q_hmm, curr_t_hmm, hit, p_mm, viterbi_matrix, shift, 0); //std::cout << hit->score << hit[elem]->Pforward << std::endl; backwardAlgorithm(curr_q_hmm, curr_t_hmm, hit, p_mm, viterbi_matrix, shift, 0); macAlgorithm(curr_q_hmm, curr_t_hmm, hit, p_mm, viterbi_matrix, mact, 0); backtraceMAC(curr_q_hmm, curr_t_hmm, p_mm, viterbi_matrix, 0, hit, corr); restoreHitValues(hit); writeProfilesToHits(curr_q_hmm, curr_t_hmm, p_mm, viterbi_matrix, hit); // add result to exclution paths (needed to align 2nd, 3rd, ... best alignment) }
std::vector<Hit> ViterbiRunner::alignment(Parameters& par, HMMSimd * q_simd, std::vector<HHEntry*> dbfiles, const float qsc, float* pb, const float S[20][20], const float Sim[20][20], const float R[20][20], const int ssm_mode, const float S73[NDSSP][NSSPRED][MAXCF], const float S33[NSSPRED][MAXCF][NSSPRED][MAXCF], const float S37[NSSPRED][MAXCF][NDSSP]) { HMM * q = q_simd->GetHMM(0); // Initialize memory std::vector<HMM*> t_hmm; for(size_t i = 0; i < HMMSimd::VEC_SIZE * thread_count; i++) { HMM* t = new HMM(MAXSEQDIS, par.maxres); t_hmm.push_back(t); } HMMSimd** t_hmm_simd = new HMMSimd*[thread_count]; std::vector<ViterbiConsumerThread *> threads; for (int thread_id = 0; thread_id < thread_count; thread_id++) { t_hmm_simd[thread_id] = new HMMSimd(par.maxres); ViterbiConsumerThread * thread = new ViterbiConsumerThread(thread_id, par, q_simd, t_hmm_simd[thread_id],viterbiMatrix[thread_id], ssm_mode, S73, S33, S37); threads.push_back(thread); } std::vector<Hit> ret_hits; std::vector<HHEntry*> dbfiles_to_align; std::map<std::string, std::vector<Viterbi::BacktraceResult> > excludeAlignments; // For all the databases comming through prefilter std::copy(dbfiles.begin(), dbfiles.end(), std::back_inserter(dbfiles_to_align)); // loop to detect second/thrid/... best alignemtns for (int alignment = 0; alignment < par.altali; alignment++) { HH_LOG(INFO) << "Alternative alignment: " << alignment << std::endl; unsigned int allElementToAlignCount = dbfiles_to_align.size(); unsigned int seqBlockSize = allElementToAlignCount; if(alignment == 0 && par.early_stopping_filter){ seqBlockSize = 2000; } for(unsigned int seqJunkStart = 0; seqJunkStart < allElementToAlignCount; seqJunkStart += seqBlockSize ){ //sort by length to improve performance. //desc sort (for better utilisation ofthreads) unsigned int seqJunkSize = imin(allElementToAlignCount - (seqJunkStart), seqBlockSize); sort(dbfiles_to_align.begin() + seqJunkStart, dbfiles_to_align.begin() + (seqJunkStart + seqJunkSize), HHDatabaseEntryCompare()); // read in data for thread #pragma omp parallel for schedule(dynamic, 1) for (unsigned int idb = seqJunkStart; idb < (seqJunkStart + seqJunkSize); idb += HMMSimd::VEC_SIZE) { int current_thread_id = 0; #ifdef OPENMP current_thread_id = omp_get_thread_num(); #endif const int current_t_index = (current_thread_id * HMMSimd::VEC_SIZE); std::vector<HMM *> templates_to_align; // read in alignment int maxResElem = imin((seqJunkStart + seqJunkSize) - (idb), HMMSimd::VEC_SIZE); for (int i = 0; i < maxResElem; i++) { HHEntry* entry = dbfiles_to_align.at(idb + i); int format_tmp = 0; char wg = 1; // performance reason entry->getTemplateHMM(par, wg, qsc, format_tmp, pb, S, Sim, t_hmm[current_t_index + i]); t_hmm[current_t_index + i]->entry = entry; PrepareTemplateHMM(par, q, t_hmm[current_t_index + i], format_tmp, false, pb, R); templates_to_align.push_back(t_hmm[current_t_index + i]); } t_hmm_simd[current_thread_id]->MapHMMVector(templates_to_align); exclude_alignments(maxResElem, q_simd, t_hmm_simd[current_thread_id], excludeAlignments, viterbiMatrix[current_thread_id]); if(par.exclstr) { // Mask excluded regions exclude_regions(par.exclstr, maxResElem, q_simd, t_hmm_simd[current_thread_id], viterbiMatrix[current_thread_id]); } if(par.template_exclstr) { // Mask excluded regions exclude_template_regions(par.template_exclstr, maxResElem, q_simd, t_hmm_simd[current_thread_id], viterbiMatrix[current_thread_id]); } // start next job threads[current_thread_id]->align(maxResElem, par.nseqdis, par.smin); } // idb loop // merge thread results // search hits for next alignment HH_LOG(INFO) << (seqJunkStart + seqJunkSize) << " alignments done" << std::endl; merge_thread_results(ret_hits, dbfiles_to_align, excludeAlignments, threads, alignment, par.smin); for (unsigned int thread = 0; thread < threads.size(); thread++) { threads[thread]->clear(); } if ( alignment == 0 && par.early_stopping_filter ) { float early_stopping_sum = calculateEarlyStop(par, q, ret_hits, seqJunkStart); float filter_cutoff = seqJunkSize * par.filter_thresh; if( early_stopping_sum < filter_cutoff){ HH_LOG(INFO) << "Stop after DB-HHM: " << (seqJunkStart + seqJunkSize) << " because early stop " << early_stopping_sum << " < filter cutoff " << filter_cutoff << "\n"; break; // stop junk loop and just find alternative alignments } } } // junk loop // earse first elements. These are the elements from alignment run before, // new elements are after + elementToAlignCount dbfiles_to_align.erase(dbfiles_to_align.begin(), dbfiles_to_align.begin() + allElementToAlignCount); } // Alignment loop // clean memory for (int thread_id = 0; thread_id < thread_count; thread_id++) { delete t_hmm_simd[thread_id]; delete threads[thread_id]; } threads.clear(); delete[] t_hmm_simd; for(size_t i = 0; i < HMMSimd::VEC_SIZE * thread_count; i++) { delete t_hmm[i]; } t_hmm.clear(); return ret_hits; }