/////////////////////////////////////////////////////////////////////////////////////
// Realign hits: compute F/B/MAC and MAC-backtrace algorithms
/////////////////////////////////////////////////////////////////////////////////////
void PosteriorDecoder::realign(HMM &q, HMM &t, Hit &hit,
							   PosteriorMatrix &p_mm, ViterbiMatrix &viterbi_matrix,
							   std::vector<PosteriorDecoder::MACBacktraceResult> alignment_to_exclude,
							   char * exclstr, char* template_exclstr, int par_min_overlap, float shift, float mact, float corr) {

	HMM & curr_q_hmm = q;
	HMM & curr_t_hmm = t;
	memorizeHitValues(hit);
	initializeForAlignment(curr_q_hmm, curr_t_hmm, hit, viterbi_matrix, 0, t.L, par_min_overlap);
	for (size_t ibt = 0; ibt < alignment_to_exclude.size(); ibt++) {
		// Mask out previous found MAC alignments
		excludeMACAlignment(q.L, hit.L, viterbi_matrix, 0, alignment_to_exclude.at(ibt));
	}

	if(exclstr) {
		// Mask excluded regions
		exclude_regions(exclstr, curr_q_hmm, curr_t_hmm, viterbi_matrix);
	}
        
        if(template_exclstr) {
                 // Mask excluded regions
                 exclude_template_regions(template_exclstr, curr_q_hmm, curr_t_hmm, viterbi_matrix);
        }

	forwardAlgorithm(curr_q_hmm, curr_t_hmm, hit, p_mm, viterbi_matrix, shift, 0);
	//std::cout << hit->score << hit[elem]->Pforward << std::endl;

	backwardAlgorithm(curr_q_hmm, curr_t_hmm, hit, p_mm, viterbi_matrix, shift, 0);
	macAlgorithm(curr_q_hmm, curr_t_hmm, hit, p_mm, viterbi_matrix, mact, 0);
	backtraceMAC(curr_q_hmm, curr_t_hmm, p_mm, viterbi_matrix, 0, hit, corr);
	restoreHitValues(hit);
	writeProfilesToHits(curr_q_hmm, curr_t_hmm, p_mm, viterbi_matrix, hit);
	// add result to exclution paths (needed to align 2nd, 3rd, ... best alignment)

}
std::vector<Hit> ViterbiRunner::alignment(Parameters& par, HMMSimd * q_simd,
    std::vector<HHEntry*> dbfiles, const float qsc, float* pb,
    const float S[20][20], const float Sim[20][20], const float R[20][20], const int ssm_mode,
    const float S73[NDSSP][NSSPRED][MAXCF], const float S33[NSSPRED][MAXCF][NSSPRED][MAXCF],
    const float S37[NSSPRED][MAXCF][NDSSP]) {

    HMM * q = q_simd->GetHMM(0);
    // Initialize memory
    std::vector<HMM*> t_hmm;
    for(size_t i = 0; i < HMMSimd::VEC_SIZE * thread_count; i++) {
      HMM* t = new HMM(MAXSEQDIS, par.maxres);
      t_hmm.push_back(t);
    }

    HMMSimd** t_hmm_simd = new HMMSimd*[thread_count];
    std::vector<ViterbiConsumerThread *> threads;
    for (int thread_id = 0; thread_id < thread_count; thread_id++) {
        t_hmm_simd[thread_id] = new HMMSimd(par.maxres);
        ViterbiConsumerThread * thread = new ViterbiConsumerThread(thread_id, par, q_simd, t_hmm_simd[thread_id],viterbiMatrix[thread_id], ssm_mode, S73, S33, S37);
        threads.push_back(thread);
    }

    std::vector<Hit> ret_hits;
    std::vector<HHEntry*> dbfiles_to_align;
    std::map<std::string, std::vector<Viterbi::BacktraceResult> > excludeAlignments;
    // For all the databases comming through prefilter
    std::copy(dbfiles.begin(), dbfiles.end(), std::back_inserter(dbfiles_to_align));

    // loop to detect second/thrid/... best alignemtns
    for (int alignment = 0; alignment < par.altali; alignment++) {
        HH_LOG(INFO) << "Alternative alignment: " << alignment << std::endl;
        unsigned int allElementToAlignCount = dbfiles_to_align.size();
        unsigned int seqBlockSize = allElementToAlignCount;

        if(alignment == 0 && par.early_stopping_filter){
            seqBlockSize = 2000;
        }

        for(unsigned int seqJunkStart = 0; seqJunkStart <  allElementToAlignCount; seqJunkStart += seqBlockSize ){
            //sort by length to improve performance.
            //desc sort (for better utilisation ofthreads)
            unsigned int seqJunkSize = imin(allElementToAlignCount - (seqJunkStart), seqBlockSize);
            sort(dbfiles_to_align.begin() + seqJunkStart,
                 dbfiles_to_align.begin() + (seqJunkStart + seqJunkSize),
                 HHDatabaseEntryCompare());

            // read in data for thread
#pragma omp parallel for schedule(dynamic, 1)
            for (unsigned int idb = seqJunkStart; idb < (seqJunkStart + seqJunkSize); idb += HMMSimd::VEC_SIZE) {
                int current_thread_id = 0;
                #ifdef OPENMP
                    current_thread_id = omp_get_thread_num();
                #endif
                const int current_t_index = (current_thread_id * HMMSimd::VEC_SIZE);

                std::vector<HMM *> templates_to_align;

                // read in alignment
                int maxResElem = imin((seqJunkStart + seqJunkSize) - (idb),
                                      HMMSimd::VEC_SIZE);

                for (int i = 0; i < maxResElem; i++) {
                    HHEntry* entry = dbfiles_to_align.at(idb + i);

                    int format_tmp = 0;
                    char wg = 1; // performance reason
                    entry->getTemplateHMM(par, wg, qsc, format_tmp, pb, S, Sim, t_hmm[current_t_index + i]);
                    t_hmm[current_t_index + i]->entry = entry;

                    PrepareTemplateHMM(par, q, t_hmm[current_t_index + i], format_tmp, false, pb, R);
                    templates_to_align.push_back(t_hmm[current_t_index + i]);
                }
                t_hmm_simd[current_thread_id]->MapHMMVector(templates_to_align);
                exclude_alignments(maxResElem, q_simd, t_hmm_simd[current_thread_id],
                                   excludeAlignments, viterbiMatrix[current_thread_id]);


                if(par.exclstr) {
                  // Mask excluded regions
                  exclude_regions(par.exclstr, maxResElem, q_simd, t_hmm_simd[current_thread_id], viterbiMatrix[current_thread_id]);
                }

                if(par.template_exclstr) {
                  // Mask excluded regions
                  exclude_template_regions(par.template_exclstr, maxResElem, q_simd, t_hmm_simd[current_thread_id], viterbiMatrix[current_thread_id]);
                }

                // start next job
                threads[current_thread_id]->align(maxResElem, par.nseqdis, par.smin);
            } // idb loop
            // merge thread results
            // search hits for next alignment
            HH_LOG(INFO) << (seqJunkStart + seqJunkSize) <<  " alignments done" << std::endl;

            merge_thread_results(ret_hits, dbfiles_to_align, excludeAlignments, threads, alignment, par.smin);
            for (unsigned int thread = 0; thread < threads.size(); thread++) {
                threads[thread]->clear();
            }

            if ( alignment == 0  && par.early_stopping_filter )
            {
                float early_stopping_sum = calculateEarlyStop(par, q, ret_hits, seqJunkStart);
                float filter_cutoff = seqJunkSize * par.filter_thresh;

                if( early_stopping_sum < filter_cutoff){
                    HH_LOG(INFO) << "Stop after DB-HHM: " << (seqJunkStart + seqJunkSize) << " because early stop  "
                    << early_stopping_sum << " < filter cutoff " << filter_cutoff << "\n";
                    break; // stop junk loop and just find alternative alignments
                }
            }
        } // junk loop
        // earse first elements. These are the elements from alignment run before,
        // new elements are after  + elementToAlignCount
        dbfiles_to_align.erase(dbfiles_to_align.begin(), dbfiles_to_align.begin() + allElementToAlignCount);

    }  // Alignment loop

    // clean memory
    for (int thread_id = 0; thread_id < thread_count; thread_id++) {
        delete t_hmm_simd[thread_id];
        delete threads[thread_id];
    }
    threads.clear();
    delete[] t_hmm_simd;

    for(size_t i = 0; i < HMMSimd::VEC_SIZE * thread_count; i++) {
      delete t_hmm[i];
    }
    t_hmm.clear();

    return ret_hits;
}