SCORE ObjScoreIds(const MSA &msa, const unsigned Ids1[], unsigned uCount1, const unsigned Ids2[], unsigned uCount2) { #if TIMING TICKS t1 = GetClockTicks(); #endif unsigned *SeqIndexes1 = new unsigned[uCount1]; unsigned *SeqIndexes2 = new unsigned[uCount2]; for (unsigned n = 0; n < uCount1; ++n) SeqIndexes1[n] = msa.GetSeqIndex(Ids1[n]); for (unsigned n = 0; n < uCount2; ++n) SeqIndexes2[n] = msa.GetSeqIndex(Ids2[n]); #if DOUBLE_AFFINE extern SCORE ObjScoreDA(const MSA &msa, SCORE *ptrLetters, SCORE *ptrGaps); SCORE Letters, Gaps; SCORE dObjScore = ObjScoreDA(msa, &Letters, &Gaps); delete[] SeqIndexes1; delete[] SeqIndexes2; #else SCORE dObjScore = ObjScore(msa, SeqIndexes1, uCount1, SeqIndexes2, uCount2); #endif #if TIMING TICKS t2 = GetClockTicks(); g_ticksObjScore += (t2 - t1); #endif return dObjScore; }
void AssertMSAEq(const MSA &msa1, const MSA &msa2) { const unsigned uSeqCount1 = msa1.GetSeqCount(); const unsigned uSeqCount2 = msa2.GetSeqCount(); if (uSeqCount1 != uSeqCount2) Quit("Seq count differs"); for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount1; ++uSeqIndex) { Seq seq1; msa1.GetSeq(uSeqIndex, seq1); unsigned uId = msa1.GetSeqId(uSeqIndex); unsigned uSeqIndex2 = msa2.GetSeqIndex(uId); Seq seq2; msa2.GetSeq(uSeqIndex2, seq2); if (!seq1.Eq(seq2)) { Log("Input:\n"); seq1.LogMe(); Log("Output:\n"); seq2.LogMe(); Quit("Seq %s differ ", msa1.GetSeqName(uSeqIndex)); } } }
// Append msa2 at the end of msa1 void AppendMSA(MSA &msa1, const MSA &msa2) { const unsigned uSeqCount = msa1.GetSeqCount(); const unsigned uColCount1 = msa1.GetColCount(); const unsigned uColCount2 = msa2.GetColCount(); const unsigned uColCountCat = uColCount1 + uColCount2; for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { unsigned uId = msa1.GetSeqId(uSeqIndex); unsigned uSeqIndex2; bool bFound = msa2.GetSeqIndex(uId, &uSeqIndex2); if (bFound) { for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex) { const char c = msa2.GetChar(uSeqIndex2, uColIndex); msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, c); } } else { for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex) msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, '-'); } } }
// Append msa2 at the end of msa1 void MSAAppend(MSA &msa1, const MSA &msa2) { const unsigned uSeqCount = msa1.GetSeqCount(); const unsigned uColCount1 = msa1.GetColCount(); const unsigned uColCount2 = msa2.GetColCount(); for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { unsigned uId = msa1.GetSeqId(uSeqIndex); unsigned uSeqIndex2 = msa2.GetSeqIndex(uId); for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex) { const char c = msa2.GetChar(uSeqIndex2, uColIndex); msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, c); } } }
void Stabilize(const MSA &msa, MSA &msaStable) { const unsigned uSeqCount = msa.GetSeqCount(); const unsigned uColCount = msa.GetColCount(); msaStable.SetSize(uSeqCount, uColCount); for (unsigned uId = 0; uId < uSeqCount; ++uId) { const unsigned uSeqIndex = msa.GetSeqIndex(uId); msaStable.SetSeqName(uId, msa.GetSeqName(uSeqIndex)); msaStable.SetSeqId(uSeqIndex, uId); for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) { const char c = msa.GetChar(uSeqIndex, uColIndex); msaStable.SetChar(uId, uColIndex, c); } } }
// "Catenate" two MSAs (by bad analogy with UNIX cat command). // msa1 and msa2 must have same sequence names, but possibly // in a different order. // msaCat is the combined alignment produce by appending // sequences in msa2 to sequences in msa1. void MSACat(const MSA &msa1, const MSA &msa2, MSA &msaCat) { const unsigned uSeqCount = msa1.GetSeqCount(); const unsigned uColCount1 = msa1.GetColCount(); const unsigned uColCount2 = msa2.GetColCount(); const unsigned uColCountCat = uColCount1 + uColCount2; msaCat.SetSize(uSeqCount, uColCountCat); for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { for (unsigned uColIndex = 0; uColIndex < uColCount1; ++uColIndex) { const char c = msa1.GetChar(uSeqIndex, uColIndex); msaCat.SetChar(uSeqIndex, uColIndex, c); } const char *ptrSeqName = msa1.GetSeqName(uSeqIndex); unsigned uSeqIndex2; msaCat.SetSeqName(uSeqIndex, ptrSeqName); bool bFound = msa2.GetSeqIndex(ptrSeqName, &uSeqIndex2); if (bFound) { for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex) { const char c = msa2.GetChar(uSeqIndex2, uColIndex); msaCat.SetChar(uSeqIndex, uColCount1 + uColIndex, c); } } else { for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex) msaCat.SetChar(uSeqIndex, uColCount1 + uColIndex, '-'); } } }
void RefineW(const MSA &msaIn, MSA &msaOut) { const unsigned uSeqCount = msaIn.GetSeqCount(); const unsigned uColCount = msaIn.GetColCount(); // Reserve same nr seqs, 20% more cols const unsigned uReserveColCount = (uColCount*120)/100; msaOut.SetSize(uSeqCount, uReserveColCount); for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { msaOut.SetSeqName(uSeqIndex, msaIn.GetSeqName(uSeqIndex)); msaOut.SetSeqId(uSeqIndex, msaIn.GetSeqId(uSeqIndex)); } const unsigned uWindowCount = (uColCount + g_uRefineWindow.get() - 1)/g_uRefineWindow.get(); if (0 == g_uWindowTo.get()) g_uWindowTo.get() = uWindowCount - 1; #if MEMDEBUG _CrtSetBreakAlloc(1560); #endif if (g_uWindowOffset.get() > 0) { MSA msaTmp; MSAFromColRange(msaIn, 0, g_uWindowOffset.get(), msaOut); } if (!g_bQuiet.get()) fprintf(stderr, "\n"); for (unsigned uWindowIndex = g_uWindowFrom.get(); uWindowIndex <= g_uWindowTo.get(); ++uWindowIndex) { if (!g_bQuiet.get()) fprintf(stderr, "Window %d of %d \r", uWindowIndex, uWindowCount); const unsigned uColFrom = g_uWindowOffset.get() + uWindowIndex*g_uRefineWindow.get(); unsigned uColTo = uColFrom + g_uRefineWindow.get() - 1; if (uColTo >= uColCount) uColTo = uColCount - 1; assert(uColTo >= uColFrom); SeqVect v; SeqVectFromMSACols(msaIn, uColFrom, uColTo, v); #if MEMDEBUG _CrtMemState s1; _CrtMemCheckpoint(&s1); #endif // Begin AED 5/20/06 // remove any empty seqs in this window std::vector< size_t > empty_seqs; SeqVect vr; for( size_t seqI = 0; seqI < v.size(); ++seqI ) { if( v[seqI]->size() == 0 ) empty_seqs.push_back(seqI); else vr.push_back(v[seqI]); } std::vector< unsigned > seqid_map( vr.size() ); for( size_t seqI = 0; seqI < vr.size(); ++seqI ) { seqid_map[seqI] = vr[seqI]->GetId(); vr[seqI]->SetId(seqI); } MSA msaTmp; if( vr.size() > 1 ) MUSCLE(vr, msaTmp); // remap the seqids to their original state for( size_t seqI = 0; seqI < vr.size(); ++seqI ) vr[seqI]->SetId(seqid_map[seqI]); // merge empty seqs back in { const unsigned uSeqCount = msaOut.GetSeqCount(); const unsigned uColCount1 = msaOut.GetColCount(); const unsigned uColCount2 = vr.size() > 1 ? msaTmp.GetColCount() : vr[0]->size(); const unsigned uColCountCat = uColCount1 + uColCount2; for( unsigned seqI = 0; seqI < vr.size(); ++seqI ) { unsigned uSeqIndex = msaOut.GetSeqIndex(seqid_map[seqI]); if( vr.size() > 1 ) { unsigned uSeqIndex2 = msaTmp.GetSeqIndex(seqI); for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex) { const char c = msaTmp.GetChar(uSeqIndex2, uColIndex); msaOut.SetChar(uSeqIndex, uColCount1 + uColIndex, c); } }else{ for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex) { const char c = vr[0]->GetChar(uColIndex); msaOut.SetChar(uSeqIndex, uColCount1 + uColIndex, c); } } } for( unsigned seqI = 0; seqI < empty_seqs.size(); ++seqI ) { unsigned uSeqId2 = v[empty_seqs[seqI]]->GetId(); unsigned uSeqIndex = msaOut.GetSeqIndex(uSeqId2); for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex) { msaOut.SetChar(uSeqIndex, uColCount1 + uColIndex, '-'); } } vr.clear(); } // AppendMSA(msaOut, msaTmp); // end AED 5/20/06 if (uWindowIndex == g_uSaveWindow.get()) { MSA msaInTmp; unsigned uOutCols = msaOut.GetColCount(); unsigned un = uColTo - uColFrom + 1; MSAFromColRange(msaIn, uColFrom, un, msaInTmp); char fn[256]; sprintf(fn, "win%d_inaln.tmp", uWindowIndex); TextFile fIn(fn, true); msaInTmp.ToFile(fIn); sprintf(fn, "win%d_inseqs.tmp", uWindowIndex); TextFile fv(fn, true); v.ToFile(fv); sprintf(fn, "win%d_outaln.tmp", uWindowIndex); TextFile fOut(fn, true); msaTmp.ToFile(fOut); } #if MEMDEBUG void FreeDPMemSPN(); FreeDPMemSPN(); _CrtMemState s2; _CrtMemCheckpoint(&s2); _CrtMemState s; _CrtMemDifference(&s, &s1, &s2); _CrtMemDumpStatistics(&s); _CrtMemDumpAllObjectsSince(&s1); exit(1); #endif //#if DEBUG // AssertMSAEqIgnoreCaseAndGaps(msaInTmp, msaTmp); //#endif } if (!g_bQuiet.get()) fprintf(stderr, "\n"); // AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);//@@uncomment! }