MasterDependentAlignment::MasterDependentAlignment(StructureBase *parent, const Sequence *masterSequence, const ncbi::objects::CSeq_align& seqAlign) : StructureBase(parent), master(masterSequence), dependent(NULL) { // resize alignment and block vector masterToDependent.resize(master->Length(), -1); blockStructure.resize(master->Length(), -1); // find dependent sequence for this alignment, and order (master or dependent first) const CSeq_id& frontSeqId = seqAlign.GetSegs().IsDendiag() ? seqAlign.GetSegs().GetDendiag().front()->GetIds().front().GetObject() : seqAlign.GetSegs().GetDenseg().GetIds().front().GetObject(); const CSeq_id& backSeqId = seqAlign.GetSegs().IsDendiag() ? seqAlign.GetSegs().GetDendiag().front()->GetIds().back().GetObject() : seqAlign.GetSegs().GetDenseg().GetIds().back().GetObject(); bool masterFirst = true; SequenceSet::SequenceList::const_iterator s, se = master->parentSet->sequenceSet->sequences.end(); for (s=master->parentSet->sequenceSet->sequences.begin(); s!=se; ++s) { if (master->identifier->MatchesSeqId(frontSeqId) && (*s)->identifier->MatchesSeqId(backSeqId)) { break; } else if ((*s)->identifier->MatchesSeqId(frontSeqId) && master->identifier->MatchesSeqId(backSeqId)) { masterFirst = false; break; } } if (s == se) { ERRORMSG("MasterDependentAlignment::MasterDependentAlignment() - couldn't find matching sequences; " << "both " << frontSeqId.AsFastaString() << " and " << backSeqId.AsFastaString() << " must be in the sequence list for this file!"); return; } else { dependent = *s; } unsigned int i, blockNum = 0; int masterRes, dependentRes; // unpack dendiag alignment if (seqAlign.GetSegs().IsDendiag()) { CSeq_align::C_Segs::TDendiag::const_iterator d , de = seqAlign.GetSegs().GetDendiag().end(); for (d=seqAlign.GetSegs().GetDendiag().begin(); d!=de; ++d, ++blockNum) { const CDense_diag& block = d->GetObject(); if (block.GetDim() != 2 || block.GetIds().size() != 2 || block.GetStarts().size() != 2) { ERRORMSG("MasterDependentAlignment::MasterDependentAlignment() - \n" "incorrect dendiag block dimensions"); return; } // make sure identities of master and dependent sequences match in each block if ((masterFirst && (!master->identifier->MatchesSeqId(block.GetIds().front().GetObject()) || !dependent->identifier->MatchesSeqId(block.GetIds().back().GetObject()))) || (!masterFirst && (!master->identifier->MatchesSeqId(block.GetIds().back().GetObject()) || !dependent->identifier->MatchesSeqId(block.GetIds().front().GetObject())))) { ERRORMSG("MasterDependentAlignment::MasterDependentAlignment() - " "mismatched Seq-id in dendiag block"); return; } // finally, actually unpack the data into the alignment vector for (i=0; i<block.GetLen(); ++i) { if (masterFirst) { masterRes = block.GetStarts().front() + i; dependentRes = block.GetStarts().back() + i; } else { masterRes = block.GetStarts().back() + i; dependentRes = block.GetStarts().front() + i; } if (masterRes < 0 || masterRes >= (int)master->Length() || dependentRes < 0 || dependentRes >= (int)dependent->Length()) { ERRORMSG("MasterDependentAlignment::MasterDependentAlignment() - seqloc in dendiag block > length of sequence!"); return; } masterToDependent[masterRes] = dependentRes; blockStructure[masterRes] = blockNum; } } } // unpack denseg alignment else if (seqAlign.GetSegs().IsDenseg()) { const CDense_seg& block = seqAlign.GetSegs().GetDenseg(); if (!block.IsSetDim() || block.GetDim() != 2 || block.GetIds().size() != 2 || (int)block.GetStarts().size() != 2 * block.GetNumseg() || (int)block.GetLens().size() != block.GetNumseg()) { ERRORMSG("MasterDependentAlignment::MasterDependentAlignment() - \n" "incorrect denseg block dimension"); return; } // make sure identities of master and dependent sequences match in each block if ((masterFirst && (!master->identifier->MatchesSeqId(block.GetIds().front().GetObject()) || !dependent->identifier->MatchesSeqId(block.GetIds().back().GetObject()))) || (!masterFirst && (!master->identifier->MatchesSeqId(block.GetIds().back().GetObject()) || !dependent->identifier->MatchesSeqId(block.GetIds().front().GetObject())))) { ERRORMSG("MasterDependentAlignment::MasterDependentAlignment() - \n" "mismatched Seq-id in denseg block"); return; } // finally, actually unpack the data into the alignment vector CDense_seg::TStarts::const_iterator starts = block.GetStarts().begin(); CDense_seg::TLens::const_iterator lens, le = block.GetLens().end(); for (lens=block.GetLens().begin(); lens!=le; ++lens) { if (masterFirst) { masterRes = *(starts++); dependentRes = *(starts++); } else { dependentRes = *(starts++); masterRes = *(starts++); } if (masterRes != -1 && dependentRes != -1) { // skip gaps if ((masterRes + *lens - 1) >= master->Length() || (dependentRes + *lens - 1) >= dependent->Length()) { ERRORMSG("MasterDependentAlignment::MasterDependentAlignment() - \n" "seqloc in denseg block > length of sequence!"); return; } for (i=0; i<*lens; ++i) { masterToDependent[masterRes + i] = dependentRes + i; blockStructure[masterRes + i] = blockNum; } ++blockNum; // a "block" of a denseg is an aligned (non-gap) segment } } } //TESTMSG("got alignment for dependent gi " << dependent->identifier->gi); }
MasterSlaveAlignment::MasterSlaveAlignment( const SequenceSet *sequenceSet, const Sequence *masterSequence, const CSeq_align& seqAlign) : status(CAV_ERROR_PAIRWISE), master(masterSequence), slave(NULL) { // resize alignment and block vector masterToSlave.resize(master->Length(), -1); SequenceSet::SequenceList::const_iterator s = sequenceSet->sequences.begin(), se = sequenceSet->sequences.end(); // find slave sequence for this alignment, and order (master or slave first) const SeqIdList& sids = seqAlign.GetSegs().IsDendiag() ? seqAlign.GetSegs().GetDendiag().front()->GetIds() : seqAlign.GetSegs().GetDenseg().GetIds(); bool masterFirst = true; for (; s!=se; ++s) { if ((*s)->Matches(master->bioseqASN->GetId())) continue; if ((*s)->Matches(sids.back().GetObject())) { break; } else if ((*s)->Matches(sids.front().GetObject())) { masterFirst = false; break; } } if (s == se) { // special case of master seq. aligned to itself if (master->Matches(sids.back().GetObject()) && master->Matches(sids.front().GetObject())) { slave = master; } else { ERR_POST_X(9, Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "couldn't find matching unaligned slave sequence"); return; } } else { slave = *s; } unsigned int i; int masterRes, slaveRes; // unpack dendiag alignment if (seqAlign.GetSegs().IsDendiag()) { CSeq_align::C_Segs::TDendiag::const_iterator d , de = seqAlign.GetSegs().GetDendiag().end(); for (d=seqAlign.GetSegs().GetDendiag().begin(); d!=de; ++d) { const CDense_diag& block = d->GetObject(); if (!block.IsSetDim() || block.GetDim() != 2 || block.GetIds().size() != 2 || block.GetStarts().size() != 2) { ERR_POST_X(10, Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "incorrect dendiag block dimensions"); return; } // make sure identities of master and slave sequences match in each block if ((masterFirst && (!master->Matches(block.GetIds().front().GetObject()) || !slave->Matches(block.GetIds().back().GetObject()))) || (!masterFirst && (!master->Matches(block.GetIds().back().GetObject()) || !slave->Matches(block.GetIds().front().GetObject())))) { ERR_POST_X(11, Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "mismatched Seq-id in dendiag block"); return; } // finally, actually unpack the data into the alignment vector for (i=0; i<block.GetLen(); ++i) { if (masterFirst) { masterRes = block.GetStarts().front() + i; slaveRes = block.GetStarts().back() + i; } else { masterRes = block.GetStarts().back() + i; slaveRes = block.GetStarts().front() + i; } if (masterRes >= (int) master->Length() || slaveRes >= (int) slave->Length()) { ERR_POST_X(12, Critical << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "seqloc in dendiag block > length of sequence!"); return; } masterToSlave[masterRes] = slaveRes; } } } // unpack denseg alignment else if (seqAlign.GetSegs().IsDenseg()) { const CDense_seg& block = seqAlign.GetSegs().GetDenseg(); if (!block.IsSetDim() || block.GetDim() != 2 || block.GetIds().size() != 2 || block.GetStarts().size() != ((unsigned int ) 2 * block.GetNumseg()) || block.GetLens().size() != ((unsigned int ) block.GetNumseg())) { ERR_POST_X(13, Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "incorrect denseg block dimension"); return; } // make sure identities of master and slave sequences match in each block if ((masterFirst && (!master->Matches(block.GetIds().front().GetObject()) || !slave->Matches(block.GetIds().back().GetObject()))) || (!masterFirst && (!master->Matches(block.GetIds().back().GetObject()) || !slave->Matches(block.GetIds().front().GetObject())))) { ERR_POST_X(14, Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "mismatched Seq-id in denseg block"); return; } // finally, actually unpack the data into the alignment vector CDense_seg::TStarts::const_iterator starts = block.GetStarts().begin(); CDense_seg::TLens::const_iterator lens, le = block.GetLens().end(); for (lens=block.GetLens().begin(); lens!=le; ++lens) { if (masterFirst) { masterRes = *(starts++); slaveRes = *(starts++); } else { slaveRes = *(starts++); masterRes = *(starts++); } if (masterRes != -1 && slaveRes != -1) { // skip gaps if ((masterRes + *lens - 1) >= master->Length() || (slaveRes + *lens - 1) >= slave->Length()) { ERR_POST_X(15, Critical << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "seqloc in denseg block > length of sequence!"); return; } for (i=0; i<*lens; ++i) masterToSlave[masterRes + i] = slaveRes + i; } } } status = CAV_SUCCESS; }