SCompartmentData(const CSeq_align_set &compartment) : num_alignments(compartment.Get().size()) { CSeq_align align; align.SetSegs().SetDisc(const_cast<CSeq_align_set &>(compartment)); query_range = align.GetSeqRange(0); subject_range = align.GetSeqRange(1); }
int CTestCompartApplication::Run(void) { // Get arguments const CArgs& args = GetArgs(); CRef<CObjectManager> om(CObjectManager::GetInstance()); CGBDataLoader::RegisterInObjectManager(*om); CRef<CScope> scope(new CScope(*om)); scope->AddDefaults(); list< CRef<CSeq_align> > aligns; {{ CNcbiIstream& istr = args["i"].AsInputFile(); auto_ptr<CObjectIStream> is (CObjectIStream::Open(eSerial_AsnText, istr)); while ( !is->EndOfData() ) { CRef<CSeq_align> align(new CSeq_align); *is >> *align; aligns.push_back(align); } }} CNcbiOstream& ostr = args["o"].AsOutputFile(); TCompartOptions opts = fCompart_Defaults; if (args["allow-intersect"]) { opts |= fCompart_AllowIntersections; } if (args["allow-intersect-query"]) { opts |= fCompart_AllowIntersectionsQuery; } if (args["allow-intersect-subject"]) { opts |= fCompart_AllowIntersectionsSubject; } if (args["allow-intersect-both"]) { opts |= fCompart_AllowIntersectionsBoth; } list< CRef<CSeq_align_set> > compartments; FindCompartments(aligns, compartments, opts); LOG_POST(Error << "input alignments: " << aligns.size()); LOG_POST(Error << "output compartments: " << compartments.size()); size_t count = 0; NON_CONST_ITERATE (list< CRef<CSeq_align_set> >, i, compartments) { ++count; CSeq_align align; align.SetSegs().SetDisc(**i); TSeqRange r1 = align.GetSeqRange(0); TSeqRange r2 = align.GetSeqRange(1); CSeq_id_Handle id1 = CSeq_id_Handle::GetHandle(align.GetSeq_id(0)); CSeq_id_Handle id2 = CSeq_id_Handle::GetHandle(align.GetSeq_id(1)); LOG_POST(Error << "compartment " << count << ":"); LOG_POST(Error << " query = " << id1 << " (" << r1.GetFrom() + 1 << ".." << r1.GetTo() + 1 << ")"); LOG_POST(Error << " subject = " << id2 << " (" << r2.GetFrom() + 1 << ".." << r2.GetTo() + 1 << ")"); LOG_POST(Error << " alignments = " << (*i)->Get().size()); string title("Compartment "); title += NStr::NumericToString(count); CSeq_annot annot; annot.SetNameDesc(title); annot.SetTitleDesc(title); annot.SetData().SetAlign() = (*i)->Get(); ostr << MSerial_AsnText << annot; }
CAlignSort::SSortKey CAlignSort::SAlignExtractor::operator()(const CSeq_align& align) { SSortKey key; ITERATE (vector<string>, iter, key_toks) { SSortKey::TItem item; if (NStr::EqualNocase(*iter, "query")) { CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(align.GetSeq_id(0)); idh = sequence::GetId(idh, *scope, sequence::eGetId_Canonical); item.first = idh.GetSeqId()->AsFastaString(); } else if (NStr::EqualNocase(*iter, "subject")) { CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(align.GetSeq_id(1)); idh = sequence::GetId(idh, *scope, sequence::eGetId_Canonical); item.first = idh.GetSeqId()->AsFastaString(); } else if (NStr::EqualNocase(*iter, "query_start")) { item.second = align.GetSeqStart(0); } else if (NStr::EqualNocase(*iter, "subject_start")) { item.second = align.GetSeqStart(1); } else if (NStr::EqualNocase(*iter, "query_end")) { item.second = align.GetSeqStop(0); } else if (NStr::EqualNocase(*iter, "subject_end")) { item.second = align.GetSeqStop(1); } else if (NStr::EqualNocase(*iter, "query_strand")) { item.second = align.GetSeqStrand(0); } else if (NStr::EqualNocase(*iter, "subject_strand")) { item.second = align.GetSeqStrand(1); } else if (NStr::EqualNocase(*iter, "query_align_len")) { item.second = align.GetSeqRange(0).GetLength(); } else if (NStr::EqualNocase(*iter, "subject_align_len")) { item.second = align.GetSeqRange(1).GetLength(); } else if (NStr::EqualNocase(*iter, "query_traceback")) { CScoreBuilder builder; item.first = builder.GetTraceback(*scope, align, 0); } else if (NStr::EqualNocase(*iter, "subject_traceback")) { CScoreBuilder builder; item.first = builder.GetTraceback(*scope, align, 1); } else { /// assume it is a score CScoreLookup lookup; lookup.SetScope(*scope); item.second = lookup.GetScore(align, *iter); } key.items.push_back(item); }
MasterSlaveAlignment::MasterSlaveAlignment( const SequenceSet *sequenceSet, const Sequence *masterSequence, const CSeq_align& seqAlign) : status(CAV_ERROR_PAIRWISE), master(masterSequence), slave(NULL) { // resize alignment and block vector masterToSlave.resize(master->Length(), -1); SequenceSet::SequenceList::const_iterator s = sequenceSet->sequences.begin(), se = sequenceSet->sequences.end(); // find slave sequence for this alignment, and order (master or slave first) const SeqIdList& sids = seqAlign.GetSegs().IsDendiag() ? seqAlign.GetSegs().GetDendiag().front()->GetIds() : seqAlign.GetSegs().GetDenseg().GetIds(); bool masterFirst = true; for (; s!=se; ++s) { if ((*s)->Matches(master->bioseqASN->GetId())) continue; if ((*s)->Matches(sids.back().GetObject())) { break; } else if ((*s)->Matches(sids.front().GetObject())) { masterFirst = false; break; } } if (s == se) { // special case of master seq. aligned to itself if (master->Matches(sids.back().GetObject()) && master->Matches(sids.front().GetObject())) { slave = master; } else { ERR_POST_X(9, Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "couldn't find matching unaligned slave sequence"); return; } } else { slave = *s; } unsigned int i; int masterRes, slaveRes; // unpack dendiag alignment if (seqAlign.GetSegs().IsDendiag()) { CSeq_align::C_Segs::TDendiag::const_iterator d , de = seqAlign.GetSegs().GetDendiag().end(); for (d=seqAlign.GetSegs().GetDendiag().begin(); d!=de; ++d) { const CDense_diag& block = d->GetObject(); if (!block.IsSetDim() || block.GetDim() != 2 || block.GetIds().size() != 2 || block.GetStarts().size() != 2) { ERR_POST_X(10, Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "incorrect dendiag block dimensions"); return; } // make sure identities of master and slave sequences match in each block if ((masterFirst && (!master->Matches(block.GetIds().front().GetObject()) || !slave->Matches(block.GetIds().back().GetObject()))) || (!masterFirst && (!master->Matches(block.GetIds().back().GetObject()) || !slave->Matches(block.GetIds().front().GetObject())))) { ERR_POST_X(11, Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "mismatched Seq-id in dendiag block"); return; } // finally, actually unpack the data into the alignment vector for (i=0; i<block.GetLen(); ++i) { if (masterFirst) { masterRes = block.GetStarts().front() + i; slaveRes = block.GetStarts().back() + i; } else { masterRes = block.GetStarts().back() + i; slaveRes = block.GetStarts().front() + i; } if (masterRes >= (int) master->Length() || slaveRes >= (int) slave->Length()) { ERR_POST_X(12, Critical << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "seqloc in dendiag block > length of sequence!"); return; } masterToSlave[masterRes] = slaveRes; } } } // unpack denseg alignment else if (seqAlign.GetSegs().IsDenseg()) { const CDense_seg& block = seqAlign.GetSegs().GetDenseg(); if (!block.IsSetDim() || block.GetDim() != 2 || block.GetIds().size() != 2 || block.GetStarts().size() != ((unsigned int ) 2 * block.GetNumseg()) || block.GetLens().size() != ((unsigned int ) block.GetNumseg())) { ERR_POST_X(13, Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "incorrect denseg block dimension"); return; } // make sure identities of master and slave sequences match in each block if ((masterFirst && (!master->Matches(block.GetIds().front().GetObject()) || !slave->Matches(block.GetIds().back().GetObject()))) || (!masterFirst && (!master->Matches(block.GetIds().back().GetObject()) || !slave->Matches(block.GetIds().front().GetObject())))) { ERR_POST_X(14, Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "mismatched Seq-id in denseg block"); return; } // finally, actually unpack the data into the alignment vector CDense_seg::TStarts::const_iterator starts = block.GetStarts().begin(); CDense_seg::TLens::const_iterator lens, le = block.GetLens().end(); for (lens=block.GetLens().begin(); lens!=le; ++lens) { if (masterFirst) { masterRes = *(starts++); slaveRes = *(starts++); } else { slaveRes = *(starts++); masterRes = *(starts++); } if (masterRes != -1 && slaveRes != -1) { // skip gaps if ((masterRes + *lens - 1) >= master->Length() || (slaveRes + *lens - 1) >= slave->Length()) { ERR_POST_X(15, Critical << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "seqloc in denseg block > length of sequence!"); return; } for (i=0; i<*lens; ++i) masterToSlave[masterRes + i] = slaveRes + i; } } } status = CAV_SUCCESS; }