SCompartmentData(const CSeq_align_set &compartment)
 : num_alignments(compartment.Get().size())
 {
    CSeq_align align;
    align.SetSegs().SetDisc(const_cast<CSeq_align_set &>(compartment));
    query_range = align.GetSeqRange(0);
    subject_range = align.GetSeqRange(1);
 }
int CTestCompartApplication::Run(void)
{
    // Get arguments
    const CArgs& args = GetArgs();

    CRef<CObjectManager> om(CObjectManager::GetInstance());
    CGBDataLoader::RegisterInObjectManager(*om);
    CRef<CScope> scope(new CScope(*om));
    scope->AddDefaults();

    list< CRef<CSeq_align> > aligns;
    {{
         CNcbiIstream& istr = args["i"].AsInputFile();
         auto_ptr<CObjectIStream> is
             (CObjectIStream::Open(eSerial_AsnText, istr));
         while ( !is->EndOfData() ) {
             CRef<CSeq_align> align(new CSeq_align);
             *is >> *align;
             aligns.push_back(align);
         }
     }}

    CNcbiOstream& ostr = args["o"].AsOutputFile();

    TCompartOptions opts = fCompart_Defaults;
    if (args["allow-intersect"]) {
        opts |= fCompart_AllowIntersections;
    }
    if (args["allow-intersect-query"]) {
        opts |= fCompart_AllowIntersectionsQuery;
    }
    if (args["allow-intersect-subject"]) {
        opts |= fCompart_AllowIntersectionsSubject;
    }
    if (args["allow-intersect-both"]) {
        opts |= fCompart_AllowIntersectionsBoth;
    }

    list< CRef<CSeq_align_set> > compartments;
    FindCompartments(aligns, compartments, opts);

    LOG_POST(Error << "input alignments: " << aligns.size());
    LOG_POST(Error << "output compartments: " << compartments.size());

    size_t count = 0;
    NON_CONST_ITERATE (list< CRef<CSeq_align_set> >, i, compartments) {
        ++count;
        CSeq_align align;
        align.SetSegs().SetDisc(**i);

        TSeqRange r1 = align.GetSeqRange(0);
        TSeqRange r2 = align.GetSeqRange(1);
        CSeq_id_Handle id1 = CSeq_id_Handle::GetHandle(align.GetSeq_id(0));
        CSeq_id_Handle id2 = CSeq_id_Handle::GetHandle(align.GetSeq_id(1));

        LOG_POST(Error << "compartment " << count << ":");
        LOG_POST(Error << "  query   = " << id1 << " (" << r1.GetFrom() + 1 << ".." << r1.GetTo() + 1 << ")");
        LOG_POST(Error << "  subject = " << id2 << " (" << r2.GetFrom() + 1 << ".." << r2.GetTo() + 1 << ")");
        LOG_POST(Error << "  alignments = " << (*i)->Get().size());

        string title("Compartment ");
        title += NStr::NumericToString(count);
        CSeq_annot annot;
        annot.SetNameDesc(title);
        annot.SetTitleDesc(title);
        annot.SetData().SetAlign() = (*i)->Get();
        ostr << MSerial_AsnText << annot;
    }
Ejemplo n.º 3
0
CAlignSort::SSortKey
CAlignSort::SAlignExtractor::operator()(const CSeq_align& align)
{
    SSortKey key;
    ITERATE (vector<string>, iter, key_toks) {
        SSortKey::TItem item;

        if (NStr::EqualNocase(*iter, "query")) {
            CSeq_id_Handle idh =
                CSeq_id_Handle::GetHandle(align.GetSeq_id(0));
            idh = sequence::GetId(idh, *scope,
                                  sequence::eGetId_Canonical);
            item.first = idh.GetSeqId()->AsFastaString();
        }
        else if (NStr::EqualNocase(*iter, "subject")) {
            CSeq_id_Handle idh =
                CSeq_id_Handle::GetHandle(align.GetSeq_id(1));
            idh = sequence::GetId(idh, *scope,
                                  sequence::eGetId_Canonical);
            item.first = idh.GetSeqId()->AsFastaString();
        }

        else if (NStr::EqualNocase(*iter, "query_start")) {
            item.second = align.GetSeqStart(0);
        }
        else if (NStr::EqualNocase(*iter, "subject_start")) {
            item.second = align.GetSeqStart(1);
        }

        else if (NStr::EqualNocase(*iter, "query_end")) {
            item.second = align.GetSeqStop(0);
        }
        else if (NStr::EqualNocase(*iter, "subject_end")) {
            item.second = align.GetSeqStop(1);
        }

        else if (NStr::EqualNocase(*iter, "query_strand")) {
            item.second = align.GetSeqStrand(0);
        }
        else if (NStr::EqualNocase(*iter, "subject_strand")) {
            item.second = align.GetSeqStrand(1);
        }

        else if (NStr::EqualNocase(*iter, "query_align_len")) {
            item.second = align.GetSeqRange(0).GetLength();
        }
        else if (NStr::EqualNocase(*iter, "subject_align_len")) {
            item.second = align.GetSeqRange(1).GetLength();
        }
        else if (NStr::EqualNocase(*iter, "query_traceback")) {
            CScoreBuilder builder;
            item.first = builder.GetTraceback(*scope, align, 0);
        }
        else if (NStr::EqualNocase(*iter, "subject_traceback")) {
            CScoreBuilder builder;
            item.first = builder.GetTraceback(*scope, align, 1);
        }

        else {
            /// assume it is a score
            CScoreLookup lookup;
            lookup.SetScope(*scope);
            item.second = lookup.GetScore(align, *iter);
        }
        key.items.push_back(item);
    }
Ejemplo n.º 4
0
MasterSlaveAlignment::MasterSlaveAlignment(
    const SequenceSet *sequenceSet,
    const Sequence *masterSequence,
    const CSeq_align& seqAlign) :
    status(CAV_ERROR_PAIRWISE), master(masterSequence), slave(NULL)
{
    // resize alignment and block vector
    masterToSlave.resize(master->Length(), -1);

    SequenceSet::SequenceList::const_iterator
        s = sequenceSet->sequences.begin(),
        se = sequenceSet->sequences.end();

    // find slave sequence for this alignment, and order (master or slave first)
    const SeqIdList& sids = seqAlign.GetSegs().IsDendiag() ?
        seqAlign.GetSegs().GetDendiag().front()->GetIds() :
        seqAlign.GetSegs().GetDenseg().GetIds();

    bool masterFirst = true;
    for (; s!=se; ++s) {

        if ((*s)->Matches(master->bioseqASN->GetId())) continue;

        if ((*s)->Matches(sids.back().GetObject())) {
            break;
        } else if ((*s)->Matches(sids.front().GetObject())) {
            masterFirst = false;
            break;
        }
    }
    if (s == se) {
        // special case of master seq. aligned to itself
        if (master->Matches(sids.back().GetObject()) && master->Matches(sids.front().GetObject())) {
            slave = master;
        } else {
            ERR_POST_X(9, Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n"
                                   "couldn't find matching unaligned slave sequence");
            return;
        }
    } else {
        slave = *s;
    }

    unsigned int i;
    int masterRes, slaveRes;

    // unpack dendiag alignment
    if (seqAlign.GetSegs().IsDendiag()) {

        CSeq_align::C_Segs::TDendiag::const_iterator d , de = seqAlign.GetSegs().GetDendiag().end();
        for (d=seqAlign.GetSegs().GetDendiag().begin(); d!=de; ++d) {
            const CDense_diag& block = d->GetObject();

            if (!block.IsSetDim() || block.GetDim() != 2 ||
                block.GetIds().size() != 2 ||
                block.GetStarts().size() != 2) {
                ERR_POST_X(10, Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n"
                                        "incorrect dendiag block dimensions");
                return;
            }

            // make sure identities of master and slave sequences match in each block
            if ((masterFirst &&
                    (!master->Matches(block.GetIds().front().GetObject()) ||
                     !slave->Matches(block.GetIds().back().GetObject()))) ||
                (!masterFirst &&
                    (!master->Matches(block.GetIds().back().GetObject()) ||
                     !slave->Matches(block.GetIds().front().GetObject())))) {
                ERR_POST_X(11, Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n"
                                        "mismatched Seq-id in dendiag block");
                return;
            }

            // finally, actually unpack the data into the alignment vector
            for (i=0; i<block.GetLen(); ++i) {
                if (masterFirst) {
                    masterRes = block.GetStarts().front() + i;
                    slaveRes = block.GetStarts().back() + i;
                } else {
                    masterRes = block.GetStarts().back() + i;
                    slaveRes = block.GetStarts().front() + i;
                }
                if (masterRes >= (int) master->Length() || slaveRes >= (int) slave->Length()) {
                    ERR_POST_X(12, Critical << "MasterSlaveAlignment::MasterSlaveAlignment() - \n"
                                               "seqloc in dendiag block > length of sequence!");
                    return;
                }
                masterToSlave[masterRes] = slaveRes;
            }
        }
    }

    // unpack denseg alignment
    else if (seqAlign.GetSegs().IsDenseg()) {

        const CDense_seg& block = seqAlign.GetSegs().GetDenseg();

        if (!block.IsSetDim() || block.GetDim() != 2 ||
            block.GetIds().size() != 2 ||
            block.GetStarts().size() != ((unsigned int ) 2 * block.GetNumseg()) ||
            block.GetLens().size() != ((unsigned int ) block.GetNumseg())) 
        {
            ERR_POST_X(13, Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n"
                                    "incorrect denseg block dimension");
            return;
        }

        // make sure identities of master and slave sequences match in each block
        if ((masterFirst &&
                (!master->Matches(block.GetIds().front().GetObject()) ||
                 !slave->Matches(block.GetIds().back().GetObject()))) ||
            (!masterFirst &&
                (!master->Matches(block.GetIds().back().GetObject()) ||
                 !slave->Matches(block.GetIds().front().GetObject())))) {
            ERR_POST_X(14, Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n"
                                    "mismatched Seq-id in denseg block");
            return;
        }

        // finally, actually unpack the data into the alignment vector
        CDense_seg::TStarts::const_iterator starts = block.GetStarts().begin();
        CDense_seg::TLens::const_iterator lens, le = block.GetLens().end();
        for (lens=block.GetLens().begin(); lens!=le; ++lens) {
            if (masterFirst) {
                masterRes = *(starts++);
                slaveRes = *(starts++);
            } else {
                slaveRes = *(starts++);
                masterRes = *(starts++);
            }
            if (masterRes != -1 && slaveRes != -1) { // skip gaps
                if ((masterRes + *lens - 1) >= master->Length() ||
                    (slaveRes + *lens - 1) >= slave->Length()) {
                    ERR_POST_X(15, Critical << "MasterSlaveAlignment::MasterSlaveAlignment() - \n"
                                               "seqloc in denseg block > length of sequence!");
                    return;
                }
                for (i=0; i<*lens; ++i)
                    masterToSlave[masterRes + i] = slaveRes + i;
            }
        }
    }

    status = CAV_SUCCESS;
}