Ejemplo n.º 1
0
MasterDependentAlignment::MasterDependentAlignment(StructureBase *parent, const Sequence *masterSequence,
    const ncbi::objects::CSeq_align& seqAlign) :
    StructureBase(parent), master(masterSequence), dependent(NULL)
{
    // resize alignment and block vector
    masterToDependent.resize(master->Length(), -1);
    blockStructure.resize(master->Length(), -1);

    // find dependent sequence for this alignment, and order (master or dependent first)
    const CSeq_id& frontSeqId = seqAlign.GetSegs().IsDendiag() ?
        seqAlign.GetSegs().GetDendiag().front()->GetIds().front().GetObject() :
        seqAlign.GetSegs().GetDenseg().GetIds().front().GetObject();
    const CSeq_id& backSeqId = seqAlign.GetSegs().IsDendiag() ?
        seqAlign.GetSegs().GetDendiag().front()->GetIds().back().GetObject() :
        seqAlign.GetSegs().GetDenseg().GetIds().back().GetObject();

    bool masterFirst = true;
    SequenceSet::SequenceList::const_iterator
        s, se = master->parentSet->sequenceSet->sequences.end();
    for (s=master->parentSet->sequenceSet->sequences.begin(); s!=se; ++s) {
        if (master->identifier->MatchesSeqId(frontSeqId) &&
            (*s)->identifier->MatchesSeqId(backSeqId)) {
            break;
        } else if ((*s)->identifier->MatchesSeqId(frontSeqId) &&
                   master->identifier->MatchesSeqId(backSeqId)) {
            masterFirst = false;
            break;
        }
    }
    if (s == se) {
        ERRORMSG("MasterDependentAlignment::MasterDependentAlignment() - couldn't find matching sequences; "
            << "both " << frontSeqId.AsFastaString() << " and "
            << backSeqId.AsFastaString() << " must be in the sequence list for this file!");
        return;
    } else {
        dependent = *s;
    }

    unsigned int i, blockNum = 0;
    int masterRes, dependentRes;

    // unpack dendiag alignment
    if (seqAlign.GetSegs().IsDendiag()) {

        CSeq_align::C_Segs::TDendiag::const_iterator d , de = seqAlign.GetSegs().GetDendiag().end();
        for (d=seqAlign.GetSegs().GetDendiag().begin(); d!=de; ++d, ++blockNum) {
            const CDense_diag& block = d->GetObject();

            if (block.GetDim() != 2 || block.GetIds().size() != 2 || block.GetStarts().size() != 2) {
                ERRORMSG("MasterDependentAlignment::MasterDependentAlignment() - \n"
                    "incorrect dendiag block dimensions");
                return;
            }

            // make sure identities of master and dependent sequences match in each block
            if ((masterFirst &&
                    (!master->identifier->MatchesSeqId(block.GetIds().front().GetObject()) ||
                     !dependent->identifier->MatchesSeqId(block.GetIds().back().GetObject()))) ||
                (!masterFirst &&
                    (!master->identifier->MatchesSeqId(block.GetIds().back().GetObject()) ||
                     !dependent->identifier->MatchesSeqId(block.GetIds().front().GetObject())))) {
                ERRORMSG("MasterDependentAlignment::MasterDependentAlignment() - "
                    "mismatched Seq-id in dendiag block");
                return;
            }

            // finally, actually unpack the data into the alignment vector
            for (i=0; i<block.GetLen(); ++i) {
                if (masterFirst) {
                    masterRes = block.GetStarts().front() + i;
                    dependentRes = block.GetStarts().back() + i;
                } else {
                    masterRes = block.GetStarts().back() + i;
                    dependentRes = block.GetStarts().front() + i;
                }
                if (masterRes < 0 || masterRes >= (int)master->Length() || dependentRes < 0 || dependentRes >= (int)dependent->Length()) {
                    ERRORMSG("MasterDependentAlignment::MasterDependentAlignment() - seqloc in dendiag block > length of sequence!");
                    return;
                }
                masterToDependent[masterRes] = dependentRes;
                blockStructure[masterRes] = blockNum;
            }
        }
    }

    // unpack denseg alignment
    else if (seqAlign.GetSegs().IsDenseg()) {

        const CDense_seg& block = seqAlign.GetSegs().GetDenseg();

        if (!block.IsSetDim() || block.GetDim() != 2 ||
            block.GetIds().size() != 2 ||
            (int)block.GetStarts().size() != 2 * block.GetNumseg() ||
            (int)block.GetLens().size() != block.GetNumseg()) {
            ERRORMSG("MasterDependentAlignment::MasterDependentAlignment() - \n"
                "incorrect denseg block dimension");
            return;
        }

        // make sure identities of master and dependent sequences match in each block
        if ((masterFirst &&
                (!master->identifier->MatchesSeqId(block.GetIds().front().GetObject()) ||
                 !dependent->identifier->MatchesSeqId(block.GetIds().back().GetObject()))) ||
            (!masterFirst &&
                (!master->identifier->MatchesSeqId(block.GetIds().back().GetObject()) ||
                 !dependent->identifier->MatchesSeqId(block.GetIds().front().GetObject())))) {
            ERRORMSG("MasterDependentAlignment::MasterDependentAlignment() - \n"
                "mismatched Seq-id in denseg block");
            return;
        }

        // finally, actually unpack the data into the alignment vector
        CDense_seg::TStarts::const_iterator starts = block.GetStarts().begin();
        CDense_seg::TLens::const_iterator lens, le = block.GetLens().end();
        for (lens=block.GetLens().begin(); lens!=le; ++lens) {
            if (masterFirst) {
                masterRes = *(starts++);
                dependentRes = *(starts++);
            } else {
                dependentRes = *(starts++);
                masterRes = *(starts++);
            }
            if (masterRes != -1 && dependentRes != -1) { // skip gaps
                if ((masterRes + *lens - 1) >= master->Length() ||
                    (dependentRes + *lens - 1) >= dependent->Length()) {
                    ERRORMSG("MasterDependentAlignment::MasterDependentAlignment() - \n"
                        "seqloc in denseg block > length of sequence!");
                    return;
                }
                for (i=0; i<*lens; ++i) {
                    masterToDependent[masterRes + i] = dependentRes + i;
                    blockStructure[masterRes + i] = blockNum;
                }
                ++blockNum; // a "block" of a denseg is an aligned (non-gap) segment
            }
        }
    }

    //TESTMSG("got alignment for dependent gi " << dependent->identifier->gi);
}
Ejemplo n.º 2
0
MasterSlaveAlignment::MasterSlaveAlignment(
    const SequenceSet *sequenceSet,
    const Sequence *masterSequence,
    const CSeq_align& seqAlign) :
    status(CAV_ERROR_PAIRWISE), master(masterSequence), slave(NULL)
{
    // resize alignment and block vector
    masterToSlave.resize(master->Length(), -1);

    SequenceSet::SequenceList::const_iterator
        s = sequenceSet->sequences.begin(),
        se = sequenceSet->sequences.end();

    // find slave sequence for this alignment, and order (master or slave first)
    const SeqIdList& sids = seqAlign.GetSegs().IsDendiag() ?
        seqAlign.GetSegs().GetDendiag().front()->GetIds() :
        seqAlign.GetSegs().GetDenseg().GetIds();

    bool masterFirst = true;
    for (; s!=se; ++s) {

        if ((*s)->Matches(master->bioseqASN->GetId())) continue;

        if ((*s)->Matches(sids.back().GetObject())) {
            break;
        } else if ((*s)->Matches(sids.front().GetObject())) {
            masterFirst = false;
            break;
        }
    }
    if (s == se) {
        // special case of master seq. aligned to itself
        if (master->Matches(sids.back().GetObject()) && master->Matches(sids.front().GetObject())) {
            slave = master;
        } else {
            ERR_POST_X(9, Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n"
                                   "couldn't find matching unaligned slave sequence");
            return;
        }
    } else {
        slave = *s;
    }

    unsigned int i;
    int masterRes, slaveRes;

    // unpack dendiag alignment
    if (seqAlign.GetSegs().IsDendiag()) {

        CSeq_align::C_Segs::TDendiag::const_iterator d , de = seqAlign.GetSegs().GetDendiag().end();
        for (d=seqAlign.GetSegs().GetDendiag().begin(); d!=de; ++d) {
            const CDense_diag& block = d->GetObject();

            if (!block.IsSetDim() || block.GetDim() != 2 ||
                block.GetIds().size() != 2 ||
                block.GetStarts().size() != 2) {
                ERR_POST_X(10, Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n"
                                        "incorrect dendiag block dimensions");
                return;
            }

            // make sure identities of master and slave sequences match in each block
            if ((masterFirst &&
                    (!master->Matches(block.GetIds().front().GetObject()) ||
                     !slave->Matches(block.GetIds().back().GetObject()))) ||
                (!masterFirst &&
                    (!master->Matches(block.GetIds().back().GetObject()) ||
                     !slave->Matches(block.GetIds().front().GetObject())))) {
                ERR_POST_X(11, Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n"
                                        "mismatched Seq-id in dendiag block");
                return;
            }

            // finally, actually unpack the data into the alignment vector
            for (i=0; i<block.GetLen(); ++i) {
                if (masterFirst) {
                    masterRes = block.GetStarts().front() + i;
                    slaveRes = block.GetStarts().back() + i;
                } else {
                    masterRes = block.GetStarts().back() + i;
                    slaveRes = block.GetStarts().front() + i;
                }
                if (masterRes >= (int) master->Length() || slaveRes >= (int) slave->Length()) {
                    ERR_POST_X(12, Critical << "MasterSlaveAlignment::MasterSlaveAlignment() - \n"
                                               "seqloc in dendiag block > length of sequence!");
                    return;
                }
                masterToSlave[masterRes] = slaveRes;
            }
        }
    }

    // unpack denseg alignment
    else if (seqAlign.GetSegs().IsDenseg()) {

        const CDense_seg& block = seqAlign.GetSegs().GetDenseg();

        if (!block.IsSetDim() || block.GetDim() != 2 ||
            block.GetIds().size() != 2 ||
            block.GetStarts().size() != ((unsigned int ) 2 * block.GetNumseg()) ||
            block.GetLens().size() != ((unsigned int ) block.GetNumseg())) 
        {
            ERR_POST_X(13, Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n"
                                    "incorrect denseg block dimension");
            return;
        }

        // make sure identities of master and slave sequences match in each block
        if ((masterFirst &&
                (!master->Matches(block.GetIds().front().GetObject()) ||
                 !slave->Matches(block.GetIds().back().GetObject()))) ||
            (!masterFirst &&
                (!master->Matches(block.GetIds().back().GetObject()) ||
                 !slave->Matches(block.GetIds().front().GetObject())))) {
            ERR_POST_X(14, Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n"
                                    "mismatched Seq-id in denseg block");
            return;
        }

        // finally, actually unpack the data into the alignment vector
        CDense_seg::TStarts::const_iterator starts = block.GetStarts().begin();
        CDense_seg::TLens::const_iterator lens, le = block.GetLens().end();
        for (lens=block.GetLens().begin(); lens!=le; ++lens) {
            if (masterFirst) {
                masterRes = *(starts++);
                slaveRes = *(starts++);
            } else {
                slaveRes = *(starts++);
                masterRes = *(starts++);
            }
            if (masterRes != -1 && slaveRes != -1) { // skip gaps
                if ((masterRes + *lens - 1) >= master->Length() ||
                    (slaveRes + *lens - 1) >= slave->Length()) {
                    ERR_POST_X(15, Critical << "MasterSlaveAlignment::MasterSlaveAlignment() - \n"
                                               "seqloc in denseg block > length of sequence!");
                    return;
                }
                for (i=0; i<*lens; ++i)
                    masterToSlave[masterRes + i] = slaveRes + i;
            }
        }
    }

    status = CAV_SUCCESS;
}