Example #1
0
int IntersectByMaster(CCdCore* ccd, double rowFraction) {

    int result = -1;
    unsigned int masterLen = (ccd) ? ccd->GetSequenceStringByRow(0).length() : 0;
    if (masterLen == 0) return result;

    int slaveStart;
    int nAlignedIBM = 0;
    unsigned int i, j, nBlocks;
    unsigned int nRows = ccd->GetNumRows();

    //  If there is already a consistent block model, do nothing.
    MultipleAlignment* ma = new MultipleAlignment(ccd);
    if (ma && ma->isBlockAligned()) {
        delete ma;
        return 0;
    }
    delete ma;


    BlockIntersector blockIntersector(masterLen);
    BlockModel* intersectedBlockModel;
    //BlockModel* simpleIntersectedBlockModel;
    BlockModelPair* bmp;
    vector<BlockModelPair*> blockModelPairs;
    set<int> forcedCTerminiInIntersection;

    list< CRef< CSeq_align > >& cdSeqAligns = ccd->GetSeqAligns();
    list< CRef< CSeq_align > >::iterator cdSeqAlignIt = cdSeqAligns.begin(), cdSeqAlignEnd = cdSeqAligns.end();

    for (i = 0; cdSeqAlignIt != cdSeqAlignEnd; ++cdSeqAlignIt, ++i) {
        bmp = new BlockModelPair(*cdSeqAlignIt);

        //  We assume # of blocks and all block lengths are same on master and slave.
        if (bmp && bmp->isValid()) {

            blockModelPairs.push_back(bmp);
            blockIntersector.addOneAlignment(bmp->getMaster());

            //  Find places the intersection can't merge blocks (i.e., where there are
            //  gaps in the slave across a block boundary, but not in the master).
            BlockModel& slave = bmp->getSlave();
            nBlocks = slave.getBlocks().size();
            for (j = 0; j < nBlocks - 1; ++j) {  //  '-1' as I don't care about end of the C-terminal block
                if (slave.getGapToCTerminal(j) > 0 && bmp->getMaster().getGapToCTerminal(j) == 0) {
                    forcedCTerminiInIntersection.insert(bmp->getMaster().getBlock(j).getEnd());
                }
            }
        }
    }

    //  There was a problem creating one of the BlockModelPair objects from a seq_align,
    //  or one or more seq_align was invalid.
    if (blockModelPairs.size() != cdSeqAligns.size()) {
        return result;
    }

    //simpleIntersectedBlockModel = blockIntersector.getIntersectedAlignment(forcedCTerminiInIntersection);
    intersectedBlockModel = blockIntersector.getIntersectedAlignment(forcedCTerminiInIntersection, rowFraction);
    nAlignedIBM = (intersectedBlockModel) ? intersectedBlockModel->getTotalBlockLength() : 0;
    if (nAlignedIBM == 0) {
        return result;
    }

/*
    string testStr, testStr2;
    string sint = intersectedBlockModel->toString();
    string sintsimple = simpleIntersectedBlockModel->toString();
    delete simpleIntersectedBlockModel;
    cout << "rowFraction = 1:\n" << sintsimple << endl;
    cout << "rowFraction = " << rowFraction << ":\n" << sint << endl;
*/

    //  As we have case where every block model isn't identical,
    //  change each seq-align to reflect the common set of aligned columns.
    nBlocks = intersectedBlockModel->getBlocks().size();
    for (i = 0, cdSeqAlignIt = cdSeqAligns.begin(); i < nRows - 1 ; ++i, ++cdSeqAlignIt) {

        bmp = blockModelPairs[i];  //BlockModelPair seqAlignPair(*cdSeqAlignIt);
        BlockModel* intersectedSeqAlignSlave = new BlockModel(bmp->getSlave().getSeqId(), false);

        bmp->reverse();
        for (j = 0; j < nBlocks; ++j) {
            const Block& jthMasterBlock = intersectedBlockModel->getBlock(j);
            slaveStart = bmp->mapToMaster(jthMasterBlock.getStart());

            //  since we're dealing w/ an intersection, slaveStart should always be valid
            assert(slaveStart != -1);

            Block b(slaveStart, jthMasterBlock.getLen(), jthMasterBlock.getId());
            intersectedSeqAlignSlave->addBlock(b);
        }
        *cdSeqAlignIt = intersectedSeqAlignSlave->toSeqAlign(*intersectedBlockModel);
        //testStr = intersectedSeqAlignSlave->toString();
        //testStr2 = bmp->getMaster().toString();  // original *slave* alignment

        delete bmp;
    }
    blockModelPairs.clear();
    result = nBlocks;

    delete intersectedBlockModel;

    return result;
}
Example #2
0
void GetAlignmentColumnsForCD(CCdCore* cd, map<unsigned int, string>& columns, unsigned int referenceRow)
{
    bool isOK = true, useRefRow = true;
    int j;
    unsigned int i, col, row, pos, mapIndex, nRows, nCols, nBlocks;
    char** alignedResidues = NULL;
    string rowString, colString;
    //  Map column number to position on the selected reference row.
    map<unsigned int, unsigned int> colToPos;
    map<unsigned int, string> rowStrings;
    vector<int> starts, lengths;
    CRef< CSeq_align > seqAlign;

    //  Empty the columns map first, as this is used as a way to flag problems.
    columns.clear();

    if (!cd) return;

    //  Check if the block structure is consistent.
    try {
        MultipleAlignment* ma = new MultipleAlignment(cd);
        if (!ma) {
            ERR_POST("Creation of MultipleAlignment object failed for CD " << cd->GetAccession() << ".");
            return;
        } else if (! ma->isBlockAligned()) {
            delete ma;
            ERR_POST("CD " << cd->GetAccession() << " must have a consistent block structure for column extraction.");
            return;
        }
        delete ma;
        ma = NULL;
    } catch (...) {
        ERR_POST("Could not extract columns for CD " << cd->GetAccession());
    }

    nCols = cd->GetAlignmentLength();
    nRows = cd->GetNumRows();

    //  Get a reference seq-align for mapping between alignment rows.
    //  If the columns map index will simply be the column count, use the master, row 0.
    if (referenceRow >= nRows) {
        useRefRow = false;
        referenceRow = 0;
    } 
    if (! cd->GetSeqAlign(referenceRow, seqAlign)) {
        isOK = false;
    }

    //  Initialize the column # -> reference row position mapping.
    //  If useRefRow is true, use the indicated row's coordinates as the position.
    //  Otherwise, use the column number as the position.
    if (isOK && GetBlockStarts(seqAlign, starts, (referenceRow == 0)) > 0 && GetBlockLengths(seqAlign, lengths) > 0) {
        nBlocks = starts.size();
        if (nBlocks == lengths.size()) {
            for (i = 0, col = 0; i < nBlocks; ++i) {
                pos = (useRefRow) ? starts[i] : col;
                for (j = 0; j < lengths[i]; ++j, ++col, ++pos) {
                    //  Not explicitly checking if 'pos' is aligned since above 
                    //  we confirmed the CD has a valid block model.
                    colToPos[col] = pos;
                }
            }
        } else {
            isOK = false;
        }
    } else {
        isOK = false;
    }

    SetAlignedResiduesForCD(cd, alignedResidues, true);

    //  Construct the columns as string objects.
    if (isOK && alignedResidues) {
        for (col = 0; col < nCols; ++col) {
            colString.erase();
            for (row = 0; row < nRows; ++row) {
                colString += alignedResidues[row][col];
            }
            mapIndex = colToPos[col];
            columns[mapIndex] = colString;
        }
    }

    //  Clean up array of characters.
    if (alignedResidues) {
        for (row = 0; row < nRows; ++row) {
            delete [] alignedResidues[row];
        }
        delete [] alignedResidues;
    }

}