int IntersectByMaster(CCdCore* ccd, double rowFraction) { int result = -1; unsigned int masterLen = (ccd) ? ccd->GetSequenceStringByRow(0).length() : 0; if (masterLen == 0) return result; int slaveStart; int nAlignedIBM = 0; unsigned int i, j, nBlocks; unsigned int nRows = ccd->GetNumRows(); // If there is already a consistent block model, do nothing. MultipleAlignment* ma = new MultipleAlignment(ccd); if (ma && ma->isBlockAligned()) { delete ma; return 0; } delete ma; BlockIntersector blockIntersector(masterLen); BlockModel* intersectedBlockModel; //BlockModel* simpleIntersectedBlockModel; BlockModelPair* bmp; vector<BlockModelPair*> blockModelPairs; set<int> forcedCTerminiInIntersection; list< CRef< CSeq_align > >& cdSeqAligns = ccd->GetSeqAligns(); list< CRef< CSeq_align > >::iterator cdSeqAlignIt = cdSeqAligns.begin(), cdSeqAlignEnd = cdSeqAligns.end(); for (i = 0; cdSeqAlignIt != cdSeqAlignEnd; ++cdSeqAlignIt, ++i) { bmp = new BlockModelPair(*cdSeqAlignIt); // We assume # of blocks and all block lengths are same on master and slave. if (bmp && bmp->isValid()) { blockModelPairs.push_back(bmp); blockIntersector.addOneAlignment(bmp->getMaster()); // Find places the intersection can't merge blocks (i.e., where there are // gaps in the slave across a block boundary, but not in the master). BlockModel& slave = bmp->getSlave(); nBlocks = slave.getBlocks().size(); for (j = 0; j < nBlocks - 1; ++j) { // '-1' as I don't care about end of the C-terminal block if (slave.getGapToCTerminal(j) > 0 && bmp->getMaster().getGapToCTerminal(j) == 0) { forcedCTerminiInIntersection.insert(bmp->getMaster().getBlock(j).getEnd()); } } } } // There was a problem creating one of the BlockModelPair objects from a seq_align, // or one or more seq_align was invalid. if (blockModelPairs.size() != cdSeqAligns.size()) { return result; } //simpleIntersectedBlockModel = blockIntersector.getIntersectedAlignment(forcedCTerminiInIntersection); intersectedBlockModel = blockIntersector.getIntersectedAlignment(forcedCTerminiInIntersection, rowFraction); nAlignedIBM = (intersectedBlockModel) ? intersectedBlockModel->getTotalBlockLength() : 0; if (nAlignedIBM == 0) { return result; } /* string testStr, testStr2; string sint = intersectedBlockModel->toString(); string sintsimple = simpleIntersectedBlockModel->toString(); delete simpleIntersectedBlockModel; cout << "rowFraction = 1:\n" << sintsimple << endl; cout << "rowFraction = " << rowFraction << ":\n" << sint << endl; */ // As we have case where every block model isn't identical, // change each seq-align to reflect the common set of aligned columns. nBlocks = intersectedBlockModel->getBlocks().size(); for (i = 0, cdSeqAlignIt = cdSeqAligns.begin(); i < nRows - 1 ; ++i, ++cdSeqAlignIt) { bmp = blockModelPairs[i]; //BlockModelPair seqAlignPair(*cdSeqAlignIt); BlockModel* intersectedSeqAlignSlave = new BlockModel(bmp->getSlave().getSeqId(), false); bmp->reverse(); for (j = 0; j < nBlocks; ++j) { const Block& jthMasterBlock = intersectedBlockModel->getBlock(j); slaveStart = bmp->mapToMaster(jthMasterBlock.getStart()); // since we're dealing w/ an intersection, slaveStart should always be valid assert(slaveStart != -1); Block b(slaveStart, jthMasterBlock.getLen(), jthMasterBlock.getId()); intersectedSeqAlignSlave->addBlock(b); } *cdSeqAlignIt = intersectedSeqAlignSlave->toSeqAlign(*intersectedBlockModel); //testStr = intersectedSeqAlignSlave->toString(); //testStr2 = bmp->getMaster().toString(); // original *slave* alignment delete bmp; } blockModelPairs.clear(); result = nBlocks; delete intersectedBlockModel; return result; }
void GetAlignmentColumnsForCD(CCdCore* cd, map<unsigned int, string>& columns, unsigned int referenceRow) { bool isOK = true, useRefRow = true; int j; unsigned int i, col, row, pos, mapIndex, nRows, nCols, nBlocks; char** alignedResidues = NULL; string rowString, colString; // Map column number to position on the selected reference row. map<unsigned int, unsigned int> colToPos; map<unsigned int, string> rowStrings; vector<int> starts, lengths; CRef< CSeq_align > seqAlign; // Empty the columns map first, as this is used as a way to flag problems. columns.clear(); if (!cd) return; // Check if the block structure is consistent. try { MultipleAlignment* ma = new MultipleAlignment(cd); if (!ma) { ERR_POST("Creation of MultipleAlignment object failed for CD " << cd->GetAccession() << "."); return; } else if (! ma->isBlockAligned()) { delete ma; ERR_POST("CD " << cd->GetAccession() << " must have a consistent block structure for column extraction."); return; } delete ma; ma = NULL; } catch (...) { ERR_POST("Could not extract columns for CD " << cd->GetAccession()); } nCols = cd->GetAlignmentLength(); nRows = cd->GetNumRows(); // Get a reference seq-align for mapping between alignment rows. // If the columns map index will simply be the column count, use the master, row 0. if (referenceRow >= nRows) { useRefRow = false; referenceRow = 0; } if (! cd->GetSeqAlign(referenceRow, seqAlign)) { isOK = false; } // Initialize the column # -> reference row position mapping. // If useRefRow is true, use the indicated row's coordinates as the position. // Otherwise, use the column number as the position. if (isOK && GetBlockStarts(seqAlign, starts, (referenceRow == 0)) > 0 && GetBlockLengths(seqAlign, lengths) > 0) { nBlocks = starts.size(); if (nBlocks == lengths.size()) { for (i = 0, col = 0; i < nBlocks; ++i) { pos = (useRefRow) ? starts[i] : col; for (j = 0; j < lengths[i]; ++j, ++col, ++pos) { // Not explicitly checking if 'pos' is aligned since above // we confirmed the CD has a valid block model. colToPos[col] = pos; } } } else { isOK = false; } } else { isOK = false; } SetAlignedResiduesForCD(cd, alignedResidues, true); // Construct the columns as string objects. if (isOK && alignedResidues) { for (col = 0; col < nCols; ++col) { colString.erase(); for (row = 0; row < nRows; ++row) { colString += alignedResidues[row][col]; } mapIndex = colToPos[col]; columns[mapIndex] = colString; } } // Clean up array of characters. if (alignedResidues) { for (row = 0; row < nRows; ++row) { delete [] alignedResidues[row]; } delete [] alignedResidues; } }