Exemplo n.º 1
0
void Liftover::liftInterval()
{  
  PositionMap posCacheMap;
  _colIt = _srcSequence->getColumnIterator(&_tgtSet, 0, _inStart, _inEnd - 1);
  while (true) 
  {
    const ColumnMap* cMap = _colIt->getColumnMap();
    for (ColumnMap::const_iterator i = cMap->begin(); i != cMap->end(); ++i)
    {
      if (i->first->getGenome() == _tgtGenome)
      {
        const DNASet* dSet = i->second;
        const Sequence* seq = i->first;
        // if we're not adding the column, don't bother keeping track
        hal_size_t paralogyFactor = _addDupeColumn ? dSet->size() : 0;
        SeqIndex seqIdx(seq, paralogyFactor);
        for (DNASet::const_iterator j = dSet->begin(); j != dSet->end(); ++j)
        {
          pair<PositionMap::iterator, bool> res =
             posCacheMap.insert(pair<SeqIndex, PositionCache*>(seqIdx, NULL));
          if (res.second == true)
          {
            res.first->second = new PositionCache();
          }
          res.first->second->insert((*j)->getArrayIndex());
        }
      }
    }
    if (_colIt->lastColumn() == true)
    {
      break;
    }
    _colIt->toRight();
  } 

  PositionMap::iterator pcmIt;
  for (pcmIt = posCacheMap.begin(); pcmIt != posCacheMap.end(); ++pcmIt)
  {
    const Sequence* seq = pcmIt->first.first;
    _outParalogy = pcmIt->first.second;
    hal_size_t seqStart = seq->getStartPosition();
    PositionCache* posCache = pcmIt->second;
    const IntervalSet* iSet = posCache->getIntervalSet();
    _outName = seq->getName();
    for (IntervalSet::const_iterator k = iSet->begin(); k != iSet->end(); ++k)
    {
      _outStart = k->second - seqStart;
      _outEnd = k->first + 1 - seqStart;
      writeBedLine();
    }
    delete posCache;
  }
}
Exemplo n.º 2
0
static void printIndels(const Genome *refGenome, const set<const Genome *> targets, hal_size_t adjacentBases) {
    hal_size_t refLength = refGenome->getSequenceLength();
    hal_size_t numSites = 0;
    ColumnIteratorPtr colIt = refGenome->getColumnIterator(&targets);
    // good flanking site
    PositionCache knownGoodSites;
    for (hal_index_t refPos = adjacentBases; refPos < refLength - adjacentBases; refPos++) {
        pair<indelType, hal_size_t> indel;
        indel = getIndel(refPos, refGenome, &targets);
        hal_index_t start = refPos - adjacentBases;
        hal_index_t end = refPos + adjacentBases;
        if (indel.first == INSERTION) {
            end += indel.second;
        }
        colIt->toSite(start, end, true);
        map<const Genome *, hal_index_t> prevPos;
        bool failedFiltering = false;
        hal_size_t step = 1;
        while (1) {
            hal_index_t refColPos = colIt->getReferenceSequencePosition() + colIt->getReferenceSequence()->getStartPosition();
            if (refColPos == refPos && indel.first == DELETION) {
                // jump "step" bases -- i.e. past the deleted region
                step = indel.second + 1;
            } else if (refColPos == refPos && indel.first == INSERTION) {
                // don't enforce adjacency on insertion since we're skipping it
                prevPos.erase(refGenome);
                if (refGenome->getSequenceBySite(refPos) != refGenome->getSequenceBySite(refPos + indel.second)) {
                    // Insertion crosses sequence end
                    failedFiltering = true;
                    break;
                }
                colIt->toSite(refPos + indel.second, end);
                continue;
            } else {
                step = 1;
            }
            const ColumnIterator::ColumnMap *colMap = colIt->getColumnMap();
            if (!knownGoodSites.find(refColPos)) {
                if (!isStrictSingleCopy(colMap, &targets) || !isContiguous(colMap, &prevPos, step, refGenome) ||
                    !isNotAmbiguous(colMap) || (step != 1 && !deletionIsNotAmbiguous(colMap, &prevPos, refGenome))) {
                    failedFiltering = true;
                    if (indel.first == INSERTION) {
                        // failed indel means that we don't have to check anywhere
                        // inside the insertion, it will automatically fail
                        refPos += indel.second;
                    }
                    break;
                } else {
                    knownGoodSites.insert(refColPos);
                }
            }
            updatePrevPos(colMap, &prevPos);
            if (colIt->lastColumn()) {
                break;
            }
            colIt->toRight();
        }
        if (indel.first != NONE && !failedFiltering) {
            if (indel.first == DELETION) {
                const Sequence *seq = refGenome->getSequenceBySite(refPos);
                cout << seq->getName() << "\t" << refPos - seq->getStartPosition() << "\t" << refPos - seq->getStartPosition()
                     << "\tD\t" << indel.second << endl;
            } else {
                const Sequence *seq = refGenome->getSequenceBySite(refPos);
                assert(seq == refGenome->getSequenceBySite(refPos + indel.second));
                cout << seq->getName() << "\t" << refPos - seq->getStartPosition() << "\t"
                     << refPos + indel.second - seq->getStartPosition() << "\tI\t" << endl;
                refPos += indel.second;
            }
        }
        if (!failedFiltering) {
            numSites++;
        }
    }
    cout << "# num sites possible: " << numSites << endl;
}
Exemplo n.º 3
0
void ColumnLiftover::liftInterval(BedList& mappedBedLines)
{  
  PositionMap posCacheMap;
  PositionMap revCacheMap;
  _colIt = _srcSequence->getColumnIterator(&_tgtSet, 0, _bedLine._start, 
                                           _bedLine._end - 1,
                                           !_traverseDupes,
                                           false,
                                           _bedLine._strand == '-');
  while (true) 
  {
    const ColumnMap* cMap = _colIt->getColumnMap();
    for (ColumnMap::const_iterator i = cMap->begin(); i != cMap->end(); ++i)
    {
      if (i->first->getGenome() == _tgtGenome)
      {
        const DNASet* dSet = i->second;
        const Sequence* seq = i->first;
        // if we're not adding the column, don't bother keeping track
        SeqIndex seqIdx(seq, 0);
        for (DNASet::const_iterator j = dSet->begin(); j != dSet->end(); ++j)
        {
          pair<PositionMap::iterator, bool> res;
          if ((*j)->getReversed() == false)
          {
            res =
               posCacheMap.insert(pair<SeqIndex, PositionCache*>(seqIdx, NULL));
          }
          else
          {
            res =
               revCacheMap.insert(pair<SeqIndex, PositionCache*>(seqIdx, NULL));
          }
          if (res.second == true)
          {
            res.first->second = new PositionCache();
          }
          res.first->second->insert((*j)->getArrayIndex());
        }
      }
    }
    if (_colIt->lastColumn() == true)
    {
      break;
    }
    _colIt->toRight();
  } 

  PositionMap::iterator pcmIt;
  for (pcmIt = posCacheMap.begin(); pcmIt != posCacheMap.end(); ++pcmIt)
  {
    const Sequence* seq = pcmIt->first.first;
    _outParalogy = pcmIt->first.second;
    hal_size_t seqStart = seq->getStartPosition();
    PositionCache* posCache = pcmIt->second;
    const IntervalSet* iSet = posCache->getIntervalSet();
    for (IntervalSet::const_iterator k = iSet->begin(); k != iSet->end(); ++k)
    {
      mappedBedLines.push_back(_bedLine);
      BedLine& outBedLine = mappedBedLines.back();
      outBedLine._blocks.clear();
      outBedLine._chrName = seq->getName();
      outBedLine._start = k->second - seqStart;
      outBedLine._end = k->first + 1 - seqStart;
      outBedLine._strand = _bedLine._strand == '.' ? '.' : '+';
      outBedLine._srcStart = NULL_INDEX; // not available from posMap
    }
    delete posCache;
  }

  for (pcmIt = revCacheMap.begin(); pcmIt != revCacheMap.end(); ++pcmIt)
  {
    const Sequence* seq = pcmIt->first.first;
    _outParalogy = pcmIt->first.second;
    hal_size_t seqStart = seq->getStartPosition();
    PositionCache* posCache = pcmIt->second;
    const IntervalSet* iSet = posCache->getIntervalSet();
    for (IntervalSet::const_iterator k = iSet->begin(); k != iSet->end(); ++k)
    {
      mappedBedLines.push_back(_bedLine);
      BedLine& outBedLine = mappedBedLines.back();
      outBedLine._blocks.clear();
      outBedLine._chrName = seq->getName();
      outBedLine._start = k->second - seqStart;
      outBedLine._end = k->first + 1 - seqStart;
      outBedLine._strand = _bedLine._strand == '.' ? '.' : '-';
      outBedLine._srcStart = NULL_INDEX; // not available from posMap
    }
    delete posCache;
  }

}