示例#1
0
void LodExtract::writeUnsampledSequence(const Sequence* outSequence,
                                        SegmentIteratorPtr outSegment)
{
  outSegment->setCoordinates(outSequence->getStartPosition(),
                             outSequence->getSequenceLength());
  if (outSegment->isTop())
  {
    assert(outSequence->getNumTopSegments() == 1);
    TopSegmentIteratorPtr top = outSegment.downCast<TopSegmentIteratorPtr>();
    top->setParentIndex(NULL_INDEX);
    top->setParentReversed(false);
    top->setNextParalogyIndex(NULL_INDEX);
    top->setBottomParseIndex(NULL_INDEX);
  }
  else
  {
    assert(outSequence->getNumBottomSegments() == 1);
    BottomSegmentIteratorPtr bottom = 
       outSegment.downCast<BottomSegmentIteratorPtr>();
    hal_size_t numChildren = bottom->getNumChildren();
    for (hal_size_t childNum = 0; childNum < numChildren; ++childNum)
    {
      bottom->setChildIndex(childNum, NULL_INDEX);
      bottom->setChildReversed(childNum, false);
    }
    bottom->setTopParseIndex(NULL_INDEX);
  }
}
示例#2
0
// note: takes smart pointer as it maybe added to the results
static hal_size_t mapDown(MappedSegmentPtr mappedSeg, hal_size_t childIndex, list<MappedSegmentPtr> &results,
                          hal_size_t minLength) {
    const Genome *child = mappedSeg->getGenome()->getChild(childIndex);
    assert(child != NULL);
    hal_size_t added = 0;
    if (mappedSeg->isTop() == false) {
        TopSegmentIteratorPtr topSegIt = child->getTopSegmentIterator();
        SegmentIteratorPtr targetSegIt = mappedSeg->getTargetIteratorPtr();
        BottomSegmentIteratorPtr botSegIt = std::dynamic_pointer_cast<BottomSegmentIterator>(targetSegIt);

        if (botSegIt->bseg()->hasChild(childIndex) == true && botSegIt->getLength() >= minLength) {
            topSegIt->toChild(botSegIt, childIndex);
            mappedSeg->setTarget(std::dynamic_pointer_cast<SegmentIterator>(topSegIt));
            results.push_back(MappedSegmentPtr(mappedSeg));
            ++added;
        }
    } else {
        hal_index_t rightCutoff = mappedSeg->getEndPosition();
        TopSegmentIteratorPtr topSegIt = mappedSeg->targetAsTop();
        hal_index_t startOffset = (hal_index_t)topSegIt->getStartOffset();
        hal_index_t endOffset = (hal_index_t)topSegIt->getEndOffset();
        BottomSegmentIteratorPtr botSegIt = mappedSeg->getGenome()->getBottomSegmentIterator();
        botSegIt->toParseDown(topSegIt);
        do {
            BottomSegmentIteratorPtr newBotSegIt = botSegIt->clone();

            // we map the new target back to see how the offsets have
            // changed.  these changes are then applied to the source segment
            // as deltas
            TopSegmentIteratorPtr backTopSegIt = topSegIt->clone();
            backTopSegIt->toParseUp(newBotSegIt);
            hal_index_t startBack = (hal_index_t)backTopSegIt->getStartOffset();
            hal_index_t endBack = (hal_index_t)backTopSegIt->getEndOffset();
            assert(startBack >= startOffset);
            assert(endBack >= endOffset);
            SegmentIteratorPtr newSourceSegIt = mappedSeg->sourceClone();
            hal_index_t startDelta = startBack - startOffset;
            hal_index_t endDelta = endBack - endOffset;
            assert((hal_index_t)newSourceSegIt->getLength() > startDelta + endDelta);
            newSourceSegIt->slice(newSourceSegIt->getStartOffset() + startDelta, newSourceSegIt->getEndOffset() + endDelta);

            MappedSegmentPtr newMappedSeg(new MappedSegment(newSourceSegIt, newBotSegIt));

            assert(newMappedSeg->isTop() == false);
            assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome());

            added += mapDown(newMappedSeg, childIndex, results, minLength);

            // stupid that we have to make this check but odn't want to
            // make fundamental api change now
            if (botSegIt->getEndPosition() != rightCutoff) {
                botSegIt->toRight(rightCutoff);
            } else {
                break;
            }
        } while (true);
    }
    return added;
}
示例#3
0
// note: takes smart pointer as it maybe added to the results
static hal_size_t mapUp(MappedSegmentPtr mappedSeg, list<MappedSegmentPtr> &results, bool doDupes, hal_size_t minLength) {
    const Genome *parent = mappedSeg->getGenome()->getParent();
    assert(parent != NULL);
    hal_size_t added = 0;
    if (mappedSeg->isTop() == true) {
        BottomSegmentIteratorPtr botSegIt = parent->getBottomSegmentIterator();
        TopSegmentIteratorPtr topSegIt = mappedSeg->targetAsTop();
        if (topSegIt->tseg()->hasParent() == true && topSegIt->getLength() >= minLength &&
            (doDupes == true || topSegIt->tseg()->isCanonicalParalog() == true)) {
            botSegIt->toParent(topSegIt);
            mappedSeg->setTarget(std::dynamic_pointer_cast<SegmentIterator>(botSegIt));
            results.push_back(mappedSeg);
            ++added;
        }
    } else {
        hal_index_t rightCutoff = mappedSeg->getEndPosition();
        BottomSegmentIteratorPtr botSegIt = mappedSeg->targetAsBottom();
        hal_index_t startOffset = (hal_index_t)botSegIt->getStartOffset();
        hal_index_t endOffset = (hal_index_t)botSegIt->getEndOffset();
        TopSegmentIteratorPtr topSegIt = mappedSeg->getGenome()->getTopSegmentIterator();
        topSegIt->toParseUp(botSegIt);
        do {
            TopSegmentIteratorPtr newTopSegIt = topSegIt->clone();

            // we map the new target back to see how the offsets have
            // changed.  these changes are then applied to the source segment
            // as deltas
            BottomSegmentIteratorPtr backBotSegIt = botSegIt->clone();
            backBotSegIt->toParseDown(newTopSegIt);
            hal_index_t startBack = (hal_index_t)backBotSegIt->getStartOffset();
            hal_index_t endBack = (hal_index_t)backBotSegIt->getEndOffset();
            assert(startBack >= startOffset);
            assert(endBack >= endOffset);
            SegmentIteratorPtr newSourceSegIt = mappedSeg->sourceClone();
            hal_index_t startDelta = startBack - startOffset;
            hal_index_t endDelta = endBack - endOffset;
            assert((hal_index_t)newSourceSegIt->getLength() > startDelta + endDelta);
            newSourceSegIt->slice(newSourceSegIt->getStartOffset() + startDelta, newSourceSegIt->getEndOffset() + endDelta);

            MappedSegmentPtr newMappedSeg(new MappedSegment(newSourceSegIt, newTopSegIt));

            assert(newMappedSeg->isTop() == true);
            assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome());

            added += mapUp(newMappedSeg, results, doDupes, minLength);
            // stupid that we have to make this check but odn't want to
            // make fundamental api change now
            if (topSegIt->getEndPosition() != rightCutoff) {
                topSegIt->toRight(rightCutoff);
            } else {
                break;
            }
        } while (true);
    }
    return added;
}
示例#4
0
MappedSegment *MappedSegment::clone() const {
    // FIXME: having both sourceCpySegIt and sourceSegIt seems pointless, same for target.
    SegmentIteratorPtr sourceCpySegIt;
    if (_source->isTop()) {
        sourceCpySegIt = std::dynamic_pointer_cast<TopSegmentIterator>(_source)->clone();
    } else {
        sourceCpySegIt = std::dynamic_pointer_cast<BottomSegmentIterator>(_source)->clone();
    }
    SegmentIteratorPtr sourceSegIt = std::dynamic_pointer_cast<SegmentIterator>(sourceCpySegIt);

    SegmentIteratorPtr targetCopySegIt;
    if (_target->isTop()) {
        targetCopySegIt = std::dynamic_pointer_cast<TopSegmentIterator>(_target)->clone();
    } else {
        targetCopySegIt = std::dynamic_pointer_cast<BottomSegmentIterator>(_target)->clone();
    }
    SegmentIteratorPtr targetSegIt = std::static_pointer_cast<SegmentIterator>(targetCopySegIt);

    assert(sourceSegIt->getStartPosition() == _source->getStartPosition() &&
           sourceSegIt->getEndPosition() == _source->getEndPosition());
    assert(targetSegIt->getStartPosition() == _target->getStartPosition() &&
           targetSegIt->getEndPosition() == _target->getEndPosition());
    assert(_source->getLength() == _target->getLength());
    assert(sourceSegIt->getLength() == targetSegIt->getLength());

    MappedSegment *newSeg = new MappedSegment(sourceSegIt, targetSegIt);

    assert(newSeg->getStartPosition() == getStartPosition() && newSeg->getEndPosition() == getEndPosition() &&
           newSeg->_source->getStartPosition() == _source->getStartPosition() &&
           newSeg->_source->getEndPosition() == _source->getEndPosition());
    assert(newSeg->_source.get() != _source.get() && newSeg->_target.get() != _target.get());
    return newSeg;
}
示例#5
0
int MappedSegment::slowComp(const SegmentIteratorPtr &s1, const SegmentIteratorPtr &s2) {
    assert(s1->getGenome() == s2->getGenome());
    int res = 0;
    hal_index_t sp1 = s1->getStartPosition();
    hal_index_t ep1 = s1->getEndPosition();
    hal_index_t sp2 = s2->getStartPosition();
    hal_index_t ep2 = s2->getEndPosition();
    if (s1->getReversed()) {
        swap(sp1, ep1);
    }
    if (s2->getReversed()) {
        swap(sp2, ep2);
    }
    if (sp1 < sp2) {
        res = -1;
    } else if (sp1 > sp2) {
        res = 1;
    } else if (ep1 < ep2) {
        res = -1;
    } else if (ep1 > ep2) {
        res = 1;
    }
    return res;
}
示例#6
0
void LodExtract::writeSegments(const Genome* inParent,
                               const vector<const Genome*>& inChildren)
{
  vector<const Genome*> inGenomes = inChildren;
  inGenomes.push_back(inParent);
  const Genome* outParent = _outAlignment->openGenome(inParent->getName());
  assert(outParent != NULL && outParent->getNumBottomSegments() > 0);
  BottomSegmentIteratorPtr bottom;
  TopSegmentIteratorPtr top;
  SegmentIteratorPtr outSegment;

  // FOR EVERY GENOME
  for (hal_size_t i = 0; i < inGenomes.size(); ++i)
  {
    const Genome* inGenome = inGenomes[i];
    Genome* outGenome = _outAlignment->openGenome(inGenome->getName());

    SequenceIteratorPtr outSeqIt = outGenome->getSequenceIterator();
    SequenceIteratorConstPtr outSeqEnd = outGenome->getSequenceEndIterator();
    
    // FOR EVERY SEQUENCE IN GENOME
    for (; outSeqIt != outSeqEnd; outSeqIt->toNext())
    {
      const Sequence* outSequence = outSeqIt->getSequence();
      const Sequence* inSequence = 
         inGenome->getSequence(outSequence->getName());
      if (outGenome != outParent && outSequence->getNumTopSegments() > 0)
      {
        top = outSequence->getTopSegmentIterator();
        outSegment = top;
      }
      else if (outSequence->getNumBottomSegments() > 0)
      {
        bottom = outSequence->getBottomSegmentIterator();
        outSegment = bottom;
      }
      const LodGraph::SegmentSet* segSet = _graph.getSegmentSet(inSequence);
      assert(segSet != NULL);
      LodGraph::SegmentSet::const_iterator segIt = segSet->begin();
      if (segSet->size() > 2)
      {
        //skip left telomere
        ++segIt;
        // use to skip right telomere:
        LodGraph::SegmentSet::const_iterator segLast = segSet->end();
        --segLast;
      
        // FOR EVERY SEGMENT IN SEQUENCE
        for (; segIt != segLast; ++segIt)
        {
          // write the HAL array index back to the segment to make
          // future passes quicker. 
          (*segIt)->setArrayIndex(outSegment->getArrayIndex());
          outSegment->setCoordinates((*segIt)->getLeftPos(), 
                                     (*segIt)->getLength());
          assert(outSegment->getSequence()->getName() == inSequence->getName());
          outSegment->toRight();
        }
      }
      else if (outSequence->getSequenceLength() > 0)
      {
        assert(segSet->size() == 2);
        writeUnsampledSequence(outSequence, outSegment);
      }
    }
  } 
}
示例#7
0
int MappedSegment::boundComp(const SegmentIteratorPtr &s1, const SegmentIteratorPtr &s2) {
    int res = 0;
    bool flip = s2->getReversed();
    if (flip) {
        s2->toReverse();
    }

    if (s1->isTop() && !s2->isTop()) {
        BottomSegmentIteratorPtr bot = std::dynamic_pointer_cast<BottomSegmentIterator>(s2);
        hal_index_t lb = bot->bseg()->getTopParseIndex();
        hal_index_t ub = lb;
        if ((hal_size_t)bot->getArrayIndex() < bot->getGenome()->getNumBottomSegments() - 1) {
            bot = bot->clone();
            bot->slice(0, 0);
            bot->toRight();
            ub = bot->bseg()->getTopParseIndex();
        }
        if (s1->getArrayIndex() < lb) {
            res = -1;
        } else if (s1->getArrayIndex() > ub) {
            res = 1;
        }
    } else if (!s1->isTop() && s2->isTop()) {
        TopSegmentIteratorPtr top = std::dynamic_pointer_cast<TopSegmentIterator>(s2);
        hal_index_t lb = top->tseg()->getBottomParseIndex();
        hal_index_t ub = lb;
        if ((hal_size_t)top->getArrayIndex() < top->getGenome()->getNumTopSegments() - 1) {
            top = top->clone();
            top->slice(0, 0);
            top->toRight();
            ub = top->tseg()->getBottomParseIndex();
        }
        if (s1->getArrayIndex() < lb) {
            res = -1;
        } else if (s1->getArrayIndex() > ub) {
            res = 1;
        }
    }

    if (flip) {
        s2->toReverse();
    }

    return res;
}
示例#8
0
int MappedSegment::fastComp(const SegmentIteratorPtr &s1, const SegmentIteratorPtr &s2) {
    // compare without accessing anything from disk (ie using only index
    // and offset)
    int res = 0;
    assert(s1->getGenome() == s2->getGenome());
    if (s1->isTop() != s2->isTop()) {
        res = boundComp(s1, s2);
        if (res == 0) {
            res = slowComp(s1, s2);
        }
    } else {
        if (s1->getArrayIndex() < s2->getArrayIndex()) {
            res = -1;
        } else if (s1->getArrayIndex() > s2->getArrayIndex()) {
            res = 1;
        } else {
            hal_offset_t so1 = s1->getStartOffset();
            hal_offset_t eo1 = s1->getEndOffset();
            if (s1->getReversed()) {
                swap(so1, eo1);
            }
            hal_offset_t so2 = s2->getStartOffset();
            hal_offset_t eo2 = s2->getEndOffset();
            if (s2->getReversed()) {
                swap(so2, eo2);
            }
            if (so1 < so2) {
                res = -1;
            } else if (so1 > so2) {
                res = 1;
            } else if (eo1 > eo2) {
                res = -1;
            } else if (eo1 < eo2) {
                res = 1;
            }
        }
    }
    assert(res == slowComp(s1, s2));
    return res;
}
示例#9
0
/* call main function with smart pointer */
hal_size_t hal::halMapSegmentSP(const SegmentIteratorPtr &source, MappedSegmentSet &outSegments, const Genome *tgtGenome,
                                const std::set<const Genome *> *genomesOnPath, bool doDupes, hal_size_t minLength,
                                const Genome *coalescenceLimit, const Genome *mrca) {
    return halMapSegment(source.get(), outSegments, tgtGenome, genomesOnPath, doDupes, minLength, coalescenceLimit, mrca);
}
示例#10
0
// note: takes smart pointer as it maybe added to the results
static hal_size_t mapSelf(MappedSegmentPtr mappedSeg, list<MappedSegmentPtr> &results, hal_size_t minLength) {
    hal_size_t added = 0;
    if (mappedSeg->isTop() == true) {
        SegmentIteratorPtr target = mappedSeg->getTargetIteratorPtr();
        SegmentIteratorPtr source = mappedSeg->getSourceIteratorPtr();
        TopSegmentIteratorPtr top = std::dynamic_pointer_cast<TopSegmentIterator>(target);
        TopSegmentIteratorPtr topCopy = top->clone();
        do {
            // FIXME: why isn't clone() polymorphic?
            SegmentIteratorPtr newSource;
            if (source->isTop()) {
                newSource = std::dynamic_pointer_cast<TopSegmentIterator>(source)->clone();
            } else {
                newSource = std::dynamic_pointer_cast<BottomSegmentIterator>(source)->clone();
            }
            TopSegmentIteratorPtr newTop = topCopy->clone();
            MappedSegmentPtr newMappedSeg(new MappedSegment(newSource, newTop));
            assert(newMappedSeg->getGenome() == mappedSeg->getGenome());
            assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome());
            results.push_back(newMappedSeg);
            ++added;
            if (topCopy->tseg()->hasNextParalogy()) {
                topCopy->toNextParalogy();
            }
        } while (topCopy->tseg()->hasNextParalogy() == true && topCopy->getLength() >= minLength &&
                 topCopy->getArrayIndex() != top->getArrayIndex());
    } else if (mappedSeg->getGenome()->getParent() != NULL) {
        hal_index_t rightCutoff = mappedSeg->getEndPosition();
        BottomSegmentIteratorPtr bottom = mappedSeg->targetAsBottom();
        hal_index_t startOffset = (hal_index_t)bottom->getStartOffset();
        hal_index_t endOffset = (hal_index_t)bottom->getEndOffset();
        TopSegmentIteratorPtr top = mappedSeg->getGenome()->getTopSegmentIterator();
        top->toParseUp(bottom);
        do {
            TopSegmentIteratorPtr topNew = top->clone();

            // we map the new target back to see how the offsets have
            // changed.  these changes are then applied to the source segment
            // as deltas
            BottomSegmentIteratorPtr bottomBack = bottom->clone();
            bottomBack->toParseDown(topNew);
            hal_index_t startBack = (hal_index_t)bottomBack->getStartOffset();
            hal_index_t endBack = (hal_index_t)bottomBack->getEndOffset();
            assert(startBack >= startOffset);
            assert(endBack >= endOffset);
            SegmentIteratorPtr newSource = mappedSeg->sourceClone();
            hal_index_t startDelta = startBack - startOffset;
            hal_index_t endDelta = endBack - endOffset;
            assert((hal_index_t)newSource->getLength() > startDelta + endDelta);
            newSource->slice(newSource->getStartOffset() + startDelta, newSource->getEndOffset() + endDelta);

            MappedSegmentPtr newMappedSeg(new MappedSegment(newSource, topNew));

            assert(newMappedSeg->isTop() == true);
            assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome());

            added += mapSelf(newMappedSeg, results, minLength);
            // stupid that we have to make this check but odn't want to
            // make fundamental api change now
            if (top->getEndPosition() != rightCutoff) {
                top->toRight(rightCutoff);
            } else {
                break;
            }
        } while (true);
    }
    return added;
}