Esempio n. 1
0
// Set top segments to be equal width and so that segment 1, 2, 3,
// etc. corresponds to parent segment 1, 2, 3, etc.
void setTopSegments(Genome *genome, hal_size_t width) {
    TopSegmentIteratorPtr topIt = genome->getTopSegmentIterator();
    hal_size_t n = genome->getNumTopSegments();
    hal_index_t startPos = 0;
    for (; topIt->getArrayIndex() < n; topIt->toRight(), startPos += width) {
        topIt->setCoordinates(startPos, width);
        topIt->tseg()->setParentIndex(topIt->getArrayIndex());
        topIt->tseg()->setParentReversed(false);
        topIt->tseg()->setBottomParseIndex(NULL_INDEX);
        topIt->tseg()->setNextParalogyIndex(NULL_INDEX);
    }
}
Esempio n. 2
0
void Genome::copyTopSegments(Genome *dest) const
{
  const Genome *inParent = getParent();
  const Genome *outParent = dest->getParent();

  TopSegmentIteratorConstPtr inTop = getTopSegmentIterator();
  TopSegmentIteratorPtr outTop = dest->getTopSegmentIterator();
  hal_size_t n = dest->getNumTopSegments();
  assert(n == 0 || n == getNumTopSegments());

  if (n == 0) {
    // Nothing to do if there are no top segments.
    return;
  }

  BottomSegmentIteratorConstPtr inParentBottomSegIt = inParent->getBottomSegmentIterator();
  BottomSegmentIteratorConstPtr outParentBottomSegIt = outParent->getBottomSegmentIterator();

  for (; (hal_size_t)inTop->getArrayIndex() < n; inTop->toRight(),
         outTop->toRight())
  {
    hal_index_t genomePos = inTop->getStartPosition();
    assert(genomePos != NULL_INDEX);
    string inSeqName = getSequenceBySite(genomePos)->getName();
    string outSeqName = dest->getSequenceBySite(genomePos)->getName();
    // if (inSeqName != outSeqName) {
    //   stringstream ss;
    //   ss << "When copying top segments from " << getName() << " to " << dest->getName() << ": sequence " << inSeqName << " != " << outSeqName << " at site " << genomePos;
    //   throw hal_exception(ss.str());
    // }

    outTop->setCoordinates(inTop->getStartPosition(), inTop->getLength());
    outTop->setParentIndex(inTop->getParentIndex());
    outTop->setParentReversed(inTop->getParentReversed());
    outTop->setBottomParseIndex(inTop->getBottomParseIndex());
    outTop->setNextParalogyIndex(inTop->getNextParalogyIndex());

    // Check that the sequences from the bottom segments we point to are the same. If not, correct the indices so that they are.
    if (inTop->getParentIndex() != NULL_INDEX) {
      inParentBottomSegIt->toParent(inTop);

      const Sequence *inParentSequence = inParentBottomSegIt->getSequence();

      const Sequence *outParentSequence = outParent->getSequence(inParentSequence->getName());

      hal_index_t inParentSegmentOffset = inTop->getParentIndex() - inParentSequence->getBottomSegmentArrayIndex();
      hal_index_t outParentSegmentIndex = inParentSegmentOffset + outParentSequence->getBottomSegmentArrayIndex();

      outTop->setParentIndex(outParentSegmentIndex);
    }
  }
}
Esempio n. 3
0
void GenomeCopyTest::createCallBack(Alignment *alignment) {
    hal_size_t alignmentSize = alignment->getNumGenomes();
    CuAssertTrue(_testCase, alignmentSize == 0);

    // Hacky: Need a different alignment to test copying the bottom
    // segments correctly.  (the names of a node's children are used
    // when copying bottom segments, and two genomes can't have the same
    // name in the same alignment)
    _path = getTempFile();
    _secondAlignment =
        AlignmentPtr(getTestAlignmentInstances(alignment->getStorageFormat(), _path, WRITE_ACCESS | CREATE_ACCESS));

    Genome *ancGenome = alignment->addRootGenome("AncGenome", 0);
    Genome *leafGenome = alignment->addLeafGenome("LeafGenome1", "AncGenome", 0);
    // This genome will test copyDimensions, copyTopSegments,
    // copyBottomSegments, copySequence, copyMetadata
    Genome *copyRootGenome = _secondAlignment->addRootGenome("copyRootGenome", 0);
    Genome *copyLeafGenome = _secondAlignment->addLeafGenome("LeafGenome1", "copyRootGenome", 0);

    MetaData *ancMeta = ancGenome->getMetaData();
    ancMeta->set("Young", "Jeezy");

    vector<Sequence::Info> seqVec(1);
    seqVec[0] = Sequence::Info("Sequence", 1000000, 0, 700000);
    ancGenome->setDimensions(seqVec);
    seqVec[0] = Sequence::Info("Sequence", 1000000, 5000, 0);
    leafGenome->setDimensions(seqVec);
    string ancSeq = "CAT";
    hal_index_t n = ancGenome->getSequenceLength();
    DnaIteratorPtr dnaIt = ancGenome->getDnaIterator();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % ancSeq.size();
        dnaIt->setBase(ancSeq[i]);
    }
    dnaIt->flush();

    n = leafGenome->getSequenceLength();
    dnaIt = leafGenome->getDnaIterator();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % ancSeq.size();
        dnaIt->setBase(ancSeq[i]);
    }
    dnaIt->flush();

    TopSegmentIteratorPtr topIt = leafGenome->getTopSegmentIterator();
    n = leafGenome->getNumTopSegments();
    for (; topIt->getArrayIndex() < n; topIt->toRight()) {
        topIt->setCoordinates(topIt->getArrayIndex(), 1);
        topIt->tseg()->setParentIndex(3);
        topIt->tseg()->setParentReversed(true);
        topIt->tseg()->setBottomParseIndex(5);
        if (topIt->getArrayIndex() != 6) {
            topIt->tseg()->setNextParalogyIndex(6);
        } else {
            topIt->tseg()->setNextParalogyIndex(7);
        }
    }
    BottomSegmentIteratorPtr botIt = ancGenome->getBottomSegmentIterator();
    n = ancGenome->getNumBottomSegments();
    for (; botIt->getArrayIndex() < n; botIt->toRight()) {
        botIt->setCoordinates(botIt->getArrayIndex(), 1);
        botIt->bseg()->setChildIndex(0, 3);
        botIt->bseg()->setChildReversed(0, true);
        botIt->bseg()->setTopParseIndex(5);
    }

    seqVec[0] = Sequence::Info("Sequence", 3300, 0, 1100);
    copyRootGenome->setDimensions(seqVec);
    seqVec[0] = Sequence::Info("Sequence", 3300, 2200, 0);
    copyLeafGenome->setDimensions(seqVec);
    string copySeq = "TAG";
    dnaIt = copyRootGenome->getDnaIterator();
    n = copyRootGenome->getSequenceLength();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % copySeq.size();
        dnaIt->setBase(copySeq[i]);
    }
    dnaIt->flush();

    dnaIt = copyLeafGenome->getDnaIterator();
    n = copyLeafGenome->getSequenceLength();
    for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) {
        size_t i = dnaIt->getArrayIndex() % copySeq.size();
        dnaIt->setBase(copySeq[i]);
    }
    dnaIt->flush();

    topIt = copyLeafGenome->getTopSegmentIterator();
    n = copyLeafGenome->getNumTopSegments();
    for (; topIt->getArrayIndex() < n; topIt->toRight()) {
        topIt->setCoordinates(7, 8);
        topIt->tseg()->setParentIndex(9);
        topIt->tseg()->setParentReversed(false);
        topIt->tseg()->setBottomParseIndex(11);
        if (topIt->getArrayIndex() != 12) {
            topIt->tseg()->setNextParalogyIndex(12);
        } else {
            topIt->tseg()->setNextParalogyIndex(7);
        }
    }
    botIt = copyRootGenome->getBottomSegmentIterator();
    n = copyRootGenome->getNumBottomSegments();
    for (; botIt->getArrayIndex() < n; botIt->toRight()) {
        botIt->setCoordinates(6, 7);
        botIt->bseg()->setChildIndex(0, 8);
        botIt->bseg()->setChildReversed(0, false);
        botIt->bseg()->setTopParseIndex(10);
    }

    ancGenome->copy(copyRootGenome);
    leafGenome->copy(copyLeafGenome);
    _secondAlignment->close();
}