bool capsAreAdjacent(Cap *cap1, Cap *cap2, int64_t *separationDistance) {
    if (cap_getName(cap2) != cap_getName(cap1) && cap_getCoordinate(cap1) != cap_getCoordinate(cap2)) { //This can happen if end1 == end2
        if (sequence_getMetaSequence(cap_getSequence(cap1)) == sequence_getMetaSequence(cap_getSequence(cap2))) {
            assert(strcmp(event_getHeader(cap_getEvent(cap1)), event_getHeader(
                                    cap_getEvent(cap2))) == 0);
            assert(cap_getPositiveOrientation(cap1)
                    != cap_getPositiveOrientation(cap2));
            assert(cap_getName(cap1) != cap_getName(cap2));
            assert(sequence_getMetaSequence(cap_getSequence(cap1))
                                == sequence_getMetaSequence(cap_getSequence(cap2)));

            if (!cap_getStrand(cap1)) {
                cap1 = cap_getReverse(cap1);
            }
            if (!cap_getStrand(cap2)) {
                cap2 = cap_getReverse(cap2);
            }
            assert(cap_getStrand(cap1));
            assert(cap_getStrand(cap2));
            if (cap_getCoordinate(cap1) < cap_getCoordinate(cap2)) {
                if (!cap_getSide(cap1) && cap_getSide(cap2)) {
                    *separationDistance = cap_getCoordinate(cap2) - cap_getCoordinate(cap1) - 1; //The minus 1, to give the length of the sequence between the two caps.
                    return 1;
                }
            } else {
                if (cap_getSide(cap1) && !cap_getSide(cap2)) {
                    *separationDistance = cap_getCoordinate(cap1) - cap_getCoordinate(cap2) - 1;
                    return 1;
                }
            }
        }
    }
    return 0;
}
bool getCapGetAtEndOfPath(Cap *cap, Cap **pathEndCap,
        int64_t *pathLength, int64_t *nCount, stList *haplotypeEventStrings, stList *contaminationEventStrings) {
    //Account for length of adjacency
    *pathLength += getTerminalAdjacencyLength(cap);
    *nCount += getNumberOfNsInAdjacency(cap);

    Segment *segment = getAdjacentCapsSegment(cap);
    if (segment == NULL) {
        *pathEndCap = cap_getAdjacency(getTerminalCap(cap));
        assert(*pathEndCap != NULL);
        return 0;
    }
    Cap *adjacentCap = cap_getSide(cap) ? segment_get3Cap(segment)
    : segment_get5Cap(segment);
    assert(
            cap_getName(adjacentCap) == cap_getName(
                    cap_getAdjacency(getTerminalCap(cap))));

    End *adjacentEnd = cap_getEnd(adjacentCap);
    if (hasCapInEvents(adjacentEnd, contaminationEventStrings) || hasCapInEvents(adjacentEnd, haplotypeEventStrings)) { //hasCapNotInEvent(adjacentEnd, event_getHeader(cap_getEvent(cap)))) { //isContaminationEnd(adjacentEnd) || isHaplotypeEnd(adjacentEnd)) {
        *pathEndCap = adjacentCap;
        return 1;
    }
    *pathLength += segment_getLength(segment);
    *nCount += getNumberOfNsInSegment(segment);
    return getCapGetAtEndOfPath(cap_getOtherSegmentCap(adjacentCap),
            pathEndCap, pathLength, nCount, haplotypeEventStrings, contaminationEventStrings);
}
示例#3
0
void testEnd_getInstance(CuTest* testCase) {
    cactusEndTestSetup();
    CuAssertTrue(testCase, end_getInstance(end, cap_getName(rootCap)) == cap_getReverse(rootCap));
    CuAssertTrue(testCase, end_getInstance(end, cap_getName(leaf1Cap)) == cap_getReverse(leaf1Cap));
    CuAssertTrue(testCase, end_getInstance(end, cap_getName(leaf2Cap)) == leaf2Cap);
    cactusEndTestTeardown();
}
static void setAdjacencyLengthsAndRecoverNewCapsAndBrokenAdjacencies(Cap *cap, stList *recoveredCaps) {
    /*
     * Sets the coordinates of the caps to be equal to the length of the adjacency sequence between them.
     * Used to build the reference sequence bottom up.
     *
     * One complexity is that a reference thread between the two caps
     * in each flower f may be broken into two in the children of f.
     * Therefore, for each flower f first identify attached stub ends present in the children of f that are
     * not present in f and copy them into f, reattaching the reference caps as needed.
     */
    while (1) {
        Cap *adjacentCap = cap_getAdjacency(cap);
        assert(adjacentCap != NULL);
        assert(cap_getCoordinate(cap) == INT64_MAX);
        assert(cap_getCoordinate(adjacentCap) == INT64_MAX);
        assert(cap_getStrand(cap) == cap_getStrand(adjacentCap));
        assert(cap_getSide(cap) != cap_getSide(adjacentCap));
        Group *group = end_getGroup(cap_getEnd(cap));
        assert(group != NULL);
        if (!group_isLeaf(group)) { //Adjacency is not terminal, so establish its sequence.
            Flower *nestedFlower = group_getNestedFlower(group);
            Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap));
            assert(nestedCap != NULL);
            Cap *nestedAdjacentCap = flower_getCap(nestedFlower, cap_getName(adjacentCap));
            assert(nestedAdjacentCap != NULL);
            Cap *breakerCap;
            int64_t adjacencyLength = traceThreadLength(nestedCap, &breakerCap);
            assert(cap_getOrientation(nestedAdjacentCap));
            if (cap_getPositiveOrientation(breakerCap) != nestedAdjacentCap) { //The thread is broken at the lower level.
                //Copy cap into higher level graph.
                breakerCap = copyCapToParent(breakerCap, recoveredCaps);
                assert(cap_getSide(breakerCap));
                cap_makeAdjacent(cap, breakerCap);
                setAdjacencyLength(cap, breakerCap, adjacencyLength);
                adjacencyLength = traceThreadLength(nestedAdjacentCap, &breakerCap);
                assert(cap_getPositiveOrientation(breakerCap) != cap);
                breakerCap = copyCapToParent(breakerCap, recoveredCaps);
                assert(!cap_getSide(breakerCap));
                cap_makeAdjacent(breakerCap, adjacentCap);
                setAdjacencyLength(adjacentCap, breakerCap, adjacencyLength);
            } else { //The thread is not broken at the lower level
                setAdjacencyLength(cap, adjacentCap, adjacencyLength);
            }
        } else {
            //Set the coordinates of the caps to the adjacency size
            setAdjacencyLength(cap, adjacentCap, 0);
        }
        if ((cap = cap_getOtherSegmentCap(adjacentCap)) == NULL) {
            break;
        }
    }
}
void mapGenes(Flower *flower, FILE *fileHandle, struct bed *gene, char *species){
   st_logInfo("Flower %s\n", cactusMisc_nameToString(flower_getName(flower)));
   printOpeningTag("geneMap", fileHandle);
   fprintf(fileHandle, "\n");
   
   int level = 0;//Flower level
   while(gene != NULL){
      //Get the start of the target sequence: 
      st_logInfo("Gene %s:\n", gene->name);
      Cap *startCap;
      struct List *capList = flower_getThreadStarts(flower, species);
      for(int i=0; i < capList->length; i++){
          startCap = capList->list[i];
          st_logInfo("Cap %d, %s\n", i, cactusMisc_nameToString(cap_getName(startCap)));
	  //Traverse cactus and get regions that overlap with exons of the gene, report the involved chains relations
	  fprintf(fileHandle, "\t<gene name=\"%s\" target=\"%s\" start=\"%" PRIi64 "\" end=\"%" PRIi64 "\" exonCount=\"%" PRIi64 "\" strand=\"%c\">\n",
                                 gene->name, species, gene->chromStart, gene->chromEnd, gene->blockCount, gene->strand[0]);
	  fprintf(fileHandle, "\t\t<exon id=\"0\" start=\"%" PRIi64 "\" end=\"%" PRIi64 "\">\n",
                                 gene->chromStart, gene->chromStart + gene->blockSizes->list[0]);
	  
          mapGene(startCap, level, 0, gene, fileHandle);
	  
          fprintf(fileHandle, "\t</gene>\n");
      }
      gene = gene->next;
   }
   printClosingTag("geneMap", fileHandle);
   return;
}
Cap *getTerminalCap(Cap *cap) {
    Flower *nestedFlower = group_getNestedFlower(end_getGroup(cap_getEnd(cap)));
    if (nestedFlower != NULL) {
        Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap));
        assert(nestedCap != NULL);
        return getTerminalCap(cap_getOrientation(cap) ? nestedCap : cap_getReverse(nestedCap));
    }
    return cap;
}
static int64_t getBoundingNs(Cap *cap) {
    assert(cap != NULL);
    Segment *segment = getCapsSegment(cap);
    if (segment == NULL) {
        return 0;
    }
    Cap *_5TerminalCap = getTerminalCap(segment_get5Cap(segment));
    Cap *_3TerminalCap = getTerminalCap(segment_get3Cap(segment));
    (void)_3TerminalCap;
    assert(_5TerminalCap != NULL);
    assert(_3TerminalCap != NULL);
    //return 0;
    if (cap_getName(_5TerminalCap) == cap_getName(cap)) {
        return getBoundingNsP(segment);
    } else {
        assert(cap_getName(_3TerminalCap) == cap_getName(cap));
        return getBoundingNsP(segment_getReverse(segment));
    }
}
static void testAdjacencySequence_1(CuTest *testCase) {
   setup();
   AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap1, INT64_MAX);
   CuAssertTrue(testCase, adjacencySequence->subsequenceIdentifier == cap_getName(cap1)); //sequence_getName(sequence1));
   CuAssertIntEquals(testCase, adjacencySequence->start, 1);
   CuAssertIntEquals(testCase, adjacencySequence->strand, 1);
   CuAssertIntEquals(testCase, adjacencySequence->length, 4);
   CuAssertStrEquals(testCase, "ACTG", adjacencySequence->string);
   adjacencySequence_destruct(adjacencySequence);
   teardown();
}
static void testAdjacencySequence_5(CuTest *testCase) {
    setup();
   AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap7, INT64_MAX);
   CuAssertTrue(testCase, adjacencySequence->subsequenceIdentifier == cap_getName(cap8)); //sequence_getName(sequence2));
   CuAssertIntEquals(testCase, adjacencySequence->start, 6);
   CuAssertIntEquals(testCase, adjacencySequence->strand, 0);
   CuAssertIntEquals(testCase, adjacencySequence->length, 6);
   CuAssertStrEquals(testCase, "CCGGTT", adjacencySequence->string);
   adjacencySequence_destruct(adjacencySequence);
   teardown();
}
示例#10
0
void testEnd_serialisation(CuTest* testCase) {
    cactusEndTestSetup();
    Name rootInstanceName = cap_getName(rootCap);
    Name leaf1InstanceName = cap_getName(leaf1Cap);
    Name leaf2InstanceName = cap_getName(leaf2Cap);
    Name leaf3InstanceName = cap_getName(leaf3Cap);
    int64_t i;
    void *vA = binaryRepresentation_makeBinaryRepresentation(end,
            (void(*)(void *, void(*)(const void *, size_t, size_t))) end_writeBinaryRepresentation, &i);
    CuAssertTrue(testCase, i > 0);
    end_destruct(end);
    void *vA2 = vA;
    end = end_loadFromBinaryRepresentation(&vA2, flower);
    rootCap = cap_getReverse(end_getInstance(end, rootInstanceName));
    leaf1Cap = cap_getReverse(end_getInstance(end, leaf1InstanceName));
    leaf2Cap = end_getInstance(end, leaf2InstanceName);
    leaf3Cap = cap_getReverse(end_getInstance(end, leaf3InstanceName));
    CuAssertTrue(testCase, leaf3Cap != NULL);
    free(vA);
    nestedTest = 1;
    testEnd_copyConstruct(testCase);
    testEnd_getName(testCase);
    testEnd_getOrientation(testCase);
    testEnd_getReverse(testCase);
    testEnd_getSide(testCase);
    testEnd_getFlower(testCase);
    testEnd_getBlock(testCase);
    testEnd_getOtherBlockEnd(testCase);
    testEnd_getGroup(testCase);
    testEnd_setGroup(testCase);
    testEnd_getInstanceNumber(testCase);
    testEnd_getInstance(testCase);
    testEnd_getFirst(testCase);
    testEnd_getSetRootInstance(testCase);
    testEnd_instanceIterator(testCase);
    testEnd_isBlockOrStubEnd(testCase);
    testEnd_isAttachedOrFree(testCase);
    testEnd_getCapForEvent(testCase);
    nestedTest = 0;
    cactusEndTestTeardown();
}
bool endsAreConnected(End *end1, End *end2, stList *eventStrings) {
    if (end_getName(end1) == end_getName(end2)) { //Then the ends are the same and are part of the same chromosome by definition.
        End_InstanceIterator *instanceIterator = end_getInstanceIterator(end1);
        Cap *cap1;
        while ((cap1 = end_getNext(instanceIterator)) != NULL) {
            if (capHasGivenEvents(cap1, eventStrings)) {
                end_destructInstanceIterator(instanceIterator);
                return 1;
            }
        }
        return 0;
    }
    End_InstanceIterator *instanceIterator = end_getInstanceIterator(end1);
    Cap *cap1;
    while ((cap1 = end_getNext(instanceIterator)) != NULL) {
        if (capHasGivenEvents(cap1, eventStrings)) {
            End_InstanceIterator *instanceIterator2 = end_getInstanceIterator(end2);
            Cap *cap2;
            while ((cap2 = end_getNext(instanceIterator2)) != NULL) {
                assert(cap_getName(cap2) != cap_getName(cap1)); //This could only happen if end1 == end2
                if (sequence_getMetaSequence(cap_getSequence(cap1)) == sequence_getMetaSequence(cap_getSequence(cap2))) {
                    assert(strcmp(event_getHeader(cap_getEvent(cap1)),
                                    event_getHeader(cap_getEvent(cap2))) == 0);
                    assert(cap_getPositiveOrientation(cap1)
                            != cap_getPositiveOrientation(cap2));
                    assert(cap_getName(cap1) != cap_getName(cap2));
                    //they could have the same coordinate if they represent two ends of a block of length 1.

                    end_destructInstanceIterator(instanceIterator);
                    end_destructInstanceIterator(instanceIterator2);
                    return 1;
                }
            }
            end_destructInstanceIterator(instanceIterator2);
        }
    }
    end_destructInstanceIterator(instanceIterator);
    return 0;
}
void topDown(Flower *flower, Name referenceEventName) {
    /*
     * Run on each flower, top down. Sets the coordinates of each reference cap to the correct
     * sequence, and sets the bases of the reference sequence to be consensus bases.
     */
    Flower_EndIterator *endIt = flower_getEndIterator(flower);
    End *end;
    while ((end = flower_getNextEnd(endIt)) != NULL) {
        Cap *cap = getCapForReferenceEvent(end, referenceEventName); //The cap in the reference
        if (cap != NULL) {
            cap = cap_getStrand(cap) ? cap : cap_getReverse(cap);
            if (!cap_getSide(cap)) {
                assert(cap_getCoordinate(cap) != INT64_MAX);
                Sequence *sequence = cap_getSequence(cap);
                assert(sequence != NULL);
                Group *group = end_getGroup(end);
                if (!group_isLeaf(group)) {
                    Flower *nestedFlower = group_getNestedFlower(group);
                    Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap));
                    assert(nestedCap != NULL);
                    nestedCap = cap_getStrand(nestedCap) ? nestedCap : cap_getReverse(nestedCap);
                    assert(cap_getStrand(nestedCap));
                    assert(!cap_getSide(nestedCap));
                    int64_t endCoordinate = setCoordinates(nestedFlower, sequence_getMetaSequence(sequence),
                                                           nestedCap, cap_getCoordinate(cap));
                    (void) endCoordinate;
                    assert(endCoordinate == cap_getCoordinate(cap_getAdjacency(cap)));
                    assert(endCoordinate
                           == cap_getCoordinate(
                               flower_getCap(nestedFlower, cap_getName(cap_getAdjacency(cap)))));
                }
            }
        }
    }
    flower_destructEndIterator(endIt);
}
static Cap *copyCapToParent(Cap *cap, stList *recoveredCaps) {
    /*
     * Get the adjacent stub end by looking at the reference adjacency in the parent.
     */
    End *end = cap_getEnd(cap);
    assert(end != NULL);
    Group *parentGroup = flower_getParentGroup(end_getFlower(end));
    assert(parentGroup != NULL);
    End *copiedEnd = end_copyConstruct(end, group_getFlower(parentGroup));
    end_setGroup(copiedEnd, parentGroup); //Set group
    Cap *copiedCap = end_getInstance(copiedEnd, cap_getName(cap));
    assert(copiedCap != NULL);
    copiedCap = cap_getStrand(copiedCap) ? copiedCap : cap_getReverse(copiedCap);
    if (!cap_getSide(copiedCap)) {
        stList_append(recoveredCaps, copiedCap);
    }
    return copiedCap;
}
示例#14
0
void testEnd_copyConstruct(CuTest* testCase) {
    cactusEndTestSetup();
    Flower *flower2 = flower_construct(cactusDisk);
    eventTree_copyConstruct(eventTree, flower2, testEnd_copyConstructP);
    sequence_construct(metaSequence, flower2);

    End *end2 = end_copyConstruct(end, flower2);
    CuAssertTrue(testCase, end_getName(end2) != NULL_NAME);
    CuAssertTrue(testCase, end_getName(end2) == end_getName(end));
    CuAssertTrue(testCase, flower_getEnd(flower2, end_getName(end2)) == end2);
    CuAssertTrue(testCase, cap_getName(end_getInstance(end2, cap_getName(rootCap))) == cap_getName(rootCap));
    CuAssertTrue(testCase, cap_getName(end_getInstance(end2, cap_getName(leaf1Cap))) == cap_getName(leaf1Cap));
    CuAssertTrue(testCase, cap_getName(end_getInstance(end2, cap_getName(leaf2Cap))) == cap_getName(leaf2Cap));
    cactusEndTestTeardown();
}
Segment *getCapsSegment(Cap *cap) {
    if (cap_getSegment(cap) != NULL) {
        return cap_getSegment(cap);
    }
    assert(!end_isBlockEnd(cap_getEnd(cap)));
    assert(end_isStubEnd(cap_getEnd(cap)));
    //Walk up to get the next adjacency.
    Group *parentGroup = flower_getParentGroup(end_getFlower(cap_getEnd(cap)));
    if (parentGroup != NULL) {
        Cap *parentCap = flower_getCap(group_getFlower(parentGroup), cap_getName(cap));
        if (parentCap != NULL) {
            assert(cap_getOrientation(parentCap));
            if (!cap_getOrientation(cap)) {
                parentCap = cap_getReverse(parentCap);
            }
            return getCapsSegment(parentCap);
        } else { //Cap must be a free stub end.
            assert(0); //Not in the current alignments.
            assert(end_isFree(cap_getEnd(cap)));
        }
    }
    return NULL;
}
示例#16
0
/*
 * Utility function for the lifted edge hashtable
 */
static uint64_t buildFaces_hashfunction(const void *ptr) {
    Cap *key = (Cap *) ptr;
    return (uint64_t) cap_getName(key);
}
示例#17
0
static int flower_constructCapsP(const void *o1, const void *o2) {
    return cactusMisc_nameCompare(cap_getName((Cap *) o1), cap_getName((Cap *) o2));
}
int mapGene(Cap *cap, int level, int exon, struct bed *gene, FILE *fileHandle){
   /*
    *Following cactus adjacencies, starting from 'cap', find regions that overlap with 
    *exons of input gene. Report chain relations of these regions with the exons.
    *cap: current cap. Level = chain level. exon = exon number. gene = bed record of gene
    */
   int64_t exonStart, exonEnd;
   if(isStubCap(cap)){
      Group *group = end_getGroup(cap_getEnd(cap));
      Flower *nestedFlower = group_getNestedFlower(group);
      if(nestedFlower != NULL){//recursive call
         Cap *childCap = flower_getCap(nestedFlower, cap_getName(cap));
         assert(childCap != NULL);
         exon = mapGene(childCap, level + 1, exon, gene, fileHandle);
         exonStart = gene->chromStarts->list[exon] + gene->chromStart;
         exonEnd = exonStart + gene->blockSizes->list[exon];
      }
   }

   cap = cap_getAdjacency(cap);
   Cap *nextcap;
   int64_t capCoor;
   exonStart = gene->chromStarts->list[exon] + gene->chromStart;
   exonEnd = exonStart + gene->blockSizes->list[exon];
   Block *block = end_getBlock(cap_getEnd(cap));  
 
   if(block == NULL){
      moveCapToNextBlock(&cap);
   }
   while(!isStubCap(cap) && exon < gene->blockCount){
      End *cend = cap_getEnd(cap);
      capCoor = cap_getCoordinate(cap);//Cap coordinate is always the coordinate on + strand
      nextcap = cap_getAdjacency(cap_getOtherSegmentCap(cap));
      st_logInfo("capCoor: %d, nextCap: %d, eStart: %d, eEnd: %d. Exon: %d\n", 
                  capCoor, cap_getCoordinate(nextcap), exonStart, exonEnd, exon);

      //keep moving if nextBlock Start is still upstream of current exon
      if(cap_getCoordinate(nextcap) <= exonStart){
         moveCapToNextBlock(&cap);
         st_logInfo("Still upstream, nextcap <= exonStart. Move to next chainBlock\n");
      }else if(capCoor >= exonEnd){//Done with current exon, move to next
         st_logInfo("Done with current exon, move to next one\n\n");
         fprintf(fileHandle, "\t\t</exon>\n");//end previous exon
         exon++;
         if(exon < gene->blockCount){
            exonStart = gene->chromStarts->list[exon] + gene->chromStart;
            exonEnd = exonStart + gene->blockSizes->list[exon];
            fprintf(fileHandle, "\t\t<exon id=\"%d\" start=\"%" PRIi64 "\" end=\"%" PRIi64 "\">\n", exon, exonStart, exonEnd);
         }
      }else{//current exon overlaps with current block Or with lower level flower
         Cap *oppcap = cap_getOtherSegmentCap(cap);
         st_logInfo("Current exon overlaps with current block or with lower flower\n");
         if(cap_getCoordinate(oppcap) >= exonStart && exonEnd > capCoor){
            mapBlockToExon(cap, level, fileHandle);
            if(exonEnd <= cap_getCoordinate(oppcap) + 1){
               st_logInfo("Done with current exon, move to next one\n\n");
               fprintf(fileHandle, "\t\t</exon>\n");//end previous exon
               exon++;
	       if(exon < gene->blockCount){
		  exonStart = gene->chromStarts->list[exon] + gene->chromStart;
		  exonEnd = exonStart + gene->blockSizes->list[exon];
		  fprintf(fileHandle, "\t\t<exon id=\"%d\" start=\"%" PRIi64 "\" end=\"%" PRIi64 "\">\n", exon, exonStart, exonEnd);
	       }
               continue;
            }
         }
         //Traverse lower level flowers if exists
         Group *group = end_getGroup(end_getOtherBlockEnd(cend));
         Flower *nestedFlower = group_getNestedFlower(group);
         if(nestedFlower != NULL){//recursive call
            Cap *childCap = flower_getCap(nestedFlower, cap_getName(cap_getOtherSegmentCap(cap)));
            assert(childCap != NULL);
            exon = mapGene(childCap, level + 1, exon, gene, fileHandle);
            exonStart = gene->chromStarts->list[exon] + gene->chromStart;
            exonEnd = exonStart + gene->blockSizes->list[exon];
         }
         moveCapToNextBlock(&cap);
      }
   }
   return exon;
}