bool capsAreAdjacent(Cap *cap1, Cap *cap2, int64_t *separationDistance) { if (cap_getName(cap2) != cap_getName(cap1) && cap_getCoordinate(cap1) != cap_getCoordinate(cap2)) { //This can happen if end1 == end2 if (sequence_getMetaSequence(cap_getSequence(cap1)) == sequence_getMetaSequence(cap_getSequence(cap2))) { assert(strcmp(event_getHeader(cap_getEvent(cap1)), event_getHeader( cap_getEvent(cap2))) == 0); assert(cap_getPositiveOrientation(cap1) != cap_getPositiveOrientation(cap2)); assert(cap_getName(cap1) != cap_getName(cap2)); assert(sequence_getMetaSequence(cap_getSequence(cap1)) == sequence_getMetaSequence(cap_getSequence(cap2))); if (!cap_getStrand(cap1)) { cap1 = cap_getReverse(cap1); } if (!cap_getStrand(cap2)) { cap2 = cap_getReverse(cap2); } assert(cap_getStrand(cap1)); assert(cap_getStrand(cap2)); if (cap_getCoordinate(cap1) < cap_getCoordinate(cap2)) { if (!cap_getSide(cap1) && cap_getSide(cap2)) { *separationDistance = cap_getCoordinate(cap2) - cap_getCoordinate(cap1) - 1; //The minus 1, to give the length of the sequence between the two caps. return 1; } } else { if (cap_getSide(cap1) && !cap_getSide(cap2)) { *separationDistance = cap_getCoordinate(cap1) - cap_getCoordinate(cap2) - 1; return 1; } } } } return 0; }
bool getCapGetAtEndOfPath(Cap *cap, Cap **pathEndCap, int64_t *pathLength, int64_t *nCount, stList *haplotypeEventStrings, stList *contaminationEventStrings) { //Account for length of adjacency *pathLength += getTerminalAdjacencyLength(cap); *nCount += getNumberOfNsInAdjacency(cap); Segment *segment = getAdjacentCapsSegment(cap); if (segment == NULL) { *pathEndCap = cap_getAdjacency(getTerminalCap(cap)); assert(*pathEndCap != NULL); return 0; } Cap *adjacentCap = cap_getSide(cap) ? segment_get3Cap(segment) : segment_get5Cap(segment); assert( cap_getName(adjacentCap) == cap_getName( cap_getAdjacency(getTerminalCap(cap)))); End *adjacentEnd = cap_getEnd(adjacentCap); if (hasCapInEvents(adjacentEnd, contaminationEventStrings) || hasCapInEvents(adjacentEnd, haplotypeEventStrings)) { //hasCapNotInEvent(adjacentEnd, event_getHeader(cap_getEvent(cap)))) { //isContaminationEnd(adjacentEnd) || isHaplotypeEnd(adjacentEnd)) { *pathEndCap = adjacentCap; return 1; } *pathLength += segment_getLength(segment); *nCount += getNumberOfNsInSegment(segment); return getCapGetAtEndOfPath(cap_getOtherSegmentCap(adjacentCap), pathEndCap, pathLength, nCount, haplotypeEventStrings, contaminationEventStrings); }
void testEnd_getInstance(CuTest* testCase) { cactusEndTestSetup(); CuAssertTrue(testCase, end_getInstance(end, cap_getName(rootCap)) == cap_getReverse(rootCap)); CuAssertTrue(testCase, end_getInstance(end, cap_getName(leaf1Cap)) == cap_getReverse(leaf1Cap)); CuAssertTrue(testCase, end_getInstance(end, cap_getName(leaf2Cap)) == leaf2Cap); cactusEndTestTeardown(); }
static void setAdjacencyLengthsAndRecoverNewCapsAndBrokenAdjacencies(Cap *cap, stList *recoveredCaps) { /* * Sets the coordinates of the caps to be equal to the length of the adjacency sequence between them. * Used to build the reference sequence bottom up. * * One complexity is that a reference thread between the two caps * in each flower f may be broken into two in the children of f. * Therefore, for each flower f first identify attached stub ends present in the children of f that are * not present in f and copy them into f, reattaching the reference caps as needed. */ while (1) { Cap *adjacentCap = cap_getAdjacency(cap); assert(adjacentCap != NULL); assert(cap_getCoordinate(cap) == INT64_MAX); assert(cap_getCoordinate(adjacentCap) == INT64_MAX); assert(cap_getStrand(cap) == cap_getStrand(adjacentCap)); assert(cap_getSide(cap) != cap_getSide(adjacentCap)); Group *group = end_getGroup(cap_getEnd(cap)); assert(group != NULL); if (!group_isLeaf(group)) { //Adjacency is not terminal, so establish its sequence. Flower *nestedFlower = group_getNestedFlower(group); Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap)); assert(nestedCap != NULL); Cap *nestedAdjacentCap = flower_getCap(nestedFlower, cap_getName(adjacentCap)); assert(nestedAdjacentCap != NULL); Cap *breakerCap; int64_t adjacencyLength = traceThreadLength(nestedCap, &breakerCap); assert(cap_getOrientation(nestedAdjacentCap)); if (cap_getPositiveOrientation(breakerCap) != nestedAdjacentCap) { //The thread is broken at the lower level. //Copy cap into higher level graph. breakerCap = copyCapToParent(breakerCap, recoveredCaps); assert(cap_getSide(breakerCap)); cap_makeAdjacent(cap, breakerCap); setAdjacencyLength(cap, breakerCap, adjacencyLength); adjacencyLength = traceThreadLength(nestedAdjacentCap, &breakerCap); assert(cap_getPositiveOrientation(breakerCap) != cap); breakerCap = copyCapToParent(breakerCap, recoveredCaps); assert(!cap_getSide(breakerCap)); cap_makeAdjacent(breakerCap, adjacentCap); setAdjacencyLength(adjacentCap, breakerCap, adjacencyLength); } else { //The thread is not broken at the lower level setAdjacencyLength(cap, adjacentCap, adjacencyLength); } } else { //Set the coordinates of the caps to the adjacency size setAdjacencyLength(cap, adjacentCap, 0); } if ((cap = cap_getOtherSegmentCap(adjacentCap)) == NULL) { break; } } }
void mapGenes(Flower *flower, FILE *fileHandle, struct bed *gene, char *species){ st_logInfo("Flower %s\n", cactusMisc_nameToString(flower_getName(flower))); printOpeningTag("geneMap", fileHandle); fprintf(fileHandle, "\n"); int level = 0;//Flower level while(gene != NULL){ //Get the start of the target sequence: st_logInfo("Gene %s:\n", gene->name); Cap *startCap; struct List *capList = flower_getThreadStarts(flower, species); for(int i=0; i < capList->length; i++){ startCap = capList->list[i]; st_logInfo("Cap %d, %s\n", i, cactusMisc_nameToString(cap_getName(startCap))); //Traverse cactus and get regions that overlap with exons of the gene, report the involved chains relations fprintf(fileHandle, "\t<gene name=\"%s\" target=\"%s\" start=\"%" PRIi64 "\" end=\"%" PRIi64 "\" exonCount=\"%" PRIi64 "\" strand=\"%c\">\n", gene->name, species, gene->chromStart, gene->chromEnd, gene->blockCount, gene->strand[0]); fprintf(fileHandle, "\t\t<exon id=\"0\" start=\"%" PRIi64 "\" end=\"%" PRIi64 "\">\n", gene->chromStart, gene->chromStart + gene->blockSizes->list[0]); mapGene(startCap, level, 0, gene, fileHandle); fprintf(fileHandle, "\t</gene>\n"); } gene = gene->next; } printClosingTag("geneMap", fileHandle); return; }
Cap *getTerminalCap(Cap *cap) { Flower *nestedFlower = group_getNestedFlower(end_getGroup(cap_getEnd(cap))); if (nestedFlower != NULL) { Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap)); assert(nestedCap != NULL); return getTerminalCap(cap_getOrientation(cap) ? nestedCap : cap_getReverse(nestedCap)); } return cap; }
static int64_t getBoundingNs(Cap *cap) { assert(cap != NULL); Segment *segment = getCapsSegment(cap); if (segment == NULL) { return 0; } Cap *_5TerminalCap = getTerminalCap(segment_get5Cap(segment)); Cap *_3TerminalCap = getTerminalCap(segment_get3Cap(segment)); (void)_3TerminalCap; assert(_5TerminalCap != NULL); assert(_3TerminalCap != NULL); //return 0; if (cap_getName(_5TerminalCap) == cap_getName(cap)) { return getBoundingNsP(segment); } else { assert(cap_getName(_3TerminalCap) == cap_getName(cap)); return getBoundingNsP(segment_getReverse(segment)); } }
static void testAdjacencySequence_1(CuTest *testCase) { setup(); AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap1, INT64_MAX); CuAssertTrue(testCase, adjacencySequence->subsequenceIdentifier == cap_getName(cap1)); //sequence_getName(sequence1)); CuAssertIntEquals(testCase, adjacencySequence->start, 1); CuAssertIntEquals(testCase, adjacencySequence->strand, 1); CuAssertIntEquals(testCase, adjacencySequence->length, 4); CuAssertStrEquals(testCase, "ACTG", adjacencySequence->string); adjacencySequence_destruct(adjacencySequence); teardown(); }
static void testAdjacencySequence_5(CuTest *testCase) { setup(); AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap7, INT64_MAX); CuAssertTrue(testCase, adjacencySequence->subsequenceIdentifier == cap_getName(cap8)); //sequence_getName(sequence2)); CuAssertIntEquals(testCase, adjacencySequence->start, 6); CuAssertIntEquals(testCase, adjacencySequence->strand, 0); CuAssertIntEquals(testCase, adjacencySequence->length, 6); CuAssertStrEquals(testCase, "CCGGTT", adjacencySequence->string); adjacencySequence_destruct(adjacencySequence); teardown(); }
void testEnd_serialisation(CuTest* testCase) { cactusEndTestSetup(); Name rootInstanceName = cap_getName(rootCap); Name leaf1InstanceName = cap_getName(leaf1Cap); Name leaf2InstanceName = cap_getName(leaf2Cap); Name leaf3InstanceName = cap_getName(leaf3Cap); int64_t i; void *vA = binaryRepresentation_makeBinaryRepresentation(end, (void(*)(void *, void(*)(const void *, size_t, size_t))) end_writeBinaryRepresentation, &i); CuAssertTrue(testCase, i > 0); end_destruct(end); void *vA2 = vA; end = end_loadFromBinaryRepresentation(&vA2, flower); rootCap = cap_getReverse(end_getInstance(end, rootInstanceName)); leaf1Cap = cap_getReverse(end_getInstance(end, leaf1InstanceName)); leaf2Cap = end_getInstance(end, leaf2InstanceName); leaf3Cap = cap_getReverse(end_getInstance(end, leaf3InstanceName)); CuAssertTrue(testCase, leaf3Cap != NULL); free(vA); nestedTest = 1; testEnd_copyConstruct(testCase); testEnd_getName(testCase); testEnd_getOrientation(testCase); testEnd_getReverse(testCase); testEnd_getSide(testCase); testEnd_getFlower(testCase); testEnd_getBlock(testCase); testEnd_getOtherBlockEnd(testCase); testEnd_getGroup(testCase); testEnd_setGroup(testCase); testEnd_getInstanceNumber(testCase); testEnd_getInstance(testCase); testEnd_getFirst(testCase); testEnd_getSetRootInstance(testCase); testEnd_instanceIterator(testCase); testEnd_isBlockOrStubEnd(testCase); testEnd_isAttachedOrFree(testCase); testEnd_getCapForEvent(testCase); nestedTest = 0; cactusEndTestTeardown(); }
bool endsAreConnected(End *end1, End *end2, stList *eventStrings) { if (end_getName(end1) == end_getName(end2)) { //Then the ends are the same and are part of the same chromosome by definition. End_InstanceIterator *instanceIterator = end_getInstanceIterator(end1); Cap *cap1; while ((cap1 = end_getNext(instanceIterator)) != NULL) { if (capHasGivenEvents(cap1, eventStrings)) { end_destructInstanceIterator(instanceIterator); return 1; } } return 0; } End_InstanceIterator *instanceIterator = end_getInstanceIterator(end1); Cap *cap1; while ((cap1 = end_getNext(instanceIterator)) != NULL) { if (capHasGivenEvents(cap1, eventStrings)) { End_InstanceIterator *instanceIterator2 = end_getInstanceIterator(end2); Cap *cap2; while ((cap2 = end_getNext(instanceIterator2)) != NULL) { assert(cap_getName(cap2) != cap_getName(cap1)); //This could only happen if end1 == end2 if (sequence_getMetaSequence(cap_getSequence(cap1)) == sequence_getMetaSequence(cap_getSequence(cap2))) { assert(strcmp(event_getHeader(cap_getEvent(cap1)), event_getHeader(cap_getEvent(cap2))) == 0); assert(cap_getPositiveOrientation(cap1) != cap_getPositiveOrientation(cap2)); assert(cap_getName(cap1) != cap_getName(cap2)); //they could have the same coordinate if they represent two ends of a block of length 1. end_destructInstanceIterator(instanceIterator); end_destructInstanceIterator(instanceIterator2); return 1; } } end_destructInstanceIterator(instanceIterator2); } } end_destructInstanceIterator(instanceIterator); return 0; }
void topDown(Flower *flower, Name referenceEventName) { /* * Run on each flower, top down. Sets the coordinates of each reference cap to the correct * sequence, and sets the bases of the reference sequence to be consensus bases. */ Flower_EndIterator *endIt = flower_getEndIterator(flower); End *end; while ((end = flower_getNextEnd(endIt)) != NULL) { Cap *cap = getCapForReferenceEvent(end, referenceEventName); //The cap in the reference if (cap != NULL) { cap = cap_getStrand(cap) ? cap : cap_getReverse(cap); if (!cap_getSide(cap)) { assert(cap_getCoordinate(cap) != INT64_MAX); Sequence *sequence = cap_getSequence(cap); assert(sequence != NULL); Group *group = end_getGroup(end); if (!group_isLeaf(group)) { Flower *nestedFlower = group_getNestedFlower(group); Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap)); assert(nestedCap != NULL); nestedCap = cap_getStrand(nestedCap) ? nestedCap : cap_getReverse(nestedCap); assert(cap_getStrand(nestedCap)); assert(!cap_getSide(nestedCap)); int64_t endCoordinate = setCoordinates(nestedFlower, sequence_getMetaSequence(sequence), nestedCap, cap_getCoordinate(cap)); (void) endCoordinate; assert(endCoordinate == cap_getCoordinate(cap_getAdjacency(cap))); assert(endCoordinate == cap_getCoordinate( flower_getCap(nestedFlower, cap_getName(cap_getAdjacency(cap))))); } } } } flower_destructEndIterator(endIt); }
static Cap *copyCapToParent(Cap *cap, stList *recoveredCaps) { /* * Get the adjacent stub end by looking at the reference adjacency in the parent. */ End *end = cap_getEnd(cap); assert(end != NULL); Group *parentGroup = flower_getParentGroup(end_getFlower(end)); assert(parentGroup != NULL); End *copiedEnd = end_copyConstruct(end, group_getFlower(parentGroup)); end_setGroup(copiedEnd, parentGroup); //Set group Cap *copiedCap = end_getInstance(copiedEnd, cap_getName(cap)); assert(copiedCap != NULL); copiedCap = cap_getStrand(copiedCap) ? copiedCap : cap_getReverse(copiedCap); if (!cap_getSide(copiedCap)) { stList_append(recoveredCaps, copiedCap); } return copiedCap; }
void testEnd_copyConstruct(CuTest* testCase) { cactusEndTestSetup(); Flower *flower2 = flower_construct(cactusDisk); eventTree_copyConstruct(eventTree, flower2, testEnd_copyConstructP); sequence_construct(metaSequence, flower2); End *end2 = end_copyConstruct(end, flower2); CuAssertTrue(testCase, end_getName(end2) != NULL_NAME); CuAssertTrue(testCase, end_getName(end2) == end_getName(end)); CuAssertTrue(testCase, flower_getEnd(flower2, end_getName(end2)) == end2); CuAssertTrue(testCase, cap_getName(end_getInstance(end2, cap_getName(rootCap))) == cap_getName(rootCap)); CuAssertTrue(testCase, cap_getName(end_getInstance(end2, cap_getName(leaf1Cap))) == cap_getName(leaf1Cap)); CuAssertTrue(testCase, cap_getName(end_getInstance(end2, cap_getName(leaf2Cap))) == cap_getName(leaf2Cap)); cactusEndTestTeardown(); }
Segment *getCapsSegment(Cap *cap) { if (cap_getSegment(cap) != NULL) { return cap_getSegment(cap); } assert(!end_isBlockEnd(cap_getEnd(cap))); assert(end_isStubEnd(cap_getEnd(cap))); //Walk up to get the next adjacency. Group *parentGroup = flower_getParentGroup(end_getFlower(cap_getEnd(cap))); if (parentGroup != NULL) { Cap *parentCap = flower_getCap(group_getFlower(parentGroup), cap_getName(cap)); if (parentCap != NULL) { assert(cap_getOrientation(parentCap)); if (!cap_getOrientation(cap)) { parentCap = cap_getReverse(parentCap); } return getCapsSegment(parentCap); } else { //Cap must be a free stub end. assert(0); //Not in the current alignments. assert(end_isFree(cap_getEnd(cap))); } } return NULL; }
/* * Utility function for the lifted edge hashtable */ static uint64_t buildFaces_hashfunction(const void *ptr) { Cap *key = (Cap *) ptr; return (uint64_t) cap_getName(key); }
static int flower_constructCapsP(const void *o1, const void *o2) { return cactusMisc_nameCompare(cap_getName((Cap *) o1), cap_getName((Cap *) o2)); }
int mapGene(Cap *cap, int level, int exon, struct bed *gene, FILE *fileHandle){ /* *Following cactus adjacencies, starting from 'cap', find regions that overlap with *exons of input gene. Report chain relations of these regions with the exons. *cap: current cap. Level = chain level. exon = exon number. gene = bed record of gene */ int64_t exonStart, exonEnd; if(isStubCap(cap)){ Group *group = end_getGroup(cap_getEnd(cap)); Flower *nestedFlower = group_getNestedFlower(group); if(nestedFlower != NULL){//recursive call Cap *childCap = flower_getCap(nestedFlower, cap_getName(cap)); assert(childCap != NULL); exon = mapGene(childCap, level + 1, exon, gene, fileHandle); exonStart = gene->chromStarts->list[exon] + gene->chromStart; exonEnd = exonStart + gene->blockSizes->list[exon]; } } cap = cap_getAdjacency(cap); Cap *nextcap; int64_t capCoor; exonStart = gene->chromStarts->list[exon] + gene->chromStart; exonEnd = exonStart + gene->blockSizes->list[exon]; Block *block = end_getBlock(cap_getEnd(cap)); if(block == NULL){ moveCapToNextBlock(&cap); } while(!isStubCap(cap) && exon < gene->blockCount){ End *cend = cap_getEnd(cap); capCoor = cap_getCoordinate(cap);//Cap coordinate is always the coordinate on + strand nextcap = cap_getAdjacency(cap_getOtherSegmentCap(cap)); st_logInfo("capCoor: %d, nextCap: %d, eStart: %d, eEnd: %d. Exon: %d\n", capCoor, cap_getCoordinate(nextcap), exonStart, exonEnd, exon); //keep moving if nextBlock Start is still upstream of current exon if(cap_getCoordinate(nextcap) <= exonStart){ moveCapToNextBlock(&cap); st_logInfo("Still upstream, nextcap <= exonStart. Move to next chainBlock\n"); }else if(capCoor >= exonEnd){//Done with current exon, move to next st_logInfo("Done with current exon, move to next one\n\n"); fprintf(fileHandle, "\t\t</exon>\n");//end previous exon exon++; if(exon < gene->blockCount){ exonStart = gene->chromStarts->list[exon] + gene->chromStart; exonEnd = exonStart + gene->blockSizes->list[exon]; fprintf(fileHandle, "\t\t<exon id=\"%d\" start=\"%" PRIi64 "\" end=\"%" PRIi64 "\">\n", exon, exonStart, exonEnd); } }else{//current exon overlaps with current block Or with lower level flower Cap *oppcap = cap_getOtherSegmentCap(cap); st_logInfo("Current exon overlaps with current block or with lower flower\n"); if(cap_getCoordinate(oppcap) >= exonStart && exonEnd > capCoor){ mapBlockToExon(cap, level, fileHandle); if(exonEnd <= cap_getCoordinate(oppcap) + 1){ st_logInfo("Done with current exon, move to next one\n\n"); fprintf(fileHandle, "\t\t</exon>\n");//end previous exon exon++; if(exon < gene->blockCount){ exonStart = gene->chromStarts->list[exon] + gene->chromStart; exonEnd = exonStart + gene->blockSizes->list[exon]; fprintf(fileHandle, "\t\t<exon id=\"%d\" start=\"%" PRIi64 "\" end=\"%" PRIi64 "\">\n", exon, exonStart, exonEnd); } continue; } } //Traverse lower level flowers if exists Group *group = end_getGroup(end_getOtherBlockEnd(cend)); Flower *nestedFlower = group_getNestedFlower(group); if(nestedFlower != NULL){//recursive call Cap *childCap = flower_getCap(nestedFlower, cap_getName(cap_getOtherSegmentCap(cap))); assert(childCap != NULL); exon = mapGene(childCap, level + 1, exon, gene, fileHandle); exonStart = gene->chromStarts->list[exon] + gene->chromStart; exonEnd = exonStart + gene->blockSizes->list[exon]; } moveCapToNextBlock(&cap); } } return exon; }