Chain *block_getChain(Block *block) { Link *link; Chain *chain1, *chain2; Group *group = end_getGroup(block_get5End(block)); chain1 = (group != NULL && (link = group_getLink(group)) != NULL) ? link_getChain(link) : NULL; group = end_getGroup(block_get3End(block)); chain2 = (group != NULL && (link = group_getLink(group)) != NULL) ? link_getChain(link) : NULL; if(chain1 != NULL && chain2 != NULL) { assert(chain1 == chain2); //block should not be in more than one chain! } return chain1 != NULL ? chain1 : chain2; }
void testEnd_getGroup(CuTest* testCase) { cactusEndTestSetup(); Flower *flower2 = flower_construct(cactusDisk); eventTree_copyConstruct(eventTree, flower2, testEnd_copyConstructP); sequence_construct(metaSequence, flower2); End *end2 = end_copyConstruct(end, flower2); CuAssertTrue(testCase, end_getGroup(end) == NULL); Group *group = group_construct(flower, flower2); CuAssertTrue(testCase, end_getGroup(end) == group); CuAssertTrue(testCase, end_getGroup(end2) == NULL); cactusEndTestTeardown(); }
void testEnd_setGroup(CuTest* testCase) { cactusEndTestSetup(); Flower *flower2 = flower_construct(cactusDisk); Group *group2 = group_construct2(flower2); End *end2 = end_construct(1, flower2); End *end3 = end_construct(1, flower2); CuAssertTrue(testCase, group_getEndNumber(group2) == 0); CuAssertTrue(testCase, end_getGroup(end2) == NULL); CuAssertTrue(testCase, end_getGroup(end3) == NULL); end_setGroup(end2, group2); CuAssertTrue(testCase, group_getEndNumber(group2) == 1); CuAssertTrue(testCase, end_getGroup(end2) == group2); CuAssertTrue(testCase, group_getEnd(group2, end_getName(end2)) == end2); CuAssertTrue(testCase, end_getGroup(end3) == NULL); end_setGroup(end3, group2); CuAssertTrue(testCase, group_getEndNumber(group2) == 2); CuAssertTrue(testCase, end_getGroup(end2) == group2); CuAssertTrue(testCase, group_getEnd(group2, end_getName(end2)) == end2); CuAssertTrue(testCase, end_getGroup(end3) == group2); CuAssertTrue(testCase, group_getEnd(group2, end_getName(end3)) == end3); end_setGroup(end3, NULL); end_setGroup(end2, group2); CuAssertTrue(testCase, group_getEndNumber(group2) == 1); CuAssertTrue(testCase, end_getGroup(end2) == group2); CuAssertTrue(testCase, group_getEnd(group2, end_getName(end2)) == end2); CuAssertTrue(testCase, end_getGroup(end3) == NULL); cactusEndTestTeardown(); }
void testGroup_addEnd(CuTest *testCase) { cactusGroupTestSetup(); CuAssertTrue(testCase, group_getEndNumber(group2) == 0); end_setGroup(end4, group2); CuAssertTrue(testCase, group_getEndNumber(group2) == 1); CuAssertTrue(testCase, end_getGroup(end4) == group2); CuAssertTrue(testCase, group_getEnd(group2, end_getName(end4)) == end4); cactusGroupTestTeardown(); }
Cap *getTerminalCap(Cap *cap) { Flower *nestedFlower = group_getNestedFlower(end_getGroup(cap_getEnd(cap))); if (nestedFlower != NULL) { Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap)); assert(nestedCap != NULL); return getTerminalCap(cap_getOrientation(cap) ? nestedCap : cap_getReverse(nestedCap)); } return cap; }
static void setAdjacencyLengthsAndRecoverNewCapsAndBrokenAdjacencies(Cap *cap, stList *recoveredCaps) { /* * Sets the coordinates of the caps to be equal to the length of the adjacency sequence between them. * Used to build the reference sequence bottom up. * * One complexity is that a reference thread between the two caps * in each flower f may be broken into two in the children of f. * Therefore, for each flower f first identify attached stub ends present in the children of f that are * not present in f and copy them into f, reattaching the reference caps as needed. */ while (1) { Cap *adjacentCap = cap_getAdjacency(cap); assert(adjacentCap != NULL); assert(cap_getCoordinate(cap) == INT64_MAX); assert(cap_getCoordinate(adjacentCap) == INT64_MAX); assert(cap_getStrand(cap) == cap_getStrand(adjacentCap)); assert(cap_getSide(cap) != cap_getSide(adjacentCap)); Group *group = end_getGroup(cap_getEnd(cap)); assert(group != NULL); if (!group_isLeaf(group)) { //Adjacency is not terminal, so establish its sequence. Flower *nestedFlower = group_getNestedFlower(group); Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap)); assert(nestedCap != NULL); Cap *nestedAdjacentCap = flower_getCap(nestedFlower, cap_getName(adjacentCap)); assert(nestedAdjacentCap != NULL); Cap *breakerCap; int64_t adjacencyLength = traceThreadLength(nestedCap, &breakerCap); assert(cap_getOrientation(nestedAdjacentCap)); if (cap_getPositiveOrientation(breakerCap) != nestedAdjacentCap) { //The thread is broken at the lower level. //Copy cap into higher level graph. breakerCap = copyCapToParent(breakerCap, recoveredCaps); assert(cap_getSide(breakerCap)); cap_makeAdjacent(cap, breakerCap); setAdjacencyLength(cap, breakerCap, adjacencyLength); adjacencyLength = traceThreadLength(nestedAdjacentCap, &breakerCap); assert(cap_getPositiveOrientation(breakerCap) != cap); breakerCap = copyCapToParent(breakerCap, recoveredCaps); assert(!cap_getSide(breakerCap)); cap_makeAdjacent(breakerCap, adjacentCap); setAdjacencyLength(adjacentCap, breakerCap, adjacencyLength); } else { //The thread is not broken at the lower level setAdjacencyLength(cap, adjacentCap, adjacencyLength); } } else { //Set the coordinates of the caps to the adjacency size setAdjacencyLength(cap, adjacentCap, 0); } if ((cap = cap_getOtherSegmentCap(adjacentCap)) == NULL) { break; } } }
void testGroup_makeNonLeaf(CuTest *testCase) { cactusGroupTestSetup(); CuAssertTrue(testCase, group_isLeaf(group2)); end_setGroup(end4, group2); group_makeNestedFlower(group2); CuAssertTrue(testCase, !group_isLeaf(group2)); Flower *nestedFlower = group_getNestedFlower(group2); CuAssertTrue(testCase, nestedFlower != NULL); CuAssertTrue(testCase, !flower_builtBlocks(flower)); CuAssertTrue(testCase, !flower_builtTrees(flower)); CuAssertTrue(testCase, !flower_builtFaces(flower)); CuAssertTrue(testCase, flower_getName(nestedFlower) == group_getName(group2)); CuAssertTrue(testCase, flower_getParentGroup(nestedFlower) == group2); CuAssertTrue(testCase, flower_getEndNumber(nestedFlower) == 1); End *nestedEnd = flower_getFirstEnd(nestedFlower); CuAssertTrue(testCase, end_getName(end4) == end_getName(nestedEnd)); CuAssertTrue(testCase, end_getGroup(nestedEnd) != NULL); CuAssertTrue(testCase, flower_getGroupNumber(nestedFlower) == 1); CuAssertTrue(testCase, flower_isTerminal(nestedFlower)); cactusGroupTestTeardown(); }
void topDown(Flower *flower, Name referenceEventName) { /* * Run on each flower, top down. Sets the coordinates of each reference cap to the correct * sequence, and sets the bases of the reference sequence to be consensus bases. */ Flower_EndIterator *endIt = flower_getEndIterator(flower); End *end; while ((end = flower_getNextEnd(endIt)) != NULL) { Cap *cap = getCapForReferenceEvent(end, referenceEventName); //The cap in the reference if (cap != NULL) { cap = cap_getStrand(cap) ? cap : cap_getReverse(cap); if (!cap_getSide(cap)) { assert(cap_getCoordinate(cap) != INT64_MAX); Sequence *sequence = cap_getSequence(cap); assert(sequence != NULL); Group *group = end_getGroup(end); if (!group_isLeaf(group)) { Flower *nestedFlower = group_getNestedFlower(group); Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap)); assert(nestedCap != NULL); nestedCap = cap_getStrand(nestedCap) ? nestedCap : cap_getReverse(nestedCap); assert(cap_getStrand(nestedCap)); assert(!cap_getSide(nestedCap)); int64_t endCoordinate = setCoordinates(nestedFlower, sequence_getMetaSequence(sequence), nestedCap, cap_getCoordinate(cap)); (void) endCoordinate; assert(endCoordinate == cap_getCoordinate(cap_getAdjacency(cap))); assert(endCoordinate == cap_getCoordinate( flower_getCap(nestedFlower, cap_getName(cap_getAdjacency(cap))))); } } } } flower_destructEndIterator(endIt); }
int mapGene(Cap *cap, int level, int exon, struct bed *gene, FILE *fileHandle){ /* *Following cactus adjacencies, starting from 'cap', find regions that overlap with *exons of input gene. Report chain relations of these regions with the exons. *cap: current cap. Level = chain level. exon = exon number. gene = bed record of gene */ int64_t exonStart, exonEnd; if(isStubCap(cap)){ Group *group = end_getGroup(cap_getEnd(cap)); Flower *nestedFlower = group_getNestedFlower(group); if(nestedFlower != NULL){//recursive call Cap *childCap = flower_getCap(nestedFlower, cap_getName(cap)); assert(childCap != NULL); exon = mapGene(childCap, level + 1, exon, gene, fileHandle); exonStart = gene->chromStarts->list[exon] + gene->chromStart; exonEnd = exonStart + gene->blockSizes->list[exon]; } } cap = cap_getAdjacency(cap); Cap *nextcap; int64_t capCoor; exonStart = gene->chromStarts->list[exon] + gene->chromStart; exonEnd = exonStart + gene->blockSizes->list[exon]; Block *block = end_getBlock(cap_getEnd(cap)); if(block == NULL){ moveCapToNextBlock(&cap); } while(!isStubCap(cap) && exon < gene->blockCount){ End *cend = cap_getEnd(cap); capCoor = cap_getCoordinate(cap);//Cap coordinate is always the coordinate on + strand nextcap = cap_getAdjacency(cap_getOtherSegmentCap(cap)); st_logInfo("capCoor: %d, nextCap: %d, eStart: %d, eEnd: %d. Exon: %d\n", capCoor, cap_getCoordinate(nextcap), exonStart, exonEnd, exon); //keep moving if nextBlock Start is still upstream of current exon if(cap_getCoordinate(nextcap) <= exonStart){ moveCapToNextBlock(&cap); st_logInfo("Still upstream, nextcap <= exonStart. Move to next chainBlock\n"); }else if(capCoor >= exonEnd){//Done with current exon, move to next st_logInfo("Done with current exon, move to next one\n\n"); fprintf(fileHandle, "\t\t</exon>\n");//end previous exon exon++; if(exon < gene->blockCount){ exonStart = gene->chromStarts->list[exon] + gene->chromStart; exonEnd = exonStart + gene->blockSizes->list[exon]; fprintf(fileHandle, "\t\t<exon id=\"%d\" start=\"%" PRIi64 "\" end=\"%" PRIi64 "\">\n", exon, exonStart, exonEnd); } }else{//current exon overlaps with current block Or with lower level flower Cap *oppcap = cap_getOtherSegmentCap(cap); st_logInfo("Current exon overlaps with current block or with lower flower\n"); if(cap_getCoordinate(oppcap) >= exonStart && exonEnd > capCoor){ mapBlockToExon(cap, level, fileHandle); if(exonEnd <= cap_getCoordinate(oppcap) + 1){ st_logInfo("Done with current exon, move to next one\n\n"); fprintf(fileHandle, "\t\t</exon>\n");//end previous exon exon++; if(exon < gene->blockCount){ exonStart = gene->chromStarts->list[exon] + gene->chromStart; exonEnd = exonStart + gene->blockSizes->list[exon]; fprintf(fileHandle, "\t\t<exon id=\"%d\" start=\"%" PRIi64 "\" end=\"%" PRIi64 "\">\n", exon, exonStart, exonEnd); } continue; } } //Traverse lower level flowers if exists Group *group = end_getGroup(end_getOtherBlockEnd(cend)); Flower *nestedFlower = group_getNestedFlower(group); if(nestedFlower != NULL){//recursive call Cap *childCap = flower_getCap(nestedFlower, cap_getName(cap_getOtherSegmentCap(cap))); assert(childCap != NULL); exon = mapGene(childCap, level + 1, exon, gene, fileHandle); exonStart = gene->chromStarts->list[exon] + gene->chromStart; exonEnd = exonStart + gene->blockSizes->list[exon]; } moveCapToNextBlock(&cap); } } return exon; }
void testFlower_removeIfRedundant(CuTest *testCase) { /* * Do a simple test to see if function can remove a redundant flower. */ cactusFlowerTestSetup(); endsSetup(); //First construct a redundant flower from the root. Flower *flower2 = flower_construct(cactusDisk); Group *group = group_construct(flower, flower2); end_setGroup(end, group); end_setGroup(end2, group); //Now hang another couple of flowers of that. Flower *flower3 = flower_construct(cactusDisk); group_construct(flower2, flower3); //Now hang another flower of that. Group *group3b = group_construct2(flower2); //Finally hang one more flower on the end.. Flower *flower4 = flower_construct(cactusDisk); group_construct(flower3, flower4); //Copy the ends into the flowers. end_copyConstruct(end, flower2); end_copyConstruct(end2, flower2); end_copyConstruct(end, flower3); end_setGroup(flower_getEnd(flower2, end_getName(end2)), group3b); end_copyConstruct(end, flower4); //st_uglyf("I got %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 "\n", flower_getName(flower), flower_getName(flower2), flower_getName(flower3), flower_getName(flower4)); //Write the mess to disk. cactusDisk_write(cactusDisk); //Now test the removal function (check we get a negative on this leaf). CuAssertTrue(testCase, !flower_removeIfRedundant(flower4)); //Check we can't remove the root.. CuAssertTrue(testCase, !flower_removeIfRedundant(flower)); //We will remove flower2 //Before CuAssertTrue(testCase, flower_getGroupNumber(flower) == 1); CuAssertTrue(testCase, group_getFlower(flower_getParentGroup(flower2)) == flower); CuAssertTrue(testCase, flower_removeIfRedundant(flower2)); //After, check the flower/group connections CuAssertTrue(testCase, flower_getGroupNumber(flower) == 2); CuAssertTrue(testCase, !flower_isLeaf(flower)); CuAssertTrue(testCase, group_getFlower(flower_getParentGroup(flower3)) == flower); group3b = end_getGroup(end2); CuAssertTrue(testCase, group_getFlower(group3b) == flower); CuAssertTrue(testCase, group_isLeaf(group3b)); CuAssertTrue(testCase, flower_getGroup(flower, flower_getName(flower3)) == flower_getParentGroup(flower3)); //Check the ends.. CuAssertTrue(testCase, flower_getEndNumber(flower) == 2); CuAssertTrue(testCase, flower_getEndNumber(flower3) == 1); CuAssertTrue(testCase, group_getEndNumber(group3b) == 1); CuAssertTrue(testCase, end_getGroup(end) == flower_getParentGroup(flower3)); CuAssertTrue(testCase, end_getGroup(end2) == group3b); CuAssertTrue(testCase, flower_getEnd(flower3, end_getName(end)) != NULL); //Check the child of 3 is still okay.. CuAssertTrue(testCase, group_getFlower(flower_getParentGroup(flower4)) == flower3); //Now do removal of flower3 CuAssertTrue(testCase, !flower_removeIfRedundant(flower)); CuAssertTrue(testCase, !flower_removeIfRedundant(flower4)); CuAssertTrue(testCase, flower_removeIfRedundant(flower3)); //Check groups again CuAssertTrue(testCase, flower_getGroupNumber(flower) == 2); CuAssertTrue(testCase, !flower_isLeaf(flower)); CuAssertTrue(testCase, group_getFlower(flower_getParentGroup(flower4)) == flower); CuAssertTrue(testCase, group_getFlower(group3b) == flower); CuAssertTrue(testCase, flower_getGroup(flower, flower_getName(flower4)) == flower_getParentGroup(flower4)); //Check the ends again.. CuAssertTrue(testCase, flower_getEndNumber(flower) == 2); CuAssertTrue(testCase, flower_getEndNumber(flower4) == 1); CuAssertTrue(testCase, group_getEndNumber(group3b) == 1); CuAssertTrue(testCase, end_getGroup(end) == flower_getParentGroup(flower4)); CuAssertTrue(testCase, end_getGroup(end2) == group3b); CuAssertTrue(testCase, flower_getEnd(flower4, end_getName(end)) != NULL); cactusFlowerTestTeardown(); }