static int64_t setCoordinates(Flower *flower, MetaSequence *metaSequence, Cap *cap, int64_t coordinate) { /* * Sets the coordinates of the reference thread and sets the bases of the actual sequence * that of the consensus. */ Sequence *sequence = flower_getSequence(flower, metaSequence_getName(metaSequence)); if (sequence == NULL) { sequence = sequence_construct(metaSequence, flower); } while (1) { assert(cap_getStrand(cap)); assert(!cap_getSide(cap)); Cap *adjacentCap = cap_getAdjacency(cap); assert(adjacentCap != NULL); assert(cap_getStrand(adjacentCap)); assert(cap_getSide(adjacentCap)); int64_t adjacencyLength = cap_getCoordinate(cap); assert(adjacencyLength == cap_getCoordinate(adjacentCap)); assert(adjacencyLength != INT64_MAX); assert(adjacencyLength >= 0); cap_setCoordinates(cap, coordinate, 1, sequence); coordinate += adjacencyLength + 1; cap_setCoordinates(adjacentCap, coordinate, 1, sequence); //Traverse any block.. if ((cap = cap_getOtherSegmentCap(adjacentCap)) == NULL) { break; } coordinate += segment_getLength(cap_getSegment(adjacentCap)) - 1; } return coordinate; }
static void setAdjacencyLength(Cap *cap, Cap *adjacentCap, int64_t adjacencyLength) { //Set the coordinates of the caps to the adjacency size cap_setCoordinates(cap, adjacencyLength, cap_getStrand(cap), NULL); cap_setCoordinates(adjacentCap, adjacencyLength, cap_getStrand(adjacentCap), NULL); assert(cap_getCoordinate(cap) == cap_getCoordinate(adjacentCap)); }
void testCap_segmentCoordinatesReverseStrand(CuTest* testCase) { /* * Tests the coordinates of an segment and its 5 and 3 prime caps. */ cactusCapTestSetup(); Block *block = block_construct(3, flower); Segment *segment = segment_construct2(block, 2, 0, sequence); Cap *_5Cap = segment_get5Cap(segment); Cap *_3Cap = segment_get3Cap(segment); CuAssertTrue(testCase, cap_getSide(_5Cap)); CuAssertTrue(testCase, !cap_getSide(_3Cap)); CuAssertTrue(testCase, !cap_getStrand(_5Cap)); CuAssertTrue(testCase, !cap_getStrand(_3Cap)); CuAssertIntEquals(testCase, 4, cap_getCoordinate(_5Cap)); CuAssertIntEquals(testCase, 2, cap_getCoordinate(_3Cap)); CuAssertTrue(testCase, !segment_getStrand(segment)); CuAssertIntEquals(testCase, 4, segment_getStart(segment)); CuAssertIntEquals(testCase, 3, segment_getLength(segment)); CuAssertTrue(testCase, !cap_getSide(cap_getReverse(_5Cap))); CuAssertTrue(testCase, cap_getSide(cap_getReverse(_3Cap))); CuAssertTrue(testCase, cap_getStrand(cap_getReverse(_5Cap))); CuAssertTrue(testCase, cap_getStrand(cap_getReverse(_3Cap))); CuAssertIntEquals(testCase, 4, cap_getCoordinate(cap_getReverse(_5Cap))); CuAssertIntEquals(testCase, 2, cap_getCoordinate(cap_getReverse(_3Cap))); CuAssertTrue(testCase, segment_getStrand(segment_getReverse(segment))); CuAssertIntEquals(testCase, 2, segment_getStart(segment_getReverse(segment))); CuAssertIntEquals(testCase, 3, segment_getLength(segment_getReverse(segment))); cactusCapTestTeardown(); }
bool capsAreAdjacent(Cap *cap1, Cap *cap2, int64_t *separationDistance) { if (cap_getName(cap2) != cap_getName(cap1) && cap_getCoordinate(cap1) != cap_getCoordinate(cap2)) { //This can happen if end1 == end2 if (sequence_getMetaSequence(cap_getSequence(cap1)) == sequence_getMetaSequence(cap_getSequence(cap2))) { assert(strcmp(event_getHeader(cap_getEvent(cap1)), event_getHeader( cap_getEvent(cap2))) == 0); assert(cap_getPositiveOrientation(cap1) != cap_getPositiveOrientation(cap2)); assert(cap_getName(cap1) != cap_getName(cap2)); assert(sequence_getMetaSequence(cap_getSequence(cap1)) == sequence_getMetaSequence(cap_getSequence(cap2))); if (!cap_getStrand(cap1)) { cap1 = cap_getReverse(cap1); } if (!cap_getStrand(cap2)) { cap2 = cap_getReverse(cap2); } assert(cap_getStrand(cap1)); assert(cap_getStrand(cap2)); if (cap_getCoordinate(cap1) < cap_getCoordinate(cap2)) { if (!cap_getSide(cap1) && cap_getSide(cap2)) { *separationDistance = cap_getCoordinate(cap2) - cap_getCoordinate(cap1) - 1; //The minus 1, to give the length of the sequence between the two caps. return 1; } } else { if (cap_getSide(cap1) && !cap_getSide(cap2)) { *separationDistance = cap_getCoordinate(cap1) - cap_getCoordinate(cap2) - 1; return 1; } } } } return 0; }
static void setAdjacencyLengthsAndRecoverNewCapsAndBrokenAdjacencies(Cap *cap, stList *recoveredCaps) { /* * Sets the coordinates of the caps to be equal to the length of the adjacency sequence between them. * Used to build the reference sequence bottom up. * * One complexity is that a reference thread between the two caps * in each flower f may be broken into two in the children of f. * Therefore, for each flower f first identify attached stub ends present in the children of f that are * not present in f and copy them into f, reattaching the reference caps as needed. */ while (1) { Cap *adjacentCap = cap_getAdjacency(cap); assert(adjacentCap != NULL); assert(cap_getCoordinate(cap) == INT64_MAX); assert(cap_getCoordinate(adjacentCap) == INT64_MAX); assert(cap_getStrand(cap) == cap_getStrand(adjacentCap)); assert(cap_getSide(cap) != cap_getSide(adjacentCap)); Group *group = end_getGroup(cap_getEnd(cap)); assert(group != NULL); if (!group_isLeaf(group)) { //Adjacency is not terminal, so establish its sequence. Flower *nestedFlower = group_getNestedFlower(group); Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap)); assert(nestedCap != NULL); Cap *nestedAdjacentCap = flower_getCap(nestedFlower, cap_getName(adjacentCap)); assert(nestedAdjacentCap != NULL); Cap *breakerCap; int64_t adjacencyLength = traceThreadLength(nestedCap, &breakerCap); assert(cap_getOrientation(nestedAdjacentCap)); if (cap_getPositiveOrientation(breakerCap) != nestedAdjacentCap) { //The thread is broken at the lower level. //Copy cap into higher level graph. breakerCap = copyCapToParent(breakerCap, recoveredCaps); assert(cap_getSide(breakerCap)); cap_makeAdjacent(cap, breakerCap); setAdjacencyLength(cap, breakerCap, adjacencyLength); adjacencyLength = traceThreadLength(nestedAdjacentCap, &breakerCap); assert(cap_getPositiveOrientation(breakerCap) != cap); breakerCap = copyCapToParent(breakerCap, recoveredCaps); assert(!cap_getSide(breakerCap)); cap_makeAdjacent(breakerCap, adjacentCap); setAdjacencyLength(adjacentCap, breakerCap, adjacencyLength); } else { //The thread is not broken at the lower level setAdjacencyLength(cap, adjacentCap, adjacencyLength); } } else { //Set the coordinates of the caps to the adjacency size setAdjacencyLength(cap, adjacentCap, 0); } if ((cap = cap_getOtherSegmentCap(adjacentCap)) == NULL) { break; } } }
void testCap_setCoordinate(CuTest* testCase) { cactusCapTestSetup(); CuAssertTrue(testCase, cap_getCoordinate(rootCap) == INT64_MAX); CuAssertTrue(testCase, cap_getStrand(rootCap)); CuAssertTrue(testCase, cap_getSequence(rootCap) == NULL); cap_setCoordinates(rootCap, 5, 0, NULL); CuAssertTrue(testCase, cap_getCoordinate(rootCap) == 5); CuAssertTrue(testCase, !cap_getStrand(rootCap)); CuAssertTrue(testCase, cap_getSequence(rootCap) == NULL); cap_setCoordinates(rootCap, INT64_MAX, 1, NULL); CuAssertTrue(testCase, cap_getCoordinate(rootCap) == INT64_MAX); CuAssertTrue(testCase, cap_getStrand(rootCap)); CuAssertTrue(testCase, cap_getSequence(rootCap) == NULL); cactusCapTestTeardown(); }
static void recoverBrokenAdjacencies(Flower *flower, stList *recoveredCaps, Name referenceEventName) { /* * Find reference intervals that are book-ended by stubs created in a child flower. */ Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); Group *group; while((group = flower_getNextGroup(groupIt)) != NULL) { Flower *nestedFlower; if((nestedFlower = group_getNestedFlower(group)) != NULL) { Flower_EndIterator *endIt = flower_getEndIterator(nestedFlower); End *childEnd; while((childEnd = flower_getNextEnd(endIt)) != NULL) { if(end_isStubEnd(childEnd) && flower_getEnd(flower, end_getName(childEnd)) == NULL) { //We have a thread we need to promote Cap *childCap = getCapForReferenceEvent(childEnd, referenceEventName); //The cap in the reference assert(childCap != NULL); assert(!end_isAttached(childEnd)); childCap = cap_getStrand(childCap) ? childCap : cap_getReverse(childCap); if (!cap_getSide(childCap)) { Cap *adjacentChildCap = NULL; int64_t adjacencyLength = traceThreadLength(childCap, &adjacentChildCap); Cap *cap = copyCapToParent(childCap, recoveredCaps); assert(adjacentChildCap != NULL); assert(!end_isAttached(cap_getEnd(adjacentChildCap))); assert(!cap_getSide(cap)); Cap *adjacentCap = copyCapToParent(adjacentChildCap, recoveredCaps); cap_makeAdjacent(cap, adjacentCap); setAdjacencyLength(cap, adjacentCap, adjacencyLength); } } } flower_destructEndIterator(endIt); } } flower_destructGroupIterator(groupIt); }
void stCaf_addAdjacencies(Flower *flower) { //Build a list of caps. stList *list = stList_construct(); Flower_EndIterator *endIterator = flower_getEndIterator(flower); End *end; while ((end = flower_getNextEnd(endIterator)) != NULL) { End_InstanceIterator *instanceIterator = end_getInstanceIterator(end); Cap *cap; while ((cap = end_getNext(instanceIterator)) != NULL) { if (!cap_getStrand(cap)) { cap = cap_getReverse(cap); } stList_append(list, cap); } end_destructInstanceIterator(instanceIterator); } flower_destructEndIterator(endIterator); assert(stList_length(list) % 2 == 0); //Sort the list of caps. stList_sort(list, (int(*)(const void *, const void *)) addAdjacenciesPP); //Now make the adjacencies. for (int64_t i = 1; i < stList_length(list); i += 2) { Cap *cap = stList_get(list, i - 1); Cap *cap2 = stList_get(list, i); cap_makeAdjacent(cap, cap2); } //Clean up. stList_destruct(list); }
static int addAdjacenciesPP(Cap *cap1, Cap *cap2) { assert(cap_getStrand(cap1) && cap_getStrand(cap2)); Sequence *sequence1 = cap_getSequence(cap1); Sequence *sequence2 = cap_getSequence(cap2); int64_t i = cactusMisc_nameCompare(sequence_getName(sequence1), sequence_getName(sequence2)); if (i == 0) { int64_t j = cap_getCoordinate(cap1); int64_t k = cap_getCoordinate(cap2); i = j > k ? 1 : (j < k ? -1 : 0); if (i == 0) { assert(cap_getSegment(cap1) == cap_getSegment(cap2)); j = cap_getSide(cap1); k = cap_getSide(cap2); assert((j && !k) || (!j && k)); i = j ? -1 : 1; } } return i; }
void bottomUp(stList *flowers, stKVDatabase *sequenceDatabase, Name referenceEventName, bool isTop, stMatrix *(*generateSubstitutionMatrix)(double)) { /* * A reference thread between the two caps * in each flower f may be broken into two in the children of f. * Therefore, for each flower f first identify attached stub ends present in the children of f that are * not present in f and copy them into f, reattaching the reference caps as needed. */ stList *caps = getCaps(flowers, referenceEventName); for (int64_t i = stList_length(caps) - 1; i >= 0; i--) { //Start from end, as we add to this list. setAdjacencyLengthsAndRecoverNewCapsAndBrokenAdjacencies(stList_get(caps, i), caps); } for(int64_t i=0; i<stList_length(flowers); i++) { recoverBrokenAdjacencies(stList_get(flowers, i), caps, referenceEventName); } //Build the phylogenetic event trees for base calling. segmentWriteFn_flowerToPhylogeneticTreeHash = stHash_construct2(NULL, (void (*)(void *))cleanupPhylogeneticTree); for(int64_t i=0; i<stList_length(flowers); i++) { Flower *flower = stList_get(flowers, i); Event *refEvent = eventTree_getEvent(flower_getEventTree(flower), referenceEventName); assert(refEvent != NULL); stHash_insert(segmentWriteFn_flowerToPhylogeneticTreeHash, flower, getPhylogeneticTreeRootedAtGivenEvent(refEvent, generateSubstitutionMatrix)); } if (isTop) { stList *threadStrings = buildRecursiveThreadsInList(sequenceDatabase, caps, segmentWriteFn, terminalAdjacencyWriteFn); assert(stList_length(threadStrings) == stList_length(caps)); int64_t nonTrivialSeqIndex = 0, trivialSeqIndex = stList_length(threadStrings); //These are used as indices for the names of trivial and non-trivial sequences. for (int64_t i = 0; i < stList_length(threadStrings); i++) { Cap *cap = stList_get(caps, i); assert(cap_getStrand(cap)); assert(!cap_getSide(cap)); Flower *flower = end_getFlower(cap_getEnd(cap)); char *threadString = stList_get(threadStrings, i); bool trivialString = isTrivialString(&threadString); //This alters the original string MetaSequence *metaSequence = addMetaSequence(flower, cap, trivialString ? trivialSeqIndex++ : nonTrivialSeqIndex++, threadString, trivialString); free(threadString); int64_t endCoordinate = setCoordinates(flower, metaSequence, cap, metaSequence_getStart(metaSequence) - 1); (void) endCoordinate; assert(endCoordinate == metaSequence_getLength(metaSequence) + metaSequence_getStart(metaSequence)); } stList_setDestructor(threadStrings, NULL); //The strings are already cleaned up by the above loop stList_destruct(threadStrings); } else { buildRecursiveThreads(sequenceDatabase, caps, segmentWriteFn, terminalAdjacencyWriteFn); } stHash_destruct(segmentWriteFn_flowerToPhylogeneticTreeHash); stList_destruct(caps); }
int64_t flower_getTotalBaseLength(Flower *flower) { /* * The implementation of this function is very like that in group_getTotalBaseLength, with a few differences. Consider merging them. */ Flower_EndIterator *endIterator = flower_getEndIterator(flower); End *end; int64_t totalLength = 0; while ((end = flower_getNextEnd(endIterator)) != NULL) { if (!end_isBlockEnd(end)) { End_InstanceIterator *instanceIterator = end_getInstanceIterator(end); Cap *cap; while ((cap = end_getNext(instanceIterator)) != NULL) { cap = cap_getStrand(cap) ? cap : cap_getReverse(cap); if (!cap_getSide(cap) && cap_getSequence(cap) != NULL) { Cap *cap2 = cap_getAdjacency(cap); assert(cap2 != NULL); while (end_isBlockEnd(cap_getEnd(cap2))) { Segment *segment = cap_getSegment(cap2); assert(segment != NULL); assert(segment_get5Cap(segment) == cap2); cap2 = cap_getAdjacency(segment_get3Cap(segment)); assert(cap2 != NULL); assert(cap_getStrand(cap2)); assert(cap_getSide(cap2)); } assert(cap_getStrand(cap2)); assert(cap_getSide(cap2)); int64_t length = cap_getCoordinate(cap2) - cap_getCoordinate(cap) - 1; assert(length >= 0); totalLength += length; } } end_destructInstanceIterator(instanceIterator); } } flower_destructEndIterator(endIterator); return totalLength; }
void topDown(Flower *flower, Name referenceEventName) { /* * Run on each flower, top down. Sets the coordinates of each reference cap to the correct * sequence, and sets the bases of the reference sequence to be consensus bases. */ Flower_EndIterator *endIt = flower_getEndIterator(flower); End *end; while ((end = flower_getNextEnd(endIt)) != NULL) { Cap *cap = getCapForReferenceEvent(end, referenceEventName); //The cap in the reference if (cap != NULL) { cap = cap_getStrand(cap) ? cap : cap_getReverse(cap); if (!cap_getSide(cap)) { assert(cap_getCoordinate(cap) != INT64_MAX); Sequence *sequence = cap_getSequence(cap); assert(sequence != NULL); Group *group = end_getGroup(end); if (!group_isLeaf(group)) { Flower *nestedFlower = group_getNestedFlower(group); Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap)); assert(nestedCap != NULL); nestedCap = cap_getStrand(nestedCap) ? nestedCap : cap_getReverse(nestedCap); assert(cap_getStrand(nestedCap)); assert(!cap_getSide(nestedCap)); int64_t endCoordinate = setCoordinates(nestedFlower, sequence_getMetaSequence(sequence), nestedCap, cap_getCoordinate(cap)); (void) endCoordinate; assert(endCoordinate == cap_getCoordinate(cap_getAdjacency(cap))); assert(endCoordinate == cap_getCoordinate( flower_getCap(nestedFlower, cap_getName(cap_getAdjacency(cap))))); } } } } flower_destructEndIterator(endIt); }
void testCap_getStrand(CuTest* testCase) { cactusCapTestSetup(); CuAssertTrue(testCase, cap_getStrand(rootCap)); CuAssertTrue(testCase, !cap_getStrand(cap_getReverse(rootCap))); CuAssertTrue(testCase, cap_getStrand(leaf1Cap)); CuAssertTrue(testCase, !cap_getStrand(cap_getReverse(leaf1Cap))); CuAssertTrue(testCase, !cap_getStrand(leaf2Cap)); CuAssertTrue(testCase, cap_getStrand(cap_getReverse(leaf2Cap))); cactusCapTestTeardown(); }
static Cap *copyCapToParent(Cap *cap, stList *recoveredCaps) { /* * Get the adjacent stub end by looking at the reference adjacency in the parent. */ End *end = cap_getEnd(cap); assert(end != NULL); Group *parentGroup = flower_getParentGroup(end_getFlower(end)); assert(parentGroup != NULL); End *copiedEnd = end_copyConstruct(end, group_getFlower(parentGroup)); end_setGroup(copiedEnd, parentGroup); //Set group Cap *copiedCap = end_getInstance(copiedEnd, cap_getName(cap)); assert(copiedCap != NULL); copiedCap = cap_getStrand(copiedCap) ? copiedCap : cap_getReverse(copiedCap); if (!cap_getSide(copiedCap)) { stList_append(recoveredCaps, copiedCap); } return copiedCap; }
char *getTerminalAdjacencySubString(Cap *cap) { if(getTerminalAdjacencyLength_ignoreAdjacencies) { return stString_copy(""); } cap = getTerminalCap(cap); cap = cap_getStrand(cap) ? cap : cap_getReverse(cap); //This ensures the asserts are as expected. Cap *adjacentCap = cap_getAdjacency(cap); int64_t i = cap_getCoordinate(cap) - cap_getCoordinate(adjacentCap); assert(i != 0); if (i > 0) { assert(cap_getSide(cap)); assert(!cap_getSide(adjacentCap)); return sequence_getString(cap_getSequence(cap), cap_getCoordinate(adjacentCap) + 1, i - 1, 1); } else { assert(cap_getSide(adjacentCap)); assert(!cap_getSide(cap)); return sequence_getString(cap_getSequence(cap), cap_getCoordinate(cap) + 1, -i - 1, 1); } }
static stList *getCaps(stList *flowers, Name referenceEventName) { stList *caps = stList_construct(); for (int64_t i = 0; i < stList_length(flowers); i++) { Flower *flower = stList_get(flowers, i); //Get list of caps Flower_EndIterator *endIt = flower_getEndIterator(flower); End *end; while ((end = flower_getNextEnd(endIt)) != NULL) { if (end_isStubEnd(end)) { Cap *cap = getCapForReferenceEvent(end, referenceEventName); //The cap in the reference if(cap != NULL) { cap = cap_getStrand(cap) ? cap : cap_getReverse(cap); if (!cap_getSide(cap)) { stList_append(caps, cap); } } } } flower_destructEndIterator(endIt); } return caps; }
static stList *getSubstringsForFlowers(stList *flowers) { /* * Get the set of substrings for sequence intervals in the given set of flowers. */ stList *substrings = stList_construct3(0, (void (*)(void *)) substring_destruct); for (int64_t i = 0; i < stList_length(flowers); i++) { Flower *flower = stList_get(flowers, i); Flower_EndIterator *endIt = flower_getEndIterator(flower); End *end; while ((end = flower_getNextEnd(endIt)) != NULL) { if (end_isStubEnd(end)) { End_InstanceIterator *instanceIt = end_getInstanceIterator(end); Cap *cap; while ((cap = end_getNext(instanceIt)) != NULL) { Sequence *sequence; if ((sequence = cap_getSequence(cap)) != NULL) { cap = cap_getStrand(cap) ? cap : cap_getReverse(cap); if (!cap_getSide(cap)) { //We have a sequence interval of interest Cap *adjacentCap = cap_getAdjacency(cap); assert(adjacentCap != NULL); int64_t length = cap_getCoordinate(adjacentCap) - cap_getCoordinate(cap) - 1; assert(length >= 0); if (length > 0) { stList_append(substrings, substring_construct(sequence_getMetaSequence(sequence)->stringName, cap_getCoordinate(cap) + 1 - sequence_getStart(sequence), length)); } } } } end_destructInstanceIterator(instanceIt); } } flower_destructEndIterator(endIt); } return substrings; }