/* * Function does the actual depth first search to detect if the thing has an acyclic ordering. */ static int64_t dfs(stHash *adjacencyList, stIntTuple *seqPos, stSortedSet *started, stSortedSet *done) { if(stSortedSet_search(started, seqPos) != NULL) { if(stSortedSet_search(done, seqPos) == NULL) { //We have detected a cycle //st_logInfo("I have cycle %" PRIi64 " %" PRIi64 "\n", stIntTuple_getPosition(seqPos, 0), stIntTuple_getPosition(seqPos, 1)); return 1; } //We have already explored this area, but no cycle. return 0; } stSortedSet_insert(started, seqPos); int64_t cycle =0; stIntTuple *nextSeqPos = stIntTuple_construct2( stIntTuple_get(seqPos, 0), stIntTuple_get(seqPos, 1) + 1); stSortedSet *column = stHash_search(adjacencyList, nextSeqPos); if(column != NULL) { //It is in the adjacency list, so we can do the recursion assert(stSortedSet_search(column, nextSeqPos) != NULL); stSortedSetIterator *it = stSortedSet_getIterator(column); stIntTuple *seqPos2; while((seqPos2 = stSortedSet_getNext(it)) != NULL) { cycle = cycle || dfs(adjacencyList, seqPos2, started, done); } stSortedSet_destructIterator(it); } stIntTuple_destruct(nextSeqPos); stSortedSet_insert(done, seqPos); return cycle; }
static void getMaximalHaplotypePathsCheck(Flower *flower, stSortedSet *segmentSet, const char *eventString, stList *eventStrings) { /* * Do debug checks that the haplotypes paths are well formed. */ Flower_SegmentIterator *segmentIt = flower_getSegmentIterator(flower); Segment *segment; while ((segment = flower_getNextSegment(segmentIt)) != NULL) { if (strcmp(event_getHeader(segment_getEvent(segment)), eventString) == 0) { if (hasCapInEvents(cap_getEnd(segment_get5Cap(segment)), eventStrings)) { //isHaplotypeEnd(cap_getEnd(segment_get5Cap(segment)))) { assert(stSortedSet_search(segmentSet, segment) != NULL || stSortedSet_search(segmentSet, segment_getReverse( segment)) != NULL); } } } flower_destructSegmentIterator(segmentIt); Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); Group *group; while ((group = flower_getNextGroup(groupIt)) != NULL) { if (group_getNestedFlower(group) != NULL) { getMaximalHaplotypePathsCheck(group_getNestedFlower(group), segmentSet, eventString, eventStrings); } } flower_destructGroupIterator(groupIt); }
static void test_stSortedSet_searchGreaterThan(CuTest* testCase) { sonLibSortedSetTestSetup(); for(int32_t i=0; i<size; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i])); } //static int32_t sortedInput[] = { -10, -1, 1, 3, 5, 10, 12 }; CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, -11)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -10))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, -10)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -1))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, -5)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -1))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, 1)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, 3))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, 13)) == NULL); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, 12)) == NULL); for(int32_t i=0; i<100; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, st_randomInt(-1000, 1000))); } stList *list = stSortedSet_getList(sortedSet); for(int32_t i=1; i<stList_length(list); i++) { stIntTuple *p = stList_get(list, i-1); stIntTuple *j = stList_get(list, i); stIntTuple *k = stIntTuple_construct(1, st_randomInt(stIntTuple_getPosition(p, 0), stIntTuple_getPosition(j, 0))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, k) == j); stIntTuple_destruct(k); } stList_destruct(list); sonLibSortedSetTestTeardown(); }
static void getMaximalHaplotypePathsP2(Segment *segment, stList *maximalHaplotypePath, stSortedSet *segmentSet, stList *eventStrings) { /* * Iterate all the way to one end of the contig then start the traversal to define the maximal * haplotype path. */ Cap *_5Cap = segment_get5Cap(segment); assert(hasCapInEvents(cap_getEnd(segment_get3Cap(segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get3Cap(segment)))); if (trueAdjacency(_5Cap, eventStrings)) { //Check that the adjacency is supported by a haplotype path Segment *otherSegment = getAdjacentCapsSegment(_5Cap); assert(segment != otherSegment); assert(segment_getReverse(segment) != otherSegment); if (otherSegment != NULL) { assert(stSortedSet_search(segmentSet, otherSegment) == NULL); assert(stSortedSet_search(segmentSet, segment_getReverse( otherSegment)) == NULL); assert(hasCapInEvents(cap_getEnd(segment_get3Cap(otherSegment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get3Cap(otherSegment)))); getMaximalHaplotypePathsP2(otherSegment, maximalHaplotypePath, segmentSet, eventStrings); } else { //We need to start the maximal haplotype recursion getMaximalHaplotypePathsP3(segment, maximalHaplotypePath, segmentSet, eventStrings); } } else { getMaximalHaplotypePathsP3(segment, maximalHaplotypePath, segmentSet, eventStrings); } }
static void getMaximalHaplotypePathsP3(Segment *segment, stList *maximalHaplotypePath, stSortedSet *segmentSet, stList *eventStrings) { stList_append(maximalHaplotypePath, segment); assert(stSortedSet_search(segmentSet, segment) == NULL); assert(stSortedSet_search(segmentSet, segment_getReverse(segment)) == NULL); stSortedSet_insert(segmentSet, segment); Cap *_3Cap = segment_get3Cap(segment); if (trueAdjacency(_3Cap, eventStrings)) { //Continue on.. Segment *otherSegment = getAdjacentCapsSegment(_3Cap); if (otherSegment != NULL) { getMaximalHaplotypePathsP3(otherSegment, maximalHaplotypePath, segmentSet, eventStrings); } } }
static void makeMatchingPerfect(stList *chosenEdges, stList *adjacencyEdges, stSortedSet *nodes) { /* * While the the number of edges is less than a perfect matching add random edges. */ stSortedSet *attachedNodes = getNodeSetOfEdges(chosenEdges); stHash *nodesToAdjacencyEdges = getNodesToEdgesHash(adjacencyEdges); stIntTuple *pNode = NULL; stSortedSetIterator *it = stSortedSet_getIterator(nodes); stIntTuple *node; while((node = stSortedSet_getNext(it)) != NULL) { if (stSortedSet_search(attachedNodes, node) == NULL) { if (pNode == NULL) { pNode = node; } else { stList_append(chosenEdges, getEdgeForNodes(stIntTuple_get(pNode, 0), stIntTuple_get(node, 0), nodesToAdjacencyEdges)); pNode = NULL; } } } stSortedSet_destructIterator(it); assert(pNode == NULL); stSortedSet_destruct(attachedNodes); assert(stList_length(chosenEdges) * 2 == stSortedSet_size(nodes)); stHash_destruct(nodesToAdjacencyEdges); }
Block *flower_getBlock(Flower *flower, Name name) { Block block; BlockContents blockContents; block.blockContents = &blockContents; blockContents.name = name; return stSortedSet_search(flower->blocks, &block); }
/* * Uses the functions above to build an adjacency list, then by DFS attempts to create * a valid topological sort, returning non-zero if the graph contains a cycle. */ static int64_t containsACycle(stList *pairs, int64_t sequenceNumber) { //Build an adjacency list structure.. stHash *adjacencyList = buildAdjacencyList(pairs, sequenceNumber); //Do a topological sort of the adjacency list stSortedSet *started = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL); stSortedSet *done = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL); int64_t cyclic = 0; for(int64_t seq=0; seq<sequenceNumber; seq++) { stIntTuple *seqPos = stIntTuple_construct2( seq, 0); //The following hacks avoid memory cleanup.. stSortedSet *column = stHash_search(adjacencyList, seqPos); assert(column != NULL); stIntTuple *seqPos2 = stSortedSet_search(column, seqPos); assert(seqPos2 != NULL); cyclic = cyclic || dfs(adjacencyList, seqPos2, started, done); stIntTuple_destruct(seqPos); } //cleanup stHashIterator *it = stHash_getIterator(adjacencyList); stIntTuple *seqPos; stSortedSet *columns = stSortedSet_construct2((void (*)(void *))stSortedSet_destruct); while((seqPos = stHash_getNext(it)) != NULL) { stSortedSet *column = stHash_search(adjacencyList, seqPos); assert(column != NULL); stSortedSet_insert(columns, column); } stHash_destructIterator(it); stHash_destruct(adjacencyList); stSortedSet_destruct(columns); stSortedSet_destruct(started); stSortedSet_destruct(done); return cyclic; }
static void checkComponents(CuTest *testCase, stList *filteredEdges) { stHash *nodesToComponents = getComponents(filteredEdges); //Check all components are smaller than threshold stList *components = stHash_getValues(nodesToComponents); for (int64_t i = 0; i < stList_length(components); i++) { stSortedSet *component = stList_get(components, i); CuAssertTrue(testCase, stSortedSet_size(component) <= maxComponentSize); CuAssertTrue(testCase, stSortedSet_size(component) >= 1); } //Check no edges can be added from those filtered. stSortedSet *filteredEdgesSet = stList_getSortedSet(filteredEdges, (int(*)(const void *, const void *)) stIntTuple_cmpFn); for (int64_t i = 0; i < stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); if (stSortedSet_search(filteredEdgesSet, edge) == NULL) { stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2)); stSortedSet *component1 = stHash_search(nodesToComponents, node1); stSortedSet *component2 = stHash_search(nodesToComponents, node2); CuAssertTrue(testCase, component1 != NULL && component2 != NULL); CuAssertTrue(testCase, component1 != component2); CuAssertTrue(testCase, stSortedSet_size(component1) + stSortedSet_size(component2) > maxComponentSize); stIntTuple_destruct(node1); stIntTuple_destruct(node2); } } stSortedSet_destruct(filteredEdgesSet); //Cleanup the components stSortedSet *componentsSet = stList_getSortedSet(components, NULL); stList_destruct(components); stSortedSet_setDestructor(componentsSet, (void(*)(void *)) stSortedSet_destruct); stSortedSet_destruct(componentsSet); stHash_destruct(nodesToComponents); }
static void getMetaSequencesForEventsP(stSortedSet *metaSequences, Flower *flower, stList *eventStrings) { //Iterate over the sequences in the flower. Flower_SequenceIterator *seqIt = flower_getSequenceIterator(flower); Sequence *sequence; while ((sequence = flower_getNextSequence(seqIt)) != NULL) { MetaSequence *metaSequence = sequence_getMetaSequence(sequence); if (stringIsInList(event_getHeader(sequence_getEvent(sequence)), eventStrings) == 0) { if (stSortedSet_search(metaSequences, metaSequence) == NULL) { stSortedSet_insert(metaSequences, metaSequence); } } } flower_destructSequenceIterator(seqIt); //Recurse over the flowers Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); Group *group; while ((group = flower_getNextGroup(groupIt)) != NULL) { if (group_getNestedFlower(group) != NULL) { getMetaSequencesForEventsP(metaSequences, group_getNestedFlower(group), eventStrings); } } flower_destructGroupIterator(groupIt); }
Sequence *flower_getSequence(Flower *flower, Name name) { Sequence sequence; MetaSequence metaSequence; sequence.metaSequence = &metaSequence; metaSequence.name = name; return stSortedSet_search(flower->sequences, &sequence); }
Cap *flower_getCap(Flower *flower, Name name) { Cap cap; CapContents capContents; cap.capContents = &capContents; cap.capContents->instance = name; return stSortedSet_search(flower->caps, &cap); }
End *flower_getEnd(Flower *flower, Name name) { End end; EndContents endContents; end.endContents = &endContents; endContents.name = name; end.orientation = 1; return stSortedSet_search(flower->ends, &end); }
static void test_stSortedSetIntersection(CuTest* testCase) { sonLibSortedSetTestSetup(); //Check intersection of empty sets is okay.. stSortedSet *sortedSet3 = stSortedSet_getIntersection(sortedSet, sortedSet2); CuAssertTrue(testCase, stSortedSet_size(sortedSet3) == 0); stSortedSet_destruct(sortedSet3); int32_t i; for(i=0; i<size; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i])); } //Check intersection of empty and non-empty set is empty. sortedSet3 = stSortedSet_getIntersection(sortedSet, sortedSet2); CuAssertTrue(testCase, stSortedSet_size(sortedSet3) == 0); stSortedSet_destruct(sortedSet3); //Check intersection of two non-empty, overlapping sets in correct. stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 0)); stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 1)); stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 5)); sortedSet3 = stSortedSet_getIntersection(sortedSet, sortedSet2); CuAssertTrue(testCase, stSortedSet_size(sortedSet3) == 2); stIntTuple *intTuple = stIntTuple_construct(1, 1); CuAssertTrue(testCase, stSortedSet_search(sortedSet3, intTuple) != NULL); stIntTuple_destruct(intTuple); intTuple = stIntTuple_construct(1, 5); CuAssertTrue(testCase, stSortedSet_search(sortedSet3, intTuple) != NULL); stIntTuple_destruct(intTuple); stSortedSet_destruct(sortedSet3); //Check we get an exception with sorted sets with different comparators. stSortedSet *sortedSet4 = stSortedSet_construct(); stTry { stSortedSet_getIntersection(sortedSet, sortedSet4); } stCatch(except) { CuAssertTrue(testCase, stExcept_getId(except) == SORTED_SET_EXCEPTION_ID); } stTryEnd stSortedSet_destruct(sortedSet4); sonLibSortedSetTestTeardown(); }
void stSortedSet_insert(stSortedSet *sortedSet, void *object) { checkModifiable(sortedSet); // FIXME: two passes, modify avl code. if(stSortedSet_search(sortedSet, object) != NULL) { avl_replace(sortedSet->sortedSet, object); } else { avl_insert(sortedSet->sortedSet, object); } }
static stList *getEdgesThatBridgeComponents(stList *components, stHash *nodesToNonZeroWeightedAdjacencyEdges) { /* * Get set of adjacency edges that bridge between (have a node in two) components. */ stList *bridgingAdjacencyEdges = stList_construct(); for (int64_t i = 0; i < stList_length(components); i++) { stSortedSet *componentNodes = getNodeSetOfEdges( stList_get(components, i)); stSortedSetIterator *it = stSortedSet_getIterator(componentNodes); stIntTuple *node; while ((node = stSortedSet_getNext(it)) != NULL) { stList *edges = stHash_search(nodesToNonZeroWeightedAdjacencyEdges, node); if (edges != NULL) { for (int64_t j = 0; j < stList_length(edges); j++) { stIntTuple *edge = stList_get(edges, j); stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 0)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 1)); assert( stSortedSet_search(componentNodes, node1) != NULL || stSortedSet_search(componentNodes, node2) != NULL); if (stSortedSet_search(componentNodes, node1) == NULL || stSortedSet_search(componentNodes, node2) == NULL) { stList_append(bridgingAdjacencyEdges, edge); } stIntTuple_destruct(node1); stIntTuple_destruct(node2); } } } stSortedSet_destructIterator(it); stSortedSet_destruct(componentNodes); } return bridgingAdjacencyEdges; }
static void test_stSortedSet(CuTest* testCase) { sonLibSortedSetTestSetup(); int32_t i; CuAssertIntEquals(testCase, 0, stSortedSet_size(sortedSet)); for(i=0; i<size; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i])); } CuAssertIntEquals(testCase, sortedSize, stSortedSet_size(sortedSet)); CuAssertIntEquals(testCase, sortedInput[0], stIntTuple_getPosition(stSortedSet_getFirst(sortedSet), 0)); CuAssertIntEquals(testCase, sortedInput[sortedSize-1], stIntTuple_getPosition(stSortedSet_getLast(sortedSet), 0)); for(i=0; i<sortedSize; i++) { CuAssertIntEquals(testCase, sortedSize-i, stSortedSet_size(sortedSet)); stIntTuple *tuple = stIntTuple_construct(1, sortedInput[i]); CuAssertTrue(testCase, stIntTuple_getPosition(stSortedSet_search(sortedSet, tuple), 0) == sortedInput[i]); stSortedSet_remove(sortedSet, tuple); CuAssertTrue(testCase, stSortedSet_search(sortedSet, tuple) == NULL); stIntTuple_destruct(tuple); } sonLibSortedSetTestTeardown(); }
static void getMaximalHaplotypePathsP(Flower *flower, stList *maximalHaplotypePaths, stSortedSet *segmentSet, const char *eventString, stList *eventStrings) { /* * Iterate through the segments in this flower. */ Flower_SegmentIterator *segmentIt = flower_getSegmentIterator(flower); Segment *segment; while ((segment = flower_getNextSegment(segmentIt)) != NULL) { if (stSortedSet_search(segmentSet, segment) == NULL && stSortedSet_search(segmentSet, segment_getReverse(segment)) == NULL) { //Check we haven't yet seen this segment if (strcmp(event_getHeader(segment_getEvent(segment)), eventString) == 0) { //Check if the segment is in the assembly if (hasCapInEvents(cap_getEnd(segment_get5Cap(segment)), eventStrings)) { //Is a block in a haplotype segment assert(hasCapInEvents(cap_getEnd(segment_get3Cap(segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get3Cap(segment)))); stList *maximalHaplotypePath = stList_construct(); stList_append(maximalHaplotypePaths, maximalHaplotypePath); getMaximalHaplotypePathsP2(segment, maximalHaplotypePath, segmentSet, eventStrings); } else { assert(!hasCapInEvents(cap_getEnd(segment_get3Cap(segment)), eventStrings));//assert(!isHaplotypeEnd(cap_getEnd(segment_get3Cap(segment)))); } } } } flower_destructSegmentIterator(segmentIt); /* * Now recurse on the contained flowers. */ Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); Group *group; while ((group = flower_getNextGroup(groupIt)) != NULL) { if (group_getNestedFlower(group) != NULL) { getMaximalHaplotypePathsP(group_getNestedFlower(group), maximalHaplotypePaths, segmentSet, eventString, eventStrings); } } flower_destructGroupIterator(groupIt); }
static int64_t intersectionSize(stSortedSet *set, stList *list) { /* * Returns the intersection size of the list and the set. */ int64_t count = 0; for (int64_t j = 0; j < stList_length(list); j++) { if (stSortedSet_search(set, stList_get(list, j)) != NULL) { count++; } } return count; }
void testCactusDisk_getUniqueID_Unique(CuTest* testCase) { cactusDiskTestSetup(); stSortedSet *uniqueNames = stSortedSet_construct3(testCactusDisk_getUniqueID_UniqueP, free); for (int64_t i = 0; i < 100000; i++) { //Gets a billion ids, checks we are good. Name uniqueName = cactusDisk_getUniqueID(cactusDisk); CuAssertTrue(testCase, uniqueName > 0); CuAssertTrue(testCase, uniqueName < INT64_MAX); CuAssertTrue(testCase, uniqueName != NULL_NAME); char *cA = cactusMisc_nameToString(uniqueName); CuAssertTrue(testCase, stSortedSet_search(uniqueNames, cA) == NULL); CuAssertTrue(testCase, cactusMisc_stringToName(cA) == uniqueName); stSortedSet_insert(uniqueNames, cA); } stSortedSet_destruct(uniqueNames); cactusDiskTestTeardown(); }
stSortedSet *stSortedSet_getDifference(stSortedSet *sortedSet1, stSortedSet *sortedSet2) { if(!stSortedSet_comparatorsEqual(sortedSet1, sortedSet2)) { stThrowNew(SORTED_SET_EXCEPTION_ID, "Comparators are not equal for creating the sorted set difference"); } stSortedSet *sortedSet3 = stSortedSet_construct3(stSortedSet_getComparator(sortedSet1)->compareFn, NULL); //Add those from sortedSet1 only if they are not in sortedSet2 stSortedSetIterator *it= stSortedSet_getIterator(sortedSet1); void *o; while((o = stSortedSet_getNext(it)) != NULL) { if(stSortedSet_search(sortedSet2, o) == NULL) { stSortedSet_insert(sortedSet3, o); } } stSortedSet_destructIterator(it); return sortedSet3; }
void testCactusDisk_getUniqueID_UniqueIntervals(CuTest* testCase) { cactusDiskTestSetup(); stSortedSet *uniqueNames = stSortedSet_construct3(testCactusDisk_getUniqueID_UniqueP, free); for (int64_t i = 0; i < 10; i++) { //Gets a billion ids, checks we are good. int64_t intervalSize = st_randomInt(0, 100000); Name uniqueName = cactusDisk_getUniqueIDInterval(cactusDisk, intervalSize); for(int64_t j=0; j<intervalSize; j++) { CuAssertTrue(testCase, uniqueName > 0); CuAssertTrue(testCase, uniqueName < INT64_MAX); CuAssertTrue(testCase, uniqueName != NULL_NAME); char *cA = cactusMisc_nameToString(uniqueName); CuAssertTrue(testCase, stSortedSet_search(uniqueNames, cA) == NULL); CuAssertTrue(testCase, cactusMisc_stringToName(cA) == uniqueName); stSortedSet_insert(uniqueNames, cA); uniqueName++; } } stSortedSet_destruct(uniqueNames); cactusDiskTestTeardown(); }
/* * This builds an adjacency list structure for the the sequences. Every sequence-position * has a column in the hash with which it can be aligned with. */ static stHash *buildAdjacencyList(stList *pairs, int64_t sequenceNumber) { stHash *hash = stHash_construct3((uint64_t (*)(const void *))stIntTuple_hashKey, (int (*)(const void *, const void *))stIntTuple_equalsFn, (void (*)(void *))stIntTuple_destruct, NULL); for(int64_t seq=0; seq<sequenceNumber; seq++) { for(int64_t position=0; position<MAX_SEQUENCE_SIZE; position++) { stIntTuple *seqPos = stIntTuple_construct2( seq, position); stSortedSet *column = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL); stSortedSet_insert(column, seqPos); stHash_insert(hash, seqPos, column); } } stListIterator *it = stList_getIterator(pairs); stIntTuple *pair; while((pair = stList_getNext(it)) != NULL) { stIntTuple *seqPos1 = stIntTuple_construct2( stIntTuple_get(pair, 0), stIntTuple_get(pair, 1)); stIntTuple *seqPos2 = stIntTuple_construct2( stIntTuple_get(pair, 2), stIntTuple_get(pair, 3)); stSortedSet *column1 = stHash_search(hash, seqPos1); assert(column1 != NULL); stSortedSet *column2 = stHash_search(hash, seqPos2); assert(column2 != NULL); if(column1 != column2) { //Merge the columns stSortedSetIterator *it2 = stSortedSet_getIterator(column2); stIntTuple *seqPos3; while((seqPos3 = stSortedSet_getNext(it2)) != NULL) { assert(stSortedSet_search(column1, seqPos3) == NULL); stSortedSet_insert(column1, seqPos3); assert(stHash_search(hash, seqPos3) == column2); stHash_insert(hash, seqPos3, column1); assert(stHash_search(hash, seqPos3) == column1); } stSortedSet_destructIterator(it2); stSortedSet_destruct(column2); } //Cleanup loop. stIntTuple_destruct(seqPos1); stIntTuple_destruct(seqPos2); } stList_destructIterator(it); return hash; }
static void getOrderedSegmentsP(Flower *flower, stSortedSet *segments) { Flower_SegmentIterator *segmentIt = flower_getSegmentIterator(flower); Segment *segment; while ((segment = flower_getNextSegment(segmentIt)) != NULL) { if (!segment_getStrand(segment)) { segment = segment_getReverse(segment); } assert(stSortedSet_search(segments, segment) == NULL); stSortedSet_insert(segments, segment); } flower_destructSegmentIterator(segmentIt); //Recurse over the flowers Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); Group *group; while ((group = flower_getNextGroup(groupIt)) != NULL) { if (group_getNestedFlower(group) != NULL) { getOrderedSegmentsP(group_getNestedFlower(group), segments); } } flower_destructGroupIterator(groupIt); }
static void getComponentsP(stHash *nodesToEdges, int64_t node, stSortedSet *component) { stIntTuple *key = stIntTuple_construct1( node); stList *edges = stHash_search(nodesToEdges, key); if (edges != NULL) { stHash_remove(nodesToEdges, key); for (int64_t i = 0; i < stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); if (stSortedSet_search(component, edge) == NULL) { stSortedSet_insert(component, edge); } /* * Recursion on stack could equal the total number of nodes. */ getComponentsP(nodesToEdges, stIntTuple_get(edge, 0), component); getComponentsP(nodesToEdges, stIntTuple_get(edge, 1), component); } stList_destruct(edges); } stIntTuple_destruct(key); }
stSortedSet *makeEndAlignment(StateMachine *sM, End *end, int64_t spanningTrees, int64_t maxSequenceLength, bool useProgressiveMerging, float gapGamma, PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters) { //Make an alignment of the sequences in the ends //Get the adjacency sequences to be aligned. Cap *cap; End_InstanceIterator *it = end_getInstanceIterator(end); stList *sequences = stList_construct3(0, (void (*)(void *))adjacencySequence_destruct); stList *seqFrags = stList_construct3(0, (void (*)(void *))seqFrag_destruct); stHash *endInstanceNumbers = stHash_construct2(NULL, free); while((cap = end_getNext(it)) != NULL) { if(cap_getSide(cap)) { cap = cap_getReverse(cap); } AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap, maxSequenceLength); stList_append(sequences, adjacencySequence); assert(cap_getAdjacency(cap) != NULL); End *otherEnd = end_getPositiveOrientation(cap_getEnd(cap_getAdjacency(cap))); stList_append(seqFrags, seqFrag_construct(adjacencySequence->string, 0, end_getName(otherEnd))); //Increase count of seqfrags with a given end. int64_t *c = stHash_search(endInstanceNumbers, otherEnd); if(c == NULL) { c = st_calloc(1, sizeof(int64_t)); assert(*c == 0); stHash_insert(endInstanceNumbers, otherEnd, c); } (*c)++; } end_destructInstanceIterator(it); //Get the alignment. MultipleAlignment *mA = makeAlignment(sM, seqFrags, spanningTrees, 100000000, useProgressiveMerging, gapGamma, pairwiseAlignmentBandingParameters); //Build an array of weights to reweight pairs in the alignment. int64_t *pairwiseAlignmentsPerSequenceNonCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t)); int64_t *pairwiseAlignmentsPerSequenceCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t)); //First build array on number of pairwise alignments to each sequence, distinguishing alignments between sequences sharing //common ends. for(int64_t i=0; i<stList_length(mA->chosenPairwiseAlignments); i++) { stIntTuple *pairwiseAlignment = stList_get(mA->chosenPairwiseAlignments, i); int64_t seq1 = stIntTuple_get(pairwiseAlignment, 1); int64_t seq2 = stIntTuple_get(pairwiseAlignment, 2); assert(seq1 != seq2); SeqFrag *seqFrag1 = stList_get(seqFrags, seq1); SeqFrag *seqFrag2 = stList_get(seqFrags, seq2); int64_t *pairwiseAlignmentsPerSequence = seqFrag1->rightEndId == seqFrag2->rightEndId ? pairwiseAlignmentsPerSequenceCommonEnds : pairwiseAlignmentsPerSequenceNonCommonEnds; pairwiseAlignmentsPerSequence[seq1]++; pairwiseAlignmentsPerSequence[seq2]++; } //Now calculate score adjustments. double *scoreAdjustmentsNonCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double)); double *scoreAdjustmentsCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double)); for(int64_t i=0; i<stList_length(seqFrags); i++) { SeqFrag *seqFrag = stList_get(seqFrags, i); End *otherEnd = flower_getEnd(end_getFlower(end), seqFrag->rightEndId); assert(otherEnd != NULL); assert(stHash_search(endInstanceNumbers, otherEnd) != NULL); int64_t commonInstanceNumber = *(int64_t *)stHash_search(endInstanceNumbers, otherEnd); int64_t nonCommonInstanceNumber = stList_length(seqFrags) - commonInstanceNumber; assert(commonInstanceNumber > 0 && nonCommonInstanceNumber >= 0); assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] <= nonCommonInstanceNumber); assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] >= 0); assert(pairwiseAlignmentsPerSequenceCommonEnds[i] < commonInstanceNumber); assert(pairwiseAlignmentsPerSequenceCommonEnds[i] >= 0); //scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber + commonInstanceNumber - 1)/(pairwiseAlignmentsPerSequenceNonCommonEnds[i] + pairwiseAlignmentsPerSequenceCommonEnds[i]); //scoreAdjustmentsCommonEnds[i] = scoreAdjustmentsNonCommonEnds[i]; if(pairwiseAlignmentsPerSequenceNonCommonEnds[i] > 0) { scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber)/pairwiseAlignmentsPerSequenceNonCommonEnds[i]; assert(scoreAdjustmentsNonCommonEnds[i] >= 1.0); assert(scoreAdjustmentsNonCommonEnds[i] <= nonCommonInstanceNumber); } else { scoreAdjustmentsNonCommonEnds[i] = INT64_MIN; } if(pairwiseAlignmentsPerSequenceCommonEnds[i] > 0) { scoreAdjustmentsCommonEnds[i] = ((double)commonInstanceNumber-1)/pairwiseAlignmentsPerSequenceCommonEnds[i]; assert(scoreAdjustmentsCommonEnds[i] >= 1.0); assert(scoreAdjustmentsCommonEnds[i] <= commonInstanceNumber-1); } else { scoreAdjustmentsCommonEnds[i] = INT64_MIN; } } //Convert the alignment pairs to an alignment of the caps.. stSortedSet *sortedAlignment = stSortedSet_construct3((int (*)(const void *, const void *))alignedPair_cmpFn, (void (*)(void *))alignedPair_destruct); while(stList_length(mA->alignedPairs) > 0) { stIntTuple *alignedPair = stList_pop(mA->alignedPairs); assert(stIntTuple_length(alignedPair) == 5); int64_t seqIndex1 = stIntTuple_get(alignedPair, 1); int64_t seqIndex2 = stIntTuple_get(alignedPair, 3); AdjacencySequence *i = stList_get(sequences, seqIndex1); AdjacencySequence *j = stList_get(sequences, seqIndex2); assert(i != j); int64_t offset1 = stIntTuple_get(alignedPair, 2); int64_t offset2 = stIntTuple_get(alignedPair, 4); int64_t score = stIntTuple_get(alignedPair, 0); if(score <= 0) { //Happens when indel probs are included score = 1; //This is the minimum } assert(score > 0 && score <= PAIR_ALIGNMENT_PROB_1); SeqFrag *seqFrag1 = stList_get(seqFrags, seqIndex1); SeqFrag *seqFrag2 = stList_get(seqFrags, seqIndex2); assert(seqFrag1 != seqFrag2); double *scoreAdjustments = seqFrag1->rightEndId == seqFrag2->rightEndId ? scoreAdjustmentsCommonEnds : scoreAdjustmentsNonCommonEnds; assert(scoreAdjustments[seqIndex1] != INT64_MIN); assert(scoreAdjustments[seqIndex2] != INT64_MIN); AlignedPair *alignedPair2 = alignedPair_construct( i->subsequenceIdentifier, i->start + (i->strand ? offset1 : -offset1), i->strand, j->subsequenceIdentifier, j->start + (j->strand ? offset2 : -offset2), j->strand, score*scoreAdjustments[seqIndex1], score*scoreAdjustments[seqIndex2]); //Do the reweighting here. assert(stSortedSet_search(sortedAlignment, alignedPair2) == NULL); assert(stSortedSet_search(sortedAlignment, alignedPair2->reverse) == NULL); stSortedSet_insert(sortedAlignment, alignedPair2); stSortedSet_insert(sortedAlignment, alignedPair2->reverse); stIntTuple_destruct(alignedPair); } //Cleanup stList_destruct(seqFrags); stList_destruct(sequences); free(pairwiseAlignmentsPerSequenceNonCommonEnds); free(pairwiseAlignmentsPerSequenceCommonEnds); free(scoreAdjustmentsNonCommonEnds); free(scoreAdjustmentsCommonEnds); multipleAlignment_destruct(mA); stHash_destruct(endInstanceNumbers); return sortedAlignment; }
Segment *block_getInstance(Block *block, Name name) { Segment segment; segment.name = name; return block_getInstanceP(block, stSortedSet_search(block->blockContents->segments, &segment)); }
int main(int argc, char *argv[]) { st_setLogLevelFromString(argv[1]); st_logDebug("Set up logging\n"); stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(argv[2]); CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0); stKVDatabaseConf_destruct(kvDatabaseConf); st_logDebug("Set up the flower disk\n"); Name flowerName = cactusMisc_stringToName(argv[3]); Flower *flower = cactusDisk_getFlower(cactusDisk, flowerName); int64_t totalBases = flower_getTotalBaseLength(flower); int64_t totalEnds = flower_getEndNumber(flower); int64_t totalFreeEnds = flower_getFreeStubEndNumber(flower); int64_t totalAttachedEnds = flower_getAttachedStubEndNumber(flower); int64_t totalCaps = flower_getCapNumber(flower); int64_t totalBlocks = flower_getBlockNumber(flower); int64_t totalGroups = flower_getGroupNumber(flower); int64_t totalChains = flower_getChainNumber(flower); int64_t totalLinkGroups = 0; int64_t maxEndDegree = 0; int64_t maxAdjacencyLength = 0; int64_t totalEdges = 0; Flower_EndIterator *endIt = flower_getEndIterator(flower); End *end; while((end = flower_getNextEnd(endIt)) != NULL) { assert(end_getOrientation(end)); if(end_getInstanceNumber(end) > maxEndDegree) { maxEndDegree = end_getInstanceNumber(end); } stSortedSet *ends = stSortedSet_construct(); End_InstanceIterator *capIt = end_getInstanceIterator(end); Cap *cap; while((cap = end_getNext(capIt)) != NULL) { if(cap_getSequence(cap) != NULL) { Cap *adjacentCap = cap_getAdjacency(cap); assert(adjacentCap != NULL); End *adjacentEnd = end_getPositiveOrientation(cap_getEnd(adjacentCap)); stSortedSet_insert(ends, adjacentEnd); int64_t adjacencyLength = cap_getCoordinate(cap) - cap_getCoordinate(adjacentCap); if(adjacencyLength < 0) { adjacencyLength *= -1; } assert(adjacencyLength >= 1); if(adjacencyLength >= maxAdjacencyLength) { maxAdjacencyLength = adjacencyLength; } } } end_destructInstanceIterator(capIt); totalEdges += stSortedSet_size(ends); if(stSortedSet_search(ends, end) != NULL) { //This ensures we count self edges twice, so that the division works. totalEdges += 1; } stSortedSet_destruct(ends); } assert(totalEdges % 2 == 0); flower_destructEndIterator(endIt); Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); Group *group; while((group = flower_getNextGroup(groupIt)) != NULL) { if(group_getLink(group) != NULL) { totalLinkGroups++; } } flower_destructGroupIterator(groupIt); printf("flower name: %" PRIi64 " total bases: %" PRIi64 " total-ends: %" PRIi64 " total-caps: %" PRIi64 " max-end-degree: %" PRIi64 " max-adjacency-length: %" PRIi64 " total-blocks: %" PRIi64 " total-groups: %" PRIi64 " total-edges: %" PRIi64 " total-free-ends: %" PRIi64 " total-attached-ends: %" PRIi64 " total-chains: %" PRIi64 " total-link groups: %" PRIi64 "\n", flower_getName(flower), totalBases, totalEnds, totalCaps, maxEndDegree, maxAdjacencyLength, totalBlocks, totalGroups, totalEdges/2, totalFreeEnds, totalAttachedEnds, totalChains, totalLinkGroups); return 0; }
Event *eventTree_getEvent(EventTree *eventTree, Name eventName) { Event event; event.name = eventName; return stSortedSet_search(eventTree->events, &event); }
void flower_removeFace(Flower *flower, Face *face) { assert(stSortedSet_search(flower->faces, face) != NULL); stSortedSet_remove(flower->faces, face); }