/* * Function does the actual depth first search to detect if the thing has an acyclic ordering. */ static int64_t dfs(stHash *adjacencyList, stIntTuple *seqPos, stSortedSet *started, stSortedSet *done) { if(stSortedSet_search(started, seqPos) != NULL) { if(stSortedSet_search(done, seqPos) == NULL) { //We have detected a cycle //st_logInfo("I have cycle %" PRIi64 " %" PRIi64 "\n", stIntTuple_getPosition(seqPos, 0), stIntTuple_getPosition(seqPos, 1)); return 1; } //We have already explored this area, but no cycle. return 0; } stSortedSet_insert(started, seqPos); int64_t cycle =0; stIntTuple *nextSeqPos = stIntTuple_construct2( stIntTuple_get(seqPos, 0), stIntTuple_get(seqPos, 1) + 1); stSortedSet *column = stHash_search(adjacencyList, nextSeqPos); if(column != NULL) { //It is in the adjacency list, so we can do the recursion assert(stSortedSet_search(column, nextSeqPos) != NULL); stSortedSetIterator *it = stSortedSet_getIterator(column); stIntTuple *seqPos2; while((seqPos2 = stSortedSet_getNext(it)) != NULL) { cycle = cycle || dfs(adjacencyList, seqPos2, started, done); } stSortedSet_destructIterator(it); } stIntTuple_destruct(nextSeqPos); stSortedSet_insert(done, seqPos); return cycle; }
static void test_stSortedSet_searchGreaterThan(CuTest* testCase) { sonLibSortedSetTestSetup(); for(int32_t i=0; i<size; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i])); } //static int32_t sortedInput[] = { -10, -1, 1, 3, 5, 10, 12 }; CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, -11)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -10))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, -10)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -1))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, -5)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -1))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, 1)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, 3))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, 13)) == NULL); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, 12)) == NULL); for(int32_t i=0; i<100; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, st_randomInt(-1000, 1000))); } stList *list = stSortedSet_getList(sortedSet); for(int32_t i=1; i<stList_length(list); i++) { stIntTuple *p = stList_get(list, i-1); stIntTuple *j = stList_get(list, i); stIntTuple *k = stIntTuple_construct(1, st_randomInt(stIntTuple_getPosition(p, 0), stIntTuple_getPosition(j, 0))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, k) == j); stIntTuple_destruct(k); } stList_destruct(list); sonLibSortedSetTestTeardown(); }
static void test_stSortedSetEquals(CuTest* testCase) { sonLibSortedSetTestSetup(); CuAssertTrue(testCase, stSortedSet_equals(sortedSet, sortedSet)); int32_t i; for(i=0; i<size; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i])); } CuAssertTrue(testCase, stSortedSet_equals(sortedSet, sortedSet)); CuAssertTrue(testCase, !stSortedSet_equals(sortedSet, sortedSet2)); for(i=1; i<size; i++) { //first argument is unique in input.. stSortedSet_insert(sortedSet2, stIntTuple_construct(1, input[i])); } CuAssertTrue(testCase, !stSortedSet_equals(sortedSet, sortedSet2)); stSortedSet_insert(sortedSet2, stIntTuple_construct(1, input[0])); CuAssertTrue(testCase, stSortedSet_equals(sortedSet, sortedSet2)); stSortedSet *sortedSet3 = stSortedSet_construct(); //diff comparator.. CuAssertTrue(testCase, !stSortedSet_equals(sortedSet, sortedSet3)); for(i=0; i<size; i++) { stSortedSet_insert(sortedSet3, stIntTuple_construct(1, input[i])); } CuAssertTrue(testCase, !stSortedSet_equals(sortedSet, sortedSet3)); stSortedSet_destruct(sortedSet3); sonLibSortedSetTestTeardown(); }
static void test_stSortedSetIterator(CuTest* testCase) { sonLibSortedSetTestSetup(); int32_t i; for(i=0; i<size; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i])); } stSortedSetIterator *iterator = stSortedSet_getIterator(sortedSet); CuAssertTrue(testCase, iterator != NULL); for(i=0; i<sortedSize; i++) { CuAssertIntEquals(testCase, sortedInput[i], stIntTuple_getPosition(stSortedSet_getNext(iterator), 0)); } CuAssertTrue(testCase, stSortedSet_getNext(iterator) == NULL); stSortedSetIterator *iterator2 = stSortedSet_copyIterator(iterator); CuAssertTrue(testCase, iterator2 != NULL); for(i=0; i<sortedSize; i++) { CuAssertIntEquals(testCase, sortedInput[sortedSize - 1 - i], stIntTuple_getPosition(stSortedSet_getPrevious(iterator), 0)); CuAssertIntEquals(testCase, sortedInput[sortedSize - 1 - i], stIntTuple_getPosition(stSortedSet_getPrevious(iterator2), 0)); } CuAssertTrue(testCase, stSortedSet_getPrevious(iterator) == NULL); CuAssertTrue(testCase, stSortedSet_getPrevious(iterator2) == NULL); stSortedSet_destructIterator(iterator); stSortedSet_destructIterator(iterator2); sonLibSortedSetTestTeardown(); }
static void test_stSortedSetIterator_getIteratorFrom(CuTest* testCase) { sonLibSortedSetTestSetup(); int32_t i; for(i=0; i<size; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i])); } stSortedSetIterator *iterator = stSortedSet_getIterator(sortedSet); CuAssertTrue(testCase, iterator != NULL); for(i=0; i<sortedSize; i++) { stSortedSetIterator *it = stSortedSet_getIteratorFrom(sortedSet, stIntTuple_construct(1, sortedInput[i])); stIntTuple *intTuple = stSortedSet_getNext(it); CuAssertTrue(testCase, intTuple != NULL); CuAssertIntEquals(testCase, sortedInput[i], stIntTuple_getPosition(intTuple, 0)); stSortedSet_destructIterator(it); } stTry { stSortedSet_getIteratorFrom(sortedSet, stIntTuple_construct(1, 7)); //This number if not in the input. CuAssertTrue(testCase, 0); } stCatch(except) { CuAssertTrue(testCase, stExcept_getId(except) == SORTED_SET_EXCEPTION_ID); } stTryEnd sonLibSortedSetTestTeardown(); }
void test_stList_filter(CuTest *testCase) { setup(); stSortedSet *set = stSortedSet_construct(); stSortedSet_insert(set, strings[0]); stSortedSet_insert(set, strings[4]); stList *list2 = stList_filterToExclude(list, set); stList *list3 = stList_filterToInclude(list, set); CuAssertTrue(testCase,stList_length(list2) == 3); CuAssertTrue(testCase,stList_length(list3) == 2); CuAssertTrue(testCase,stList_get(list2, 0) == strings[1]); CuAssertTrue(testCase,stList_get(list2, 1) == strings[2]); CuAssertTrue(testCase,stList_get(list2, 2) == strings[3]); CuAssertTrue(testCase,stList_get(list3, 0) == strings[0]); CuAssertTrue(testCase,stList_get(list3, 1) == strings[4]); teardown(); }
static void getMetaSequencesForEventsP(stSortedSet *metaSequences, Flower *flower, stList *eventStrings) { //Iterate over the sequences in the flower. Flower_SequenceIterator *seqIt = flower_getSequenceIterator(flower); Sequence *sequence; while ((sequence = flower_getNextSequence(seqIt)) != NULL) { MetaSequence *metaSequence = sequence_getMetaSequence(sequence); if (stringIsInList(event_getHeader(sequence_getEvent(sequence)), eventStrings) == 0) { if (stSortedSet_search(metaSequences, metaSequence) == NULL) { stSortedSet_insert(metaSequences, metaSequence); } } } flower_destructSequenceIterator(seqIt); //Recurse over the flowers Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); Group *group; while ((group = flower_getNextGroup(groupIt)) != NULL) { if (group_getNestedFlower(group) != NULL) { getMetaSequencesForEventsP(metaSequences, group_getNestedFlower(group), eventStrings); } } flower_destructGroupIterator(groupIt); }
/* * Uses the functions above to build an adjacency list, then by DFS attempts to create * a valid topological sort, returning non-zero if the graph contains a cycle. */ static int64_t containsACycle(stList *pairs, int64_t sequenceNumber) { //Build an adjacency list structure.. stHash *adjacencyList = buildAdjacencyList(pairs, sequenceNumber); //Do a topological sort of the adjacency list stSortedSet *started = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL); stSortedSet *done = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL); int64_t cyclic = 0; for(int64_t seq=0; seq<sequenceNumber; seq++) { stIntTuple *seqPos = stIntTuple_construct2( seq, 0); //The following hacks avoid memory cleanup.. stSortedSet *column = stHash_search(adjacencyList, seqPos); assert(column != NULL); stIntTuple *seqPos2 = stSortedSet_search(column, seqPos); assert(seqPos2 != NULL); cyclic = cyclic || dfs(adjacencyList, seqPos2, started, done); stIntTuple_destruct(seqPos); } //cleanup stHashIterator *it = stHash_getIterator(adjacencyList); stIntTuple *seqPos; stSortedSet *columns = stSortedSet_construct2((void (*)(void *))stSortedSet_destruct); while((seqPos = stHash_getNext(it)) != NULL) { stSortedSet *column = stHash_search(adjacencyList, seqPos); assert(column != NULL); stSortedSet_insert(columns, column); } stHash_destructIterator(it); stHash_destruct(adjacencyList); stSortedSet_destruct(columns); stSortedSet_destruct(started); stSortedSet_destruct(done); return cyclic; }
static stHash *getComponents(stList *filteredEdges) { /* * A kind of stupid reimplementation of the greedy function, done just to trap typos. */ stHash *nodesToComponents = stHash_construct3((uint64_t(*)(const void *)) stIntTuple_hashKey, (int(*)(const void *, const void *)) stIntTuple_equalsFn, NULL, NULL); for (int64_t i = 0; i < stList_length(nodes); i++) { stIntTuple *node = stList_get(nodes, i); stSortedSet *component = stSortedSet_construct(); stSortedSet_insert(component, node); stHash_insert(nodesToComponents, node, component); } for (int64_t i = 0; i < stList_length(filteredEdges); i++) { stIntTuple *edge = stList_get(filteredEdges, i); stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2)); stSortedSet *component1 = stHash_search(nodesToComponents, node1); stSortedSet *component2 = stHash_search(nodesToComponents, node2); assert(component1 != NULL && component2 != NULL); if (component1 != component2) { stSortedSet *component3 = stSortedSet_getUnion(component1, component2); stSortedSetIterator *setIt = stSortedSet_getIterator(component3); stIntTuple *node3; while ((node3 = stSortedSet_getNext(setIt)) != NULL) { stHash_insert(nodesToComponents, node3, component3); } stSortedSet_destructIterator(setIt); stSortedSet_destruct(component1); stSortedSet_destruct(component2); } stIntTuple_destruct(node1); stIntTuple_destruct(node2); } return nodesToComponents; }
static void checkIsValidReference(CuTest *testCase, stList *reference, double totalScore) { stList *chosenEdges = convertReferenceToAdjacencyEdges(reference); //Check that everyone has a partner. CuAssertIntEquals(testCase, nodeNumber, stList_length(chosenEdges) * 2); stSortedSet *nodes = stSortedSet_construct3((int(*)(const void *, const void *)) stIntTuple_cmpFn, (void(*)(void *)) stIntTuple_destruct); for (int64_t i = 0; i < nodeNumber; i++) { stSortedSet_insert(nodes, stIntTuple_construct1( i)); } checkEdges(chosenEdges, nodes, 1, 0); //Check that the score is correct double totalScore2 = calculateZScoreOfReference(reference, nodeNumber, zMatrix); CuAssertDblEquals(testCase, totalScore2, totalScore, 0.000001); //Check that the stubs are properly connected. stList *allEdges = stList_copy(chosenEdges, NULL); stList_appendAll(allEdges, stubs); stList_appendAll(allEdges, chains); stList *components = getComponents(allEdges); CuAssertIntEquals(testCase, stList_length(stubs), stList_length(reference)); CuAssertIntEquals(testCase, stList_length(stubs), stList_length(components)); //Cleanup stList_destruct(components); stSortedSet_destruct(nodes); stList_destruct(allEdges); stList_destruct(chosenEdges); }
static void test_stSortedSetIntersection(CuTest* testCase) { sonLibSortedSetTestSetup(); //Check intersection of empty sets is okay.. stSortedSet *sortedSet3 = stSortedSet_getIntersection(sortedSet, sortedSet2); CuAssertTrue(testCase, stSortedSet_size(sortedSet3) == 0); stSortedSet_destruct(sortedSet3); int32_t i; for(i=0; i<size; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i])); } //Check intersection of empty and non-empty set is empty. sortedSet3 = stSortedSet_getIntersection(sortedSet, sortedSet2); CuAssertTrue(testCase, stSortedSet_size(sortedSet3) == 0); stSortedSet_destruct(sortedSet3); //Check intersection of two non-empty, overlapping sets in correct. stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 0)); stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 1)); stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 5)); sortedSet3 = stSortedSet_getIntersection(sortedSet, sortedSet2); CuAssertTrue(testCase, stSortedSet_size(sortedSet3) == 2); stIntTuple *intTuple = stIntTuple_construct(1, 1); CuAssertTrue(testCase, stSortedSet_search(sortedSet3, intTuple) != NULL); stIntTuple_destruct(intTuple); intTuple = stIntTuple_construct(1, 5); CuAssertTrue(testCase, stSortedSet_search(sortedSet3, intTuple) != NULL); stIntTuple_destruct(intTuple); stSortedSet_destruct(sortedSet3); //Check we get an exception with sorted sets with different comparators. stSortedSet *sortedSet4 = stSortedSet_construct(); stTry { stSortedSet_getIntersection(sortedSet, sortedSet4); } stCatch(except) { CuAssertTrue(testCase, stExcept_getId(except) == SORTED_SET_EXCEPTION_ID); } stTryEnd stSortedSet_destruct(sortedSet4); sonLibSortedSetTestTeardown(); }
stSortedSet *stSortedSet_copyConstruct(stSortedSet *sortedSet, void (*destructElementFn)(void *)) { stSortedSet *sortedSet2 = stSortedSet_construct3(stSortedSet_getComparator(sortedSet)->compareFn, destructElementFn); stSortedSetIterator *it = stSortedSet_getIterator(sortedSet); void *o; while((o = stSortedSet_getNext(it)) != NULL) { stSortedSet_insert(sortedSet2, o); } stSortedSet_destructIterator(it); return sortedSet2; }
/* * This builds an adjacency list structure for the the sequences. Every sequence-position * has a column in the hash with which it can be aligned with. */ static stHash *buildAdjacencyList(stList *pairs, int64_t sequenceNumber) { stHash *hash = stHash_construct3((uint64_t (*)(const void *))stIntTuple_hashKey, (int (*)(const void *, const void *))stIntTuple_equalsFn, (void (*)(void *))stIntTuple_destruct, NULL); for(int64_t seq=0; seq<sequenceNumber; seq++) { for(int64_t position=0; position<MAX_SEQUENCE_SIZE; position++) { stIntTuple *seqPos = stIntTuple_construct2( seq, position); stSortedSet *column = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL); stSortedSet_insert(column, seqPos); stHash_insert(hash, seqPos, column); } } stListIterator *it = stList_getIterator(pairs); stIntTuple *pair; while((pair = stList_getNext(it)) != NULL) { stIntTuple *seqPos1 = stIntTuple_construct2( stIntTuple_get(pair, 0), stIntTuple_get(pair, 1)); stIntTuple *seqPos2 = stIntTuple_construct2( stIntTuple_get(pair, 2), stIntTuple_get(pair, 3)); stSortedSet *column1 = stHash_search(hash, seqPos1); assert(column1 != NULL); stSortedSet *column2 = stHash_search(hash, seqPos2); assert(column2 != NULL); if(column1 != column2) { //Merge the columns stSortedSetIterator *it2 = stSortedSet_getIterator(column2); stIntTuple *seqPos3; while((seqPos3 = stSortedSet_getNext(it2)) != NULL) { assert(stSortedSet_search(column1, seqPos3) == NULL); stSortedSet_insert(column1, seqPos3); assert(stHash_search(hash, seqPos3) == column2); stHash_insert(hash, seqPos3, column1); assert(stHash_search(hash, seqPos3) == column1); } stSortedSet_destructIterator(it2); stSortedSet_destruct(column2); } //Cleanup loop. stIntTuple_destruct(seqPos1); stIntTuple_destruct(seqPos2); } stList_destructIterator(it); return hash; }
static void test_stSortedSet_copyConstruct(CuTest* testCase) { sonLibSortedSetTestSetup(); CuAssertTrue(testCase, sortedSet != NULL); int32_t i; for(i=0; i<size; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i])); } stSortedSet *sortedSet2 = stSortedSet_copyConstruct(sortedSet, NULL); CuAssertTrue(testCase, stSortedSet_size(sortedSet2) == stSortedSet_size(sortedSet)); CuAssertTrue(testCase, stSortedSet_equals(sortedSet2, sortedSet)); stSortedSet_destruct(sortedSet2); sonLibSortedSetTestTeardown(); }
stSortedSet *stSortedSet_getUnion(stSortedSet *sortedSet1, stSortedSet *sortedSet2) { if(!stSortedSet_comparatorsEqual(sortedSet1, sortedSet2)) { stThrowNew(SORTED_SET_EXCEPTION_ID, "Comparators are not equal for creating the union of two sorted sets"); } stSortedSet *sortedSet3 = stSortedSet_construct3(stSortedSet_getComparator(sortedSet1)->compareFn, NULL); //Add those from sortedSet1 stSortedSetIterator *it= stSortedSet_getIterator(sortedSet1); void *o; while((o = stSortedSet_getNext(it)) != NULL) { stSortedSet_insert(sortedSet3, o); } stSortedSet_destructIterator(it); //Add those from sortedSet2 it= stSortedSet_getIterator(sortedSet2); while((o = stSortedSet_getNext(it)) != NULL) { stSortedSet_insert(sortedSet3, o); } stSortedSet_destructIterator(it); return sortedSet3; }
static void getMaximalHaplotypePathsP3(Segment *segment, stList *maximalHaplotypePath, stSortedSet *segmentSet, stList *eventStrings) { stList_append(maximalHaplotypePath, segment); assert(stSortedSet_search(segmentSet, segment) == NULL); assert(stSortedSet_search(segmentSet, segment_getReverse(segment)) == NULL); stSortedSet_insert(segmentSet, segment); Cap *_3Cap = segment_get3Cap(segment); if (trueAdjacency(_3Cap, eventStrings)) { //Continue on.. Segment *otherSegment = getAdjacentCapsSegment(_3Cap); if (otherSegment != NULL) { getMaximalHaplotypePathsP3(otherSegment, maximalHaplotypePath, segmentSet, eventStrings); } } }
void testCactusDisk_getUniqueID_Unique(CuTest* testCase) { cactusDiskTestSetup(); stSortedSet *uniqueNames = stSortedSet_construct3(testCactusDisk_getUniqueID_UniqueP, free); for (int64_t i = 0; i < 100000; i++) { //Gets a billion ids, checks we are good. Name uniqueName = cactusDisk_getUniqueID(cactusDisk); CuAssertTrue(testCase, uniqueName > 0); CuAssertTrue(testCase, uniqueName < INT64_MAX); CuAssertTrue(testCase, uniqueName != NULL_NAME); char *cA = cactusMisc_nameToString(uniqueName); CuAssertTrue(testCase, stSortedSet_search(uniqueNames, cA) == NULL); CuAssertTrue(testCase, cactusMisc_stringToName(cA) == uniqueName); stSortedSet_insert(uniqueNames, cA); } stSortedSet_destruct(uniqueNames); cactusDiskTestTeardown(); }
static stSortedSet *getEventStrings(End *end, stList *eventStrings) { stSortedSet *eventStringsSet = stSortedSet_construct3( (int(*)(const void *, const void *)) strcmp, NULL); End_InstanceIterator *instanceIt = end_getInstanceIterator(end); Cap *cap; while ((cap = end_getNext(instanceIt)) != NULL) { const char *header = event_getHeader(cap_getEvent(cap)); for(int64_t i=0; i<stList_length(eventStrings); i++) { if(strcmp(stList_get(eventStrings, i), header) == 0) { stSortedSet_insert(eventStringsSet, (void *) header); } } } end_destructInstanceIterator(instanceIt); return eventStringsSet; }
static void getMAFBlock2(Block *block, FILE *fileHandle) { if (block_getLength(block) >= minimumBlockLength) { //Calculate bases in the reference and other reference sequence Block_InstanceIterator *instanceIt = block_getInstanceIterator(block); bool includesReference = 0; bool includesOtherReference = 0; Segment *segment; while ((segment = block_getNext(instanceIt)) != NULL) { const char *segmentEvent = event_getHeader( segment_getEvent(segment)); if (strcmp(segmentEvent, referenceEventString) == 0) { includesReference = 1; } else if (strcmp(segmentEvent, otherReferenceEventString) == 0) { includesOtherReference = 1; } } block_destructInstanceIterator(instanceIt); if (ignoreOtherReferenceBlocks && includesOtherReference) { return; } stSortedSet *otherSampleEvents = stSortedSet_construct3( (int(*)(const void *, const void *)) strcmp, NULL); instanceIt = block_getInstanceIterator(block); int32_t sampleNumber = 0; while ((segment = block_getNext(instanceIt)) != NULL) { const char *segmentEvent = event_getHeader( segment_getEvent(segment)); if (strcmp(segmentEvent, sampleEventString) == 0) { sampleNumber++; } else if (strcmp(segmentEvent, referenceEventString) != 0) { stSortedSet_insert(otherSampleEvents, (void *) segmentEvent); } } block_destructInstanceIterator(instanceIt); baseCoverages[stSortedSet_size(otherSampleEvents)] += block_getLength( block) * sampleNumber; stSortedSet_destruct(otherSampleEvents); referenceBases += includesReference ? block_getLength(block) * sampleNumber : 0; otherReferenceBases += includesOtherReference ? block_getLength(block) * sampleNumber : 0; } }
stSortedSet *stSortedSet_getDifference(stSortedSet *sortedSet1, stSortedSet *sortedSet2) { if(!stSortedSet_comparatorsEqual(sortedSet1, sortedSet2)) { stThrowNew(SORTED_SET_EXCEPTION_ID, "Comparators are not equal for creating the sorted set difference"); } stSortedSet *sortedSet3 = stSortedSet_construct3(stSortedSet_getComparator(sortedSet1)->compareFn, NULL); //Add those from sortedSet1 only if they are not in sortedSet2 stSortedSetIterator *it= stSortedSet_getIterator(sortedSet1); void *o; while((o = stSortedSet_getNext(it)) != NULL) { if(stSortedSet_search(sortedSet2, o) == NULL) { stSortedSet_insert(sortedSet3, o); } } stSortedSet_destructIterator(it); return sortedSet3; }
static void test_stSortedSet(CuTest* testCase) { sonLibSortedSetTestSetup(); int32_t i; CuAssertIntEquals(testCase, 0, stSortedSet_size(sortedSet)); for(i=0; i<size; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i])); } CuAssertIntEquals(testCase, sortedSize, stSortedSet_size(sortedSet)); CuAssertIntEquals(testCase, sortedInput[0], stIntTuple_getPosition(stSortedSet_getFirst(sortedSet), 0)); CuAssertIntEquals(testCase, sortedInput[sortedSize-1], stIntTuple_getPosition(stSortedSet_getLast(sortedSet), 0)); for(i=0; i<sortedSize; i++) { CuAssertIntEquals(testCase, sortedSize-i, stSortedSet_size(sortedSet)); stIntTuple *tuple = stIntTuple_construct(1, sortedInput[i]); CuAssertTrue(testCase, stIntTuple_getPosition(stSortedSet_search(sortedSet, tuple), 0) == sortedInput[i]); stSortedSet_remove(sortedSet, tuple); CuAssertTrue(testCase, stSortedSet_search(sortedSet, tuple) == NULL); stIntTuple_destruct(tuple); } sonLibSortedSetTestTeardown(); }
void testCactusDisk_getUniqueID_UniqueIntervals(CuTest* testCase) { cactusDiskTestSetup(); stSortedSet *uniqueNames = stSortedSet_construct3(testCactusDisk_getUniqueID_UniqueP, free); for (int64_t i = 0; i < 10; i++) { //Gets a billion ids, checks we are good. int64_t intervalSize = st_randomInt(0, 100000); Name uniqueName = cactusDisk_getUniqueIDInterval(cactusDisk, intervalSize); for(int64_t j=0; j<intervalSize; j++) { CuAssertTrue(testCase, uniqueName > 0); CuAssertTrue(testCase, uniqueName < INT64_MAX); CuAssertTrue(testCase, uniqueName != NULL_NAME); char *cA = cactusMisc_nameToString(uniqueName); CuAssertTrue(testCase, stSortedSet_search(uniqueNames, cA) == NULL); CuAssertTrue(testCase, cactusMisc_stringToName(cA) == uniqueName); stSortedSet_insert(uniqueNames, cA); uniqueName++; } } stSortedSet_destruct(uniqueNames); cactusDiskTestTeardown(); }
/* * Adds a prime less than (or equals) constraint to the list of prime constraints, removing any redundant constraints in the process. * The or equals is specified by making the lessThanOrEquals argument non-zero. */ void addConstraint_lessThan(stPosetAlignment *posetAlignment, int64_t sequence1, int64_t position1, int64_t sequence2, int64_t position2, int64_t lessThanOrEquals) { stSortedSet *constraintList = getConstraintList(posetAlignment, sequence1, sequence2); assert(position1 != INT64_MAX); assert(position2 != INT64_MAX); stIntTuple *constraint1 = stIntTuple_construct3( position1, position2, lessThanOrEquals); stIntTuple *constraint2; while((constraint2 = stSortedSet_searchLessThanOrEqual(constraintList, constraint1)) != NULL) { assert(stIntTuple_get(constraint2, 0) <= position1); if(stIntTuple_get(constraint2, 1) >= position2) { if(stIntTuple_get(constraint2, 1) == position2) { //Check we are not removing an equivalent or more severe constraint. assert((!lessThanOrEquals && stIntTuple_get(constraint2, 2)) || stIntTuple_get(constraint2, 0) < position1); } stSortedSet_remove(constraintList, constraint2); stIntTuple_destruct(constraint2); } else { assert(stIntTuple_get(constraint2, 0) < position1); //Check the constraint does not overshadow our proposed constraint. break; } } stSortedSet_insert(constraintList, constraint1); }
static void getOrderedSegmentsP(Flower *flower, stSortedSet *segments) { Flower_SegmentIterator *segmentIt = flower_getSegmentIterator(flower); Segment *segment; while ((segment = flower_getNextSegment(segmentIt)) != NULL) { if (!segment_getStrand(segment)) { segment = segment_getReverse(segment); } assert(stSortedSet_search(segments, segment) == NULL); stSortedSet_insert(segments, segment); } flower_destructSegmentIterator(segmentIt); //Recurse over the flowers Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); Group *group; while ((group = flower_getNextGroup(groupIt)) != NULL) { if (group_getNestedFlower(group) != NULL) { getOrderedSegmentsP(group_getNestedFlower(group), segments); } } flower_destructGroupIterator(groupIt); }
static void getComponentsP(stHash *nodesToEdges, int64_t node, stSortedSet *component) { stIntTuple *key = stIntTuple_construct1( node); stList *edges = stHash_search(nodesToEdges, key); if (edges != NULL) { stHash_remove(nodesToEdges, key); for (int64_t i = 0; i < stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); if (stSortedSet_search(component, edge) == NULL) { stSortedSet_insert(component, edge); } /* * Recursion on stack could equal the total number of nodes. */ getComponentsP(nodesToEdges, stIntTuple_get(edge, 0), component); getComponentsP(nodesToEdges, stIntTuple_get(edge, 1), component); } stList_destruct(edges); } stIntTuple_destruct(key); }
stSortedSet *loadEndAlignmentFromDisk(Flower *flower, FILE *fileHandle, End **end) { stSortedSet *endAlignment = stSortedSet_construct3((int (*)(const void *, const void *))alignedPair_cmpFn, (void (*)(void *))alignedPair_destruct); char *line = stFile_getLineFromFile(fileHandle); if(line == NULL) { *end = NULL; return NULL; } Name flowerName; int64_t lineNumber; int64_t i = sscanf(line, "%" PRIi64 " %" PRIi64 "", &flowerName, &lineNumber); if(i != 2 || lineNumber < 0) { st_errAbort("We encountered a mis-specified name in loading the first line of an end alignment from the disk: '%s'\n", line); } free(line); *end = flower_getEnd(flower, flowerName); if(*end == NULL) { st_errAbort("We encountered an end name that is not in the database: '%s'\n", line); } for(int64_t i=0; i<lineNumber; i++) { line = stFile_getLineFromFile(fileHandle); if(line == NULL) { st_errAbort("Got a null line when parsing an end alignment\n"); } int64_t sI1, sI2; int64_t p1, st1, p2, st2, score1, score2; int64_t i = sscanf(line, "%" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 "", &sI1, &p1, &st1, &score1, &sI2, &p2, &st2, &score2); (void)i; if(i != 8) { st_errAbort("We encountered a mis-specified name in loading an end alignment from the disk: '%s'\n", line); } stSortedSet_insert(endAlignment, alignedPair_construct(sI1, p1, st1, sI2, p2, st2, score1, score2)); free(line); } return endAlignment; }
static void test_stSortedSetDifference(CuTest* testCase) { sonLibSortedSetTestSetup(); //Check difference of empty sets is okay.. stSortedSet *sortedSet3 = stSortedSet_getDifference(sortedSet, sortedSet2); CuAssertTrue(testCase, stSortedSet_size(sortedSet3) == 0); stSortedSet_destruct(sortedSet3); int32_t i; for(i=0; i<size; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i])); } //Check difference of non-empty set / empty set is the non-empty. sortedSet3 = stSortedSet_getDifference(sortedSet, sortedSet2); CuAssertTrue(testCase, stSortedSet_equals(sortedSet, sortedSet3)); stSortedSet_destruct(sortedSet3); //Check difference of two non-empty, overlapping sets in correct. stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 0)); stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 1)); stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 5)); sortedSet3 = stSortedSet_getDifference(sortedSet, sortedSet2); CuAssertTrue(testCase, stSortedSet_size(sortedSet3) == stSortedSet_size(sortedSet) - 2); CuAssertTrue(testCase, !stSortedSet_equals(sortedSet, sortedSet3)); stSortedSet_insert(sortedSet3, stIntTuple_construct(1, 1)); stSortedSet_insert(sortedSet3, stIntTuple_construct(1, 5)); CuAssertTrue(testCase, stSortedSet_equals(sortedSet, sortedSet3)); stSortedSet_destruct(sortedSet3); //Check we get an exception when merging sorted sets with different comparators. stSortedSet *sortedSet4 = stSortedSet_construct(); stTry { stSortedSet_getDifference(sortedSet, sortedSet4); CuAssertTrue(testCase, 0); } stCatch(except) { CuAssertTrue(testCase, stExcept_getId(except) == SORTED_SET_EXCEPTION_ID); } stTryEnd stSortedSet_destruct(sortedSet4); sonLibSortedSetTestTeardown(); }
stSortedSet *makeEndAlignment(StateMachine *sM, End *end, int64_t spanningTrees, int64_t maxSequenceLength, bool useProgressiveMerging, float gapGamma, PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters) { //Make an alignment of the sequences in the ends //Get the adjacency sequences to be aligned. Cap *cap; End_InstanceIterator *it = end_getInstanceIterator(end); stList *sequences = stList_construct3(0, (void (*)(void *))adjacencySequence_destruct); stList *seqFrags = stList_construct3(0, (void (*)(void *))seqFrag_destruct); stHash *endInstanceNumbers = stHash_construct2(NULL, free); while((cap = end_getNext(it)) != NULL) { if(cap_getSide(cap)) { cap = cap_getReverse(cap); } AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap, maxSequenceLength); stList_append(sequences, adjacencySequence); assert(cap_getAdjacency(cap) != NULL); End *otherEnd = end_getPositiveOrientation(cap_getEnd(cap_getAdjacency(cap))); stList_append(seqFrags, seqFrag_construct(adjacencySequence->string, 0, end_getName(otherEnd))); //Increase count of seqfrags with a given end. int64_t *c = stHash_search(endInstanceNumbers, otherEnd); if(c == NULL) { c = st_calloc(1, sizeof(int64_t)); assert(*c == 0); stHash_insert(endInstanceNumbers, otherEnd, c); } (*c)++; } end_destructInstanceIterator(it); //Get the alignment. MultipleAlignment *mA = makeAlignment(sM, seqFrags, spanningTrees, 100000000, useProgressiveMerging, gapGamma, pairwiseAlignmentBandingParameters); //Build an array of weights to reweight pairs in the alignment. int64_t *pairwiseAlignmentsPerSequenceNonCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t)); int64_t *pairwiseAlignmentsPerSequenceCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t)); //First build array on number of pairwise alignments to each sequence, distinguishing alignments between sequences sharing //common ends. for(int64_t i=0; i<stList_length(mA->chosenPairwiseAlignments); i++) { stIntTuple *pairwiseAlignment = stList_get(mA->chosenPairwiseAlignments, i); int64_t seq1 = stIntTuple_get(pairwiseAlignment, 1); int64_t seq2 = stIntTuple_get(pairwiseAlignment, 2); assert(seq1 != seq2); SeqFrag *seqFrag1 = stList_get(seqFrags, seq1); SeqFrag *seqFrag2 = stList_get(seqFrags, seq2); int64_t *pairwiseAlignmentsPerSequence = seqFrag1->rightEndId == seqFrag2->rightEndId ? pairwiseAlignmentsPerSequenceCommonEnds : pairwiseAlignmentsPerSequenceNonCommonEnds; pairwiseAlignmentsPerSequence[seq1]++; pairwiseAlignmentsPerSequence[seq2]++; } //Now calculate score adjustments. double *scoreAdjustmentsNonCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double)); double *scoreAdjustmentsCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double)); for(int64_t i=0; i<stList_length(seqFrags); i++) { SeqFrag *seqFrag = stList_get(seqFrags, i); End *otherEnd = flower_getEnd(end_getFlower(end), seqFrag->rightEndId); assert(otherEnd != NULL); assert(stHash_search(endInstanceNumbers, otherEnd) != NULL); int64_t commonInstanceNumber = *(int64_t *)stHash_search(endInstanceNumbers, otherEnd); int64_t nonCommonInstanceNumber = stList_length(seqFrags) - commonInstanceNumber; assert(commonInstanceNumber > 0 && nonCommonInstanceNumber >= 0); assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] <= nonCommonInstanceNumber); assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] >= 0); assert(pairwiseAlignmentsPerSequenceCommonEnds[i] < commonInstanceNumber); assert(pairwiseAlignmentsPerSequenceCommonEnds[i] >= 0); //scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber + commonInstanceNumber - 1)/(pairwiseAlignmentsPerSequenceNonCommonEnds[i] + pairwiseAlignmentsPerSequenceCommonEnds[i]); //scoreAdjustmentsCommonEnds[i] = scoreAdjustmentsNonCommonEnds[i]; if(pairwiseAlignmentsPerSequenceNonCommonEnds[i] > 0) { scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber)/pairwiseAlignmentsPerSequenceNonCommonEnds[i]; assert(scoreAdjustmentsNonCommonEnds[i] >= 1.0); assert(scoreAdjustmentsNonCommonEnds[i] <= nonCommonInstanceNumber); } else { scoreAdjustmentsNonCommonEnds[i] = INT64_MIN; } if(pairwiseAlignmentsPerSequenceCommonEnds[i] > 0) { scoreAdjustmentsCommonEnds[i] = ((double)commonInstanceNumber-1)/pairwiseAlignmentsPerSequenceCommonEnds[i]; assert(scoreAdjustmentsCommonEnds[i] >= 1.0); assert(scoreAdjustmentsCommonEnds[i] <= commonInstanceNumber-1); } else { scoreAdjustmentsCommonEnds[i] = INT64_MIN; } } //Convert the alignment pairs to an alignment of the caps.. stSortedSet *sortedAlignment = stSortedSet_construct3((int (*)(const void *, const void *))alignedPair_cmpFn, (void (*)(void *))alignedPair_destruct); while(stList_length(mA->alignedPairs) > 0) { stIntTuple *alignedPair = stList_pop(mA->alignedPairs); assert(stIntTuple_length(alignedPair) == 5); int64_t seqIndex1 = stIntTuple_get(alignedPair, 1); int64_t seqIndex2 = stIntTuple_get(alignedPair, 3); AdjacencySequence *i = stList_get(sequences, seqIndex1); AdjacencySequence *j = stList_get(sequences, seqIndex2); assert(i != j); int64_t offset1 = stIntTuple_get(alignedPair, 2); int64_t offset2 = stIntTuple_get(alignedPair, 4); int64_t score = stIntTuple_get(alignedPair, 0); if(score <= 0) { //Happens when indel probs are included score = 1; //This is the minimum } assert(score > 0 && score <= PAIR_ALIGNMENT_PROB_1); SeqFrag *seqFrag1 = stList_get(seqFrags, seqIndex1); SeqFrag *seqFrag2 = stList_get(seqFrags, seqIndex2); assert(seqFrag1 != seqFrag2); double *scoreAdjustments = seqFrag1->rightEndId == seqFrag2->rightEndId ? scoreAdjustmentsCommonEnds : scoreAdjustmentsNonCommonEnds; assert(scoreAdjustments[seqIndex1] != INT64_MIN); assert(scoreAdjustments[seqIndex2] != INT64_MIN); AlignedPair *alignedPair2 = alignedPair_construct( i->subsequenceIdentifier, i->start + (i->strand ? offset1 : -offset1), i->strand, j->subsequenceIdentifier, j->start + (j->strand ? offset2 : -offset2), j->strand, score*scoreAdjustments[seqIndex1], score*scoreAdjustments[seqIndex2]); //Do the reweighting here. assert(stSortedSet_search(sortedAlignment, alignedPair2) == NULL); assert(stSortedSet_search(sortedAlignment, alignedPair2->reverse) == NULL); stSortedSet_insert(sortedAlignment, alignedPair2); stSortedSet_insert(sortedAlignment, alignedPair2->reverse); stIntTuple_destruct(alignedPair); } //Cleanup stList_destruct(seqFrags); stList_destruct(sequences); free(pairwiseAlignmentsPerSequenceNonCommonEnds); free(pairwiseAlignmentsPerSequenceCommonEnds); free(scoreAdjustmentsNonCommonEnds); free(scoreAdjustmentsCommonEnds); multipleAlignment_destruct(mA); stHash_destruct(endInstanceNumbers); return sortedAlignment; }
int main(int argc, char *argv[]) { st_setLogLevelFromString(argv[1]); st_logDebug("Set up logging\n"); stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(argv[2]); CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0); stKVDatabaseConf_destruct(kvDatabaseConf); st_logDebug("Set up the flower disk\n"); Name flowerName = cactusMisc_stringToName(argv[3]); Flower *flower = cactusDisk_getFlower(cactusDisk, flowerName); int64_t totalBases = flower_getTotalBaseLength(flower); int64_t totalEnds = flower_getEndNumber(flower); int64_t totalFreeEnds = flower_getFreeStubEndNumber(flower); int64_t totalAttachedEnds = flower_getAttachedStubEndNumber(flower); int64_t totalCaps = flower_getCapNumber(flower); int64_t totalBlocks = flower_getBlockNumber(flower); int64_t totalGroups = flower_getGroupNumber(flower); int64_t totalChains = flower_getChainNumber(flower); int64_t totalLinkGroups = 0; int64_t maxEndDegree = 0; int64_t maxAdjacencyLength = 0; int64_t totalEdges = 0; Flower_EndIterator *endIt = flower_getEndIterator(flower); End *end; while((end = flower_getNextEnd(endIt)) != NULL) { assert(end_getOrientation(end)); if(end_getInstanceNumber(end) > maxEndDegree) { maxEndDegree = end_getInstanceNumber(end); } stSortedSet *ends = stSortedSet_construct(); End_InstanceIterator *capIt = end_getInstanceIterator(end); Cap *cap; while((cap = end_getNext(capIt)) != NULL) { if(cap_getSequence(cap) != NULL) { Cap *adjacentCap = cap_getAdjacency(cap); assert(adjacentCap != NULL); End *adjacentEnd = end_getPositiveOrientation(cap_getEnd(adjacentCap)); stSortedSet_insert(ends, adjacentEnd); int64_t adjacencyLength = cap_getCoordinate(cap) - cap_getCoordinate(adjacentCap); if(adjacencyLength < 0) { adjacencyLength *= -1; } assert(adjacencyLength >= 1); if(adjacencyLength >= maxAdjacencyLength) { maxAdjacencyLength = adjacencyLength; } } } end_destructInstanceIterator(capIt); totalEdges += stSortedSet_size(ends); if(stSortedSet_search(ends, end) != NULL) { //This ensures we count self edges twice, so that the division works. totalEdges += 1; } stSortedSet_destruct(ends); } assert(totalEdges % 2 == 0); flower_destructEndIterator(endIt); Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); Group *group; while((group = flower_getNextGroup(groupIt)) != NULL) { if(group_getLink(group) != NULL) { totalLinkGroups++; } } flower_destructGroupIterator(groupIt); printf("flower name: %" PRIi64 " total bases: %" PRIi64 " total-ends: %" PRIi64 " total-caps: %" PRIi64 " max-end-degree: %" PRIi64 " max-adjacency-length: %" PRIi64 " total-blocks: %" PRIi64 " total-groups: %" PRIi64 " total-edges: %" PRIi64 " total-free-ends: %" PRIi64 " total-attached-ends: %" PRIi64 " total-chains: %" PRIi64 " total-link groups: %" PRIi64 "\n", flower_getName(flower), totalBases, totalEnds, totalCaps, maxEndDegree, maxAdjacencyLength, totalBlocks, totalGroups, totalEdges/2, totalFreeEnds, totalAttachedEnds, totalChains, totalLinkGroups); return 0; }
void eventTree_addEvent(EventTree *eventTree, Event *event) { stSortedSet_insert(eventTree->events, event); }