static stHash *getComponents(stList *filteredEdges) { /* * A kind of stupid reimplementation of the greedy function, done just to trap typos. */ stHash *nodesToComponents = stHash_construct3((uint64_t(*)(const void *)) stIntTuple_hashKey, (int(*)(const void *, const void *)) stIntTuple_equalsFn, NULL, NULL); for (int64_t i = 0; i < stList_length(nodes); i++) { stIntTuple *node = stList_get(nodes, i); stSortedSet *component = stSortedSet_construct(); stSortedSet_insert(component, node); stHash_insert(nodesToComponents, node, component); } for (int64_t i = 0; i < stList_length(filteredEdges); i++) { stIntTuple *edge = stList_get(filteredEdges, i); stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2)); stSortedSet *component1 = stHash_search(nodesToComponents, node1); stSortedSet *component2 = stHash_search(nodesToComponents, node2); assert(component1 != NULL && component2 != NULL); if (component1 != component2) { stSortedSet *component3 = stSortedSet_getUnion(component1, component2); stSortedSetIterator *setIt = stSortedSet_getIterator(component3); stIntTuple *node3; while ((node3 = stSortedSet_getNext(setIt)) != NULL) { stHash_insert(nodesToComponents, node3, component3); } stSortedSet_destructIterator(setIt); stSortedSet_destruct(component1); stSortedSet_destruct(component2); } stIntTuple_destruct(node1); stIntTuple_destruct(node2); } return nodesToComponents; }
static void test_stSet_search(CuTest* testCase) { testSetup(); stIntTuple *i = stIntTuple_construct1( 0); stIntTuple *j = stIntTuple_construct2(10, 0); stIntTuple *k = stIntTuple_construct1( 5); //Check search by memory address CuAssertTrue(testCase, stSet_search(set0, one) == one); CuAssertTrue(testCase, stSet_search(set0, two) == two); CuAssertTrue(testCase, stSet_search(set0, three) == three); CuAssertTrue(testCase, stSet_search(set0, four) == four); CuAssertTrue(testCase, stSet_search(set0, five) == five); CuAssertTrue(testCase, stSet_search(set0, six) == six); //Check not present CuAssertTrue(testCase, stSet_search(set0, i) == NULL); CuAssertTrue(testCase, stSet_search(set0, j) == NULL); CuAssertTrue(testCase, stSet_search(set0, k) == NULL); //Check search by memory address CuAssertTrue(testCase, stSet_search(set1, one) == one); CuAssertTrue(testCase, stSet_search(set1, two) == two); CuAssertTrue(testCase, stSet_search(set1, three) == three); CuAssertTrue(testCase, stSet_search(set1, four) == four); CuAssertTrue(testCase, stSet_search(set1, five) == five); CuAssertTrue(testCase, stSet_search(set1, six) == six); //Check not present CuAssertTrue(testCase, stSet_search(set1, j) == NULL); //Check is searching by memory CuAssertTrue(testCase, stSet_search(set1, i) == one); CuAssertTrue(testCase, stSet_search(set1, k) == six); stIntTuple_destruct(i); stIntTuple_destruct(j); stIntTuple_destruct(k); testTeardown(); }
static void checkComponents(CuTest *testCase, stList *filteredEdges) { stHash *nodesToComponents = getComponents(filteredEdges); //Check all components are smaller than threshold stList *components = stHash_getValues(nodesToComponents); for (int64_t i = 0; i < stList_length(components); i++) { stSortedSet *component = stList_get(components, i); CuAssertTrue(testCase, stSortedSet_size(component) <= maxComponentSize); CuAssertTrue(testCase, stSortedSet_size(component) >= 1); } //Check no edges can be added from those filtered. stSortedSet *filteredEdgesSet = stList_getSortedSet(filteredEdges, (int(*)(const void *, const void *)) stIntTuple_cmpFn); for (int64_t i = 0; i < stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); if (stSortedSet_search(filteredEdgesSet, edge) == NULL) { stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2)); stSortedSet *component1 = stHash_search(nodesToComponents, node1); stSortedSet *component2 = stHash_search(nodesToComponents, node2); CuAssertTrue(testCase, component1 != NULL && component2 != NULL); CuAssertTrue(testCase, component1 != component2); CuAssertTrue(testCase, stSortedSet_size(component1) + stSortedSet_size(component2) > maxComponentSize); stIntTuple_destruct(node1); stIntTuple_destruct(node2); } } stSortedSet_destruct(filteredEdgesSet); //Cleanup the components stSortedSet *componentsSet = stList_getSortedSet(components, NULL); stList_destruct(components); stSortedSet_setDestructor(componentsSet, (void(*)(void *)) stSortedSet_destruct); stSortedSet_destruct(componentsSet); stHash_destruct(nodesToComponents); }
static void test_stSortedSet_searchGreaterThan(CuTest* testCase) { sonLibSortedSetTestSetup(); for(int32_t i=0; i<size; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i])); } //static int32_t sortedInput[] = { -10, -1, 1, 3, 5, 10, 12 }; CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, -11)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -10))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, -10)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -1))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, -5)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -1))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, 1)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, 3))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, 13)) == NULL); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, 12)) == NULL); for(int32_t i=0; i<100; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, st_randomInt(-1000, 1000))); } stList *list = stSortedSet_getList(sortedSet); for(int32_t i=1; i<stList_length(list); i++) { stIntTuple *p = stList_get(list, i-1); stIntTuple *j = stList_get(list, i); stIntTuple *k = stIntTuple_construct(1, st_randomInt(stIntTuple_getPosition(p, 0), stIntTuple_getPosition(j, 0))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, k) == j); stIntTuple_destruct(k); } stList_destruct(list); sonLibSortedSetTestTeardown(); }
/* * Uses the functions above to build an adjacency list, then by DFS attempts to create * a valid topological sort, returning non-zero if the graph contains a cycle. */ static int64_t containsACycle(stList *pairs, int64_t sequenceNumber) { //Build an adjacency list structure.. stHash *adjacencyList = buildAdjacencyList(pairs, sequenceNumber); //Do a topological sort of the adjacency list stSortedSet *started = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL); stSortedSet *done = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL); int64_t cyclic = 0; for(int64_t seq=0; seq<sequenceNumber; seq++) { stIntTuple *seqPos = stIntTuple_construct2( seq, 0); //The following hacks avoid memory cleanup.. stSortedSet *column = stHash_search(adjacencyList, seqPos); assert(column != NULL); stIntTuple *seqPos2 = stSortedSet_search(column, seqPos); assert(seqPos2 != NULL); cyclic = cyclic || dfs(adjacencyList, seqPos2, started, done); stIntTuple_destruct(seqPos); } //cleanup stHashIterator *it = stHash_getIterator(adjacencyList); stIntTuple *seqPos; stSortedSet *columns = stSortedSet_construct2((void (*)(void *))stSortedSet_destruct); while((seqPos = stHash_getNext(it)) != NULL) { stSortedSet *column = stHash_search(adjacencyList, seqPos); assert(column != NULL); stSortedSet_insert(columns, column); } stHash_destructIterator(it); stHash_destruct(adjacencyList); stSortedSet_destruct(columns); stSortedSet_destruct(started); stSortedSet_destruct(done); return cyclic; }
/* * Function does the actual depth first search to detect if the thing has an acyclic ordering. */ static int64_t dfs(stHash *adjacencyList, stIntTuple *seqPos, stSortedSet *started, stSortedSet *done) { if(stSortedSet_search(started, seqPos) != NULL) { if(stSortedSet_search(done, seqPos) == NULL) { //We have detected a cycle //st_logInfo("I have cycle %" PRIi64 " %" PRIi64 "\n", stIntTuple_getPosition(seqPos, 0), stIntTuple_getPosition(seqPos, 1)); return 1; } //We have already explored this area, but no cycle. return 0; } stSortedSet_insert(started, seqPos); int64_t cycle =0; stIntTuple *nextSeqPos = stIntTuple_construct2( stIntTuple_get(seqPos, 0), stIntTuple_get(seqPos, 1) + 1); stSortedSet *column = stHash_search(adjacencyList, nextSeqPos); if(column != NULL) { //It is in the adjacency list, so we can do the recursion assert(stSortedSet_search(column, nextSeqPos) != NULL); stSortedSetIterator *it = stSortedSet_getIterator(column); stIntTuple *seqPos2; while((seqPos2 = stSortedSet_getNext(it)) != NULL) { cycle = cycle || dfs(adjacencyList, seqPos2, started, done); } stSortedSet_destructIterator(it); } stIntTuple_destruct(nextSeqPos); stSortedSet_insert(done, seqPos); return cycle; }
static stList *readMatching(FILE *fileHandle, stList *originalEdges) { /* * Reads the matching created by Blossum. */ stHash *originalEdgesHash = putEdgesInHash(originalEdges); char *line = stFile_getLineFromFile(fileHandle); assert(line != NULL); int64_t nodeNumber, edgeNumber; int64_t i = sscanf(line, "%" PRIi64 " %" PRIi64 "\n", &nodeNumber, &edgeNumber); assert(i == 2); free(line); stList *chosenEdges = stList_construct(); for(int64_t j=0; j<edgeNumber; j++) { line = stFile_getLineFromFile(fileHandle); int64_t node1, node2; i = sscanf(line, "%" PRIi64 " %" PRIi64 "", &node1, &node2); assert(i == 2); free(line); assert(node1 >= 0); assert(node1 < nodeNumber); assert(node2 >= 0); assert(node2 < nodeNumber); stIntTuple *edge = constructEdge(node1, node2); stIntTuple *originalEdge = stHash_search(originalEdgesHash, edge); if(originalEdge != NULL) { stList_append(chosenEdges, originalEdge); } stIntTuple_destruct(edge); } stHash_destruct(originalEdgesHash); return chosenEdges; }
/* * Gets the position in sequence2 that the position in sequence1 must be greater than or equal to in the alignment */ static stIntTuple *getConstraint_greaterThan(stPosetAlignment *posetAlignment, int64_t sequence1, int64_t position1, int64_t sequence2) { stIntTuple *pos = stIntTuple_construct2(INT64_MAX, position1); //Get less than or equal stIntTuple *constraint = stSortedSet_searchLessThanOrEqual(getConstraintList(posetAlignment, sequence2, sequence1), pos); stIntTuple_destruct(pos); assert(constraint == NULL || position1 >= stIntTuple_get(constraint, 1)); return constraint; }
static void test_stSortedSetIntersection(CuTest* testCase) { sonLibSortedSetTestSetup(); //Check intersection of empty sets is okay.. stSortedSet *sortedSet3 = stSortedSet_getIntersection(sortedSet, sortedSet2); CuAssertTrue(testCase, stSortedSet_size(sortedSet3) == 0); stSortedSet_destruct(sortedSet3); int32_t i; for(i=0; i<size; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i])); } //Check intersection of empty and non-empty set is empty. sortedSet3 = stSortedSet_getIntersection(sortedSet, sortedSet2); CuAssertTrue(testCase, stSortedSet_size(sortedSet3) == 0); stSortedSet_destruct(sortedSet3); //Check intersection of two non-empty, overlapping sets in correct. stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 0)); stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 1)); stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 5)); sortedSet3 = stSortedSet_getIntersection(sortedSet, sortedSet2); CuAssertTrue(testCase, stSortedSet_size(sortedSet3) == 2); stIntTuple *intTuple = stIntTuple_construct(1, 1); CuAssertTrue(testCase, stSortedSet_search(sortedSet3, intTuple) != NULL); stIntTuple_destruct(intTuple); intTuple = stIntTuple_construct(1, 5); CuAssertTrue(testCase, stSortedSet_search(sortedSet3, intTuple) != NULL); stIntTuple_destruct(intTuple); stSortedSet_destruct(sortedSet3); //Check we get an exception with sorted sets with different comparators. stSortedSet *sortedSet4 = stSortedSet_construct(); stTry { stSortedSet_getIntersection(sortedSet, sortedSet4); } stCatch(except) { CuAssertTrue(testCase, stExcept_getId(except) == SORTED_SET_EXCEPTION_ID); } stTryEnd stSortedSet_destruct(sortedSet4); sonLibSortedSetTestTeardown(); }
static stList *getEdgesThatBridgeComponents(stList *components, stHash *nodesToNonZeroWeightedAdjacencyEdges) { /* * Get set of adjacency edges that bridge between (have a node in two) components. */ stList *bridgingAdjacencyEdges = stList_construct(); for (int64_t i = 0; i < stList_length(components); i++) { stSortedSet *componentNodes = getNodeSetOfEdges( stList_get(components, i)); stSortedSetIterator *it = stSortedSet_getIterator(componentNodes); stIntTuple *node; while ((node = stSortedSet_getNext(it)) != NULL) { stList *edges = stHash_search(nodesToNonZeroWeightedAdjacencyEdges, node); if (edges != NULL) { for (int64_t j = 0; j < stList_length(edges); j++) { stIntTuple *edge = stList_get(edges, j); stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 0)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 1)); assert( stSortedSet_search(componentNodes, node1) != NULL || stSortedSet_search(componentNodes, node2) != NULL); if (stSortedSet_search(componentNodes, node1) == NULL || stSortedSet_search(componentNodes, node2) == NULL) { stList_append(bridgingAdjacencyEdges, edge); } stIntTuple_destruct(node1); stIntTuple_destruct(node2); } } } stSortedSet_destructIterator(it); stSortedSet_destruct(componentNodes); } return bridgingAdjacencyEdges; }
static void test_stPosetAlignment_addAndIsPossible(CuTest *testCase) { for(int64_t trial=0; trial<100; trial++) { setup(); //Make random number of sequences. stList *sequenceLengths = stList_construct3(0, (void (*)(void *))stIntTuple_destruct); for(int64_t i=0; i<sequenceNumber; i++) { stList_append(sequenceLengths, stIntTuple_construct1( st_randomInt(0, MAX_SEQUENCE_SIZE))); } //Propose random alignment pairs... stList *pairs = stList_construct3(0, (void(*)(void *))stIntTuple_destruct); int64_t maxAlignedPairs = st_randomInt(0, MAX_ALIGNMENTS); if(sequenceNumber > 0) { for(int64_t i=0; i<maxAlignedPairs; i++) { int64_t seq1 = st_randomInt(0, sequenceNumber); int64_t seqLength1 = stIntTuple_get(stList_get(sequenceLengths, seq1), 0); if(seqLength1 == 0) { continue; } int64_t position1 = st_randomInt(0, seqLength1); int64_t seq2 = st_randomInt(0, sequenceNumber); int64_t seqLength2 = stIntTuple_get(stList_get(sequenceLengths, seq1), 0); if(seqLength2 == 0) { continue; } int64_t position2 = st_randomInt(0, seqLength2); if(seq1 != seq2) { stList_append(pairs, stIntTuple_construct4( seq1, position1, seq2, position2)); if(stPosetAlignment_isPossible(posetAlignment, seq1, position1, seq2, position2)) { st_logInfo("In %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " \n", seq1, position1, seq2, position2); //For each accepted pair check it doesn't create a cycle. CuAssertTrue(testCase, !containsACycle(pairs, sequenceNumber)); CuAssertTrue(testCase, stPosetAlignment_add(posetAlignment, seq1, position1, seq2, position2)); } else { st_logInfo("Out %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " \n", seq1, position1, seq2, position2); //For each rejected pair check it creates a cycle.. CuAssertTrue(testCase, containsACycle(pairs, sequenceNumber)); CuAssertTrue(testCase, !stPosetAlignment_isPossible(posetAlignment, seq1, position1, seq2, position2)); stIntTuple_destruct(stList_pop(pairs)); //remove the pair which created the cycle. CuAssertTrue(testCase, !containsACycle(pairs, sequenceNumber)); //Check we're back to being okay.. } } } } //Cleanup stList_destruct(sequenceLengths); stList_destruct(pairs); teardown(); st_logInfo("Passed a random ordering test with %" PRIi64 " sequences and %" PRIi64 " aligned pairs\n", sequenceNumber, maxAlignedPairs); } }
/* * This builds an adjacency list structure for the the sequences. Every sequence-position * has a column in the hash with which it can be aligned with. */ static stHash *buildAdjacencyList(stList *pairs, int64_t sequenceNumber) { stHash *hash = stHash_construct3((uint64_t (*)(const void *))stIntTuple_hashKey, (int (*)(const void *, const void *))stIntTuple_equalsFn, (void (*)(void *))stIntTuple_destruct, NULL); for(int64_t seq=0; seq<sequenceNumber; seq++) { for(int64_t position=0; position<MAX_SEQUENCE_SIZE; position++) { stIntTuple *seqPos = stIntTuple_construct2( seq, position); stSortedSet *column = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL); stSortedSet_insert(column, seqPos); stHash_insert(hash, seqPos, column); } } stListIterator *it = stList_getIterator(pairs); stIntTuple *pair; while((pair = stList_getNext(it)) != NULL) { stIntTuple *seqPos1 = stIntTuple_construct2( stIntTuple_get(pair, 0), stIntTuple_get(pair, 1)); stIntTuple *seqPos2 = stIntTuple_construct2( stIntTuple_get(pair, 2), stIntTuple_get(pair, 3)); stSortedSet *column1 = stHash_search(hash, seqPos1); assert(column1 != NULL); stSortedSet *column2 = stHash_search(hash, seqPos2); assert(column2 != NULL); if(column1 != column2) { //Merge the columns stSortedSetIterator *it2 = stSortedSet_getIterator(column2); stIntTuple *seqPos3; while((seqPos3 = stSortedSet_getNext(it2)) != NULL) { assert(stSortedSet_search(column1, seqPos3) == NULL); stSortedSet_insert(column1, seqPos3); assert(stHash_search(hash, seqPos3) == column2); stHash_insert(hash, seqPos3, column1); assert(stHash_search(hash, seqPos3) == column1); } stSortedSet_destructIterator(it2); stSortedSet_destruct(column2); } //Cleanup loop. stIntTuple_destruct(seqPos1); stIntTuple_destruct(seqPos2); } stList_destructIterator(it); return hash; }
static void test_stSet_insert(CuTest* testCase) { /* * Tests inserting already present keys. */ testSetup(); CuAssertTrue(testCase, stSet_search(set0, one) == one); stSet_insert(set0, one); CuAssertTrue(testCase, stSet_search(set0, one) == one); stSet_insert(set0, three); CuAssertTrue(testCase, stSet_search(set0, three) == three); stIntTuple *seven = stIntTuple_construct2(7, 7); CuAssertTrue(testCase, stSet_search(set0, seven) == NULL); stSet_insert(set0, seven); CuAssertTrue(testCase, stSet_search(set0, seven) == seven); stIntTuple_destruct(seven); testTeardown(); }
// copied from cPecanRealign struct PairwiseAlignment *convertAlignedPairsToPairwiseAlignment(char *seqName1, char *seqName2, double score, int64_t length1, int64_t length2, stList *alignedPairs) { //Make pairwise alignment int64_t pX = -1, pY = -1, mL = 0; //Create an end matched pair, which is used to ensure the alignment has the correct end indels. struct List *opList = constructEmptyList(0, (void (*)(void *)) destructAlignmentOperation); stList_append(alignedPairs, stIntTuple_construct2(length1, length2)); for (int64_t i = 0; i < stList_length(alignedPairs); i++) { stIntTuple *alignedPair = stList_get(alignedPairs, i); int64_t x = stIntTuple_get(alignedPair, 0); int64_t y = stIntTuple_get(alignedPair, 1); assert(x - pX > 0); assert(y - pY > 0); if (x - pX > 0 && y - pY > 0) { //This is a hack for filtering if (x - pX > 1) { //There is an indel. if (mL > 0) { listAppend(opList, constructAlignmentOperation(PAIRWISE_MATCH, mL, 0)); mL = 0; } listAppend(opList, constructAlignmentOperation(PAIRWISE_INDEL_X, x - pX - 1, 0)); } if (y - pY > 1) { if (mL > 0) { listAppend(opList, constructAlignmentOperation(PAIRWISE_MATCH, mL, 0)); mL = 0; } listAppend(opList, constructAlignmentOperation(PAIRWISE_INDEL_Y, y - pY - 1, 0)); } mL++; pX = x; pY = y; } } //Deal with a trailing match, but exclude the final match if (mL > 1) { listAppend(opList, constructAlignmentOperation(PAIRWISE_MATCH, mL - 1, 0)); } stIntTuple_destruct(stList_pop(alignedPairs)); //Construct the alignment struct PairwiseAlignment *pA = constructPairwiseAlignment(seqName1, 0, length1, 1, seqName2, 0, length2, 1, score, opList); return pA; }
static void test_stSortedSet(CuTest* testCase) { sonLibSortedSetTestSetup(); int32_t i; CuAssertIntEquals(testCase, 0, stSortedSet_size(sortedSet)); for(i=0; i<size; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i])); } CuAssertIntEquals(testCase, sortedSize, stSortedSet_size(sortedSet)); CuAssertIntEquals(testCase, sortedInput[0], stIntTuple_getPosition(stSortedSet_getFirst(sortedSet), 0)); CuAssertIntEquals(testCase, sortedInput[sortedSize-1], stIntTuple_getPosition(stSortedSet_getLast(sortedSet), 0)); for(i=0; i<sortedSize; i++) { CuAssertIntEquals(testCase, sortedSize-i, stSortedSet_size(sortedSet)); stIntTuple *tuple = stIntTuple_construct(1, sortedInput[i]); CuAssertTrue(testCase, stIntTuple_getPosition(stSortedSet_search(sortedSet, tuple), 0) == sortedInput[i]); stSortedSet_remove(sortedSet, tuple); CuAssertTrue(testCase, stSortedSet_search(sortedSet, tuple) == NULL); stIntTuple_destruct(tuple); } sonLibSortedSetTestTeardown(); }
stList *getComponents(stList *edges) { /* * Gets a list of connected components, each connected component * being represented as a list of the edges, such that each edge is in exactly one * connected component. Allows for multi-graphs (multiple edges connecting two nodes). */ stHash *nodesToEdges = getNodesToEdgesHash(edges); /* * Traverse the edges greedily */ stList *components = stList_construct3(0, (void(*)(void *)) stList_destruct); stList *nodes = stHash_getKeys(nodesToEdges); while (stList_length(nodes) > 0) { stIntTuple *node = stList_pop(nodes); stList *edges = stHash_search(nodesToEdges, node); if (edges != NULL) { //We have a component to build stSortedSet *component = stSortedSet_construct(); stHash_remove(nodesToEdges, node); for (int64_t i = 0; i < stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); getComponentsP(nodesToEdges, stIntTuple_get(edge, 0), component); getComponentsP(nodesToEdges, stIntTuple_get(edge, 1), component); } stList_append(components, stSortedSet_getList(component)); //Cleanup stSortedSet_destruct(component); stList_destruct(edges); } stIntTuple_destruct(node); } assert(stHash_size(nodesToEdges) == 0); stHash_destruct(nodesToEdges); stList_destruct(nodes); return components; }
/* * Adds a prime less than (or equals) constraint to the list of prime constraints, removing any redundant constraints in the process. * The or equals is specified by making the lessThanOrEquals argument non-zero. */ void addConstraint_lessThan(stPosetAlignment *posetAlignment, int64_t sequence1, int64_t position1, int64_t sequence2, int64_t position2, int64_t lessThanOrEquals) { stSortedSet *constraintList = getConstraintList(posetAlignment, sequence1, sequence2); assert(position1 != INT64_MAX); assert(position2 != INT64_MAX); stIntTuple *constraint1 = stIntTuple_construct3( position1, position2, lessThanOrEquals); stIntTuple *constraint2; while((constraint2 = stSortedSet_searchLessThanOrEqual(constraintList, constraint1)) != NULL) { assert(stIntTuple_get(constraint2, 0) <= position1); if(stIntTuple_get(constraint2, 1) >= position2) { if(stIntTuple_get(constraint2, 1) == position2) { //Check we are not removing an equivalent or more severe constraint. assert((!lessThanOrEquals && stIntTuple_get(constraint2, 2)) || stIntTuple_get(constraint2, 0) < position1); } stSortedSet_remove(constraintList, constraint2); stIntTuple_destruct(constraint2); } else { assert(stIntTuple_get(constraint2, 0) < position1); //Check the constraint does not overshadow our proposed constraint. break; } } stSortedSet_insert(constraintList, constraint1); }
static void getComponentsP(stHash *nodesToEdges, int64_t node, stSortedSet *component) { stIntTuple *key = stIntTuple_construct1( node); stList *edges = stHash_search(nodesToEdges, key); if (edges != NULL) { stHash_remove(nodesToEdges, key); for (int64_t i = 0; i < stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); if (stSortedSet_search(component, edge) == NULL) { stSortedSet_insert(component, edge); } /* * Recursion on stack could equal the total number of nodes. */ getComponentsP(nodesToEdges, stIntTuple_get(edge, 0), component); getComponentsP(nodesToEdges, stIntTuple_get(edge, 1), component); } stList_destruct(edges); } stIntTuple_destruct(key); }
stSortedSet *makeEndAlignment(StateMachine *sM, End *end, int64_t spanningTrees, int64_t maxSequenceLength, bool useProgressiveMerging, float gapGamma, PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters) { //Make an alignment of the sequences in the ends //Get the adjacency sequences to be aligned. Cap *cap; End_InstanceIterator *it = end_getInstanceIterator(end); stList *sequences = stList_construct3(0, (void (*)(void *))adjacencySequence_destruct); stList *seqFrags = stList_construct3(0, (void (*)(void *))seqFrag_destruct); stHash *endInstanceNumbers = stHash_construct2(NULL, free); while((cap = end_getNext(it)) != NULL) { if(cap_getSide(cap)) { cap = cap_getReverse(cap); } AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap, maxSequenceLength); stList_append(sequences, adjacencySequence); assert(cap_getAdjacency(cap) != NULL); End *otherEnd = end_getPositiveOrientation(cap_getEnd(cap_getAdjacency(cap))); stList_append(seqFrags, seqFrag_construct(adjacencySequence->string, 0, end_getName(otherEnd))); //Increase count of seqfrags with a given end. int64_t *c = stHash_search(endInstanceNumbers, otherEnd); if(c == NULL) { c = st_calloc(1, sizeof(int64_t)); assert(*c == 0); stHash_insert(endInstanceNumbers, otherEnd, c); } (*c)++; } end_destructInstanceIterator(it); //Get the alignment. MultipleAlignment *mA = makeAlignment(sM, seqFrags, spanningTrees, 100000000, useProgressiveMerging, gapGamma, pairwiseAlignmentBandingParameters); //Build an array of weights to reweight pairs in the alignment. int64_t *pairwiseAlignmentsPerSequenceNonCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t)); int64_t *pairwiseAlignmentsPerSequenceCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t)); //First build array on number of pairwise alignments to each sequence, distinguishing alignments between sequences sharing //common ends. for(int64_t i=0; i<stList_length(mA->chosenPairwiseAlignments); i++) { stIntTuple *pairwiseAlignment = stList_get(mA->chosenPairwiseAlignments, i); int64_t seq1 = stIntTuple_get(pairwiseAlignment, 1); int64_t seq2 = stIntTuple_get(pairwiseAlignment, 2); assert(seq1 != seq2); SeqFrag *seqFrag1 = stList_get(seqFrags, seq1); SeqFrag *seqFrag2 = stList_get(seqFrags, seq2); int64_t *pairwiseAlignmentsPerSequence = seqFrag1->rightEndId == seqFrag2->rightEndId ? pairwiseAlignmentsPerSequenceCommonEnds : pairwiseAlignmentsPerSequenceNonCommonEnds; pairwiseAlignmentsPerSequence[seq1]++; pairwiseAlignmentsPerSequence[seq2]++; } //Now calculate score adjustments. double *scoreAdjustmentsNonCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double)); double *scoreAdjustmentsCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double)); for(int64_t i=0; i<stList_length(seqFrags); i++) { SeqFrag *seqFrag = stList_get(seqFrags, i); End *otherEnd = flower_getEnd(end_getFlower(end), seqFrag->rightEndId); assert(otherEnd != NULL); assert(stHash_search(endInstanceNumbers, otherEnd) != NULL); int64_t commonInstanceNumber = *(int64_t *)stHash_search(endInstanceNumbers, otherEnd); int64_t nonCommonInstanceNumber = stList_length(seqFrags) - commonInstanceNumber; assert(commonInstanceNumber > 0 && nonCommonInstanceNumber >= 0); assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] <= nonCommonInstanceNumber); assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] >= 0); assert(pairwiseAlignmentsPerSequenceCommonEnds[i] < commonInstanceNumber); assert(pairwiseAlignmentsPerSequenceCommonEnds[i] >= 0); //scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber + commonInstanceNumber - 1)/(pairwiseAlignmentsPerSequenceNonCommonEnds[i] + pairwiseAlignmentsPerSequenceCommonEnds[i]); //scoreAdjustmentsCommonEnds[i] = scoreAdjustmentsNonCommonEnds[i]; if(pairwiseAlignmentsPerSequenceNonCommonEnds[i] > 0) { scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber)/pairwiseAlignmentsPerSequenceNonCommonEnds[i]; assert(scoreAdjustmentsNonCommonEnds[i] >= 1.0); assert(scoreAdjustmentsNonCommonEnds[i] <= nonCommonInstanceNumber); } else { scoreAdjustmentsNonCommonEnds[i] = INT64_MIN; } if(pairwiseAlignmentsPerSequenceCommonEnds[i] > 0) { scoreAdjustmentsCommonEnds[i] = ((double)commonInstanceNumber-1)/pairwiseAlignmentsPerSequenceCommonEnds[i]; assert(scoreAdjustmentsCommonEnds[i] >= 1.0); assert(scoreAdjustmentsCommonEnds[i] <= commonInstanceNumber-1); } else { scoreAdjustmentsCommonEnds[i] = INT64_MIN; } } //Convert the alignment pairs to an alignment of the caps.. stSortedSet *sortedAlignment = stSortedSet_construct3((int (*)(const void *, const void *))alignedPair_cmpFn, (void (*)(void *))alignedPair_destruct); while(stList_length(mA->alignedPairs) > 0) { stIntTuple *alignedPair = stList_pop(mA->alignedPairs); assert(stIntTuple_length(alignedPair) == 5); int64_t seqIndex1 = stIntTuple_get(alignedPair, 1); int64_t seqIndex2 = stIntTuple_get(alignedPair, 3); AdjacencySequence *i = stList_get(sequences, seqIndex1); AdjacencySequence *j = stList_get(sequences, seqIndex2); assert(i != j); int64_t offset1 = stIntTuple_get(alignedPair, 2); int64_t offset2 = stIntTuple_get(alignedPair, 4); int64_t score = stIntTuple_get(alignedPair, 0); if(score <= 0) { //Happens when indel probs are included score = 1; //This is the minimum } assert(score > 0 && score <= PAIR_ALIGNMENT_PROB_1); SeqFrag *seqFrag1 = stList_get(seqFrags, seqIndex1); SeqFrag *seqFrag2 = stList_get(seqFrags, seqIndex2); assert(seqFrag1 != seqFrag2); double *scoreAdjustments = seqFrag1->rightEndId == seqFrag2->rightEndId ? scoreAdjustmentsCommonEnds : scoreAdjustmentsNonCommonEnds; assert(scoreAdjustments[seqIndex1] != INT64_MIN); assert(scoreAdjustments[seqIndex2] != INT64_MIN); AlignedPair *alignedPair2 = alignedPair_construct( i->subsequenceIdentifier, i->start + (i->strand ? offset1 : -offset1), i->strand, j->subsequenceIdentifier, j->start + (j->strand ? offset2 : -offset2), j->strand, score*scoreAdjustments[seqIndex1], score*scoreAdjustments[seqIndex2]); //Do the reweighting here. assert(stSortedSet_search(sortedAlignment, alignedPair2) == NULL); assert(stSortedSet_search(sortedAlignment, alignedPair2->reverse) == NULL); stSortedSet_insert(sortedAlignment, alignedPair2); stSortedSet_insert(sortedAlignment, alignedPair2->reverse); stIntTuple_destruct(alignedPair); } //Cleanup stList_destruct(seqFrags); stList_destruct(sequences); free(pairwiseAlignmentsPerSequenceNonCommonEnds); free(pairwiseAlignmentsPerSequenceCommonEnds); free(scoreAdjustmentsNonCommonEnds); free(scoreAdjustmentsCommonEnds); multipleAlignment_destruct(mA); stHash_destruct(endInstanceNumbers); return sortedAlignment; }
static stHash *getScaffoldPathsP(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) { stHash *haplotypeToMaximalHaplotypeLengthHash = buildContigPathToContigPathLengthHash(haplotypePaths); stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths); for (int64_t i = 0; i < stList_length(haplotypePaths); i++) { stSortedSet *bucket = stSortedSet_construct(); stHash_insert(haplotypePathToScaffoldPathHash, stList_get(haplotypePaths, i), bucket); stSortedSet_insert(bucket, stList_get(haplotypePaths, i)); } for (int64_t i = 0; i < stList_length(haplotypePaths); i++) { stList *haplotypePath = stList_get(haplotypePaths, i); assert(stList_length(haplotypePath) > 0); Segment *_5Segment = stList_get(haplotypePath, 0); if (!segment_getStrand(_5Segment)) { _5Segment = segment_getReverse(stList_get(haplotypePath, stList_length(haplotypePath) - 1)); } assert(segment_getStrand(_5Segment)); if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) { assert(!trueAdjacency(segment_get5Cap(_5Segment), haplotypeEventStrings)); } int64_t insertLength; int64_t deleteLength; Cap *otherCap; enum CapCode _5CapCode = getCapCode(segment_get5Cap(_5Segment), &otherCap, haplotypeEventStrings, contaminationEventStrings, &insertLength, &deleteLength, capCodeParameters); if (_5CapCode == SCAFFOLD_GAP || _5CapCode == AMBIGUITY_GAP) { assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath) != NULL); int64_t j = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath), 0); Segment *adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(_5Segment)); assert(adjacentSegment != NULL); while (!hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)) { //is not a haplotype end adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(adjacentSegment)); assert(adjacentSegment != NULL); } assert(adjacentSegment != NULL); assert(hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)); //is a haplotype end stList *adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, adjacentSegment); if (adjacentHaplotypePath == NULL) { adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, segment_getReverse( adjacentSegment)); } assert(adjacentHaplotypePath != NULL); assert(adjacentHaplotypePath != haplotypePath); assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath) != NULL); int64_t k = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath), 0); //Now merge the buckets and make new int tuples.. stSortedSet *bucket1 = stHash_search(haplotypePathToScaffoldPathHash, haplotypePath); stSortedSet *bucket2 = stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath); assert(bucket1 != NULL); assert(bucket2 != NULL); assert(bucket1 != bucket2); stSortedSet *bucket3 = stSortedSet_getUnion(bucket1, bucket2); stSortedSetIterator *bucketIt = stSortedSet_getIterator(bucket3); stList *l; while ((l = stSortedSet_getNext(bucketIt)) != NULL) { //Do the bucket first assert(stHash_search(haplotypePathToScaffoldPathHash, l) == bucket1 || stHash_search(haplotypePathToScaffoldPathHash, l) == bucket2); stHash_remove(haplotypePathToScaffoldPathHash, l); stHash_insert(haplotypePathToScaffoldPathHash, l, bucket3); //Now the length stIntTuple *m = stHash_remove(haplotypeToMaximalHaplotypeLengthHash, l); assert(m != NULL); assert(stIntTuple_get(m, 0) == j || stIntTuple_get(m, 0) == k); stHash_insert(haplotypeToMaximalHaplotypeLengthHash, l, stIntTuple_construct1( j + k)); stIntTuple_destruct(m); } assert(stHash_search(haplotypePathToScaffoldPathHash, haplotypePath) == bucket3); assert(stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath) == bucket3); stSortedSet_destructIterator(bucketIt); } } stHash_destruct(segmentToMaximalHaplotypePathHash); return haplotypeToMaximalHaplotypeLengthHash; }
static void readWriteAndRemoveRecordsLotsIteration(CuTest *testCase, int numRecords, bool reopenDatabase) { //Make a big old list of records.. stSortedSet *set = stSortedSet_construct3((int(*)(const void *, const void *)) stIntTuple_cmpFn, (void(*)(void *)) stIntTuple_destruct); while (stSortedSet_size(set) < numRecords) { int32_t key = st_randomInt(0, 100 * numRecords); stIntTuple *tuple = stIntTuple_construct(1, key); if (stSortedSet_search(set, tuple) == NULL) { CuAssertTrue(testCase, !stKVDatabase_containsRecord(database, key)); stSortedSet_insert(set, tuple); stKVDatabase_insertRecord(database, key, &key, sizeof(int32_t)); CuAssertTrue(testCase, stKVDatabase_containsRecord(database, key)); } else { CuAssertTrue(testCase, stKVDatabase_containsRecord(database, key)); stIntTuple_destruct(tuple); // already in db } } readWriteAndRemoveRecordsLotsCheck(testCase, set, 1); //Update all records to negate values stSortedSetIterator *it = stSortedSet_getIterator(set); stIntTuple *tuple; while ((tuple = stSortedSet_getNext(it)) != NULL) { int32_t *value = (int32_t *) stKVDatabase_getRecord(database, stIntTuple_getPosition(tuple, 0)); *value *= -1; stKVDatabase_updateRecord(database, stIntTuple_getPosition(tuple, 0), value, sizeof(int32_t)); CuAssertTrue(testCase, stKVDatabase_containsRecord(database, stIntTuple_getPosition(tuple, 0))); free(value); } stSortedSet_destructIterator(it); readWriteAndRemoveRecordsLotsCheck(testCase, set, -1); //Try optionally committing the transaction and reloading the database.. if (reopenDatabase) { //stKVDatabase_commitTransaction(database); stKVDatabase_destruct(database); database = stKVDatabase_construct(conf, false); //stKVDatabase_startTransaction(database); } //Now remove each one.. it = stSortedSet_getIterator(set); while ((tuple = stSortedSet_getNext(it)) != NULL) { CuAssertTrue(testCase, stKVDatabase_containsRecord(database, stIntTuple_getPosition(tuple, 0))); stKVDatabase_removeRecord(database, stIntTuple_getPosition(tuple, 0)); CuAssertTrue(testCase, !stKVDatabase_containsRecord(database, stIntTuple_getPosition(tuple, 0))); //Test we get exception if we remove twice. stTry { stKVDatabase_removeRecord(database, stIntTuple_getPosition(tuple, 0)); CuAssertTrue(testCase, 0); } stCatch(except) { CuAssertTrue(testCase, stExcept_getId(except) == ST_KV_DATABASE_EXCEPTION_ID); }stTryEnd; } stSortedSet_destructIterator(it); CuAssertIntEquals(testCase, 0, stKVDatabase_getNumberOfRecords(database)); stSortedSet_destruct(set); }
// copied from cPecanRealign, which is sloppy. void *convertToAnchorPair(void *aPair, void *extraArg) { stIntTuple *i = stIntTuple_construct2(stIntTuple_get(aPair, 1), stIntTuple_get(aPair, 2)); stIntTuple_destruct(aPair); return i; }