static void checkComponents(CuTest *testCase, stList *filteredEdges) { stHash *nodesToComponents = getComponents(filteredEdges); //Check all components are smaller than threshold stList *components = stHash_getValues(nodesToComponents); for (int64_t i = 0; i < stList_length(components); i++) { stSortedSet *component = stList_get(components, i); CuAssertTrue(testCase, stSortedSet_size(component) <= maxComponentSize); CuAssertTrue(testCase, stSortedSet_size(component) >= 1); } //Check no edges can be added from those filtered. stSortedSet *filteredEdgesSet = stList_getSortedSet(filteredEdges, (int(*)(const void *, const void *)) stIntTuple_cmpFn); for (int64_t i = 0; i < stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); if (stSortedSet_search(filteredEdgesSet, edge) == NULL) { stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2)); stSortedSet *component1 = stHash_search(nodesToComponents, node1); stSortedSet *component2 = stHash_search(nodesToComponents, node2); CuAssertTrue(testCase, component1 != NULL && component2 != NULL); CuAssertTrue(testCase, component1 != component2); CuAssertTrue(testCase, stSortedSet_size(component1) + stSortedSet_size(component2) > maxComponentSize); stIntTuple_destruct(node1); stIntTuple_destruct(node2); } } stSortedSet_destruct(filteredEdgesSet); //Cleanup the components stSortedSet *componentsSet = stList_getSortedSet(components, NULL); stList_destruct(components); stSortedSet_setDestructor(componentsSet, (void(*)(void *)) stSortedSet_destruct); stSortedSet_destruct(componentsSet); stHash_destruct(nodesToComponents); }
static void splitIntoAdjacenciesStubsAndChains(stList *subCycle, stList *adjacencyEdges, stList *stubEdges, stList *chainEdges, stList **subAdjacencyEdges, stList **subStubEdges, stList **subChainEdges) { /* * Splits run into cycles and chains.. */ *subStubEdges = stList_construct(); *subChainEdges = stList_construct(); for (int64_t j = 0; j < stList_length(subCycle); j++) { stIntTuple *edge = stList_get(subCycle, j); if (stList_contains(stubEdges, edge)) { stList_append(*subStubEdges, edge); } else if (stList_contains(chainEdges, edge)) { stList_append(*subChainEdges, edge); } } *subAdjacencyEdges = stList_construct(); stSortedSet *nodes = getNodeSetOfEdges(subCycle); for (int64_t j = 0; j < stList_length(adjacencyEdges); j++) { stIntTuple *edge = stList_get(adjacencyEdges, j); if (nodeInSet(nodes, stIntTuple_get(edge, 0)) && nodeInSet( nodes, stIntTuple_get(edge, 1))) { stList_append(*subAdjacencyEdges, edge); } } stSortedSet_destruct(nodes); }
static stHash *getComponents(stList *filteredEdges) { /* * A kind of stupid reimplementation of the greedy function, done just to trap typos. */ stHash *nodesToComponents = stHash_construct3((uint64_t(*)(const void *)) stIntTuple_hashKey, (int(*)(const void *, const void *)) stIntTuple_equalsFn, NULL, NULL); for (int64_t i = 0; i < stList_length(nodes); i++) { stIntTuple *node = stList_get(nodes, i); stSortedSet *component = stSortedSet_construct(); stSortedSet_insert(component, node); stHash_insert(nodesToComponents, node, component); } for (int64_t i = 0; i < stList_length(filteredEdges); i++) { stIntTuple *edge = stList_get(filteredEdges, i); stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2)); stSortedSet *component1 = stHash_search(nodesToComponents, node1); stSortedSet *component2 = stHash_search(nodesToComponents, node2); assert(component1 != NULL && component2 != NULL); if (component1 != component2) { stSortedSet *component3 = stSortedSet_getUnion(component1, component2); stSortedSetIterator *setIt = stSortedSet_getIterator(component3); stIntTuple *node3; while ((node3 = stSortedSet_getNext(setIt)) != NULL) { stHash_insert(nodesToComponents, node3, component3); } stSortedSet_destructIterator(setIt); stSortedSet_destruct(component1); stSortedSet_destruct(component2); } stIntTuple_destruct(node1); stIntTuple_destruct(node2); } return nodesToComponents; }
static void makeMatchingPerfect(stList *chosenEdges, stList *adjacencyEdges, stSortedSet *nodes) { /* * While the the number of edges is less than a perfect matching add random edges. */ stSortedSet *attachedNodes = getNodeSetOfEdges(chosenEdges); stHash *nodesToAdjacencyEdges = getNodesToEdgesHash(adjacencyEdges); stIntTuple *pNode = NULL; stSortedSetIterator *it = stSortedSet_getIterator(nodes); stIntTuple *node; while((node = stSortedSet_getNext(it)) != NULL) { if (stSortedSet_search(attachedNodes, node) == NULL) { if (pNode == NULL) { pNode = node; } else { stList_append(chosenEdges, getEdgeForNodes(stIntTuple_get(pNode, 0), stIntTuple_get(node, 0), nodesToAdjacencyEdges)); pNode = NULL; } } } stSortedSet_destructIterator(it); assert(pNode == NULL); stSortedSet_destruct(attachedNodes); assert(stList_length(chosenEdges) * 2 == stSortedSet_size(nodes)); stHash_destruct(nodesToAdjacencyEdges); }
static AdjacencySwitch *getBest4EdgeAdjacencySwitchP(stIntTuple *oldEdge1, int64_t node1, stSortedSet *allAdjacencyEdges, stHash *nodesToAllCurrentEdges, stHash *nodesToBridgingAdjacencyEdges) { /* * Returns the best adjacency switch for the given node and edge that * contains 4 existing edges. */ int64_t node4 = getOtherPosition(oldEdge1, node1); AdjacencySwitch *minimumCostAdjacencySwitch = NULL; stList *validEdges = getItemForNode(node1, nodesToBridgingAdjacencyEdges); if (validEdges != NULL) { for (int64_t i = 0; i < stList_length(validEdges); i++) { stIntTuple *newEdge1 = stList_get(validEdges, i); int64_t node2 = getOtherPosition(newEdge1, node1); stList *validEdges2 = getItemForNode(node2, nodesToAllCurrentEdges); assert(validEdges2 != NULL); assert(stList_length(validEdges2) == 1); stIntTuple *oldEdge2 = stList_peek(validEdges2); int64_t node3 = getOtherPosition(oldEdge2, node2); stIntTuple *newEdge2 = getWeightedEdgeFromSet(node3, node4, allAdjacencyEdges); assert(newEdge2 != NULL); int64_t cost = stIntTuple_get(oldEdge1, 2) + stIntTuple_get(oldEdge2, 2) - stIntTuple_get(newEdge1, 2) - stIntTuple_get(newEdge2, 2); minimumCostAdjacencySwitch = adjacencySwitch_update( minimumCostAdjacencySwitch, oldEdge1, oldEdge2, newEdge1, newEdge2, cost); } } return minimumCostAdjacencySwitch; }
stList *chooseMatching_greedy(stList *edges, int64_t nodeNumber) { /* * Greedily picks the edge from the list such that each node has at most one edge. */ //First clone the list.. edges = stList_copy(edges, NULL); stSortedSet *seen = getEmptyNodeOrEdgeSetWithCleanup(); stList *matching = stList_construct(); //Sort the adjacency pairs.. stList_sort(edges, chooseMatching_greedyP); double strength = INT64_MAX; while (stList_length(edges) > 0) { stIntTuple *edge = stList_pop(edges); double d = stIntTuple_get(edge, 2); assert(d <= strength); strength = d; if(!nodeInSet(seen, stIntTuple_get(edge, 0)) && !nodeInSet(seen, stIntTuple_get(edge, 1))) { addNodeToSet(seen, stIntTuple_get(edge, 0)); addNodeToSet(seen, stIntTuple_get(edge, 1)); stList_append(matching,edge); } } assert(stList_length(edges) == 0); stList_destruct(edges); stSortedSet_destruct(seen); return matching; }
/* * Function does the actual depth first search to detect if the thing has an acyclic ordering. */ static int64_t dfs(stHash *adjacencyList, stIntTuple *seqPos, stSortedSet *started, stSortedSet *done) { if(stSortedSet_search(started, seqPos) != NULL) { if(stSortedSet_search(done, seqPos) == NULL) { //We have detected a cycle //st_logInfo("I have cycle %" PRIi64 " %" PRIi64 "\n", stIntTuple_getPosition(seqPos, 0), stIntTuple_getPosition(seqPos, 1)); return 1; } //We have already explored this area, but no cycle. return 0; } stSortedSet_insert(started, seqPos); int64_t cycle =0; stIntTuple *nextSeqPos = stIntTuple_construct2( stIntTuple_get(seqPos, 0), stIntTuple_get(seqPos, 1) + 1); stSortedSet *column = stHash_search(adjacencyList, nextSeqPos); if(column != NULL) { //It is in the adjacency list, so we can do the recursion assert(stSortedSet_search(column, nextSeqPos) != NULL); stSortedSetIterator *it = stSortedSet_getIterator(column); stIntTuple *seqPos2; while((seqPos2 = stSortedSet_getNext(it)) != NULL) { cycle = cycle || dfs(adjacencyList, seqPos2, started, done); } stSortedSet_destructIterator(it); } stIntTuple_destruct(nextSeqPos); stSortedSet_insert(done, seqPos); return cycle; }
static void debugScaffoldPathsP(Cap *cap, stList *haplotypePath, stHash *haplotypePathToScaffoldPathHash, stHash *haplotypeToMaximalHaplotypeLengthHash, stHash *segmentToMaximalHaplotypePathHash, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters, bool capDir) { int64_t insertLength; int64_t deleteLength; Cap *otherCap; enum CapCode capCode = getCapCode(cap, &otherCap, haplotypeEventStrings, contaminationEventStrings, &insertLength, &deleteLength, capCodeParameters); if (capCode == SCAFFOLD_GAP || capCode == AMBIGUITY_GAP) { Segment *adjacentSegment = getAdjacentCapsSegment(cap); assert(adjacentSegment != NULL); while (!hasCapInEvents(cap_getEnd(capDir ? segment_get5Cap(adjacentSegment) : segment_get3Cap(adjacentSegment)), haplotypeEventStrings)) { adjacentSegment = getAdjacentCapsSegment(capDir ? segment_get5Cap(adjacentSegment) : segment_get3Cap(adjacentSegment)); assert(adjacentSegment != NULL); } assert(adjacentSegment != NULL); assert(hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(adjacentSegment)))); stIntTuple *j = stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath); (void)j; assert(j != NULL); stList *adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, adjacentSegment); if (adjacentHaplotypePath == NULL) { adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, segment_getReverse(adjacentSegment)); } assert(adjacentHaplotypePath != NULL); assert(adjacentHaplotypePath != haplotypePath); stIntTuple *k = stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath); (void)k; assert(k != NULL); assert(stIntTuple_get(j, 0) == stIntTuple_get(k, 0)); assert(stHash_search(haplotypePathToScaffoldPathHash, haplotypePath) == stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath)); } }
static stHash *putEdgesInHash(stList *edges) { stHash *intsToEdgesHash = stHash_construct3((uint64_t (*)(const void *))stIntTuple_hashKey, (int (*)(const void *, const void *))stIntTuple_equalsFn, (void (*)(void *))stIntTuple_destruct, NULL); for(int64_t i=0; i<stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); stHash_insert(intsToEdgesHash, constructEdge(stIntTuple_get(edge, 0), stIntTuple_get(edge, 1)), edge); } return intsToEdgesHash; }
static AdjacencySwitch *getBest2EdgeAdjacencySwitch(stList *components, stSortedSet *allAdjacencyEdges) { /* * Look for the two lowest value adjacency edges in all current edges that are in a separate component and returns them as an adjacency switch * with now new adjacency edges. */ /* * Get lowest scoring edge for each component. */ stList *lowestScoringEdgeFromEachComponent = stList_construct(); for (int64_t i = 0; i < stList_length(components); i++) { stList_append(lowestScoringEdgeFromEachComponent, getLowestScoringEdge(stList_get(components, i))); } /* * Get two lowest scoring edges. */ stList_sort(lowestScoringEdgeFromEachComponent, getBest2EdgeAdjacencySwitchP); stIntTuple *lowestScoreEdge1 = stList_get( lowestScoringEdgeFromEachComponent, 0); stIntTuple *lowestScoreEdge2 = stList_get( lowestScoringEdgeFromEachComponent, 1); assert(lowestScoreEdge1 != lowestScoreEdge2); stList_destruct(lowestScoringEdgeFromEachComponent); //Cleanup stIntTuple *newEdge1 = getWeightedEdgeFromSet( stIntTuple_get(lowestScoreEdge1, 0), stIntTuple_get(lowestScoreEdge2, 0), allAdjacencyEdges); stIntTuple *newEdge2 = getWeightedEdgeFromSet( stIntTuple_get(lowestScoreEdge1, 1), stIntTuple_get(lowestScoreEdge2, 1), allAdjacencyEdges); if (newEdge1 == NULL) { assert(newEdge2 == NULL); newEdge1 = getWeightedEdgeFromSet( stIntTuple_get(lowestScoreEdge1, 0), stIntTuple_get(lowestScoreEdge2, 1), allAdjacencyEdges); newEdge2 = getWeightedEdgeFromSet( stIntTuple_get(lowestScoreEdge1, 1), stIntTuple_get(lowestScoreEdge2, 0), allAdjacencyEdges); } assert(newEdge1 != NULL); assert(newEdge2 != NULL); return adjacencySwitch_construct( lowestScoreEdge1, lowestScoreEdge2, newEdge1, newEdge2, stIntTuple_get(lowestScoreEdge1, 2) + stIntTuple_get(lowestScoreEdge2, 2)); }
static void test_stPosetAlignment_addAndIsPossible(CuTest *testCase) { for(int64_t trial=0; trial<100; trial++) { setup(); //Make random number of sequences. stList *sequenceLengths = stList_construct3(0, (void (*)(void *))stIntTuple_destruct); for(int64_t i=0; i<sequenceNumber; i++) { stList_append(sequenceLengths, stIntTuple_construct1( st_randomInt(0, MAX_SEQUENCE_SIZE))); } //Propose random alignment pairs... stList *pairs = stList_construct3(0, (void(*)(void *))stIntTuple_destruct); int64_t maxAlignedPairs = st_randomInt(0, MAX_ALIGNMENTS); if(sequenceNumber > 0) { for(int64_t i=0; i<maxAlignedPairs; i++) { int64_t seq1 = st_randomInt(0, sequenceNumber); int64_t seqLength1 = stIntTuple_get(stList_get(sequenceLengths, seq1), 0); if(seqLength1 == 0) { continue; } int64_t position1 = st_randomInt(0, seqLength1); int64_t seq2 = st_randomInt(0, sequenceNumber); int64_t seqLength2 = stIntTuple_get(stList_get(sequenceLengths, seq1), 0); if(seqLength2 == 0) { continue; } int64_t position2 = st_randomInt(0, seqLength2); if(seq1 != seq2) { stList_append(pairs, stIntTuple_construct4( seq1, position1, seq2, position2)); if(stPosetAlignment_isPossible(posetAlignment, seq1, position1, seq2, position2)) { st_logInfo("In %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " \n", seq1, position1, seq2, position2); //For each accepted pair check it doesn't create a cycle. CuAssertTrue(testCase, !containsACycle(pairs, sequenceNumber)); CuAssertTrue(testCase, stPosetAlignment_add(posetAlignment, seq1, position1, seq2, position2)); } else { st_logInfo("Out %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " \n", seq1, position1, seq2, position2); //For each rejected pair check it creates a cycle.. CuAssertTrue(testCase, containsACycle(pairs, sequenceNumber)); CuAssertTrue(testCase, !stPosetAlignment_isPossible(posetAlignment, seq1, position1, seq2, position2)); stIntTuple_destruct(stList_pop(pairs)); //remove the pair which created the cycle. CuAssertTrue(testCase, !containsACycle(pairs, sequenceNumber)); //Check we're back to being okay.. } } } } //Cleanup stList_destruct(sequenceLengths); stList_destruct(pairs); teardown(); st_logInfo("Passed a random ordering test with %" PRIi64 " sequences and %" PRIi64 " aligned pairs\n", sequenceNumber, maxAlignedPairs); } }
bool stPosetAlignment_isPossibleP(stPosetAlignment *posetAlignment, int64_t sequence1, int64_t position1, int64_t sequence2, int64_t position2) { stIntTuple *constraint = getConstraint_lessThan(posetAlignment, sequence1, position1, sequence2); if(constraint == NULL) { return 1; } if(stIntTuple_get(constraint, 2) && stIntTuple_get(constraint, 0) == position1) { //less than or equals return position2 <= stIntTuple_get(constraint, 1); } else { return position2 < stIntTuple_get(constraint, 1); } }
static AdjacencySwitch *getBest4EdgeAdjacencySwitch2(stIntTuple *oldEdge1, stSortedSet *allAdjacencyEdges, stHash *nodesToAllCurrentEdgesSet, stHash *nodesToBridgingAdjacencyEdges) { /* * Returns the best 3 or 4 edge switch (one including 3 or 4 edges) for the given existing edge, if they exist, or else NULL. */ return getMinimumCostAdjacencySwitch( getBest4EdgeAdjacencySwitchP(oldEdge1, stIntTuple_get(oldEdge1, 0), allAdjacencyEdges, nodesToAllCurrentEdgesSet, nodesToBridgingAdjacencyEdges), getBest4EdgeAdjacencySwitchP(oldEdge1, stIntTuple_get(oldEdge1, 1), allAdjacencyEdges, nodesToAllCurrentEdgesSet, nodesToBridgingAdjacencyEdges)); }
static void stPosetAlignment_addP2(stPosetAlignment *posetAlignment, int64_t sequence1, int64_t sequence3, int64_t position3, int64_t sequence2, int64_t position2, int64_t lessThanOrEqual) { for(int64_t sequence4=0; sequence4<posetAlignment->sequenceNumber; sequence4++) { if(sequence4 != sequence1 && sequence4 != sequence2 && sequence4 != sequence3) { stIntTuple *constraint = getConstraint_lessThan(posetAlignment, sequence2, position2, sequence4); if(constraint != NULL) { int64_t position4 = stIntTuple_get(constraint, 1); int64_t transLessThanOrEqual = lessThanOrEqual && stIntTuple_get(constraint, 2) && stIntTuple_get(constraint, 0) == position2; //stuff which maintains the less than or equals if(lessThanConstraintIsPrime(posetAlignment, sequence3, position3, sequence4, position4, transLessThanOrEqual)) {//We have a new transitive constraint.. addConstraint_lessThan(posetAlignment, sequence3, position3, sequence4, position4, transLessThanOrEqual); } } } } }
static stList *getOddToEvenAdjacencyEdges(stSortedSet *oddNodes, stList *adjacencyEdges) { /* * Gets edges that include one node in the set of oddNodes, but not both. */ stList *oddToEvenAdjacencyEdges = stList_construct(); for (int64_t i = 0; i < stList_length(adjacencyEdges); i++) { stIntTuple *edge = stList_get(adjacencyEdges, i); if (nodeInSet(oddNodes, stIntTuple_get(edge, 0)) ^ nodeInSet( oddNodes, stIntTuple_get(edge, 1))) { stList_append(oddToEvenAdjacencyEdges, edge); } } return oddToEvenAdjacencyEdges; }
static int comparePositions(stIntTuple *position1, stIntTuple *position2) { if(stIntTuple_get(position1, 0) == INT64_MAX || stIntTuple_get(position2, 0) == INT64_MAX) { //Indicates we should ignore the first position and compare the second. assert(stIntTuple_get(position1, 1) != INT64_MAX); assert(stIntTuple_get(position2, 1) != INT64_MAX); return cmpFn(stIntTuple_get(position1, 1), stIntTuple_get(position2, 1)); } return cmpFn(stIntTuple_get(position1, 0), stIntTuple_get(position2, 0)); }
static void writeGraph(FILE *fileHandle, stList *edges, int64_t nodeNumber) { /* * Writes out just the adjacencies in the blossom format. */ int64_t edgeNumber = stList_length(edges); fprintf(fileHandle, "%" PRIi64 " %" PRIi64 "\n", nodeNumber, edgeNumber); for(int64_t i=0; i<stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); int64_t from = stIntTuple_get(edge, 0); int64_t to = stIntTuple_get(edge, 1); int64_t weight = stIntTuple_get(edge, 2); //All the algorithms are minimisation algorithms, so we invert the sign. fprintf(fileHandle, "%" PRIi64 " %" PRIi64 " %" PRIi64 "\n", from, to, weight); } }
/* * Returns non-zero iff the constraint is prime. The lessThanOrEquals argument, if non-zero specifies the constraint is less than equals. */ static bool lessThanConstraintIsPrime(stPosetAlignment *posetAlignment, int64_t sequence1, int64_t position1, int64_t sequence2, int64_t position2, int64_t lessThanOrEquals) { stIntTuple *constraint = getConstraint_lessThan(posetAlignment, sequence1, position1, sequence2); if(constraint == NULL) { return 1; } if(position2 < stIntTuple_get(constraint, 1)) { //new constraint is tighter return 1; } if(position2 > stIntTuple_get(constraint, 1)) { //new constraint is looser return 0; } if(position1 == stIntTuple_get(constraint, 0) && stIntTuple_get(constraint, 2) && !lessThanOrEquals) { //converts a less than or equals constraint to a less than constraint return 1; } return 0; }
stIntTuple *getLowestScoringEdge(stList *edges) { /* * Returns edge with lowest weight. */ assert(stList_length(edges) > 0); stIntTuple *lowestScoringEdge = stList_get(edges, 0); int64_t lowestScore = stIntTuple_get(lowestScoringEdge, 2); for (int64_t j = 1; j < stList_length(edges); j++) { stIntTuple *edge = stList_get(edges, j); int64_t k = stIntTuple_get(edge, 2); if (k < lowestScore) { lowestScore = k; lowestScoringEdge = edge; } } return lowestScoringEdge; }
/* * Gets the position in sequence2 that the position in sequence1 must be greater than or equal to in the alignment */ static stIntTuple *getConstraint_greaterThan(stPosetAlignment *posetAlignment, int64_t sequence1, int64_t position1, int64_t sequence2) { stIntTuple *pos = stIntTuple_construct2(INT64_MAX, position1); //Get less than or equal stIntTuple *constraint = stSortedSet_searchLessThanOrEqual(getConstraintList(posetAlignment, sequence2, sequence1), pos); stIntTuple_destruct(pos); assert(constraint == NULL || position1 >= stIntTuple_get(constraint, 1)); return constraint; }
static stList *getEdgesThatBridgeComponents(stList *components, stHash *nodesToNonZeroWeightedAdjacencyEdges) { /* * Get set of adjacency edges that bridge between (have a node in two) components. */ stList *bridgingAdjacencyEdges = stList_construct(); for (int64_t i = 0; i < stList_length(components); i++) { stSortedSet *componentNodes = getNodeSetOfEdges( stList_get(components, i)); stSortedSetIterator *it = stSortedSet_getIterator(componentNodes); stIntTuple *node; while ((node = stSortedSet_getNext(it)) != NULL) { stList *edges = stHash_search(nodesToNonZeroWeightedAdjacencyEdges, node); if (edges != NULL) { for (int64_t j = 0; j < stList_length(edges); j++) { stIntTuple *edge = stList_get(edges, j); stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 0)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 1)); assert( stSortedSet_search(componentNodes, node1) != NULL || stSortedSet_search(componentNodes, node2) != NULL); if (stSortedSet_search(componentNodes, node1) == NULL || stSortedSet_search(componentNodes, node2) == NULL) { stList_append(bridgingAdjacencyEdges, edge); } stIntTuple_destruct(node1); stIntTuple_destruct(node2); } } } stSortedSet_destructIterator(it); stSortedSet_destruct(componentNodes); } return bridgingAdjacencyEdges; }
// copied from cPecanRealign struct PairwiseAlignment *convertAlignedPairsToPairwiseAlignment(char *seqName1, char *seqName2, double score, int64_t length1, int64_t length2, stList *alignedPairs) { //Make pairwise alignment int64_t pX = -1, pY = -1, mL = 0; //Create an end matched pair, which is used to ensure the alignment has the correct end indels. struct List *opList = constructEmptyList(0, (void (*)(void *)) destructAlignmentOperation); stList_append(alignedPairs, stIntTuple_construct2(length1, length2)); for (int64_t i = 0; i < stList_length(alignedPairs); i++) { stIntTuple *alignedPair = stList_get(alignedPairs, i); int64_t x = stIntTuple_get(alignedPair, 0); int64_t y = stIntTuple_get(alignedPair, 1); assert(x - pX > 0); assert(y - pY > 0); if (x - pX > 0 && y - pY > 0) { //This is a hack for filtering if (x - pX > 1) { //There is an indel. if (mL > 0) { listAppend(opList, constructAlignmentOperation(PAIRWISE_MATCH, mL, 0)); mL = 0; } listAppend(opList, constructAlignmentOperation(PAIRWISE_INDEL_X, x - pX - 1, 0)); } if (y - pY > 1) { if (mL > 0) { listAppend(opList, constructAlignmentOperation(PAIRWISE_MATCH, mL, 0)); mL = 0; } listAppend(opList, constructAlignmentOperation(PAIRWISE_INDEL_Y, y - pY - 1, 0)); } mL++; pX = x; pY = y; } } //Deal with a trailing match, but exclude the final match if (mL > 1) { listAppend(opList, constructAlignmentOperation(PAIRWISE_MATCH, mL - 1, 0)); } stIntTuple_destruct(stList_pop(alignedPairs)); //Construct the alignment struct PairwiseAlignment *pA = constructPairwiseAlignment(seqName1, 0, length1, 1, seqName2, 0, length2, 1, score, opList); return pA; }
int64_t matchingCardinality(stList *matching) { /* * Returns number of edges with weight > 0. */ int64_t totalCardinality = 0; for (int64_t i = 0; i < stList_length(matching); i++) { stIntTuple *edge = stList_get(matching, i); totalCardinality += stIntTuple_get(edge, 2) > 0 ? 1 : 0; } return totalCardinality; }
int64_t matchingWeight(stList *matching) { /* * Returns sum of weights. */ int64_t totalWeight = 0; for(int64_t i=0; i<stList_length(matching); i++) { stIntTuple *edge = stList_get(matching, i); totalWeight += stIntTuple_get(edge, 2); } return totalWeight; }
stList *getComponents(stList *edges) { /* * Gets a list of connected components, each connected component * being represented as a list of the edges, such that each edge is in exactly one * connected component. Allows for multi-graphs (multiple edges connecting two nodes). */ stHash *nodesToEdges = getNodesToEdgesHash(edges); /* * Traverse the edges greedily */ stList *components = stList_construct3(0, (void(*)(void *)) stList_destruct); stList *nodes = stHash_getKeys(nodesToEdges); while (stList_length(nodes) > 0) { stIntTuple *node = stList_pop(nodes); stList *edges = stHash_search(nodesToEdges, node); if (edges != NULL) { //We have a component to build stSortedSet *component = stSortedSet_construct(); stHash_remove(nodesToEdges, node); for (int64_t i = 0; i < stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); getComponentsP(nodesToEdges, stIntTuple_get(edge, 0), component); getComponentsP(nodesToEdges, stIntTuple_get(edge, 1), component); } stList_append(components, stSortedSet_getList(component)); //Cleanup stSortedSet_destruct(component); stList_destruct(edges); } stIntTuple_destruct(node); } assert(stHash_size(nodesToEdges) == 0); stHash_destruct(nodesToEdges); stList_destruct(nodes); return components; }
/* * This builds an adjacency list structure for the the sequences. Every sequence-position * has a column in the hash with which it can be aligned with. */ static stHash *buildAdjacencyList(stList *pairs, int64_t sequenceNumber) { stHash *hash = stHash_construct3((uint64_t (*)(const void *))stIntTuple_hashKey, (int (*)(const void *, const void *))stIntTuple_equalsFn, (void (*)(void *))stIntTuple_destruct, NULL); for(int64_t seq=0; seq<sequenceNumber; seq++) { for(int64_t position=0; position<MAX_SEQUENCE_SIZE; position++) { stIntTuple *seqPos = stIntTuple_construct2( seq, position); stSortedSet *column = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL); stSortedSet_insert(column, seqPos); stHash_insert(hash, seqPos, column); } } stListIterator *it = stList_getIterator(pairs); stIntTuple *pair; while((pair = stList_getNext(it)) != NULL) { stIntTuple *seqPos1 = stIntTuple_construct2( stIntTuple_get(pair, 0), stIntTuple_get(pair, 1)); stIntTuple *seqPos2 = stIntTuple_construct2( stIntTuple_get(pair, 2), stIntTuple_get(pair, 3)); stSortedSet *column1 = stHash_search(hash, seqPos1); assert(column1 != NULL); stSortedSet *column2 = stHash_search(hash, seqPos2); assert(column2 != NULL); if(column1 != column2) { //Merge the columns stSortedSetIterator *it2 = stSortedSet_getIterator(column2); stIntTuple *seqPos3; while((seqPos3 = stSortedSet_getNext(it2)) != NULL) { assert(stSortedSet_search(column1, seqPos3) == NULL); stSortedSet_insert(column1, seqPos3); assert(stHash_search(hash, seqPos3) == column2); stHash_insert(hash, seqPos3, column1); assert(stHash_search(hash, seqPos3) == column1); } stSortedSet_destructIterator(it2); stSortedSet_destruct(column2); } //Cleanup loop. stIntTuple_destruct(seqPos1); stIntTuple_destruct(seqPos2); } stList_destructIterator(it); return hash; }
static void getComponentsP(stHash *nodesToEdges, int64_t node, stSortedSet *component) { stIntTuple *key = stIntTuple_construct1( node); stList *edges = stHash_search(nodesToEdges, key); if (edges != NULL) { stHash_remove(nodesToEdges, key); for (int64_t i = 0; i < stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); if (stSortedSet_search(component, edge) == NULL) { stSortedSet_insert(component, edge); } /* * Recursion on stack could equal the total number of nodes. */ getComponentsP(nodesToEdges, stIntTuple_get(edge, 0), component); getComponentsP(nodesToEdges, stIntTuple_get(edge, 1), component); } stList_destruct(edges); } stIntTuple_destruct(key); }
static void stPosetAlignment_addP(stPosetAlignment *posetAlignment, int64_t sequence1, int64_t position1, int64_t sequence2, int64_t position2) { //for all pairs do check.. if(lessThanConstraintIsPrime(posetAlignment, sequence1, position1, sequence2, position2, 1)) { addConstraint_lessThan(posetAlignment, sequence1, position1, sequence2, position2, 1); for(int64_t sequence3=0; sequence3<posetAlignment->sequenceNumber; sequence3++) { if(sequence3 != sequence2) { if(sequence3 != sequence1) { stIntTuple *constraint = getConstraint_greaterThan(posetAlignment, sequence1, position1, sequence3); if(constraint != NULL) { int64_t position3 = stIntTuple_get(constraint, 0); //its reversed int64_t lessThanOrEqual = stIntTuple_get(constraint, 2) && stIntTuple_get(constraint, 1) == position1; if(lessThanConstraintIsPrime(posetAlignment, sequence3, position3, sequence2, position2, lessThanOrEqual)) { //new constraint found, so add it to the set.. addConstraint_lessThan(posetAlignment, sequence3, position3, sequence2, position2, lessThanOrEqual); stPosetAlignment_addP2(posetAlignment, sequence1, sequence3, position3, sequence2, position2, lessThanOrEqual); } } } else { stPosetAlignment_addP2(posetAlignment, INT64_MAX, sequence1, position1, sequence2, position2, 1); } } } } }
static void writeCliqueGraph(FILE *fileHandle, stList *edges, int64_t nodeNumber, bool negativeWeights) { /* * Writes out a representation of the adjacencies and ends as a graph readable by blossom. * Writes out additional edges so that every pair of nodes is connected. */ int64_t edgeNumber = ((nodeNumber * nodeNumber) - nodeNumber) / 2; fprintf(fileHandle, "%" PRIi64 " %" PRIi64 "\n", nodeNumber, edgeNumber); stSortedSet *seen = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, (void (*)(void *))stIntTuple_destruct); int64_t edgesWritten = 0; for(int64_t i=0; i<stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); int64_t from = stIntTuple_get(edge, 0); int64_t to = stIntTuple_get(edge, 1); assert(from < nodeNumber); assert(to < nodeNumber); assert(from >= 0); assert(to >= 0); assert(from != to); int64_t weight = stIntTuple_get(edge, 2); //If is a minimisation algorithms we invert the sign.. fprintf(fileHandle, "%" PRIi64 " %" PRIi64 " %" PRIi64 "\n", from, to, negativeWeights ? -weight : weight); edgesWritten++; addEdgeToSet(seen, from, to); } for(int64_t i=0; i<nodeNumber; i++) { for(int64_t j=i+1; j<nodeNumber; j++) { if(!edgeInSet(seen, i, j)) { fprintf(fileHandle, "%" PRIi64 " %" PRIi64 " 0\n", i, j); edgesWritten++; } } } //Cleanup stSortedSet_destruct(seen); assert(edgeNumber == edgesWritten); }
static stSortedSet *getOddNodes(stList *cycle) { /* * Returns alternating nodes in a simple cycle. */ //Set to return stSortedSet *nodes = stSortedSet_construct3( (int(*)(const void *, const void *)) stIntTuple_cmpFn, (void(*)(void *)) stIntTuple_destruct); stHash *nodesToEdges = getNodesToEdgesHash(cycle); int64_t node = stIntTuple_get(stList_get(cycle, 0), 0); int64_t pNode = -1; int64_t counter = 0; bool b = 1; assert(stList_length(cycle) % 2 == 0); while (counter++ < stList_length(cycle)) { if (b) { //Make alternating addNodeToSet(nodes, node); b = 0; } else { b = 1; } stList *edges = getItemForNode(node, nodesToEdges); assert(stList_length(edges) == 2); stIntTuple *edge = stList_get(edges, 0); int64_t node2 = getOtherPosition(edge, node); if (node2 != pNode) { pNode = node; node = node2; continue; } edge = stList_get(edges, 1); node2 = getOtherPosition(edge, node); assert(node2 != pNode); pNode = node; node = node2; } stHash_destruct(nodesToEdges); assert(stList_length(cycle) / 2 == stSortedSet_size(nodes)); return nodes; }