static void checkIsValidReference(CuTest *testCase, stList *reference, double totalScore) { stList *chosenEdges = convertReferenceToAdjacencyEdges(reference); //Check that everyone has a partner. CuAssertIntEquals(testCase, nodeNumber, stList_length(chosenEdges) * 2); stSortedSet *nodes = stSortedSet_construct3((int(*)(const void *, const void *)) stIntTuple_cmpFn, (void(*)(void *)) stIntTuple_destruct); for (int64_t i = 0; i < nodeNumber; i++) { stSortedSet_insert(nodes, stIntTuple_construct1( i)); } checkEdges(chosenEdges, nodes, 1, 0); //Check that the score is correct double totalScore2 = calculateZScoreOfReference(reference, nodeNumber, zMatrix); CuAssertDblEquals(testCase, totalScore2, totalScore, 0.000001); //Check that the stubs are properly connected. stList *allEdges = stList_copy(chosenEdges, NULL); stList_appendAll(allEdges, stubs); stList_appendAll(allEdges, chains); stList *components = getComponents(allEdges); CuAssertIntEquals(testCase, stList_length(stubs), stList_length(reference)); CuAssertIntEquals(testCase, stList_length(stubs), stList_length(components)); //Cleanup stList_destruct(components); stSortedSet_destruct(nodes); stList_destruct(allEdges); stList_destruct(chosenEdges); }
void stCaf_addAdjacencies(Flower *flower) { //Build a list of caps. stList *list = stList_construct(); Flower_EndIterator *endIterator = flower_getEndIterator(flower); End *end; while ((end = flower_getNextEnd(endIterator)) != NULL) { End_InstanceIterator *instanceIterator = end_getInstanceIterator(end); Cap *cap; while ((cap = end_getNext(instanceIterator)) != NULL) { if (!cap_getStrand(cap)) { cap = cap_getReverse(cap); } stList_append(list, cap); } end_destructInstanceIterator(instanceIterator); } flower_destructEndIterator(endIterator); assert(stList_length(list) % 2 == 0); //Sort the list of caps. stList_sort(list, (int(*)(const void *, const void *)) addAdjacenciesPP); //Now make the adjacencies. for (int64_t i = 1; i < stList_length(list); i += 2) { Cap *cap = stList_get(list, i - 1); Cap *cap2 = stList_get(list, i); cap_makeAdjacent(cap, cap2); } //Clean up. stList_destruct(list); }
stList *chooseMatching_greedy(stList *edges, int64_t nodeNumber) { /* * Greedily picks the edge from the list such that each node has at most one edge. */ //First clone the list.. edges = stList_copy(edges, NULL); stSortedSet *seen = getEmptyNodeOrEdgeSetWithCleanup(); stList *matching = stList_construct(); //Sort the adjacency pairs.. stList_sort(edges, chooseMatching_greedyP); double strength = INT64_MAX; while (stList_length(edges) > 0) { stIntTuple *edge = stList_pop(edges); double d = stIntTuple_get(edge, 2); assert(d <= strength); strength = d; if(!nodeInSet(seen, stIntTuple_get(edge, 0)) && !nodeInSet(seen, stIntTuple_get(edge, 1))) { addNodeToSet(seen, stIntTuple_get(edge, 0)); addNodeToSet(seen, stIntTuple_get(edge, 1)); stList_append(matching,edge); } } assert(stList_length(edges) == 0); stList_destruct(edges); stSortedSet_destruct(seen); return matching; }
static void debugScaffoldPaths(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash, stHash *haplotypeToMaximalHaplotypeLengthHash, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) { stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths); for (int64_t i = 0; i < stList_length(haplotypePaths); i++) { stList *haplotypePath = stList_get(haplotypePaths, i); assert(stList_length(haplotypePath) > 0); //Traversing from 5' end.. Segment *_5Segment = stList_get(haplotypePath, 0); Segment *_3Segment = stList_get(haplotypePath, stList_length(haplotypePath) - 1); assert(segment_getStrand(_5Segment) == segment_getStrand(_3Segment)); if (!segment_getStrand(_5Segment)) { Segment *j = _5Segment; _5Segment = segment_getReverse(_3Segment); _3Segment = segment_getReverse(j); } assert(segment_getStrand(_5Segment)); assert(segment_getStrand(_3Segment)); Cap *_5Cap = segment_get5Cap(_5Segment); Cap *_3Cap = segment_get3Cap(_3Segment); if (getAdjacentCapsSegment(_5Cap) != NULL) { assert(!trueAdjacency(_5Cap, haplotypeEventStrings)); } if (getAdjacentCapsSegment(_3Cap) != NULL) { assert(!trueAdjacency(_3Cap, haplotypeEventStrings)); } debugScaffoldPathsP(_5Cap, haplotypePath, haplotypePathToScaffoldPathHash, haplotypeToMaximalHaplotypeLengthHash, segmentToMaximalHaplotypePathHash, haplotypeEventStrings, contaminationEventStrings, capCodeParameters, 1); debugScaffoldPathsP(_3Cap, haplotypePath, haplotypePathToScaffoldPathHash, haplotypeToMaximalHaplotypeLengthHash, segmentToMaximalHaplotypePathHash, haplotypeEventStrings, contaminationEventStrings, capCodeParameters, 0); } stHash_destruct(segmentToMaximalHaplotypePathHash); }
static void splitIntoAdjacenciesStubsAndChains(stList *subCycle, stList *adjacencyEdges, stList *stubEdges, stList *chainEdges, stList **subAdjacencyEdges, stList **subStubEdges, stList **subChainEdges) { /* * Splits run into cycles and chains.. */ *subStubEdges = stList_construct(); *subChainEdges = stList_construct(); for (int64_t j = 0; j < stList_length(subCycle); j++) { stIntTuple *edge = stList_get(subCycle, j); if (stList_contains(stubEdges, edge)) { stList_append(*subStubEdges, edge); } else if (stList_contains(chainEdges, edge)) { stList_append(*subChainEdges, edge); } } *subAdjacencyEdges = stList_construct(); stSortedSet *nodes = getNodeSetOfEdges(subCycle); for (int64_t j = 0; j < stList_length(adjacencyEdges); j++) { stIntTuple *edge = stList_get(adjacencyEdges, j); if (nodeInSet(nodes, stIntTuple_get(edge, 0)) && nodeInSet( nodes, stIntTuple_get(edge, 1))) { stList_append(*subAdjacencyEdges, edge); } } stSortedSet_destruct(nodes); }
static stHash *getComponents(stList *filteredEdges) { /* * A kind of stupid reimplementation of the greedy function, done just to trap typos. */ stHash *nodesToComponents = stHash_construct3((uint64_t(*)(const void *)) stIntTuple_hashKey, (int(*)(const void *, const void *)) stIntTuple_equalsFn, NULL, NULL); for (int64_t i = 0; i < stList_length(nodes); i++) { stIntTuple *node = stList_get(nodes, i); stSortedSet *component = stSortedSet_construct(); stSortedSet_insert(component, node); stHash_insert(nodesToComponents, node, component); } for (int64_t i = 0; i < stList_length(filteredEdges); i++) { stIntTuple *edge = stList_get(filteredEdges, i); stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2)); stSortedSet *component1 = stHash_search(nodesToComponents, node1); stSortedSet *component2 = stHash_search(nodesToComponents, node2); assert(component1 != NULL && component2 != NULL); if (component1 != component2) { stSortedSet *component3 = stSortedSet_getUnion(component1, component2); stSortedSetIterator *setIt = stSortedSet_getIterator(component3); stIntTuple *node3; while ((node3 = stSortedSet_getNext(setIt)) != NULL) { stHash_insert(nodesToComponents, node3, component3); } stSortedSet_destructIterator(setIt); stSortedSet_destruct(component1); stSortedSet_destruct(component2); } stIntTuple_destruct(node1); stIntTuple_destruct(node2); } return nodesToComponents; }
static void checkComponents(CuTest *testCase, stList *filteredEdges) { stHash *nodesToComponents = getComponents(filteredEdges); //Check all components are smaller than threshold stList *components = stHash_getValues(nodesToComponents); for (int64_t i = 0; i < stList_length(components); i++) { stSortedSet *component = stList_get(components, i); CuAssertTrue(testCase, stSortedSet_size(component) <= maxComponentSize); CuAssertTrue(testCase, stSortedSet_size(component) >= 1); } //Check no edges can be added from those filtered. stSortedSet *filteredEdgesSet = stList_getSortedSet(filteredEdges, (int(*)(const void *, const void *)) stIntTuple_cmpFn); for (int64_t i = 0; i < stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); if (stSortedSet_search(filteredEdgesSet, edge) == NULL) { stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2)); stSortedSet *component1 = stHash_search(nodesToComponents, node1); stSortedSet *component2 = stHash_search(nodesToComponents, node2); CuAssertTrue(testCase, component1 != NULL && component2 != NULL); CuAssertTrue(testCase, component1 != component2); CuAssertTrue(testCase, stSortedSet_size(component1) + stSortedSet_size(component2) > maxComponentSize); stIntTuple_destruct(node1); stIntTuple_destruct(node2); } } stSortedSet_destruct(filteredEdgesSet); //Cleanup the components stSortedSet *componentsSet = stList_getSortedSet(components, NULL); stList_destruct(components); stSortedSet_setDestructor(componentsSet, (void(*)(void *)) stSortedSet_destruct); stSortedSet_destruct(componentsSet); stHash_destruct(nodesToComponents); }
static void testBreakUpPinchGraphAdjacencyComponentsGreedily(CuTest *testCase) { //return; for (int64_t test = 0; test < 10000; test++) { st_logInfo("Starting break up giant pinch graph components random test %" PRIi64 "\n", test); stPinchThreadSet *threadSet = stPinchThreadSet_getRandomGraph(); int64_t totalNodes = 2 * stPinchThreadSet_getTotalBlockNumber(threadSet); float maximumAdjacencyComponentSizeRatio = st_random() * 10; int64_t maximumAdjacencyComponentSize = log(maximumAdjacencyComponentSizeRatio) * totalNodes; if (maximumAdjacencyComponentSize < 2) { maximumAdjacencyComponentSize = 2; } stList *adjacencyComponents = stPinchThreadSet_getAdjacencyComponents(threadSet); int64_t largestAdjacencyComponentSizeInGraph = getSizeOfLargestAdjacencyComponent(adjacencyComponents); st_logInfo( "We have a random pinch graph with %" PRIi64 " nodes and %" PRIi64 " adjacency components, the largest adjacency component has %" PRIi64 " nodes, with a ratio of %f we will break up adjacency components larger than %" PRIi64 " in size, this will result in a breakup: %" PRIi64 "\n", totalNodes, stList_length(adjacencyComponents), largestAdjacencyComponentSizeInGraph, maximumAdjacencyComponentSizeRatio, maximumAdjacencyComponentSize, largestAdjacencyComponentSizeInGraph > maximumAdjacencyComponentSize); stList_destruct(adjacencyComponents); //Now do the actual breaking up stCaf_breakupComponentsGreedily(threadSet, maximumAdjacencyComponentSizeRatio); adjacencyComponents = stPinchThreadSet_getAdjacencyComponents(threadSet); int64_t largestAdjacencyComponentSizeInGraphAfterBreakup = getSizeOfLargestAdjacencyComponent(adjacencyComponents); totalNodes = 2 * stPinchThreadSet_getTotalBlockNumber(threadSet); st_logInfo( "After splitting we have a pinch graph with %" PRIi64 " nodes and %" PRIi64 " adjacency components, the largest adjacency component has %" PRIi64 " nodes, with a ratio of %f that broke up adjacency components larger than %" PRIi64 " in size\n", totalNodes, stList_length(adjacencyComponents), largestAdjacencyComponentSizeInGraphAfterBreakup, maximumAdjacencyComponentSizeRatio, maximumAdjacencyComponentSize); //Cleanup stList_destruct(adjacencyComponents); stPinchThreadSet_destruct(threadSet); } }
static stList *mergeSubstrings(stList *substrings, int64_t proximityToMerge) { /* * Merge set of substrings into fewer substrings, if they overlap by less than proximityToMerge */ stList *mergedSubstrings = stList_construct3(0, (void (*)(void *)) substring_destruct); if (stList_length(substrings) == 0) { return mergedSubstrings; } stList_sort(substrings, (int (*)(const void *, const void *)) substring_cmp); Substring *pSubsequence = substring_clone(stList_get(substrings, 0)); stList_append(mergedSubstrings, pSubsequence); for (int64_t i = 1; i < stList_length(substrings); i++) { Substring *substring = stList_get(substrings, i); if (pSubsequence->name == substring->name && pSubsequence->start + pSubsequence->length + proximityToMerge >= substring->start) { //Merge if (pSubsequence->start + pSubsequence->length < substring->start + substring->length) { pSubsequence->length = substring->start + substring->length - pSubsequence->start; } } else { pSubsequence = substring_clone(substring); stList_append(mergedSubstrings, pSubsequence); } } return mergedSubstrings; }
static AdjacencySwitch *getBest4EdgeAdjacencySwitchP(stIntTuple *oldEdge1, int64_t node1, stSortedSet *allAdjacencyEdges, stHash *nodesToAllCurrentEdges, stHash *nodesToBridgingAdjacencyEdges) { /* * Returns the best adjacency switch for the given node and edge that * contains 4 existing edges. */ int64_t node4 = getOtherPosition(oldEdge1, node1); AdjacencySwitch *minimumCostAdjacencySwitch = NULL; stList *validEdges = getItemForNode(node1, nodesToBridgingAdjacencyEdges); if (validEdges != NULL) { for (int64_t i = 0; i < stList_length(validEdges); i++) { stIntTuple *newEdge1 = stList_get(validEdges, i); int64_t node2 = getOtherPosition(newEdge1, node1); stList *validEdges2 = getItemForNode(node2, nodesToAllCurrentEdges); assert(validEdges2 != NULL); assert(stList_length(validEdges2) == 1); stIntTuple *oldEdge2 = stList_peek(validEdges2); int64_t node3 = getOtherPosition(oldEdge2, node2); stIntTuple *newEdge2 = getWeightedEdgeFromSet(node3, node4, allAdjacencyEdges); assert(newEdge2 != NULL); int64_t cost = stIntTuple_get(oldEdge1, 2) + stIntTuple_get(oldEdge2, 2) - stIntTuple_get(newEdge1, 2) - stIntTuple_get(newEdge2, 2); minimumCostAdjacencySwitch = adjacencySwitch_update( minimumCostAdjacencySwitch, oldEdge1, oldEdge2, newEdge1, newEdge2, cost); } } return minimumCostAdjacencySwitch; }
stList *mergeSimpleCycles(stList *cycles, stList *nonZeroWeightAdjacencyEdges, stSortedSet *allAdjacencyEdges) { /* * Takes a set of simple cycles (containing only the adjacency edges). * Returns a single simple cycle, as a list of edges, by doing length(components)-1 * calls to doBestMergeOfTwoSimpleCycles. */ /* * Build a hash of nodes to adjacency edges. */ cycles = stList_copy(cycles, NULL); for (int64_t i = 0; i < stList_length(cycles); i++) { //Clone the complete list assert(stList_length(stList_get(cycles, i)) > 0); assert(!stList_contains(stList_get(cycles, i), NULL)); stList_set(cycles, i, stList_copy(stList_get(cycles, i), NULL)); } while (stList_length(cycles) > 1) { doBestMergeOfTwoSimpleCycles(cycles, nonZeroWeightAdjacencyEdges, allAdjacencyEdges); } assert(stList_length(cycles) == 1); stList *mergedComponent = stList_get(cycles, 0); stList_destruct(cycles); return mergedComponent; }
void test_stList_remove(CuTest *testCase) { setup(); CuAssertTrue(testCase, stList_remove(list, 0) == strings[0]); CuAssertTrue(testCase, stList_length(list) == stringNumber-1); CuAssertTrue(testCase, stList_remove(list, 1) == strings[2]); CuAssertTrue(testCase, stList_length(list) == stringNumber-2); teardown(); }
static void doBestMergeOfTwoSimpleCycles(stList *cycles, stList *nonZeroWeightAdjacencyEdges, stSortedSet *allAdjacencyEdges) { /* * Merge two simple cycles, using the best possible adjacency switch. Modifies components list in place, * destroying two old components and adding a new one. If new adjacency edges are needed then they are * added to the adjacency edges list. */ assert(stList_length(cycles) > 1); /* * Get the best adjacency switch. */ AdjacencySwitch *adjacencySwitch = getBestAdjacencySwitch(cycles, nonZeroWeightAdjacencyEdges, allAdjacencyEdges); assert(adjacencySwitch != NULL); /* * Find the two components to merge. */ stList *cyclesToMerge = stList_construct3(0, (void(*)(void *)) stList_destruct); for (int64_t i = 0; i < stList_length(cycles); i++) { stList *cycle = stList_get(cycles, i); if (stList_contains(cycle, adjacencySwitch->oldEdge1)) { assert(!stList_contains(cycle, adjacencySwitch->oldEdge2)); stList_append(cyclesToMerge, cycle); } else if (stList_contains(cycle, adjacencySwitch->oldEdge2)) { stList_append(cyclesToMerge, cycle); } } /* * Now construct the new component and modify the list of components in place. */ assert(stList_length(cyclesToMerge) == 2); stList *newComponent = stList_join(cyclesToMerge); assert(!stList_contains(newComponent, NULL)); //Cleanup the old components assert(stList_contains(cycles, stList_get(cyclesToMerge, 0))); stList_removeItem(cycles, stList_get(cyclesToMerge, 0)); assert(stList_contains(cycles, stList_get(cyclesToMerge, 1))); stList_removeItem(cycles, stList_get(cyclesToMerge, 1)); stList_destruct(cyclesToMerge); //Now remove the old edges and add the new ones assert(stList_contains(newComponent, adjacencySwitch->oldEdge1)); stList_removeItem(newComponent, adjacencySwitch->oldEdge1); assert(stList_contains(newComponent, adjacencySwitch->oldEdge2)); stList_removeItem(newComponent, adjacencySwitch->oldEdge2); assert(!stList_contains(newComponent, adjacencySwitch->newEdge1)); stList_append(newComponent, adjacencySwitch->newEdge1); assert(!stList_contains(newComponent, adjacencySwitch->newEdge2)); stList_append(newComponent, adjacencySwitch->newEdge2); adjacencySwitch_destruct(adjacencySwitch); //Clean the adjacency switch. //Finally add the component to the list of components stList_append(cycles, newComponent); }
static stList *mergeSimpleCycles2(stList *chosenEdges, stList *nonZeroWeightAdjacencyEdges, stSortedSet *allAdjacencyEdges, stList *stubEdges, stList *chainEdges) { /* * Returns a new set of chosen edges, modified by adjacency switches such that every simple cycle * contains at least one stub edge. */ /* * Calculate components. */ stList *components = getComponents2(chosenEdges, stubEdges, chainEdges); /* * Divide the components by the presence of one or more stub edges. */ stSortedSet *stubEdgesSet = stList_getSortedSet(stubEdges, (int(*)(const void *, const void *)) stIntTuple_cmpFn); stList *stubContainingComponents = stList_construct(); stList *stubFreeComponents = stList_construct(); for (int64_t i = 0; i < stList_length(components); i++) { stList *component = stList_get(components, i); stList_append( intersectionSize(stubEdgesSet, component) > 0 ? stubContainingComponents : stubFreeComponents, component); } assert(stList_length(stubContainingComponents) > 0); stSortedSet_destruct(stubEdgesSet); /* * Merge the stub containing components into one 'global' component */ stList *globalComponent = stList_join(stubContainingComponents); stList_destruct(stubContainingComponents); /* * Remove the stub/chain edges from the components. */ stList_append(stubFreeComponents, globalComponent); stList *adjacencyOnlyComponents = getStubAndChainEdgeFreeComponents( stubFreeComponents, stubEdges, chainEdges); stList_destruct(stubFreeComponents); stList_destruct(globalComponent); stList_destruct(components); //We only clean this up now, as this frees the components it contains. /* * Merge stub free components into the others. */ stList *updatedChosenEdges = mergeSimpleCycles(adjacencyOnlyComponents, nonZeroWeightAdjacencyEdges, allAdjacencyEdges); stList_destruct(adjacencyOnlyComponents); return updatedChosenEdges; }
/* * Constructs a face from a given Cap */ static void buildFaces_constructFromCap(Cap * startingCap, stHash *liftedEdgesTable, Flower * flower) { Face *face = face_construct(flower); stList *topNodes = stList_construct3(16, NULL); stList *liftedEdges; Cap *cap, *bottomNode, *ancestor; int64_t index, index2; printf("Constructing new face"); // Establishlist of top nodes buildFaces_fillTopNodeList(startingCap, topNodes, liftedEdgesTable); #ifndef NDEBUG // What, no top nodes!? if (stList_length(topNodes) == 0) abort(); #endif // Initialize data structure face_allocateSpace(face, stList_length(topNodes)); // For every top node for (index = 0; index < stList_length(topNodes); index++) { cap = stList_get(topNodes, index); face_setTopNode(face, index, cap); liftedEdges = stHash_search(liftedEdgesTable, cap); if (!liftedEdges) { face_setBottomNodeNumber(face, index, 0); continue; } face_setBottomNodeNumber(face, index, stList_length(liftedEdges)); // For every bottom node of that top node for (index2 = 0; index2 < stList_length(liftedEdges); index2++) { bottomNode = ((LiftedEdge *) stList_get(liftedEdges, index2))->bottomNode; face_addBottomNode(face, index, bottomNode); ancestor = cap_getTopCap(cap_getPositiveOrientation( cap_getAdjacency(bottomNode))); if (cap_getAdjacency(cap) != ancestor) face_setDerivedDestination(face, index, index2, ancestor); else face_setDerivedDestination(face, index, index2, NULL); #ifndef NDEBUG // If bottom nodes part of top nodes assert(!stList_contains(topNodes, cap_getPositiveOrientation( ((LiftedEdge*) stList_get(liftedEdges, index2))->bottomNode))); #endif } } // Clean up stList_destruct(topNodes); }
static int64_t getSizeOfLargestAdjacencyComponent(stList *adjacencyComponents) { int64_t largestAdjacencyComponentSizeInGraph = 0; for (int64_t i = 0; i < stList_length(adjacencyComponents); i++) { stList *adjacencyComponent = stList_get(adjacencyComponents, i); if (stList_length(adjacencyComponent) > largestAdjacencyComponentSizeInGraph) { largestAdjacencyComponentSizeInGraph = stList_length(adjacencyComponent); } } return largestAdjacencyComponentSizeInGraph; }
void test_stList_copy(CuTest *testCase) { setup(); stList *list2 = stList_copy(list, NULL); CuAssertTrue(testCase, stList_length(list) == stList_length(list2)); int64_t i; for(i=0; i<stringNumber; i++) { CuAssertTrue(testCase, stList_get(list2, i) == strings[i]); } stList_destruct(list2); teardown(); }
int main(int argc, char *argv[]) { ////////////////////////////////////////////// //Parse the inputs ////////////////////////////////////////////// parseBasicArguments(argc, argv, "linkageStats"); /////////////////////////////////////////////////////////////////////////// // Get the intervals /////////////////////////////////////////////////////////////////////////// stList *haplotypeEventStrings = getEventStrings( treatHaplotype1AsContamination ? NULL : hap1EventString, treatHaplotype2AsContamination ? NULL : hap2EventString); stList *assemblyEventStringInList = stList_construct(); stList_append(assemblyEventStringInList, assemblyEventString); stList *intervals = stList_construct3(0, (void (*)(void *))sequenceInterval_destruct); for(int64_t i=0; i<stList_length(haplotypeEventStrings); i++) { const char *hapEventString = stList_get(haplotypeEventStrings, i); st_logInfo("Getting contig paths for haplotype: %s", hapEventString); stList *contigPaths = getContigPaths(flower, hapEventString, assemblyEventStringInList); stList *hapIntervals = getSplitContigPathIntervals(flower, contigPaths, hapEventString, assemblyEventStringInList); stList_destruct(contigPaths); st_logInfo("Getting contig paths\n"); stList_appendAll(intervals, hapIntervals); stList_setDestructor(hapIntervals, NULL); stList_destruct(hapIntervals); } st_logDebug("Got a total of %" PRIi64 " intervals\n", stList_length(intervals)); /////////////////////////////////////////////////////////////////////////// // Write it out. /////////////////////////////////////////////////////////////////////////// FILE *fileHandle = fopen(outputFile, "w"); for (int64_t i = 0; i < stList_length(intervals); i++) { SequenceInterval *sequenceInterval = stList_get(intervals, i); st_logDebug("We have a path interval %s %" PRIi64 " %" PRIi64 "\n", sequenceInterval->sequenceName, sequenceInterval->start, sequenceInterval->end); fprintf(fileHandle, "%s %" PRIi64 " %" PRIi64 "\n", sequenceInterval->sequenceName, sequenceInterval->start, sequenceInterval->end); } st_logInfo("Finished writing out the stats.\n"); fclose(fileHandle); return 0; }
static AdjacencySwitch *getBestAdjacencySwitch(stList *cycles, stList *nonZeroWeightAdjacencyEdges, stSortedSet *allAdjacencyEdges) { /* * Returns the best 3 or 4 edge switch (one including 3 or 4 edges) for the given existing edge, if they exist, or else NULL. */ assert(stList_length(cycles) > 0); stHash *nodesToNonZeroWeightedAdjacencyEdges = getNodesToEdgesHash( nonZeroWeightAdjacencyEdges); stList *allComponentEdges = stList_join(cycles); assert(stList_length(allComponentEdges) > 0); stHash *nodesToAllCurrentEdgesSet = getNodesToEdgesHash(allComponentEdges); /* * Get list of adjacency edges that bridge between (have a node in two) components. */ stList *bridgingAdjacencyEdges = getEdgesThatBridgeComponents(cycles, nodesToNonZeroWeightedAdjacencyEdges); stHash *nodesToBridgingAdjacencyEdges = getNodesToEdgesHash( bridgingAdjacencyEdges); /* * For the best 2 edge switch. */ AdjacencySwitch *minimumCostAdjacencySwitch = getBest2EdgeAdjacencySwitch( cycles, allAdjacencyEdges); /* * Look for the best 3 or 4 edge switch. */ for (int64_t i = 0; i < stList_length(allComponentEdges); i++) { minimumCostAdjacencySwitch = getMinimumCostAdjacencySwitch( minimumCostAdjacencySwitch, getBest4EdgeAdjacencySwitch2(stList_get(allComponentEdges, i), allAdjacencyEdges, nodesToAllCurrentEdgesSet, nodesToBridgingAdjacencyEdges)); } assert(minimumCostAdjacencySwitch != NULL); /* * Cleanup */ stList_destruct(allComponentEdges); stList_destruct(bridgingAdjacencyEdges); stHash_destruct(nodesToAllCurrentEdgesSet); stHash_destruct(nodesToBridgingAdjacencyEdges); stHash_destruct(nodesToNonZeroWeightedAdjacencyEdges); return minimumCostAdjacencySwitch; }
static void writeGraph(FILE *fileHandle, stList *edges, int64_t nodeNumber) { /* * Writes out just the adjacencies in the blossom format. */ int64_t edgeNumber = stList_length(edges); fprintf(fileHandle, "%" PRIi64 " %" PRIi64 "\n", nodeNumber, edgeNumber); for(int64_t i=0; i<stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); int64_t from = stIntTuple_get(edge, 0); int64_t to = stIntTuple_get(edge, 1); int64_t weight = stIntTuple_get(edge, 2); //All the algorithms are minimisation algorithms, so we invert the sign. fprintf(fileHandle, "%" PRIi64 " %" PRIi64 " %" PRIi64 "\n", from, to, weight); } }
static void setup() { teardown(); assert(nodeNumber == -1); while(nodeNumber % 2 != 0) { nodeNumber = st_randomInt(0, 100); } assert(nodeNumber >= 0); assert(nodeNumber % 2 == 0); stubs = stList_construct3(0, (void (*)(void *))stIntTuple_destruct); chains = stList_construct3(0, (void (*)(void *))stIntTuple_destruct); for(int64_t i=0; i<nodeNumber/2; i++) { assert(nodeNumber/2 > 0); stIntTuple *edge = stIntTuple_construct2(i, nodeNumber/2 + i); if(stList_length(stubs) == 0 || st_random() > 0.9) { stList_append(stubs, edge); } else { stList_append(chains, edge); } } zMatrix = st_calloc(nodeNumber*nodeNumber, sizeof(float)); for(int64_t i=0; i<nodeNumber; i++) { for(int64_t j=i+1; j<nodeNumber; j++) { double score = st_random(); zMatrix[i * nodeNumber + j] = score; zMatrix[j * nodeNumber + i] = score; } } st_logDebug("To test the adjacency problem we've created a problem with %" PRIi64 " nodes %" PRIi64 " stubs and %" PRIi64 " chains\n", nodeNumber, stList_length(stubs), stList_length(chains)); }
// TODO: see if we can make this one command static void bulkSetRecords(stKVDatabase *database, stList *records) { startTransaction(database); stTry { for(int32_t i=0; i<stList_length(records); i++) { stKVDatabaseBulkRequest *request = stList_get(records, i); switch(request->type) { case UPDATE: updateRecord(database, request->key, request->value, request->size); break; case INSERT: insertRecord(database, request->key, request->value, request->size); break; case SET: setRecord(database, request->key, request->value, request->size); break; } } commitTransaction(database); }stCatch(ex) { abortTransaction(database); stThrowNewCause( ex, ST_KV_DATABASE_EXCEPTION_ID, "MySQL bulk set records failed"); }stTryEnd; }
static stList *getSubstringsForFlowerSegments(stList *flowers) { /* * Get the set of substrings representing the strings in the segments of the given flowers. */ stList *substrings = stList_construct3(0, (void (*)(void *)) substring_destruct); for (int64_t i = 0; i < stList_length(flowers); i++) { Flower *flower = stList_get(flowers, i); Flower_EndIterator *blockIt = flower_getBlockIterator(flower); Block *block; while ((block = flower_getNextBlock(blockIt)) != NULL) { Block_InstanceIterator *instanceIt = block_getInstanceIterator(block); Segment *segment; while ((segment = block_getNext(instanceIt)) != NULL) { Sequence *sequence; if ((sequence = segment_getSequence(segment)) != NULL) { segment = segment_getStrand(segment) ? segment : segment_getReverse(segment); assert(segment_getLength(segment) > 0); stList_append(substrings, substring_construct(sequence_getMetaSequence(sequence)->stringName, segment_getStart(segment) - sequence_getStart(sequence), segment_getLength(segment))); } } block_destructInstanceIterator(instanceIt); } flower_destructBlockIterator(blockIt); } return substrings; }
void test_stList_append(CuTest *testCase) { setup(); stList_append(list, NULL); CuAssertTrue(testCase, stList_length(list) == stringNumber+1); CuAssertTrue(testCase, stList_get(list, stringNumber) == NULL); teardown(); }
void test_stList_filter(CuTest *testCase) { setup(); stSortedSet *set = stSortedSet_construct(); stSortedSet_insert(set, strings[0]); stSortedSet_insert(set, strings[4]); stList *list2 = stList_filterToExclude(list, set); stList *list3 = stList_filterToInclude(list, set); CuAssertTrue(testCase,stList_length(list2) == 3); CuAssertTrue(testCase,stList_length(list3) == 2); CuAssertTrue(testCase,stList_get(list2, 0) == strings[1]); CuAssertTrue(testCase,stList_get(list2, 1) == strings[2]); CuAssertTrue(testCase,stList_get(list2, 2) == strings[3]); CuAssertTrue(testCase,stList_get(list3, 0) == strings[0]); CuAssertTrue(testCase,stList_get(list3, 1) == strings[4]); teardown(); }
/* clone the root. */ static stTree *subrangeCloneRoot(stTree *srcRoot, struct malnCompCompMap *srcDestCompMap) { // clone root, if deleted, these must only be one child (due to the way // the trees are constructed). stList *pendingSubtrees = stList_construct(); stTree *destRoot = subrangeCloneNode(srcRoot, srcDestCompMap, pendingSubtrees); if (destRoot == NULL) { if (stList_length(pendingSubtrees) > 1) { struct mafTreeNodeCompLink *srcNcLink = getNodeCompLink(srcRoot); errAbort("deleted tree root %s (component: %s:%d-%d/%c)) has more that one child", stTree_getLabel(srcRoot), srcNcLink->comp->seq->orgSeqName, srcNcLink->comp->start, srcNcLink->comp->end, srcNcLink->comp->strand); } else if (stList_length(pendingSubtrees) == 1) { destRoot = stList_pop(pendingSubtrees); } } stList_destruct(pendingSubtrees); return destRoot; }
static void test_stSortedSet_searchGreaterThan(CuTest* testCase) { sonLibSortedSetTestSetup(); for(int32_t i=0; i<size; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i])); } //static int32_t sortedInput[] = { -10, -1, 1, 3, 5, 10, 12 }; CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, -11)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -10))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, -10)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -1))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, -5)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -1))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, 1)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, 3))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, 13)) == NULL); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, 12)) == NULL); for(int32_t i=0; i<100; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, st_randomInt(-1000, 1000))); } stList *list = stSortedSet_getList(sortedSet); for(int32_t i=1; i<stList_length(list); i++) { stIntTuple *p = stList_get(list, i-1); stIntTuple *j = stList_get(list, i); stIntTuple *k = stIntTuple_construct(1, st_randomInt(stIntTuple_getPosition(p, 0), stIntTuple_getPosition(j, 0))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, k) == j); stIntTuple_destruct(k); } stList_destruct(list); sonLibSortedSetTestTeardown(); }
void printPositions(stList *positions, const char *substitutionType, FILE *fileHandle) { for (int64_t i = 0; i < stList_length(positions); i++) { SegmentHolder *segmentHolder = stList_get(positions, i); int64_t j = segment_getStart(segmentHolder->segment); if (segment_getStrand(segmentHolder->segment)) { j += segmentHolder->offset; assert( cap_getCoordinate(segment_get5Cap(segmentHolder->segment)) == segment_getStart( segmentHolder->segment)); assert( segment_getStart(segmentHolder->segment) + segment_getLength(segmentHolder->segment) - 1 == cap_getCoordinate(segment_get3Cap(segmentHolder->segment))); } else { j -= segmentHolder->offset; assert( segment_getStart(segmentHolder->segment) - segment_getLength(segmentHolder->segment) + 1 == cap_getCoordinate(segment_get3Cap(segmentHolder->segment))); } fprintf(fileHandle, "%s: %s_%" PRIi64 " %" PRIi64 " %c %c %c\n", substitutionType, event_getHeader(segment_getEvent(segmentHolder->segment)), sequence_getLength(segment_getSequence(segmentHolder->segment)), j, segmentHolder->base1, segmentHolder->base2, segmentHolder->base3); getMAFBlock(segment_getBlock(segmentHolder->segment), fileHandle); } }
/* * Recursive function which fills a givenlist with the * connected nodes within a module */ static void buildFaces_fillTopNodeList(Cap * cap, stList *list, stHash *liftedEdgesTable) { stList *liftedEdges; int64_t index; // Limit of recursion if (stList_contains(list, cap)) return; // Actual filling st_logInfo("Adding cap %p to face\n", cap); stList_append(list, cap); // Recursion through lifted edges if ((liftedEdges = stHash_search(liftedEdgesTable, cap))) for (index = 0; index < stList_length(liftedEdges); index++) buildFaces_fillTopNodeList( ((LiftedEdge *) stList_get(liftedEdges, index))->destination, list, liftedEdgesTable); // Recursion through adjacency if (cap_getAdjacency(cap)) buildFaces_fillTopNodeList(cap_getAdjacency(cap),list, liftedEdgesTable); }
static void makeMatchingPerfect(stList *chosenEdges, stList *adjacencyEdges, stSortedSet *nodes) { /* * While the the number of edges is less than a perfect matching add random edges. */ stSortedSet *attachedNodes = getNodeSetOfEdges(chosenEdges); stHash *nodesToAdjacencyEdges = getNodesToEdgesHash(adjacencyEdges); stIntTuple *pNode = NULL; stSortedSetIterator *it = stSortedSet_getIterator(nodes); stIntTuple *node; while((node = stSortedSet_getNext(it)) != NULL) { if (stSortedSet_search(attachedNodes, node) == NULL) { if (pNode == NULL) { pNode = node; } else { stList_append(chosenEdges, getEdgeForNodes(stIntTuple_get(pNode, 0), stIntTuple_get(node, 0), nodesToAdjacencyEdges)); pNode = NULL; } } } stSortedSet_destructIterator(it); assert(pNode == NULL); stSortedSet_destruct(attachedNodes); assert(stList_length(chosenEdges) * 2 == stSortedSet_size(nodes)); stHash_destruct(nodesToAdjacencyEdges); }