static void splitIntoAdjacenciesStubsAndChains(stList *subCycle, stList *adjacencyEdges, stList *stubEdges, stList *chainEdges, stList **subAdjacencyEdges, stList **subStubEdges, stList **subChainEdges) { /* * Splits run into cycles and chains.. */ *subStubEdges = stList_construct(); *subChainEdges = stList_construct(); for (int64_t j = 0; j < stList_length(subCycle); j++) { stIntTuple *edge = stList_get(subCycle, j); if (stList_contains(stubEdges, edge)) { stList_append(*subStubEdges, edge); } else if (stList_contains(chainEdges, edge)) { stList_append(*subChainEdges, edge); } } *subAdjacencyEdges = stList_construct(); stSortedSet *nodes = getNodeSetOfEdges(subCycle); for (int64_t j = 0; j < stList_length(adjacencyEdges); j++) { stIntTuple *edge = stList_get(adjacencyEdges, j); if (nodeInSet(nodes, stIntTuple_get(edge, 0)) && nodeInSet( nodes, stIntTuple_get(edge, 1))) { stList_append(*subAdjacencyEdges, edge); } } stSortedSet_destruct(nodes); }
stList *mergeSimpleCycles(stList *cycles, stList *nonZeroWeightAdjacencyEdges, stSortedSet *allAdjacencyEdges) { /* * Takes a set of simple cycles (containing only the adjacency edges). * Returns a single simple cycle, as a list of edges, by doing length(components)-1 * calls to doBestMergeOfTwoSimpleCycles. */ /* * Build a hash of nodes to adjacency edges. */ cycles = stList_copy(cycles, NULL); for (int64_t i = 0; i < stList_length(cycles); i++) { //Clone the complete list assert(stList_length(stList_get(cycles, i)) > 0); assert(!stList_contains(stList_get(cycles, i), NULL)); stList_set(cycles, i, stList_copy(stList_get(cycles, i), NULL)); } while (stList_length(cycles) > 1) { doBestMergeOfTwoSimpleCycles(cycles, nonZeroWeightAdjacencyEdges, allAdjacencyEdges); } assert(stList_length(cycles) == 1); stList *mergedComponent = stList_get(cycles, 0); stList_destruct(cycles); return mergedComponent; }
static void checkComponents(CuTest *testCase, stList *filteredEdges) { stHash *nodesToComponents = getComponents(filteredEdges); //Check all components are smaller than threshold stList *components = stHash_getValues(nodesToComponents); for (int64_t i = 0; i < stList_length(components); i++) { stSortedSet *component = stList_get(components, i); CuAssertTrue(testCase, stSortedSet_size(component) <= maxComponentSize); CuAssertTrue(testCase, stSortedSet_size(component) >= 1); } //Check no edges can be added from those filtered. stSortedSet *filteredEdgesSet = stList_getSortedSet(filteredEdges, (int(*)(const void *, const void *)) stIntTuple_cmpFn); for (int64_t i = 0; i < stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); if (stSortedSet_search(filteredEdgesSet, edge) == NULL) { stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2)); stSortedSet *component1 = stHash_search(nodesToComponents, node1); stSortedSet *component2 = stHash_search(nodesToComponents, node2); CuAssertTrue(testCase, component1 != NULL && component2 != NULL); CuAssertTrue(testCase, component1 != component2); CuAssertTrue(testCase, stSortedSet_size(component1) + stSortedSet_size(component2) > maxComponentSize); stIntTuple_destruct(node1); stIntTuple_destruct(node2); } } stSortedSet_destruct(filteredEdgesSet); //Cleanup the components stSortedSet *componentsSet = stList_getSortedSet(components, NULL); stList_destruct(components); stSortedSet_setDestructor(componentsSet, (void(*)(void *)) stSortedSet_destruct); stSortedSet_destruct(componentsSet); stHash_destruct(nodesToComponents); }
static void test_stSortedSet_searchGreaterThan(CuTest* testCase) { sonLibSortedSetTestSetup(); for(int32_t i=0; i<size; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i])); } //static int32_t sortedInput[] = { -10, -1, 1, 3, 5, 10, 12 }; CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, -11)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -10))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, -10)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -1))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, -5)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -1))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, 1)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, 3))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, 13)) == NULL); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, stIntTuple_construct(1, 12)) == NULL); for(int32_t i=0; i<100; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, st_randomInt(-1000, 1000))); } stList *list = stSortedSet_getList(sortedSet); for(int32_t i=1; i<stList_length(list); i++) { stIntTuple *p = stList_get(list, i-1); stIntTuple *j = stList_get(list, i); stIntTuple *k = stIntTuple_construct(1, st_randomInt(stIntTuple_getPosition(p, 0), stIntTuple_getPosition(j, 0))); CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, k) == j); stIntTuple_destruct(k); } stList_destruct(list); sonLibSortedSetTestTeardown(); }
static void debugScaffoldPaths(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash, stHash *haplotypeToMaximalHaplotypeLengthHash, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) { stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths); for (int64_t i = 0; i < stList_length(haplotypePaths); i++) { stList *haplotypePath = stList_get(haplotypePaths, i); assert(stList_length(haplotypePath) > 0); //Traversing from 5' end.. Segment *_5Segment = stList_get(haplotypePath, 0); Segment *_3Segment = stList_get(haplotypePath, stList_length(haplotypePath) - 1); assert(segment_getStrand(_5Segment) == segment_getStrand(_3Segment)); if (!segment_getStrand(_5Segment)) { Segment *j = _5Segment; _5Segment = segment_getReverse(_3Segment); _3Segment = segment_getReverse(j); } assert(segment_getStrand(_5Segment)); assert(segment_getStrand(_3Segment)); Cap *_5Cap = segment_get5Cap(_5Segment); Cap *_3Cap = segment_get3Cap(_3Segment); if (getAdjacentCapsSegment(_5Cap) != NULL) { assert(!trueAdjacency(_5Cap, haplotypeEventStrings)); } if (getAdjacentCapsSegment(_3Cap) != NULL) { assert(!trueAdjacency(_3Cap, haplotypeEventStrings)); } debugScaffoldPathsP(_5Cap, haplotypePath, haplotypePathToScaffoldPathHash, haplotypeToMaximalHaplotypeLengthHash, segmentToMaximalHaplotypePathHash, haplotypeEventStrings, contaminationEventStrings, capCodeParameters, 1); debugScaffoldPathsP(_3Cap, haplotypePath, haplotypePathToScaffoldPathHash, haplotypeToMaximalHaplotypeLengthHash, segmentToMaximalHaplotypePathHash, haplotypeEventStrings, contaminationEventStrings, capCodeParameters, 0); } stHash_destruct(segmentToMaximalHaplotypePathHash); }
static stHash *getComponents(stList *filteredEdges) { /* * A kind of stupid reimplementation of the greedy function, done just to trap typos. */ stHash *nodesToComponents = stHash_construct3((uint64_t(*)(const void *)) stIntTuple_hashKey, (int(*)(const void *, const void *)) stIntTuple_equalsFn, NULL, NULL); for (int64_t i = 0; i < stList_length(nodes); i++) { stIntTuple *node = stList_get(nodes, i); stSortedSet *component = stSortedSet_construct(); stSortedSet_insert(component, node); stHash_insert(nodesToComponents, node, component); } for (int64_t i = 0; i < stList_length(filteredEdges); i++) { stIntTuple *edge = stList_get(filteredEdges, i); stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2)); stSortedSet *component1 = stHash_search(nodesToComponents, node1); stSortedSet *component2 = stHash_search(nodesToComponents, node2); assert(component1 != NULL && component2 != NULL); if (component1 != component2) { stSortedSet *component3 = stSortedSet_getUnion(component1, component2); stSortedSetIterator *setIt = stSortedSet_getIterator(component3); stIntTuple *node3; while ((node3 = stSortedSet_getNext(setIt)) != NULL) { stHash_insert(nodesToComponents, node3, component3); } stSortedSet_destructIterator(setIt); stSortedSet_destruct(component1); stSortedSet_destruct(component2); } stIntTuple_destruct(node1); stIntTuple_destruct(node2); } return nodesToComponents; }
static stList *mergeSubstrings(stList *substrings, int64_t proximityToMerge) { /* * Merge set of substrings into fewer substrings, if they overlap by less than proximityToMerge */ stList *mergedSubstrings = stList_construct3(0, (void (*)(void *)) substring_destruct); if (stList_length(substrings) == 0) { return mergedSubstrings; } stList_sort(substrings, (int (*)(const void *, const void *)) substring_cmp); Substring *pSubsequence = substring_clone(stList_get(substrings, 0)); stList_append(mergedSubstrings, pSubsequence); for (int64_t i = 1; i < stList_length(substrings); i++) { Substring *substring = stList_get(substrings, i); if (pSubsequence->name == substring->name && pSubsequence->start + pSubsequence->length + proximityToMerge >= substring->start) { //Merge if (pSubsequence->start + pSubsequence->length < substring->start + substring->length) { pSubsequence->length = substring->start + substring->length - pSubsequence->start; } } else { pSubsequence = substring_clone(substring); stList_append(mergedSubstrings, pSubsequence); } } return mergedSubstrings; }
void stCaf_addAdjacencies(Flower *flower) { //Build a list of caps. stList *list = stList_construct(); Flower_EndIterator *endIterator = flower_getEndIterator(flower); End *end; while ((end = flower_getNextEnd(endIterator)) != NULL) { End_InstanceIterator *instanceIterator = end_getInstanceIterator(end); Cap *cap; while ((cap = end_getNext(instanceIterator)) != NULL) { if (!cap_getStrand(cap)) { cap = cap_getReverse(cap); } stList_append(list, cap); } end_destructInstanceIterator(instanceIterator); } flower_destructEndIterator(endIterator); assert(stList_length(list) % 2 == 0); //Sort the list of caps. stList_sort(list, (int(*)(const void *, const void *)) addAdjacenciesPP); //Now make the adjacencies. for (int64_t i = 1; i < stList_length(list); i += 2) { Cap *cap = stList_get(list, i - 1); Cap *cap2 = stList_get(list, i); cap_makeAdjacent(cap, cap2); } //Clean up. stList_destruct(list); }
void printEventNoisesAndParams(NanoporeRead *npRead, stList *templateKmers, stList *complementKmers) { char *t_modelState, *c_modelState; // kmer | stDev | scale_sd | shift_sd | var_sd | strand for (int64_t i = 0; i < npRead->nbTemplateEvents; i++) { int64_t index = i * NB_EVENT_PARAMS; t_modelState = (char *)stList_get(templateKmers, i); fprintf(stdout, "%s\t%f\t%f\t%f\t%f\t%s\n", t_modelState, npRead->templateEvents[index + 1], // st_dev npRead->templateParams.scale_sd, npRead->templateParams.shift_sd, npRead->templateParams.var_sd, "t"); } for (int64_t i = 0; i < npRead->nbComplementEvents; i++) { int64_t index = i * NB_EVENT_PARAMS; c_modelState = (char *)stList_get(complementKmers, i); fprintf(stdout, "%s\t%f\t%f\t%f\t%f\t%s\n", c_modelState, npRead->complementEvents[index + 1], // st_dev npRead->complementParams.scale_sd, npRead->complementParams.shift_sd, npRead->complementParams.var_sd, "c"); } }
Hmm *hmm_loadFromFile(const char *fileName) { FILE *fH = fopen(fileName, "r"); char *string = stFile_getLineFromFile(fH); stList *tokens = stString_split(string); if (stList_length(tokens) < 2) { st_errAbort("Got an empty line in the input state machine file %s\n", fileName); } int type; int64_t j = sscanf(stList_get(tokens, 0), "%i", &type); if (j != 1) { st_errAbort("Failed to parse state number (int) from string: %s\n", string); } Hmm *hmm = hmm_constructEmpty(0.0, type); if (stList_length(tokens) != hmm->stateNumber * hmm->stateNumber + 2) { st_errAbort( "Got the wrong number of transitions in the input state machine file %s, got %" PRIi64 " instead of %" PRIi64 "\n", fileName, stList_length(tokens), hmm->stateNumber * hmm->stateNumber + 2); } for (int64_t i = 0; i < hmm->stateNumber * hmm->stateNumber; i++) { j = sscanf(stList_get(tokens, i + 1), "%lf", &(hmm->transitions[i])); if (j != 1) { st_errAbort("Failed to parse transition prob (float) from string: %s\n", string); } } j = sscanf(stList_get(tokens, stList_length(tokens) - 1), "%lf", &(hmm->likelihood)); if (j != 1) { st_errAbort("Failed to parse likelihood (float) from string: %s\n", string); } //Cleanup transitions line free(string); stList_destruct(tokens); //Now parse the emissions line string = stFile_getLineFromFile(fH); tokens = stString_split(string); if (stList_length(tokens) != hmm->stateNumber * SYMBOL_NUMBER_NO_N * SYMBOL_NUMBER_NO_N) { st_errAbort( "Got the wrong number of emissions in the input state machine file %s, got %" PRIi64 " instead of %" PRIi64 "\n", fileName, stList_length(tokens), hmm->stateNumber * SYMBOL_NUMBER_NO_N * SYMBOL_NUMBER_NO_N); } for (int64_t i = 0; i < hmm->stateNumber * SYMBOL_NUMBER_NO_N * SYMBOL_NUMBER_NO_N; i++) { j = sscanf(stList_get(tokens, i), "%lf", &(hmm->emissions[i])); if (j != 1) { st_errAbort("Failed to parse emission prob (float) from string: %s\n", string); } } //Final cleanup free(string); stList_destruct(tokens); fclose(fH); return hmm; }
/* * Constructs a face from a given Cap */ static void buildFaces_constructFromCap(Cap * startingCap, stHash *liftedEdgesTable, Flower * flower) { Face *face = face_construct(flower); stList *topNodes = stList_construct3(16, NULL); stList *liftedEdges; Cap *cap, *bottomNode, *ancestor; int64_t index, index2; printf("Constructing new face"); // Establishlist of top nodes buildFaces_fillTopNodeList(startingCap, topNodes, liftedEdgesTable); #ifndef NDEBUG // What, no top nodes!? if (stList_length(topNodes) == 0) abort(); #endif // Initialize data structure face_allocateSpace(face, stList_length(topNodes)); // For every top node for (index = 0; index < stList_length(topNodes); index++) { cap = stList_get(topNodes, index); face_setTopNode(face, index, cap); liftedEdges = stHash_search(liftedEdgesTable, cap); if (!liftedEdges) { face_setBottomNodeNumber(face, index, 0); continue; } face_setBottomNodeNumber(face, index, stList_length(liftedEdges)); // For every bottom node of that top node for (index2 = 0; index2 < stList_length(liftedEdges); index2++) { bottomNode = ((LiftedEdge *) stList_get(liftedEdges, index2))->bottomNode; face_addBottomNode(face, index, bottomNode); ancestor = cap_getTopCap(cap_getPositiveOrientation( cap_getAdjacency(bottomNode))); if (cap_getAdjacency(cap) != ancestor) face_setDerivedDestination(face, index, index2, ancestor); else face_setDerivedDestination(face, index, index2, NULL); #ifndef NDEBUG // If bottom nodes part of top nodes assert(!stList_contains(topNodes, cap_getPositiveOrientation( ((LiftedEdge*) stList_get(liftedEdges, index2))->bottomNode))); #endif } } // Clean up stList_destruct(topNodes); }
static void doBestMergeOfTwoSimpleCycles(stList *cycles, stList *nonZeroWeightAdjacencyEdges, stSortedSet *allAdjacencyEdges) { /* * Merge two simple cycles, using the best possible adjacency switch. Modifies components list in place, * destroying two old components and adding a new one. If new adjacency edges are needed then they are * added to the adjacency edges list. */ assert(stList_length(cycles) > 1); /* * Get the best adjacency switch. */ AdjacencySwitch *adjacencySwitch = getBestAdjacencySwitch(cycles, nonZeroWeightAdjacencyEdges, allAdjacencyEdges); assert(adjacencySwitch != NULL); /* * Find the two components to merge. */ stList *cyclesToMerge = stList_construct3(0, (void(*)(void *)) stList_destruct); for (int64_t i = 0; i < stList_length(cycles); i++) { stList *cycle = stList_get(cycles, i); if (stList_contains(cycle, adjacencySwitch->oldEdge1)) { assert(!stList_contains(cycle, adjacencySwitch->oldEdge2)); stList_append(cyclesToMerge, cycle); } else if (stList_contains(cycle, adjacencySwitch->oldEdge2)) { stList_append(cyclesToMerge, cycle); } } /* * Now construct the new component and modify the list of components in place. */ assert(stList_length(cyclesToMerge) == 2); stList *newComponent = stList_join(cyclesToMerge); assert(!stList_contains(newComponent, NULL)); //Cleanup the old components assert(stList_contains(cycles, stList_get(cyclesToMerge, 0))); stList_removeItem(cycles, stList_get(cyclesToMerge, 0)); assert(stList_contains(cycles, stList_get(cyclesToMerge, 1))); stList_removeItem(cycles, stList_get(cyclesToMerge, 1)); stList_destruct(cyclesToMerge); //Now remove the old edges and add the new ones assert(stList_contains(newComponent, adjacencySwitch->oldEdge1)); stList_removeItem(newComponent, adjacencySwitch->oldEdge1); assert(stList_contains(newComponent, adjacencySwitch->oldEdge2)); stList_removeItem(newComponent, adjacencySwitch->oldEdge2); assert(!stList_contains(newComponent, adjacencySwitch->newEdge1)); stList_append(newComponent, adjacencySwitch->newEdge1); assert(!stList_contains(newComponent, adjacencySwitch->newEdge2)); stList_append(newComponent, adjacencySwitch->newEdge2); adjacencySwitch_destruct(adjacencySwitch); //Clean the adjacency switch. //Finally add the component to the list of components stList_append(cycles, newComponent); }
static AdjacencySwitch *getBest2EdgeAdjacencySwitch(stList *components, stSortedSet *allAdjacencyEdges) { /* * Look for the two lowest value adjacency edges in all current edges that are in a separate component and returns them as an adjacency switch * with now new adjacency edges. */ /* * Get lowest scoring edge for each component. */ stList *lowestScoringEdgeFromEachComponent = stList_construct(); for (int64_t i = 0; i < stList_length(components); i++) { stList_append(lowestScoringEdgeFromEachComponent, getLowestScoringEdge(stList_get(components, i))); } /* * Get two lowest scoring edges. */ stList_sort(lowestScoringEdgeFromEachComponent, getBest2EdgeAdjacencySwitchP); stIntTuple *lowestScoreEdge1 = stList_get( lowestScoringEdgeFromEachComponent, 0); stIntTuple *lowestScoreEdge2 = stList_get( lowestScoringEdgeFromEachComponent, 1); assert(lowestScoreEdge1 != lowestScoreEdge2); stList_destruct(lowestScoringEdgeFromEachComponent); //Cleanup stIntTuple *newEdge1 = getWeightedEdgeFromSet( stIntTuple_get(lowestScoreEdge1, 0), stIntTuple_get(lowestScoreEdge2, 0), allAdjacencyEdges); stIntTuple *newEdge2 = getWeightedEdgeFromSet( stIntTuple_get(lowestScoreEdge1, 1), stIntTuple_get(lowestScoreEdge2, 1), allAdjacencyEdges); if (newEdge1 == NULL) { assert(newEdge2 == NULL); newEdge1 = getWeightedEdgeFromSet( stIntTuple_get(lowestScoreEdge1, 0), stIntTuple_get(lowestScoreEdge2, 1), allAdjacencyEdges); newEdge2 = getWeightedEdgeFromSet( stIntTuple_get(lowestScoreEdge1, 1), stIntTuple_get(lowestScoreEdge2, 0), allAdjacencyEdges); } assert(newEdge1 != NULL); assert(newEdge2 != NULL); return adjacencySwitch_construct( lowestScoreEdge1, lowestScoreEdge2, newEdge1, newEdge2, stIntTuple_get(lowestScoreEdge1, 2) + stIntTuple_get(lowestScoreEdge2, 2)); }
static void test_stPosetAlignment_addAndIsPossible(CuTest *testCase) { for(int64_t trial=0; trial<100; trial++) { setup(); //Make random number of sequences. stList *sequenceLengths = stList_construct3(0, (void (*)(void *))stIntTuple_destruct); for(int64_t i=0; i<sequenceNumber; i++) { stList_append(sequenceLengths, stIntTuple_construct1( st_randomInt(0, MAX_SEQUENCE_SIZE))); } //Propose random alignment pairs... stList *pairs = stList_construct3(0, (void(*)(void *))stIntTuple_destruct); int64_t maxAlignedPairs = st_randomInt(0, MAX_ALIGNMENTS); if(sequenceNumber > 0) { for(int64_t i=0; i<maxAlignedPairs; i++) { int64_t seq1 = st_randomInt(0, sequenceNumber); int64_t seqLength1 = stIntTuple_get(stList_get(sequenceLengths, seq1), 0); if(seqLength1 == 0) { continue; } int64_t position1 = st_randomInt(0, seqLength1); int64_t seq2 = st_randomInt(0, sequenceNumber); int64_t seqLength2 = stIntTuple_get(stList_get(sequenceLengths, seq1), 0); if(seqLength2 == 0) { continue; } int64_t position2 = st_randomInt(0, seqLength2); if(seq1 != seq2) { stList_append(pairs, stIntTuple_construct4( seq1, position1, seq2, position2)); if(stPosetAlignment_isPossible(posetAlignment, seq1, position1, seq2, position2)) { st_logInfo("In %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " \n", seq1, position1, seq2, position2); //For each accepted pair check it doesn't create a cycle. CuAssertTrue(testCase, !containsACycle(pairs, sequenceNumber)); CuAssertTrue(testCase, stPosetAlignment_add(posetAlignment, seq1, position1, seq2, position2)); } else { st_logInfo("Out %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " \n", seq1, position1, seq2, position2); //For each rejected pair check it creates a cycle.. CuAssertTrue(testCase, containsACycle(pairs, sequenceNumber)); CuAssertTrue(testCase, !stPosetAlignment_isPossible(posetAlignment, seq1, position1, seq2, position2)); stIntTuple_destruct(stList_pop(pairs)); //remove the pair which created the cycle. CuAssertTrue(testCase, !containsACycle(pairs, sequenceNumber)); //Check we're back to being okay.. } } } } //Cleanup stList_destruct(sequenceLengths); stList_destruct(pairs); teardown(); st_logInfo("Passed a random ordering test with %" PRIi64 " sequences and %" PRIi64 " aligned pairs\n", sequenceNumber, maxAlignedPairs); } }
void test_stList_sort2(CuTest *testCase) { setup(); stList_sort2(list, test_stList_sort2P, "hello"); CuAssertTrue(testCase, stList_length(list) == stringNumber); CuAssertStrEquals(testCase, "five", stList_get(list, 0)); CuAssertStrEquals(testCase, "four", stList_get(list, 1)); CuAssertStrEquals(testCase, "one", stList_get(list, 2)); CuAssertStrEquals(testCase, "three", stList_get(list, 3)); CuAssertStrEquals(testCase, "two", stList_get(list, 4)); teardown(); }
void test_stList_sort(CuTest *testCase) { setup(); stList_sort(list, (int (*)(const void *, const void *))strcmp); CuAssertTrue(testCase, stList_length(list) == stringNumber); CuAssertStrEquals(testCase, "five", stList_get(list, 0)); CuAssertStrEquals(testCase, "four", stList_get(list, 1)); CuAssertStrEquals(testCase, "one", stList_get(list, 2)); CuAssertStrEquals(testCase, "three", stList_get(list, 3)); CuAssertStrEquals(testCase, "two", stList_get(list, 4)); teardown(); }
void bottomUp(stList *flowers, stKVDatabase *sequenceDatabase, Name referenceEventName, bool isTop, stMatrix *(*generateSubstitutionMatrix)(double)) { /* * A reference thread between the two caps * in each flower f may be broken into two in the children of f. * Therefore, for each flower f first identify attached stub ends present in the children of f that are * not present in f and copy them into f, reattaching the reference caps as needed. */ stList *caps = getCaps(flowers, referenceEventName); for (int64_t i = stList_length(caps) - 1; i >= 0; i--) { //Start from end, as we add to this list. setAdjacencyLengthsAndRecoverNewCapsAndBrokenAdjacencies(stList_get(caps, i), caps); } for(int64_t i=0; i<stList_length(flowers); i++) { recoverBrokenAdjacencies(stList_get(flowers, i), caps, referenceEventName); } //Build the phylogenetic event trees for base calling. segmentWriteFn_flowerToPhylogeneticTreeHash = stHash_construct2(NULL, (void (*)(void *))cleanupPhylogeneticTree); for(int64_t i=0; i<stList_length(flowers); i++) { Flower *flower = stList_get(flowers, i); Event *refEvent = eventTree_getEvent(flower_getEventTree(flower), referenceEventName); assert(refEvent != NULL); stHash_insert(segmentWriteFn_flowerToPhylogeneticTreeHash, flower, getPhylogeneticTreeRootedAtGivenEvent(refEvent, generateSubstitutionMatrix)); } if (isTop) { stList *threadStrings = buildRecursiveThreadsInList(sequenceDatabase, caps, segmentWriteFn, terminalAdjacencyWriteFn); assert(stList_length(threadStrings) == stList_length(caps)); int64_t nonTrivialSeqIndex = 0, trivialSeqIndex = stList_length(threadStrings); //These are used as indices for the names of trivial and non-trivial sequences. for (int64_t i = 0; i < stList_length(threadStrings); i++) { Cap *cap = stList_get(caps, i); assert(cap_getStrand(cap)); assert(!cap_getSide(cap)); Flower *flower = end_getFlower(cap_getEnd(cap)); char *threadString = stList_get(threadStrings, i); bool trivialString = isTrivialString(&threadString); //This alters the original string MetaSequence *metaSequence = addMetaSequence(flower, cap, trivialString ? trivialSeqIndex++ : nonTrivialSeqIndex++, threadString, trivialString); free(threadString); int64_t endCoordinate = setCoordinates(flower, metaSequence, cap, metaSequence_getStart(metaSequence) - 1); (void) endCoordinate; assert(endCoordinate == metaSequence_getLength(metaSequence) + metaSequence_getStart(metaSequence)); } stList_setDestructor(threadStrings, NULL); //The strings are already cleaned up by the above loop stList_destruct(threadStrings); } else { buildRecursiveThreads(sequenceDatabase, caps, segmentWriteFn, terminalAdjacencyWriteFn); } stHash_destruct(segmentWriteFn_flowerToPhylogeneticTreeHash); stList_destruct(caps); }
int main(int argc, char *argv[]) { ////////////////////////////////////////////// //Parse the inputs ////////////////////////////////////////////// parseBasicArguments(argc, argv, "linkageStats"); /////////////////////////////////////////////////////////////////////////// // Get the intervals /////////////////////////////////////////////////////////////////////////// stList *haplotypeEventStrings = getEventStrings( treatHaplotype1AsContamination ? NULL : hap1EventString, treatHaplotype2AsContamination ? NULL : hap2EventString); stList *assemblyEventStringInList = stList_construct(); stList_append(assemblyEventStringInList, assemblyEventString); stList *intervals = stList_construct3(0, (void (*)(void *))sequenceInterval_destruct); for(int64_t i=0; i<stList_length(haplotypeEventStrings); i++) { const char *hapEventString = stList_get(haplotypeEventStrings, i); st_logInfo("Getting contig paths for haplotype: %s", hapEventString); stList *contigPaths = getContigPaths(flower, hapEventString, assemblyEventStringInList); stList *hapIntervals = getSplitContigPathIntervals(flower, contigPaths, hapEventString, assemblyEventStringInList); stList_destruct(contigPaths); st_logInfo("Getting contig paths\n"); stList_appendAll(intervals, hapIntervals); stList_setDestructor(hapIntervals, NULL); stList_destruct(hapIntervals); } st_logDebug("Got a total of %" PRIi64 " intervals\n", stList_length(intervals)); /////////////////////////////////////////////////////////////////////////// // Write it out. /////////////////////////////////////////////////////////////////////////// FILE *fileHandle = fopen(outputFile, "w"); for (int64_t i = 0; i < stList_length(intervals); i++) { SequenceInterval *sequenceInterval = stList_get(intervals, i); st_logDebug("We have a path interval %s %" PRIi64 " %" PRIi64 "\n", sequenceInterval->sequenceName, sequenceInterval->start, sequenceInterval->end); fprintf(fileHandle, "%s %" PRIi64 " %" PRIi64 "\n", sequenceInterval->sequenceName, sequenceInterval->start, sequenceInterval->end); } st_logInfo("Finished writing out the stats.\n"); fclose(fileHandle); return 0; }
void printPositions(stList *positions, const char *substitutionType, FILE *fileHandle) { for (int64_t i = 0; i < stList_length(positions); i++) { SegmentHolder *segmentHolder = stList_get(positions, i); int64_t j = segment_getStart(segmentHolder->segment); if (segment_getStrand(segmentHolder->segment)) { j += segmentHolder->offset; assert( cap_getCoordinate(segment_get5Cap(segmentHolder->segment)) == segment_getStart( segmentHolder->segment)); assert( segment_getStart(segmentHolder->segment) + segment_getLength(segmentHolder->segment) - 1 == cap_getCoordinate(segment_get3Cap(segmentHolder->segment))); } else { j -= segmentHolder->offset; assert( segment_getStart(segmentHolder->segment) - segment_getLength(segmentHolder->segment) + 1 == cap_getCoordinate(segment_get3Cap(segmentHolder->segment))); } fprintf(fileHandle, "%s: %s_%" PRIi64 " %" PRIi64 " %c %c %c\n", substitutionType, event_getHeader(segment_getEvent(segmentHolder->segment)), sequence_getLength(segment_getSequence(segmentHolder->segment)), j, segmentHolder->base1, segmentHolder->base2, segmentHolder->base3); getMAFBlock(segment_getBlock(segmentHolder->segment), fileHandle); } }
// Returns a hash mapping from sequence header to sequence data. static stHash *readFastaFile(char *filename) { FILE *fasta = fopen(filename, "r"); if (fasta == NULL) { st_errnoAbort("Could not open fasta file %s", filename); } stHash *headerToData = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, free); struct List *seqs = constructEmptyList(0, NULL); struct List *seqLengths = constructEmptyList(0, free); struct List *headers = constructEmptyList(0, free); fastaRead(fasta, seqs, seqLengths, headers); for (int64_t i = 0; i < seqs->length; i++) { char *fullHeader = headers->list[i]; stList *headerTokens = stString_splitByString(fullHeader, " "); char *usableHeader = stString_copy(stList_get(headerTokens, 0)); stHash_insert(headerToData, usableHeader, seqs->list[i]); stList_destruct(headerTokens); } destructList(seqs); destructList(seqLengths); destructList(headers); return headerToData; }
// TODO: see if we can make this one command static void bulkSetRecords(stKVDatabase *database, stList *records) { startTransaction(database); stTry { for(int32_t i=0; i<stList_length(records); i++) { stKVDatabaseBulkRequest *request = stList_get(records, i); switch(request->type) { case UPDATE: updateRecord(database, request->key, request->value, request->size); break; case INSERT: insertRecord(database, request->key, request->value, request->size); break; case SET: setRecord(database, request->key, request->value, request->size); break; } } commitTransaction(database); }stCatch(ex) { abortTransaction(database); stThrowNewCause( ex, ST_KV_DATABASE_EXCEPTION_ID, "MySQL bulk set records failed"); }stTryEnd; }
static stList *getSubstringsForFlowerSegments(stList *flowers) { /* * Get the set of substrings representing the strings in the segments of the given flowers. */ stList *substrings = stList_construct3(0, (void (*)(void *)) substring_destruct); for (int64_t i = 0; i < stList_length(flowers); i++) { Flower *flower = stList_get(flowers, i); Flower_EndIterator *blockIt = flower_getBlockIterator(flower); Block *block; while ((block = flower_getNextBlock(blockIt)) != NULL) { Block_InstanceIterator *instanceIt = block_getInstanceIterator(block); Segment *segment; while ((segment = block_getNext(instanceIt)) != NULL) { Sequence *sequence; if ((sequence = segment_getSequence(segment)) != NULL) { segment = segment_getStrand(segment) ? segment : segment_getReverse(segment); assert(segment_getLength(segment) > 0); stList_append(substrings, substring_construct(sequence_getMetaSequence(sequence)->stringName, segment_getStart(segment) - sequence_getStart(sequence), segment_getLength(segment))); } } block_destructInstanceIterator(instanceIt); } flower_destructBlockIterator(blockIt); } return substrings; }
static AdjacencySwitch *getBest4EdgeAdjacencySwitchP(stIntTuple *oldEdge1, int64_t node1, stSortedSet *allAdjacencyEdges, stHash *nodesToAllCurrentEdges, stHash *nodesToBridgingAdjacencyEdges) { /* * Returns the best adjacency switch for the given node and edge that * contains 4 existing edges. */ int64_t node4 = getOtherPosition(oldEdge1, node1); AdjacencySwitch *minimumCostAdjacencySwitch = NULL; stList *validEdges = getItemForNode(node1, nodesToBridgingAdjacencyEdges); if (validEdges != NULL) { for (int64_t i = 0; i < stList_length(validEdges); i++) { stIntTuple *newEdge1 = stList_get(validEdges, i); int64_t node2 = getOtherPosition(newEdge1, node1); stList *validEdges2 = getItemForNode(node2, nodesToAllCurrentEdges); assert(validEdges2 != NULL); assert(stList_length(validEdges2) == 1); stIntTuple *oldEdge2 = stList_peek(validEdges2); int64_t node3 = getOtherPosition(oldEdge2, node2); stIntTuple *newEdge2 = getWeightedEdgeFromSet(node3, node4, allAdjacencyEdges); assert(newEdge2 != NULL); int64_t cost = stIntTuple_get(oldEdge1, 2) + stIntTuple_get(oldEdge2, 2) - stIntTuple_get(newEdge1, 2) - stIntTuple_get(newEdge2, 2); minimumCostAdjacencySwitch = adjacencySwitch_update( minimumCostAdjacencySwitch, oldEdge1, oldEdge2, newEdge1, newEdge2, cost); } } return minimumCostAdjacencySwitch; }
void test_stList_append(CuTest *testCase) { setup(); stList_append(list, NULL); CuAssertTrue(testCase, stList_length(list) == stringNumber+1); CuAssertTrue(testCase, stList_get(list, stringNumber) == NULL); teardown(); }
/* * Recursive function which fills a givenlist with the * connected nodes within a module */ static void buildFaces_fillTopNodeList(Cap * cap, stList *list, stHash *liftedEdgesTable) { stList *liftedEdges; int64_t index; // Limit of recursion if (stList_contains(list, cap)) return; // Actual filling st_logInfo("Adding cap %p to face\n", cap); stList_append(list, cap); // Recursion through lifted edges if ((liftedEdges = stHash_search(liftedEdgesTable, cap))) for (index = 0; index < stList_length(liftedEdges); index++) buildFaces_fillTopNodeList( ((LiftedEdge *) stList_get(liftedEdges, index))->destination, list, liftedEdgesTable); // Recursion through adjacency if (cap_getAdjacency(cap)) buildFaces_fillTopNodeList(cap_getAdjacency(cap),list, liftedEdgesTable); }
void test_stList_filter(CuTest *testCase) { setup(); stSortedSet *set = stSortedSet_construct(); stSortedSet_insert(set, strings[0]); stSortedSet_insert(set, strings[4]); stList *list2 = stList_filterToExclude(list, set); stList *list3 = stList_filterToInclude(list, set); CuAssertTrue(testCase,stList_length(list2) == 3); CuAssertTrue(testCase,stList_length(list3) == 2); CuAssertTrue(testCase,stList_get(list2, 0) == strings[1]); CuAssertTrue(testCase,stList_get(list2, 1) == strings[2]); CuAssertTrue(testCase,stList_get(list2, 2) == strings[3]); CuAssertTrue(testCase,stList_get(list3, 0) == strings[0]); CuAssertTrue(testCase,stList_get(list3, 1) == strings[4]); teardown(); }
void test_stList_get(CuTest *testCase) { setup(); int64_t i; for(i=0; i<stringNumber; i++) { CuAssertTrue(testCase, stList_get(list, i) == strings[i]); } teardown(); }
bool hasCapInEvents(End *end, stList *eventStrings) { for(int64_t i=0; i<stList_length(eventStrings); i++) { if(hasCapInEvent(end, stList_get(eventStrings, i))) { return 1; } } return 0; }
static bool listsAreEqual(stList *observedList, stList *expectedList) { if (stList_length(observedList) != stList_length(expectedList)) { fprintf(stderr, "stList lengths are not equal: %"PRIu64" %"PRIu64"\n", stList_length(observedList), stList_length(expectedList)); printList(observedList, "observed"); printList(expectedList, "expected"); return false; } for (int64_t i = 0; i < stList_length(observedList); ++i) { if (strcmp(stList_get(observedList, i), stList_get(expectedList, i)) != 0) { fprintf(stderr, "stList elements are not equal at index %"PRIu64": %s %s\n", i, (char *)stList_get(observedList, i), (char *)stList_get(expectedList, i)); return false; } } return true; }
stIntTuple *getLowestScoringEdge(stList *edges) { /* * Returns edge with lowest weight. */ assert(stList_length(edges) > 0); stIntTuple *lowestScoringEdge = stList_get(edges, 0); int64_t lowestScore = stIntTuple_get(lowestScoringEdge, 2); for (int64_t j = 1; j < stList_length(edges); j++) { stIntTuple *edge = stList_get(edges, j); int64_t k = stIntTuple_get(edge, 2); if (k < lowestScore) { lowestScore = k; lowestScoringEdge = edge; } } return lowestScoringEdge; }