static void splitIntoAdjacenciesStubsAndChains(stList *subCycle,
        stList *adjacencyEdges, stList *stubEdges, stList *chainEdges,
        stList **subAdjacencyEdges, stList **subStubEdges,
        stList **subChainEdges) {
    /*
     * Splits run into cycles and chains..
     */
    *subStubEdges = stList_construct();
    *subChainEdges = stList_construct();
    for (int64_t j = 0; j < stList_length(subCycle); j++) {
        stIntTuple *edge = stList_get(subCycle, j);
        if (stList_contains(stubEdges, edge)) {
            stList_append(*subStubEdges, edge);
        } else if (stList_contains(chainEdges, edge)) {
            stList_append(*subChainEdges, edge);
        }
    }
    *subAdjacencyEdges = stList_construct();
    stSortedSet *nodes = getNodeSetOfEdges(subCycle);
    for (int64_t j = 0; j < stList_length(adjacencyEdges); j++) {
        stIntTuple *edge = stList_get(adjacencyEdges, j);
        if (nodeInSet(nodes, stIntTuple_get(edge, 0)) && nodeInSet(
                nodes, stIntTuple_get(edge, 1))) {
            stList_append(*subAdjacencyEdges, edge);
        }
    }
    stSortedSet_destruct(nodes);
}
stList *mergeSimpleCycles(stList *cycles, stList *nonZeroWeightAdjacencyEdges,
        stSortedSet *allAdjacencyEdges) {
    /*
     * Takes a set of simple cycles (containing only the adjacency edges).
     * Returns a single simple cycle, as a list of edges, by doing length(components)-1
     * calls to doBestMergeOfTwoSimpleCycles.
     */

    /*
     * Build a hash of nodes to adjacency edges.
     */

    cycles = stList_copy(cycles, NULL);
    for (int64_t i = 0; i < stList_length(cycles); i++) { //Clone the complete list
        assert(stList_length(stList_get(cycles, i)) > 0);
        assert(!stList_contains(stList_get(cycles, i), NULL));
        stList_set(cycles, i, stList_copy(stList_get(cycles, i), NULL));
    }
    while (stList_length(cycles) > 1) {
        doBestMergeOfTwoSimpleCycles(cycles, nonZeroWeightAdjacencyEdges,
                allAdjacencyEdges);
    }
    assert(stList_length(cycles) == 1);
    stList *mergedComponent = stList_get(cycles, 0);
    stList_destruct(cycles);
    return mergedComponent;
}
Пример #3
0
static void checkComponents(CuTest *testCase, stList *filteredEdges) {
    stHash *nodesToComponents = getComponents(filteredEdges);
    //Check all components are smaller than threshold
    stList *components = stHash_getValues(nodesToComponents);
    for (int64_t i = 0; i < stList_length(components); i++) {
        stSortedSet *component = stList_get(components, i);
        CuAssertTrue(testCase, stSortedSet_size(component) <= maxComponentSize);
        CuAssertTrue(testCase, stSortedSet_size(component) >= 1);
    }
    //Check no edges can be added from those filtered.
    stSortedSet *filteredEdgesSet = stList_getSortedSet(filteredEdges, (int(*)(const void *, const void *)) stIntTuple_cmpFn);
    for (int64_t i = 0; i < stList_length(edges); i++) {
        stIntTuple *edge = stList_get(edges, i);
        if (stSortedSet_search(filteredEdgesSet, edge) == NULL) {
            stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1));
            stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2));
            stSortedSet *component1 = stHash_search(nodesToComponents, node1);
            stSortedSet *component2 = stHash_search(nodesToComponents, node2);
            CuAssertTrue(testCase, component1 != NULL && component2 != NULL);
            CuAssertTrue(testCase, component1 != component2);
            CuAssertTrue(testCase, stSortedSet_size(component1) + stSortedSet_size(component2) > maxComponentSize);
            stIntTuple_destruct(node1);
            stIntTuple_destruct(node2);
        }
    }
    stSortedSet_destruct(filteredEdgesSet);
    //Cleanup the components
    stSortedSet *componentsSet = stList_getSortedSet(components, NULL);
    stList_destruct(components);
    stSortedSet_setDestructor(componentsSet, (void(*)(void *)) stSortedSet_destruct);
    stSortedSet_destruct(componentsSet);
    stHash_destruct(nodesToComponents);
}
Пример #4
0
static void test_stSortedSet_searchGreaterThan(CuTest* testCase) {
    sonLibSortedSetTestSetup();

    for(int32_t i=0; i<size; i++) {
        stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i]));
    }
    //static int32_t sortedInput[] = { -10, -1, 1, 3, 5, 10, 12 };
    CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet,
                 stIntTuple_construct(1, -11)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -10)));
    CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet,
                 stIntTuple_construct(1, -10)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -1)));
    CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet,
                 stIntTuple_construct(1, -5)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -1)));
    CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet,
                 stIntTuple_construct(1, 1)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, 3)));
    CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet,
                 stIntTuple_construct(1, 13)) == NULL);
    CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet,
                 stIntTuple_construct(1, 12)) == NULL);

    for(int32_t i=0; i<100; i++) {
        stSortedSet_insert(sortedSet, stIntTuple_construct(1, st_randomInt(-1000, 1000)));
    }
    stList *list = stSortedSet_getList(sortedSet);
    for(int32_t i=1; i<stList_length(list); i++) {
        stIntTuple *p = stList_get(list, i-1);
        stIntTuple *j = stList_get(list, i);
        stIntTuple *k = stIntTuple_construct(1, st_randomInt(stIntTuple_getPosition(p, 0), stIntTuple_getPosition(j, 0)));
        CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, k) == j);
        stIntTuple_destruct(k);
    }
    stList_destruct(list);

    sonLibSortedSetTestTeardown();
}
Пример #5
0
static void debugScaffoldPaths(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash,
        stHash *haplotypeToMaximalHaplotypeLengthHash, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) {
    stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths);
    for (int64_t i = 0; i < stList_length(haplotypePaths); i++) {
        stList *haplotypePath = stList_get(haplotypePaths, i);
        assert(stList_length(haplotypePath) > 0);
        //Traversing from 5' end..
        Segment *_5Segment = stList_get(haplotypePath, 0);
        Segment *_3Segment = stList_get(haplotypePath, stList_length(haplotypePath) - 1);
        assert(segment_getStrand(_5Segment) == segment_getStrand(_3Segment));
        if (!segment_getStrand(_5Segment)) {
            Segment *j = _5Segment;
            _5Segment = segment_getReverse(_3Segment);
            _3Segment = segment_getReverse(j);
        }
        assert(segment_getStrand(_5Segment));
        assert(segment_getStrand(_3Segment));
        Cap *_5Cap = segment_get5Cap(_5Segment);
        Cap *_3Cap = segment_get3Cap(_3Segment);
        if (getAdjacentCapsSegment(_5Cap) != NULL) {
            assert(!trueAdjacency(_5Cap, haplotypeEventStrings));
        }
        if (getAdjacentCapsSegment(_3Cap) != NULL) {
            assert(!trueAdjacency(_3Cap, haplotypeEventStrings));
        }
        debugScaffoldPathsP(_5Cap, haplotypePath,
                haplotypePathToScaffoldPathHash, haplotypeToMaximalHaplotypeLengthHash,
                segmentToMaximalHaplotypePathHash, haplotypeEventStrings, contaminationEventStrings, capCodeParameters, 1);
        debugScaffoldPathsP(_3Cap, haplotypePath,
                haplotypePathToScaffoldPathHash, haplotypeToMaximalHaplotypeLengthHash,
                segmentToMaximalHaplotypePathHash, haplotypeEventStrings, contaminationEventStrings, capCodeParameters, 0);
    }
    stHash_destruct(segmentToMaximalHaplotypePathHash);
}
Пример #6
0
static stHash *getComponents(stList *filteredEdges) {
    /*
     * A kind of stupid reimplementation of the greedy function, done just to trap typos.
     */
    stHash *nodesToComponents = stHash_construct3((uint64_t(*)(const void *)) stIntTuple_hashKey,
            (int(*)(const void *, const void *)) stIntTuple_equalsFn, NULL, NULL);
    for (int64_t i = 0; i < stList_length(nodes); i++) {
        stIntTuple *node = stList_get(nodes, i);
        stSortedSet *component = stSortedSet_construct();
        stSortedSet_insert(component, node);
        stHash_insert(nodesToComponents, node, component);
    }
    for (int64_t i = 0; i < stList_length(filteredEdges); i++) {
        stIntTuple *edge = stList_get(filteredEdges, i);
        stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1));
        stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2));
        stSortedSet *component1 = stHash_search(nodesToComponents, node1);
        stSortedSet *component2 = stHash_search(nodesToComponents, node2);
        assert(component1 != NULL && component2 != NULL);
        if (component1 != component2) {
            stSortedSet *component3 = stSortedSet_getUnion(component1, component2);
            stSortedSetIterator *setIt = stSortedSet_getIterator(component3);
            stIntTuple *node3;
            while ((node3 = stSortedSet_getNext(setIt)) != NULL) {
                stHash_insert(nodesToComponents, node3, component3);
            }
            stSortedSet_destructIterator(setIt);
            stSortedSet_destruct(component1);
            stSortedSet_destruct(component2);
        }
        stIntTuple_destruct(node1);
        stIntTuple_destruct(node2);
    }
    return nodesToComponents;
}
Пример #7
0
static stList *mergeSubstrings(stList *substrings, int64_t proximityToMerge) {
    /*
     * Merge set of substrings into fewer substrings, if they overlap by less than proximityToMerge
     */
    stList *mergedSubstrings = stList_construct3(0, (void (*)(void *)) substring_destruct);
    if (stList_length(substrings) == 0) {
        return mergedSubstrings;
    }
    stList_sort(substrings, (int (*)(const void *, const void *)) substring_cmp);
    Substring *pSubsequence = substring_clone(stList_get(substrings, 0));
    stList_append(mergedSubstrings, pSubsequence);
    for (int64_t i = 1; i < stList_length(substrings); i++) {
        Substring *substring = stList_get(substrings, i);
        if (pSubsequence->name == substring->name
                && pSubsequence->start + pSubsequence->length + proximityToMerge >= substring->start) { //Merge
            if (pSubsequence->start + pSubsequence->length < substring->start + substring->length) {
                pSubsequence->length = substring->start + substring->length - pSubsequence->start;
            }
        } else {
            pSubsequence = substring_clone(substring);
            stList_append(mergedSubstrings, pSubsequence);
        }
    }
    return mergedSubstrings;
}
Пример #8
0
void stCaf_addAdjacencies(Flower *flower) {
    //Build a list of caps.
    stList *list = stList_construct();
    Flower_EndIterator *endIterator = flower_getEndIterator(flower);
    End *end;
    while ((end = flower_getNextEnd(endIterator)) != NULL) {
        End_InstanceIterator *instanceIterator = end_getInstanceIterator(end);
        Cap *cap;
        while ((cap = end_getNext(instanceIterator)) != NULL) {
            if (!cap_getStrand(cap)) {
                cap = cap_getReverse(cap);
            }
            stList_append(list, cap);
        }
        end_destructInstanceIterator(instanceIterator);
    }
    flower_destructEndIterator(endIterator);
    assert(stList_length(list) % 2 == 0);
    //Sort the list of caps.
    stList_sort(list, (int(*)(const void *, const void *)) addAdjacenciesPP);
    //Now make the adjacencies.
    for (int64_t i = 1; i < stList_length(list); i += 2) {
        Cap *cap = stList_get(list, i - 1);
        Cap *cap2 = stList_get(list, i);
        cap_makeAdjacent(cap, cap2);
    }
    //Clean up.
    stList_destruct(list);
}
Пример #9
0
void printEventNoisesAndParams(NanoporeRead *npRead, stList *templateKmers, stList *complementKmers) {
    char *t_modelState, *c_modelState;
    // kmer | stDev | scale_sd | shift_sd | var_sd | strand
    for (int64_t i = 0; i < npRead->nbTemplateEvents; i++) {
        int64_t index = i * NB_EVENT_PARAMS;
        t_modelState = (char *)stList_get(templateKmers, i);
        fprintf(stdout, "%s\t%f\t%f\t%f\t%f\t%s\n",
                t_modelState,
                npRead->templateEvents[index + 1], // st_dev
                npRead->templateParams.scale_sd,
                npRead->templateParams.shift_sd,
                npRead->templateParams.var_sd,
                "t");
    }
    for (int64_t i = 0; i < npRead->nbComplementEvents; i++) {
        int64_t index = i * NB_EVENT_PARAMS;
        c_modelState = (char *)stList_get(complementKmers, i);
        fprintf(stdout, "%s\t%f\t%f\t%f\t%f\t%s\n",
                c_modelState,
                npRead->complementEvents[index + 1], // st_dev
                npRead->complementParams.scale_sd,
                npRead->complementParams.shift_sd,
                npRead->complementParams.var_sd,
                "c");
    }
}
Пример #10
0
Hmm *hmm_loadFromFile(const char *fileName) {
    FILE *fH = fopen(fileName, "r");
    char *string = stFile_getLineFromFile(fH);
    stList *tokens = stString_split(string);
    if (stList_length(tokens) < 2) {
        st_errAbort("Got an empty line in the input state machine file %s\n", fileName);
    }
    int type;
    int64_t j = sscanf(stList_get(tokens, 0), "%i", &type);
    if (j != 1) {
        st_errAbort("Failed to parse state number (int) from string: %s\n", string);
    }
    Hmm *hmm = hmm_constructEmpty(0.0, type);
    if (stList_length(tokens) != hmm->stateNumber * hmm->stateNumber + 2) {
        st_errAbort(
                "Got the wrong number of transitions in the input state machine file %s, got %" PRIi64 " instead of %" PRIi64 "\n",
                fileName, stList_length(tokens), hmm->stateNumber * hmm->stateNumber + 2);
    }

    for (int64_t i = 0; i < hmm->stateNumber * hmm->stateNumber; i++) {
        j = sscanf(stList_get(tokens, i + 1), "%lf", &(hmm->transitions[i]));
        if (j != 1) {
            st_errAbort("Failed to parse transition prob (float) from string: %s\n", string);
        }
    }
    j = sscanf(stList_get(tokens, stList_length(tokens) - 1), "%lf", &(hmm->likelihood));
    if (j != 1) {
        st_errAbort("Failed to parse likelihood (float) from string: %s\n", string);
    }

    //Cleanup transitions line
    free(string);
    stList_destruct(tokens);

    //Now parse the emissions line
    string = stFile_getLineFromFile(fH);
    tokens = stString_split(string);

    if (stList_length(tokens) != hmm->stateNumber * SYMBOL_NUMBER_NO_N * SYMBOL_NUMBER_NO_N) {
        st_errAbort(
                "Got the wrong number of emissions in the input state machine file %s, got %" PRIi64 " instead of %" PRIi64 "\n",
                fileName, stList_length(tokens), hmm->stateNumber * SYMBOL_NUMBER_NO_N * SYMBOL_NUMBER_NO_N);
    }

    for (int64_t i = 0; i < hmm->stateNumber * SYMBOL_NUMBER_NO_N * SYMBOL_NUMBER_NO_N; i++) {
        j = sscanf(stList_get(tokens, i), "%lf", &(hmm->emissions[i]));
        if (j != 1) {
            st_errAbort("Failed to parse emission prob (float) from string: %s\n", string);
        }
    }

    //Final cleanup
    free(string);
    stList_destruct(tokens);
    fclose(fH);

    return hmm;
}
Пример #11
0
/*
 * Constructs a face from a given Cap
 */
static void buildFaces_constructFromCap(Cap * startingCap,
        stHash *liftedEdgesTable, Flower * flower) {
    Face *face = face_construct(flower);
    stList *topNodes = stList_construct3(16, NULL);
    stList *liftedEdges;
    Cap *cap, *bottomNode, *ancestor;
    int64_t index, index2;

    printf("Constructing new face");

    // Establishlist of top nodes
    buildFaces_fillTopNodeList(startingCap, topNodes, liftedEdgesTable);

#ifndef NDEBUG
    // What, no top nodes!?
    if (stList_length(topNodes) == 0)
        abort();
#endif

    // Initialize data structure
    face_allocateSpace(face, stList_length(topNodes));

    // For every top node
    for (index = 0; index < stList_length(topNodes); index++) {
        cap = stList_get(topNodes, index);
        face_setTopNode(face, index, cap);
        liftedEdges = stHash_search(liftedEdgesTable, cap);

        if (!liftedEdges) {
            face_setBottomNodeNumber(face, index, 0);
            continue;
        }

        face_setBottomNodeNumber(face, index, stList_length(liftedEdges));
        // For every bottom node of that top node
        for (index2 = 0; index2 < stList_length(liftedEdges); index2++) {
            bottomNode
                    = ((LiftedEdge *) stList_get(liftedEdges, index2))->bottomNode;
            face_addBottomNode(face, index, bottomNode);
            ancestor = cap_getTopCap(cap_getPositiveOrientation(
                    cap_getAdjacency(bottomNode)));
            if (cap_getAdjacency(cap) != ancestor)
                face_setDerivedDestination(face, index, index2, ancestor);
            else
                face_setDerivedDestination(face, index, index2, NULL);

#ifndef NDEBUG
            // If bottom nodes part of top nodes
            assert(!stList_contains(topNodes, cap_getPositiveOrientation(
                    ((LiftedEdge*) stList_get(liftedEdges, index2))->bottomNode)));
#endif
        }
    }

    // Clean up
    stList_destruct(topNodes);
}
static void doBestMergeOfTwoSimpleCycles(stList *cycles,
        stList *nonZeroWeightAdjacencyEdges, stSortedSet *allAdjacencyEdges) {
    /*
     * Merge two simple cycles, using the best possible adjacency switch. Modifies components list in place,
     * destroying two old components and adding a new one. If new adjacency edges are needed then they are
     * added to the adjacency edges list.
     */
    assert(stList_length(cycles) > 1);

    /*
     * Get the best adjacency switch.
     */
    AdjacencySwitch *adjacencySwitch = getBestAdjacencySwitch(cycles,
            nonZeroWeightAdjacencyEdges, allAdjacencyEdges);
    assert(adjacencySwitch != NULL);

    /*
     * Find the two components to merge.
     */
    stList *cyclesToMerge = stList_construct3(0,
            (void(*)(void *)) stList_destruct);
    for (int64_t i = 0; i < stList_length(cycles); i++) {
        stList *cycle = stList_get(cycles, i);
        if (stList_contains(cycle, adjacencySwitch->oldEdge1)) {
            assert(!stList_contains(cycle, adjacencySwitch->oldEdge2));
            stList_append(cyclesToMerge, cycle);
        } else if (stList_contains(cycle, adjacencySwitch->oldEdge2)) {
            stList_append(cyclesToMerge, cycle);
        }
    }

    /*
     * Now construct the new component and modify the list of components in place.
     */
    assert(stList_length(cyclesToMerge) == 2);
    stList *newComponent = stList_join(cyclesToMerge);
    assert(!stList_contains(newComponent, NULL));
    //Cleanup the old components
    assert(stList_contains(cycles, stList_get(cyclesToMerge, 0)));
    stList_removeItem(cycles, stList_get(cyclesToMerge, 0));
    assert(stList_contains(cycles, stList_get(cyclesToMerge, 1)));
    stList_removeItem(cycles, stList_get(cyclesToMerge, 1));
    stList_destruct(cyclesToMerge);
    //Now remove the old edges and add the new ones
    assert(stList_contains(newComponent, adjacencySwitch->oldEdge1));
    stList_removeItem(newComponent, adjacencySwitch->oldEdge1);
    assert(stList_contains(newComponent, adjacencySwitch->oldEdge2));
    stList_removeItem(newComponent, adjacencySwitch->oldEdge2);
    assert(!stList_contains(newComponent, adjacencySwitch->newEdge1));
    stList_append(newComponent, adjacencySwitch->newEdge1);
    assert(!stList_contains(newComponent, adjacencySwitch->newEdge2));
    stList_append(newComponent, adjacencySwitch->newEdge2);
    adjacencySwitch_destruct(adjacencySwitch); //Clean the adjacency switch.
    //Finally add the component to the list of components
    stList_append(cycles, newComponent);

}
static AdjacencySwitch *getBest2EdgeAdjacencySwitch(stList *components,
        stSortedSet *allAdjacencyEdges) {
    /*
     * Look for the two lowest value adjacency edges in all current edges that are in a separate component and returns them as an adjacency switch
     * with now new adjacency edges.
     */

    /*
     * Get lowest scoring edge for each component.
     */
    stList *lowestScoringEdgeFromEachComponent = stList_construct();
    for (int64_t i = 0; i < stList_length(components); i++) {
        stList_append(lowestScoringEdgeFromEachComponent,
                getLowestScoringEdge(stList_get(components, i)));
    }

    /*
     * Get two lowest scoring edges.
     */
    stList_sort(lowestScoringEdgeFromEachComponent,
            getBest2EdgeAdjacencySwitchP);
    stIntTuple *lowestScoreEdge1 = stList_get(
            lowestScoringEdgeFromEachComponent, 0);
    stIntTuple *lowestScoreEdge2 = stList_get(
            lowestScoringEdgeFromEachComponent, 1);
    assert(lowestScoreEdge1 != lowestScoreEdge2);

    stList_destruct(lowestScoringEdgeFromEachComponent); //Cleanup

    stIntTuple *newEdge1 = getWeightedEdgeFromSet(
            stIntTuple_get(lowestScoreEdge1, 0),
            stIntTuple_get(lowestScoreEdge2, 0), allAdjacencyEdges);
    stIntTuple *newEdge2 = getWeightedEdgeFromSet(
            stIntTuple_get(lowestScoreEdge1, 1),
            stIntTuple_get(lowestScoreEdge2, 1), allAdjacencyEdges);
    if (newEdge1 == NULL) {
        assert(newEdge2 == NULL);
        newEdge1 = getWeightedEdgeFromSet(
                stIntTuple_get(lowestScoreEdge1, 0),
                stIntTuple_get(lowestScoreEdge2, 1), allAdjacencyEdges);
        newEdge2 = getWeightedEdgeFromSet(
                stIntTuple_get(lowestScoreEdge1, 1),
                stIntTuple_get(lowestScoreEdge2, 0), allAdjacencyEdges);
    }
    assert(newEdge1 != NULL);
    assert(newEdge2 != NULL);

    return adjacencySwitch_construct(
            lowestScoreEdge1,
            lowestScoreEdge2,
            newEdge1,
            newEdge2,
            stIntTuple_get(lowestScoreEdge1, 2)
                    + stIntTuple_get(lowestScoreEdge2, 2));
}
Пример #14
0
static void test_stPosetAlignment_addAndIsPossible(CuTest *testCase) {
    for(int64_t trial=0; trial<100; trial++) {
        setup();

        //Make random number of sequences.
        stList *sequenceLengths = stList_construct3(0, (void (*)(void *))stIntTuple_destruct);
        for(int64_t i=0; i<sequenceNumber; i++) {
            stList_append(sequenceLengths, stIntTuple_construct1( st_randomInt(0, MAX_SEQUENCE_SIZE)));
        }

        //Propose random alignment pairs...
        stList *pairs = stList_construct3(0, (void(*)(void *))stIntTuple_destruct);
        int64_t maxAlignedPairs = st_randomInt(0, MAX_ALIGNMENTS);
        if(sequenceNumber > 0) {
            for(int64_t i=0; i<maxAlignedPairs; i++) {
                int64_t seq1 = st_randomInt(0, sequenceNumber);
                int64_t seqLength1 = stIntTuple_get(stList_get(sequenceLengths, seq1), 0);
                if(seqLength1 == 0) {
                    continue;
                }
                int64_t position1 = st_randomInt(0, seqLength1);
                int64_t seq2 = st_randomInt(0, sequenceNumber);
                int64_t seqLength2 = stIntTuple_get(stList_get(sequenceLengths, seq1), 0);
                if(seqLength2 == 0) {
                    continue;
                }
                int64_t position2 = st_randomInt(0, seqLength2);
                if(seq1 != seq2) {
                    stList_append(pairs, stIntTuple_construct4( seq1, position1, seq2, position2));
                    if(stPosetAlignment_isPossible(posetAlignment, seq1, position1, seq2, position2)) {
                        st_logInfo("In %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " \n", seq1, position1, seq2, position2);
                        //For each accepted pair check it doesn't create a cycle.
                        CuAssertTrue(testCase, !containsACycle(pairs, sequenceNumber));
                        CuAssertTrue(testCase, stPosetAlignment_add(posetAlignment, seq1, position1, seq2, position2));
                    }
                    else {
                        st_logInfo("Out %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " \n", seq1, position1, seq2, position2);
                        //For each rejected pair check it creates a cycle..
                        CuAssertTrue(testCase, containsACycle(pairs, sequenceNumber));
                        CuAssertTrue(testCase, !stPosetAlignment_isPossible(posetAlignment, seq1, position1, seq2, position2));
                        stIntTuple_destruct(stList_pop(pairs)); //remove the pair which created the cycle.
                        CuAssertTrue(testCase, !containsACycle(pairs, sequenceNumber)); //Check we're back to being okay..
                    }
                }
            }
        }

        //Cleanup
        stList_destruct(sequenceLengths);
        stList_destruct(pairs);
        teardown();
        st_logInfo("Passed a random ordering test with %" PRIi64 " sequences and %" PRIi64 " aligned pairs\n", sequenceNumber, maxAlignedPairs);
    }
}
Пример #15
0
void test_stList_sort2(CuTest *testCase) {
    setup();
    stList_sort2(list, test_stList_sort2P, "hello");
    CuAssertTrue(testCase, stList_length(list) == stringNumber);
    CuAssertStrEquals(testCase, "five", stList_get(list, 0));
    CuAssertStrEquals(testCase, "four", stList_get(list, 1));
    CuAssertStrEquals(testCase, "one", stList_get(list, 2));
    CuAssertStrEquals(testCase, "three", stList_get(list, 3));
    CuAssertStrEquals(testCase, "two", stList_get(list, 4));
    teardown();
}
Пример #16
0
void test_stList_sort(CuTest *testCase) {
    setup();
    stList_sort(list, (int (*)(const void *, const void *))strcmp);
    CuAssertTrue(testCase, stList_length(list) == stringNumber);
    CuAssertStrEquals(testCase, "five", stList_get(list, 0));
    CuAssertStrEquals(testCase, "four", stList_get(list, 1));
    CuAssertStrEquals(testCase, "one", stList_get(list, 2));
    CuAssertStrEquals(testCase, "three", stList_get(list, 3));
    CuAssertStrEquals(testCase, "two", stList_get(list, 4));
    teardown();
}
Пример #17
0
void bottomUp(stList *flowers, stKVDatabase *sequenceDatabase, Name referenceEventName,
              bool isTop, stMatrix *(*generateSubstitutionMatrix)(double)) {
    /*
     * A reference thread between the two caps
     * in each flower f may be broken into two in the children of f.
     * Therefore, for each flower f first identify attached stub ends present in the children of f that are
     * not present in f and copy them into f, reattaching the reference caps as needed.
     */
    stList *caps = getCaps(flowers, referenceEventName);
    for (int64_t i = stList_length(caps) - 1; i >= 0; i--) { //Start from end, as we add to this list.
        setAdjacencyLengthsAndRecoverNewCapsAndBrokenAdjacencies(stList_get(caps, i), caps);
    }
    for(int64_t i=0; i<stList_length(flowers); i++) {
        recoverBrokenAdjacencies(stList_get(flowers, i), caps, referenceEventName);
    }

    //Build the phylogenetic event trees for base calling.
    segmentWriteFn_flowerToPhylogeneticTreeHash = stHash_construct2(NULL, (void (*)(void *))cleanupPhylogeneticTree);
    for(int64_t i=0; i<stList_length(flowers); i++) {
        Flower *flower = stList_get(flowers, i);
        Event *refEvent = eventTree_getEvent(flower_getEventTree(flower), referenceEventName);
        assert(refEvent != NULL);
        stHash_insert(segmentWriteFn_flowerToPhylogeneticTreeHash, flower, getPhylogeneticTreeRootedAtGivenEvent(refEvent, generateSubstitutionMatrix));
    }

    if (isTop) {
        stList *threadStrings = buildRecursiveThreadsInList(sequenceDatabase, caps, segmentWriteFn,
                terminalAdjacencyWriteFn);
        assert(stList_length(threadStrings) == stList_length(caps));

        int64_t nonTrivialSeqIndex = 0, trivialSeqIndex = stList_length(threadStrings); //These are used as indices for the names of trivial and non-trivial sequences.
        for (int64_t i = 0; i < stList_length(threadStrings); i++) {
            Cap *cap = stList_get(caps, i);
            assert(cap_getStrand(cap));
            assert(!cap_getSide(cap));
            Flower *flower = end_getFlower(cap_getEnd(cap));
            char *threadString = stList_get(threadStrings, i);
            bool trivialString = isTrivialString(&threadString); //This alters the original string
            MetaSequence *metaSequence = addMetaSequence(flower, cap, trivialString ? trivialSeqIndex++ : nonTrivialSeqIndex++,
                    threadString, trivialString);
            free(threadString);
            int64_t endCoordinate = setCoordinates(flower, metaSequence, cap, metaSequence_getStart(metaSequence) - 1);
            (void) endCoordinate;
            assert(endCoordinate == metaSequence_getLength(metaSequence) + metaSequence_getStart(metaSequence));
        }
        stList_setDestructor(threadStrings, NULL); //The strings are already cleaned up by the above loop
        stList_destruct(threadStrings);
    } else {
        buildRecursiveThreads(sequenceDatabase, caps, segmentWriteFn, terminalAdjacencyWriteFn);
    }
    stHash_destruct(segmentWriteFn_flowerToPhylogeneticTreeHash);
    stList_destruct(caps);
}
Пример #18
0
int main(int argc, char *argv[]) {
    //////////////////////////////////////////////
    //Parse the inputs
    //////////////////////////////////////////////

    parseBasicArguments(argc, argv, "linkageStats");

    ///////////////////////////////////////////////////////////////////////////
    // Get the intervals
    ///////////////////////////////////////////////////////////////////////////

    stList *haplotypeEventStrings = getEventStrings(
            treatHaplotype1AsContamination ? NULL : hap1EventString,
            treatHaplotype2AsContamination ? NULL : hap2EventString);
    stList *assemblyEventStringInList = stList_construct();
    stList_append(assemblyEventStringInList, assemblyEventString);

    stList *intervals = stList_construct3(0, (void (*)(void *))sequenceInterval_destruct);
    for(int64_t i=0; i<stList_length(haplotypeEventStrings); i++) {
        const char *hapEventString = stList_get(haplotypeEventStrings, i);
        st_logInfo("Getting contig paths for haplotype: %s", hapEventString);
        stList *contigPaths = getContigPaths(flower, hapEventString, assemblyEventStringInList);
        stList *hapIntervals = getSplitContigPathIntervals(flower, contigPaths, hapEventString,
                assemblyEventStringInList);
        stList_destruct(contigPaths);
        st_logInfo("Getting contig paths\n");
        stList_appendAll(intervals, hapIntervals);
        stList_setDestructor(hapIntervals, NULL);
        stList_destruct(hapIntervals);
    }

    st_logDebug("Got a total of %" PRIi64 " intervals\n", stList_length(intervals));

    ///////////////////////////////////////////////////////////////////////////
    // Write it out.
    ///////////////////////////////////////////////////////////////////////////

    FILE *fileHandle = fopen(outputFile, "w");
    for (int64_t i = 0; i < stList_length(intervals); i++) {
        SequenceInterval *sequenceInterval = stList_get(intervals, i);
        st_logDebug("We have a path interval %s %" PRIi64 " %" PRIi64 "\n", sequenceInterval->sequenceName,
                sequenceInterval->start, sequenceInterval->end);
        fprintf(fileHandle, "%s %" PRIi64 " %" PRIi64 "\n", sequenceInterval->sequenceName,
                sequenceInterval->start, sequenceInterval->end);
    }

    st_logInfo("Finished writing out the stats.\n");
    fclose(fileHandle);

    return 0;
}
void printPositions(stList *positions, const char *substitutionType, FILE *fileHandle) {
    for (int64_t i = 0; i < stList_length(positions); i++) {
        SegmentHolder *segmentHolder = stList_get(positions, i);
        int64_t j = segment_getStart(segmentHolder->segment);
        if (segment_getStrand(segmentHolder->segment)) {
            j += segmentHolder->offset;
            assert(
                    cap_getCoordinate(segment_get5Cap(segmentHolder->segment)) == segment_getStart(
                            segmentHolder->segment));
            assert(
                    segment_getStart(segmentHolder->segment) + segment_getLength(segmentHolder->segment) - 1
                            == cap_getCoordinate(segment_get3Cap(segmentHolder->segment)));
        } else {
            j -= segmentHolder->offset;
            assert(
                    segment_getStart(segmentHolder->segment) - segment_getLength(segmentHolder->segment) + 1
                            == cap_getCoordinate(segment_get3Cap(segmentHolder->segment)));
        }

        fprintf(fileHandle, "%s: %s_%" PRIi64 " %" PRIi64 " %c %c %c\n", substitutionType,
                event_getHeader(segment_getEvent(segmentHolder->segment)),
                sequence_getLength(segment_getSequence(segmentHolder->segment)), j,
                segmentHolder->base1, segmentHolder->base2, segmentHolder->base3);
        getMAFBlock(segment_getBlock(segmentHolder->segment), fileHandle);
    }
}
Пример #20
0
// Returns a hash mapping from sequence header to sequence data.
static stHash *readFastaFile(char *filename) {
    FILE *fasta = fopen(filename, "r");
    if (fasta == NULL) {
        st_errnoAbort("Could not open fasta file %s", filename);
    }
    stHash *headerToData = stHash_construct3(stHash_stringKey,
                                             stHash_stringEqualKey,
                                             free,
                                             free);
    struct List *seqs = constructEmptyList(0, NULL);
    struct List *seqLengths = constructEmptyList(0, free);
    struct List *headers = constructEmptyList(0, free);
    fastaRead(fasta, seqs, seqLengths, headers);

    for (int64_t i = 0; i < seqs->length; i++) {
        char *fullHeader = headers->list[i];
        stList *headerTokens = stString_splitByString(fullHeader, " ");
        char *usableHeader = stString_copy(stList_get(headerTokens, 0));
        stHash_insert(headerToData, usableHeader, seqs->list[i]);
        stList_destruct(headerTokens);
    }
    destructList(seqs);
    destructList(seqLengths);
    destructList(headers);

    return headerToData;
}
Пример #21
0
// TODO: see if we can make this one command
static void bulkSetRecords(stKVDatabase *database, stList *records) {
    startTransaction(database);
    stTry {
        for(int32_t i=0; i<stList_length(records); i++) {
            stKVDatabaseBulkRequest *request = stList_get(records, i);
            switch(request->type) {
                case UPDATE:
                updateRecord(database, request->key, request->value, request->size);
                break;
                case INSERT:
                insertRecord(database, request->key, request->value, request->size);
                break;
                case SET:
                setRecord(database, request->key, request->value, request->size);
                break;
            }
        }
        commitTransaction(database);
    }stCatch(ex) {
        abortTransaction(database);
        stThrowNewCause(
                ex,
                ST_KV_DATABASE_EXCEPTION_ID,
                "MySQL bulk set records failed");
    }stTryEnd;
}
Пример #22
0
static stList *getSubstringsForFlowerSegments(stList *flowers) {
    /*
     * Get the set of substrings representing the strings in the segments of the given flowers.
     */
    stList *substrings = stList_construct3(0, (void (*)(void *)) substring_destruct);
    for (int64_t i = 0; i < stList_length(flowers); i++) {
        Flower *flower = stList_get(flowers, i);
        Flower_EndIterator *blockIt = flower_getBlockIterator(flower);
        Block *block;
        while ((block = flower_getNextBlock(blockIt)) != NULL) {
            Block_InstanceIterator *instanceIt = block_getInstanceIterator(block);
            Segment *segment;
            while ((segment = block_getNext(instanceIt)) != NULL) {
                Sequence *sequence;
                if ((sequence = segment_getSequence(segment)) != NULL) {
                    segment = segment_getStrand(segment) ? segment : segment_getReverse(segment);
                    assert(segment_getLength(segment) > 0);
                    stList_append(substrings,
                            substring_construct(sequence_getMetaSequence(sequence)->stringName,
                                    segment_getStart(segment) - sequence_getStart(sequence),
                                    segment_getLength(segment)));
                }
            }
            block_destructInstanceIterator(instanceIt);
        }
        flower_destructBlockIterator(blockIt);
    }
    return substrings;
}
static AdjacencySwitch *getBest4EdgeAdjacencySwitchP(stIntTuple *oldEdge1,
        int64_t node1, stSortedSet *allAdjacencyEdges,
        stHash *nodesToAllCurrentEdges, stHash *nodesToBridgingAdjacencyEdges) {
    /*
     * Returns the best adjacency switch for the given node and edge that
     * contains 4 existing edges.
     */
    int64_t node4 = getOtherPosition(oldEdge1, node1);
    AdjacencySwitch *minimumCostAdjacencySwitch = NULL;
    stList *validEdges = getItemForNode(node1, nodesToBridgingAdjacencyEdges);
    if (validEdges != NULL) {
        for (int64_t i = 0; i < stList_length(validEdges); i++) {
            stIntTuple *newEdge1 = stList_get(validEdges, i);
            int64_t node2 = getOtherPosition(newEdge1, node1);
            stList *validEdges2 =
                    getItemForNode(node2, nodesToAllCurrentEdges);
            assert(validEdges2 != NULL);
            assert(stList_length(validEdges2) == 1);
            stIntTuple *oldEdge2 = stList_peek(validEdges2);
            int64_t node3 = getOtherPosition(oldEdge2, node2);
            stIntTuple *newEdge2 = getWeightedEdgeFromSet(node3, node4,
                    allAdjacencyEdges);
            assert(newEdge2 != NULL);
            int64_t cost = stIntTuple_get(oldEdge1, 2)
                    + stIntTuple_get(oldEdge2, 2)
                    - stIntTuple_get(newEdge1, 2)
                    - stIntTuple_get(newEdge2, 2);
            minimumCostAdjacencySwitch = adjacencySwitch_update(
                    minimumCostAdjacencySwitch, oldEdge1, oldEdge2, newEdge1,
                    newEdge2, cost);
        }
    }
    return minimumCostAdjacencySwitch;
}
Пример #24
0
void test_stList_append(CuTest *testCase) {
    setup();
    stList_append(list, NULL);
    CuAssertTrue(testCase, stList_length(list) == stringNumber+1);
    CuAssertTrue(testCase, stList_get(list, stringNumber) == NULL);
    teardown();
}
Пример #25
0
/*
 * Recursive function which fills a givenlist with the
 * connected nodes within a module
 */
static void buildFaces_fillTopNodeList(Cap * cap, stList *list,
        stHash *liftedEdgesTable) {
    stList *liftedEdges;
    int64_t index;

    // Limit of recursion
    if (stList_contains(list, cap))
        return;

    // Actual filling
    st_logInfo("Adding cap %p to face\n", cap);
    stList_append(list, cap);

    // Recursion through lifted edges
    if ((liftedEdges = stHash_search(liftedEdgesTable, cap)))
        for (index = 0; index < stList_length(liftedEdges); index++)
            buildFaces_fillTopNodeList(
                    ((LiftedEdge *) stList_get(liftedEdges, index))->destination,
                   list, liftedEdgesTable);

    // Recursion through adjacency
    if (cap_getAdjacency(cap))
        buildFaces_fillTopNodeList(cap_getAdjacency(cap),list,
                liftedEdgesTable);
}
Пример #26
0
void test_stList_filter(CuTest *testCase) {
    setup();
    stSortedSet *set = stSortedSet_construct();
    stSortedSet_insert(set, strings[0]);
    stSortedSet_insert(set, strings[4]);
    stList *list2 = stList_filterToExclude(list, set);
    stList *list3 = stList_filterToInclude(list, set);
    CuAssertTrue(testCase,stList_length(list2) == 3);
    CuAssertTrue(testCase,stList_length(list3) == 2);
    CuAssertTrue(testCase,stList_get(list2, 0) == strings[1]);
    CuAssertTrue(testCase,stList_get(list2, 1) == strings[2]);
    CuAssertTrue(testCase,stList_get(list2, 2) == strings[3]);
    CuAssertTrue(testCase,stList_get(list3, 0) == strings[0]);
    CuAssertTrue(testCase,stList_get(list3, 1) == strings[4]);
    teardown();
}
Пример #27
0
void test_stList_get(CuTest *testCase) {
    setup();
    int64_t i;
    for(i=0; i<stringNumber; i++) {
        CuAssertTrue(testCase, stList_get(list, i) == strings[i]);
    }
    teardown();
}
Пример #28
0
bool hasCapInEvents(End *end, stList *eventStrings) {
    for(int64_t i=0; i<stList_length(eventStrings); i++) {
        if(hasCapInEvent(end, stList_get(eventStrings, i))) {
            return 1;
        }
    }
    return 0;
}
static bool listsAreEqual(stList *observedList, stList *expectedList) {
    if (stList_length(observedList) != stList_length(expectedList)) {
        fprintf(stderr, "stList lengths are not equal: %"PRIu64" %"PRIu64"\n", 
                stList_length(observedList), stList_length(expectedList));
        printList(observedList, "observed");
        printList(expectedList, "expected");
        return false;
    }
    for (int64_t i = 0; i < stList_length(observedList); ++i) {
        if (strcmp(stList_get(observedList, i), stList_get(expectedList, i)) != 0) {
            fprintf(stderr, "stList elements are not equal at index %"PRIu64": %s %s\n", 
                    i, (char *)stList_get(observedList, i), (char *)stList_get(expectedList, i));
            return false;
        }
    }
    return true;
}
stIntTuple *getLowestScoringEdge(stList *edges) {
    /*
     * Returns edge with lowest weight.
     */
    assert(stList_length(edges) > 0);
    stIntTuple *lowestScoringEdge = stList_get(edges, 0);
    int64_t lowestScore = stIntTuple_get(lowestScoringEdge, 2);
    for (int64_t j = 1; j < stList_length(edges); j++) {
        stIntTuple *edge = stList_get(edges, j);
        int64_t k = stIntTuple_get(edge, 2);
        if (k < lowestScore) {
            lowestScore = k;
            lowestScoringEdge = edge;
        }
    }
    return lowestScoringEdge;
}