static void checkIsValidReference(CuTest *testCase, stList *reference,
        double totalScore) {
    stList *chosenEdges = convertReferenceToAdjacencyEdges(reference);
    //Check that everyone has a partner.
    CuAssertIntEquals(testCase, nodeNumber, stList_length(chosenEdges) * 2);
    stSortedSet *nodes = stSortedSet_construct3((int(*)(const void *, const void *)) stIntTuple_cmpFn,
            (void(*)(void *)) stIntTuple_destruct);
    for (int64_t i = 0; i < nodeNumber; i++) {
        stSortedSet_insert(nodes, stIntTuple_construct1( i));
    }
    checkEdges(chosenEdges, nodes, 1, 0);
    //Check that the score is correct
    double totalScore2 = calculateZScoreOfReference(reference, nodeNumber, zMatrix);
    CuAssertDblEquals(testCase, totalScore2, totalScore, 0.000001);
    //Check that the stubs are properly connected.
    stList *allEdges = stList_copy(chosenEdges, NULL);
    stList_appendAll(allEdges, stubs);
    stList_appendAll(allEdges, chains);
    stList *components = getComponents(allEdges);
    CuAssertIntEquals(testCase, stList_length(stubs), stList_length(reference));
    CuAssertIntEquals(testCase, stList_length(stubs), stList_length(components));
    //Cleanup
    stList_destruct(components);
    stSortedSet_destruct(nodes);
    stList_destruct(allEdges);
    stList_destruct(chosenEdges);
}
static stSortedSet *getSetOfMergedLists(stList *list1, stList *list2) {
    /*
     * Returns a sorted set of two input lists.
     */
    stList *list3 = stList_copy(list1, NULL);
    stList_appendAll(list3, list2);
    stSortedSet *set = stList_getSortedSet(list3, NULL);
    stList_destruct(list3);
    return set;
}
stList *splitMultipleStubCycles(stList *chosenEdges,
        stList *nonZeroWeightAdjacencyEdges, stSortedSet *allAdjacencyEdges,
        stList *stubEdges, stList *chainEdges) {
    /*
     *  Returns an updated list of adjacency edges, such that each stub edge is a member of exactly one cycle.
     */

    /*
     * Calculate components.
     */
    stList *cycles = getComponents2(chosenEdges, stubEdges, chainEdges);

    /*
     * Find components with multiple stub edges.
     */
    stList *singleStubEdgeCycles = stList_construct3(0,
            (void(*)(void *)) stList_destruct);
    for (int64_t i = 0; i < stList_length(cycles); i++) {
        stList *subCycle = stList_get(cycles, i);
        stList *subAdjacencyEdges;
        stList *subStubEdges;
        stList *subChainEdges;
        splitIntoAdjacenciesStubsAndChains(subCycle,
                nonZeroWeightAdjacencyEdges, stubEdges, chainEdges,
                &subAdjacencyEdges, &subStubEdges, &subChainEdges);
        stList *splitCycles = splitMultipleStubCycle(subCycle,
                subAdjacencyEdges, allAdjacencyEdges, subStubEdges,
                subChainEdges);
        stList_appendAll(singleStubEdgeCycles, splitCycles);
        stList_setDestructor(splitCycles, NULL); //Do this to avoid destroying the underlying lists
        stList_destruct(splitCycles);
        stList_destruct(subAdjacencyEdges);
        stList_destruct(subStubEdges);
        stList_destruct(subChainEdges);
    }
    stList_destruct(cycles);

    /*
     * Remove the stub/chain edges from the components.
     */
    stSortedSet *stubAndChainEdgesSet = getSetOfMergedLists(stubEdges,
            chainEdges);
    stList *adjacencyOnlyComponents = filterListsToExclude(
            singleStubEdgeCycles, stubAndChainEdgesSet);
    stList_destruct(singleStubEdgeCycles);
    stSortedSet_destruct(stubAndChainEdgesSet);

    /*
     * Merge the adjacency edges in the components into a single list.
     */
    stList *updatedChosenEdges = stList_join(adjacencyOnlyComponents);
    stList_destruct(adjacencyOnlyComponents);

    return updatedChosenEdges;
}
stList *getComponents2(stList *adjacencyEdges, stList *stubEdges,
        stList *chainEdges) {
    /*
     * Gets a list of connected components for a set of adjacency, stub and chain edges.
     * If adjacencyEdges, stubEdges or chainEdges are NULL then they are ignored.
     */
    stList *allEdges = stList_construct(); //Build a concatenated list of all the chain, stub and adjacency edges.
    if (adjacencyEdges != NULL) {
        stList_appendAll(allEdges, adjacencyEdges);
    }
    if (stubEdges != NULL) {
        stList_appendAll(allEdges, stubEdges);
    }
    if (chainEdges != NULL) {
        stList_appendAll(allEdges, chainEdges);
    }
    stList *components = getComponents(allEdges); //Gets the graph components.
    stList_destruct(allEdges); //Cleanup the all edges.
    return components;
}
Beispiel #5
0
void test_stList_appendAll(CuTest *testCase) {
    setup();
    stList *list2 = stList_copy(list, NULL);
    stList_appendAll(list, list2);
    CuAssertTrue(testCase, stList_length(list) == stringNumber * 2);
    int64_t i;
    for(i=0; i<stringNumber*2; i++) {
        CuAssertTrue(testCase, stList_get(list, i) == strings[i % stringNumber]);
    }
    stList_destruct(list2);
    teardown();
}
int main(int argc, char *argv[]) {
    //////////////////////////////////////////////
    //Parse the inputs
    //////////////////////////////////////////////

    parseBasicArguments(argc, argv, "linkageStats");

    ///////////////////////////////////////////////////////////////////////////
    // Get the intervals
    ///////////////////////////////////////////////////////////////////////////

    stList *haplotypeEventStrings = getEventStrings(
            treatHaplotype1AsContamination ? NULL : hap1EventString,
            treatHaplotype2AsContamination ? NULL : hap2EventString);
    stList *assemblyEventStringInList = stList_construct();
    stList_append(assemblyEventStringInList, assemblyEventString);

    stList *intervals = stList_construct3(0, (void (*)(void *))sequenceInterval_destruct);
    for(int64_t i=0; i<stList_length(haplotypeEventStrings); i++) {
        const char *hapEventString = stList_get(haplotypeEventStrings, i);
        st_logInfo("Getting contig paths for haplotype: %s", hapEventString);
        stList *contigPaths = getContigPaths(flower, hapEventString, assemblyEventStringInList);
        stList *hapIntervals = getSplitContigPathIntervals(flower, contigPaths, hapEventString,
                assemblyEventStringInList);
        stList_destruct(contigPaths);
        st_logInfo("Getting contig paths\n");
        stList_appendAll(intervals, hapIntervals);
        stList_setDestructor(hapIntervals, NULL);
        stList_destruct(hapIntervals);
    }

    st_logDebug("Got a total of %" PRIi64 " intervals\n", stList_length(intervals));

    ///////////////////////////////////////////////////////////////////////////
    // Write it out.
    ///////////////////////////////////////////////////////////////////////////

    FILE *fileHandle = fopen(outputFile, "w");
    for (int64_t i = 0; i < stList_length(intervals); i++) {
        SequenceInterval *sequenceInterval = stList_get(intervals, i);
        st_logDebug("We have a path interval %s %" PRIi64 " %" PRIi64 "\n", sequenceInterval->sequenceName,
                sequenceInterval->start, sequenceInterval->end);
        fprintf(fileHandle, "%s %" PRIi64 " %" PRIi64 "\n", sequenceInterval->sequenceName,
                sequenceInterval->start, sequenceInterval->end);
    }

    st_logInfo("Finished writing out the stats.\n");
    fclose(fileHandle);

    return 0;
}
void checkInputs(stSortedSet *nodes, stList *adjacencyEdges,
        stList *stubEdges, stList *chainEdges) {
    /*
     * Checks the inputs to the algorithm are as expected.
     */
    int64_t nodeNumber = stSortedSet_size(nodes);
    assert(nodeNumber % 2 == 0);
    if (nodeNumber > 0) {
        assert(stList_length(stubEdges) > 0);
    }
    assert(
            stList_length(stubEdges) + stList_length(chainEdges) == (nodeNumber
                    / 2));
    checkEdges(stubEdges, nodes, 0, 0);
    checkEdges(chainEdges, nodes, 0, 0);
    stList *stubsAndChainEdges = stList_copy(stubEdges, NULL);
    stList_appendAll(stubsAndChainEdges, chainEdges);
    checkEdges(stubsAndChainEdges, nodes, 1, 0);
    stList_destruct(stubsAndChainEdges);
    checkEdges(adjacencyEdges, nodes, 1, 1);
}
static stList *splitMultipleStubCycle(stList *cycle,
        stList *nonZeroWeightAdjacencyEdges, stSortedSet *allAdjacencyEdges,
        stList *stubEdges, stList *chainEdges) {
    /*
     *  Takes a simple cycle containing k stub edges and splits into k cycles, each containing 1 stub edge.
     */

    /*
     * Get sub-components containing only adjacency and chain edges.
     */

    stSortedSet *stubAndChainEdgesSet = getSetOfMergedLists(stubEdges,
            chainEdges);
    stList *adjacencyEdgeMatching =
            stList_filterToExclude(cycle, stubAndChainEdgesSet); //Filter out the the non-adjacency edges
    //Make it only the chain edges present in the original component
    stList *stubFreePaths = getComponents2(adjacencyEdgeMatching, NULL,
            chainEdges);
    stList_destruct(adjacencyEdgeMatching);
    assert(stList_length(stubFreePaths) >= 1);

    stList *splitCycles = stList_construct3(0,
            (void(*)(void *)) stList_destruct); //The list to return.


    if (stList_length(stubFreePaths) > 1) {
        /*
         * Build the list of adjacency edges acceptable in the merge
         */
        stSortedSet *oddNodes = getOddNodes(cycle);
        stList *oddToEvenNonZeroWeightAdjacencyEdges =
                getOddToEvenAdjacencyEdges(oddNodes,
                        nonZeroWeightAdjacencyEdges);
        stSortedSet *oddToEvenAllAdjacencyEdges = getOddToEvenAdjacencyEdges2(oddNodes, allAdjacencyEdges);

        /*
         * Merge together the best two components.
         */
        stList *l = filterListsToExclude(stubFreePaths, stubAndChainEdgesSet);
        doBestMergeOfTwoSimpleCycles(l, oddToEvenNonZeroWeightAdjacencyEdges,
                oddToEvenAllAdjacencyEdges); //This is inplace.
        stList *l2 = stList_join(l);
        stList_destruct(l);
        l = getComponents2(l2, stubEdges, chainEdges);
        assert(stList_length(l) == 2);
        stList_destruct(l2);

        /*
         * Cleanup
         */
        stSortedSet_destruct(oddNodes);
        stList_destruct(oddToEvenNonZeroWeightAdjacencyEdges);
        stSortedSet_destruct(oddToEvenAllAdjacencyEdges);

        /*
         * Call procedure recursively.
         */
        for (int64_t i = 0; i < stList_length(l); i++) {
            /*
             * Split into adjacency edges, stub edges and chain edges.
             */
            stList *subCycle = stList_get(l, i);
            stList *subAdjacencyEdges;
            stList *subStubEdges;
            stList *subChainEdges;
            splitIntoAdjacenciesStubsAndChains(subCycle,
                    nonZeroWeightAdjacencyEdges, stubEdges, chainEdges,
                    &subAdjacencyEdges, &subStubEdges, &subChainEdges);

            /*
             * Call recursively.
             */
            l2 = splitMultipleStubCycle(subCycle, subAdjacencyEdges,
                    allAdjacencyEdges, subStubEdges, subChainEdges);
            stList_appendAll(splitCycles, l2);

            /*
             * Clean up
             */
            stList_setDestructor(l2, NULL);
            stList_destruct(l2);
            stList_destruct(subAdjacencyEdges);
            stList_destruct(subStubEdges);
            stList_destruct(subChainEdges);
        }
        stList_destruct(l);
    } else {
        stList_append(splitCycles, stList_copy(cycle, NULL));
    }

    stSortedSet_destruct(stubAndChainEdgesSet);
    stList_destruct(stubFreePaths);

    return splitCycles;
}