static void checkComponents(CuTest *testCase, stList *filteredEdges) {
    stHash *nodesToComponents = getComponents(filteredEdges);
    //Check all components are smaller than threshold
    stList *components = stHash_getValues(nodesToComponents);
    for (int64_t i = 0; i < stList_length(components); i++) {
        stSortedSet *component = stList_get(components, i);
        CuAssertTrue(testCase, stSortedSet_size(component) <= maxComponentSize);
        CuAssertTrue(testCase, stSortedSet_size(component) >= 1);
    }
    //Check no edges can be added from those filtered.
    stSortedSet *filteredEdgesSet = stList_getSortedSet(filteredEdges, (int(*)(const void *, const void *)) stIntTuple_cmpFn);
    for (int64_t i = 0; i < stList_length(edges); i++) {
        stIntTuple *edge = stList_get(edges, i);
        if (stSortedSet_search(filteredEdgesSet, edge) == NULL) {
            stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1));
            stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2));
            stSortedSet *component1 = stHash_search(nodesToComponents, node1);
            stSortedSet *component2 = stHash_search(nodesToComponents, node2);
            CuAssertTrue(testCase, component1 != NULL && component2 != NULL);
            CuAssertTrue(testCase, component1 != component2);
            CuAssertTrue(testCase, stSortedSet_size(component1) + stSortedSet_size(component2) > maxComponentSize);
            stIntTuple_destruct(node1);
            stIntTuple_destruct(node2);
        }
    }
    stSortedSet_destruct(filteredEdgesSet);
    //Cleanup the components
    stSortedSet *componentsSet = stList_getSortedSet(components, NULL);
    stList_destruct(components);
    stSortedSet_setDestructor(componentsSet, (void(*)(void *)) stSortedSet_destruct);
    stSortedSet_destruct(componentsSet);
    stHash_destruct(nodesToComponents);
}
Exemple #2
0
static void test_stSet_search(CuTest* testCase) {
    testSetup();
    stIntTuple *i = stIntTuple_construct1( 0);
    stIntTuple *j = stIntTuple_construct2(10, 0);
    stIntTuple *k = stIntTuple_construct1( 5);
    //Check search by memory address
    CuAssertTrue(testCase, stSet_search(set0, one) == one);
    CuAssertTrue(testCase, stSet_search(set0, two) == two);
    CuAssertTrue(testCase, stSet_search(set0, three) == three);
    CuAssertTrue(testCase, stSet_search(set0, four) == four);
    CuAssertTrue(testCase, stSet_search(set0, five) == five);
    CuAssertTrue(testCase, stSet_search(set0, six) == six);
    //Check not present
    CuAssertTrue(testCase, stSet_search(set0, i) == NULL);
    CuAssertTrue(testCase, stSet_search(set0, j) == NULL);
    CuAssertTrue(testCase, stSet_search(set0, k) == NULL);
    //Check search by memory address
    CuAssertTrue(testCase, stSet_search(set1, one) == one);
    CuAssertTrue(testCase, stSet_search(set1, two) == two);
    CuAssertTrue(testCase, stSet_search(set1, three) == three);
    CuAssertTrue(testCase, stSet_search(set1, four) == four);
    CuAssertTrue(testCase, stSet_search(set1, five) == five);
    CuAssertTrue(testCase, stSet_search(set1, six) == six);
    //Check not present
    CuAssertTrue(testCase, stSet_search(set1, j) == NULL);
    //Check is searching by memory
    CuAssertTrue(testCase, stSet_search(set1, i) == one);
    CuAssertTrue(testCase, stSet_search(set1, k) == six);
    stIntTuple_destruct(i);
    stIntTuple_destruct(j);
    stIntTuple_destruct(k);
    testTeardown();
}
static stHash *getComponents(stList *filteredEdges) {
    /*
     * A kind of stupid reimplementation of the greedy function, done just to trap typos.
     */
    stHash *nodesToComponents = stHash_construct3((uint64_t(*)(const void *)) stIntTuple_hashKey,
            (int(*)(const void *, const void *)) stIntTuple_equalsFn, NULL, NULL);
    for (int64_t i = 0; i < stList_length(nodes); i++) {
        stIntTuple *node = stList_get(nodes, i);
        stSortedSet *component = stSortedSet_construct();
        stSortedSet_insert(component, node);
        stHash_insert(nodesToComponents, node, component);
    }
    for (int64_t i = 0; i < stList_length(filteredEdges); i++) {
        stIntTuple *edge = stList_get(filteredEdges, i);
        stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1));
        stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2));
        stSortedSet *component1 = stHash_search(nodesToComponents, node1);
        stSortedSet *component2 = stHash_search(nodesToComponents, node2);
        assert(component1 != NULL && component2 != NULL);
        if (component1 != component2) {
            stSortedSet *component3 = stSortedSet_getUnion(component1, component2);
            stSortedSetIterator *setIt = stSortedSet_getIterator(component3);
            stIntTuple *node3;
            while ((node3 = stSortedSet_getNext(setIt)) != NULL) {
                stHash_insert(nodesToComponents, node3, component3);
            }
            stSortedSet_destructIterator(setIt);
            stSortedSet_destruct(component1);
            stSortedSet_destruct(component2);
        }
        stIntTuple_destruct(node1);
        stIntTuple_destruct(node2);
    }
    return nodesToComponents;
}
static void checkIsValidReference(CuTest *testCase, stList *reference,
        double totalScore) {
    stList *chosenEdges = convertReferenceToAdjacencyEdges(reference);
    //Check that everyone has a partner.
    CuAssertIntEquals(testCase, nodeNumber, stList_length(chosenEdges) * 2);
    stSortedSet *nodes = stSortedSet_construct3((int(*)(const void *, const void *)) stIntTuple_cmpFn,
            (void(*)(void *)) stIntTuple_destruct);
    for (int64_t i = 0; i < nodeNumber; i++) {
        stSortedSet_insert(nodes, stIntTuple_construct1( i));
    }
    checkEdges(chosenEdges, nodes, 1, 0);
    //Check that the score is correct
    double totalScore2 = calculateZScoreOfReference(reference, nodeNumber, zMatrix);
    CuAssertDblEquals(testCase, totalScore2, totalScore, 0.000001);
    //Check that the stubs are properly connected.
    stList *allEdges = stList_copy(chosenEdges, NULL);
    stList_appendAll(allEdges, stubs);
    stList_appendAll(allEdges, chains);
    stList *components = getComponents(allEdges);
    CuAssertIntEquals(testCase, stList_length(stubs), stList_length(reference));
    CuAssertIntEquals(testCase, stList_length(stubs), stList_length(components));
    //Cleanup
    stList_destruct(components);
    stSortedSet_destruct(nodes);
    stList_destruct(allEdges);
    stList_destruct(chosenEdges);
}
static stList *getEdgesThatBridgeComponents(stList *components,
        stHash *nodesToNonZeroWeightedAdjacencyEdges) {
    /*
     * Get set of adjacency edges that bridge between (have a node in two) components.
     */

    stList *bridgingAdjacencyEdges = stList_construct();

    for (int64_t i = 0; i < stList_length(components); i++) {
        stSortedSet *componentNodes = getNodeSetOfEdges(
                stList_get(components, i));
        stSortedSetIterator *it = stSortedSet_getIterator(componentNodes);
        stIntTuple *node;
        while ((node = stSortedSet_getNext(it)) != NULL) {
            stList *edges = stHash_search(nodesToNonZeroWeightedAdjacencyEdges,
                    node);
            if (edges != NULL) {
                for (int64_t j = 0; j < stList_length(edges); j++) {
                    stIntTuple *edge = stList_get(edges, j);
                    stIntTuple *node1 = stIntTuple_construct1(
                            stIntTuple_get(edge, 0));
                    stIntTuple *node2 = stIntTuple_construct1(
                            stIntTuple_get(edge, 1));
                    assert(
                            stSortedSet_search(componentNodes, node1) != NULL
                                    || stSortedSet_search(componentNodes, node2)
                                            != NULL);
                    if (stSortedSet_search(componentNodes, node1) == NULL
                            || stSortedSet_search(componentNodes, node2)
                                    == NULL) {
                        stList_append(bridgingAdjacencyEdges, edge);
                    }
                    stIntTuple_destruct(node1);
                    stIntTuple_destruct(node2);
                }
            }
        }
        stSortedSet_destructIterator(it);
        stSortedSet_destruct(componentNodes);
    }

    return bridgingAdjacencyEdges;
}
static void test_stPosetAlignment_addAndIsPossible(CuTest *testCase) {
    for(int64_t trial=0; trial<100; trial++) {
        setup();

        //Make random number of sequences.
        stList *sequenceLengths = stList_construct3(0, (void (*)(void *))stIntTuple_destruct);
        for(int64_t i=0; i<sequenceNumber; i++) {
            stList_append(sequenceLengths, stIntTuple_construct1( st_randomInt(0, MAX_SEQUENCE_SIZE)));
        }

        //Propose random alignment pairs...
        stList *pairs = stList_construct3(0, (void(*)(void *))stIntTuple_destruct);
        int64_t maxAlignedPairs = st_randomInt(0, MAX_ALIGNMENTS);
        if(sequenceNumber > 0) {
            for(int64_t i=0; i<maxAlignedPairs; i++) {
                int64_t seq1 = st_randomInt(0, sequenceNumber);
                int64_t seqLength1 = stIntTuple_get(stList_get(sequenceLengths, seq1), 0);
                if(seqLength1 == 0) {
                    continue;
                }
                int64_t position1 = st_randomInt(0, seqLength1);
                int64_t seq2 = st_randomInt(0, sequenceNumber);
                int64_t seqLength2 = stIntTuple_get(stList_get(sequenceLengths, seq1), 0);
                if(seqLength2 == 0) {
                    continue;
                }
                int64_t position2 = st_randomInt(0, seqLength2);
                if(seq1 != seq2) {
                    stList_append(pairs, stIntTuple_construct4( seq1, position1, seq2, position2));
                    if(stPosetAlignment_isPossible(posetAlignment, seq1, position1, seq2, position2)) {
                        st_logInfo("In %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " \n", seq1, position1, seq2, position2);
                        //For each accepted pair check it doesn't create a cycle.
                        CuAssertTrue(testCase, !containsACycle(pairs, sequenceNumber));
                        CuAssertTrue(testCase, stPosetAlignment_add(posetAlignment, seq1, position1, seq2, position2));
                    }
                    else {
                        st_logInfo("Out %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " \n", seq1, position1, seq2, position2);
                        //For each rejected pair check it creates a cycle..
                        CuAssertTrue(testCase, containsACycle(pairs, sequenceNumber));
                        CuAssertTrue(testCase, !stPosetAlignment_isPossible(posetAlignment, seq1, position1, seq2, position2));
                        stIntTuple_destruct(stList_pop(pairs)); //remove the pair which created the cycle.
                        CuAssertTrue(testCase, !containsACycle(pairs, sequenceNumber)); //Check we're back to being okay..
                    }
                }
            }
        }

        //Cleanup
        stList_destruct(sequenceLengths);
        stList_destruct(pairs);
        teardown();
        st_logInfo("Passed a random ordering test with %" PRIi64 " sequences and %" PRIi64 " aligned pairs\n", sequenceNumber, maxAlignedPairs);
    }
}
stHash *buildContigPathToContigPathLengthHash(
        stList *maximalHaplotypePaths) {
    stHash *maximalHaplotypesToMaximalHaplotypePathLengths = stHash_construct();
    for (int64_t i = 0; i < stList_length(maximalHaplotypePaths); i++) {
        stList *maximalHaplotypePath = stList_get(maximalHaplotypePaths, i);
        int64_t k = contigPathLength(maximalHaplotypePath);
        stHash_insert(maximalHaplotypesToMaximalHaplotypePathLengths,
                maximalHaplotypePath, stIntTuple_construct1( k));
    }
    return maximalHaplotypesToMaximalHaplotypePathLengths;
}
Exemple #8
0
static void testSetup() {
    // compare by value of memory address
    set0 = stSet_construct();
    // compare by value of ints.
    set1 = stSet_construct3((uint64_t(*)(const void *)) stIntTuple_hashKey, 
                            (int(*)(const void *, const void *)) stIntTuple_equalsFn,
                            (void(*)(void *)) stIntTuple_destruct);
    one = stIntTuple_construct1( 0);
    two = stIntTuple_construct1( 1);
    three = stIntTuple_construct1( 2);
    four = stIntTuple_construct1( 3);
    five = stIntTuple_construct1( 4);
    six = stIntTuple_construct1( 5);
    stSet_insert(set0, one);
    stSet_insert(set0, two);
    stSet_insert(set0, three);
    stSet_insert(set0, four);
    stSet_insert(set0, five);
    stSet_insert(set0, six);
    stSet_insert(set1, one);
    stSet_insert(set1, two);
    stSet_insert(set1, three);
    stSet_insert(set1, four);
    stSet_insert(set1, five);
    stSet_insert(set1, six);
}
static void getComponentsP(stHash *nodesToEdges, int64_t node,
        stSortedSet *component) {
    stIntTuple *key = stIntTuple_construct1( node);
    stList *edges = stHash_search(nodesToEdges, key);
    if (edges != NULL) {
        stHash_remove(nodesToEdges, key);
        for (int64_t i = 0; i < stList_length(edges); i++) {
            stIntTuple *edge = stList_get(edges, i);
            if (stSortedSet_search(component, edge) == NULL) {
                stSortedSet_insert(component, edge);
            }
            /*
             * Recursion on stack could equal the total number of nodes.
             */
            getComponentsP(nodesToEdges, stIntTuple_get(edge, 0),
                    component);
            getComponentsP(nodesToEdges, stIntTuple_get(edge, 1),
                    component);
        }
        stList_destruct(edges);
    }
    stIntTuple_destruct(key);
}
static void setup() {
    teardown();

    //Make nodes
    nodes = stList_construct3(0, (void(*)(void *)) stIntTuple_destruct);
    int64_t nodeNumber = st_randomInt(0, 1000);
    for (int64_t i = 0; i < nodeNumber; i++) {
        stList_append(nodes, stIntTuple_construct1( i));
    }

    //Make edges
    edges = stList_construct3(0, (void(*)(void *)) stIntTuple_destruct);
    float edgeProb = st_random();
    for (int64_t i = 0; i < nodeNumber; i++) {
        for (int64_t j = i; j < nodeNumber; j++) {
            if (st_random() <= edgeProb) {
                stList_append(edges, stIntTuple_construct3( st_randomInt(1, 100), i, j));
            }
        }
    }

    //Max component size
    maxComponentSize = 1 + log(nodeNumber) * 10; //(st_randomInt(0, nodeNumber+1);
}
Exemple #11
0
void cactusDisk_write(CactusDisk *cactusDisk) {
    Flower *flower;
    int64_t recordSize;

    stList *removeRequests = stList_construct3(0, (void (*)(void *)) stIntTuple_destruct);

    st_logDebug("Starting to write the cactus to disk\n");

    stSortedSetIterator *it = stSortedSet_getIterator(cactusDisk->flowers);
    //Sort flowers to update.
    while ((flower = stSortedSet_getNext(it)) != NULL) {
        cactusDisk_addUpdateRequest(cactusDisk, flower);
    }
    stSortedSet_destructIterator(it);

    st_logDebug("Got the flowers to update\n");

    //Remove nets that are marked for deletion..
    it = stSortedSet_getIterator(cactusDisk->flowerNamesMarkedForDeletion);
    char *nameString;
    while ((nameString = stSortedSet_getNext(it)) != NULL) {
        Name name = cactusMisc_stringToName(nameString);
        if (containsRecord(cactusDisk, name)) {
            stList_append(cactusDisk->updateRequests, stKVDatabaseBulkRequest_constructUpdateRequest(name, &name, 0)); //We set it to null in the first atomic operation.
            stList_append(removeRequests, stIntTuple_construct1(name));
        }
    }
    stSortedSet_destructIterator(it);

    st_logDebug("Avoided updating nets marked for deletion\n");

    // Insert and/or update meta-sequences.
    it = stSortedSet_getIterator(cactusDisk->metaSequences);
    MetaSequence *metaSequence;
    while ((metaSequence = stSortedSet_getNext(it)) != NULL) {
        void *vA =
                binaryRepresentation_makeBinaryRepresentation(metaSequence,
                        (void (*)(void *, void (*)(const void * ptr, size_t size, size_t count))) metaSequence_writeBinaryRepresentation,
                        &recordSize);
        //Compression
        vA = compress(vA, &recordSize);
        if (!containsRecord(cactusDisk, metaSequence_getName(metaSequence))) {
            stList_append(cactusDisk->updateRequests,
                    stKVDatabaseBulkRequest_constructInsertRequest(metaSequence_getName(metaSequence), vA, recordSize));
        } else {
            stList_append(cactusDisk->updateRequests,
                    stKVDatabaseBulkRequest_constructUpdateRequest(metaSequence_getName(metaSequence), vA, recordSize));
        }
        free(vA);
    }
    stSortedSet_destructIterator(it);

    st_logDebug("Got the sequences we are going to add to the database.\n");

    if (!containsRecord(cactusDisk, CACTUS_DISK_PARAMETER_KEY)) { //We only write the parameters once.
        //Finally the database info.
        void *cactusDiskParameters =
                binaryRepresentation_makeBinaryRepresentation(cactusDisk,
                        (void (*)(void *, void (*)(const void * ptr, size_t size, size_t count))) cactusDisk_writeBinaryRepresentation,
                        &recordSize);
        //Compression
        cactusDiskParameters = compress(cactusDiskParameters, &recordSize);
        stList_append(cactusDisk->updateRequests,
                stKVDatabaseBulkRequest_constructInsertRequest(CACTUS_DISK_PARAMETER_KEY, cactusDiskParameters,
                        recordSize));
        free(cactusDiskParameters);
    }

    st_logDebug("Checked if need to write the initial parameters\n");

    if (stList_length(cactusDisk->updateRequests) > 0) {
        st_logDebug("Going to write %" PRIi64 " updates\n", stList_length(cactusDisk->updateRequests));
        stTry
            {
                st_logDebug("Writing %" PRIi64 " updates\n", stList_length(cactusDisk->updateRequests));
                assert(stList_length(cactusDisk->updateRequests) > 0);
                stKVDatabase_bulkSetRecords(cactusDisk->database, cactusDisk->updateRequests);
            }
            stCatch(except)
                {
                    stThrowNewCause(except, ST_KV_DATABASE_EXCEPTION_ID,
                            "Failed when trying to set records in updating the cactus disk");
                }stTryEnd
        ;
    }
static stHash *getScaffoldPathsP(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash,
        stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) {
    stHash *haplotypeToMaximalHaplotypeLengthHash = buildContigPathToContigPathLengthHash(haplotypePaths);
    stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths);
    for (int64_t i = 0; i < stList_length(haplotypePaths); i++) {
        stSortedSet *bucket = stSortedSet_construct();
        stHash_insert(haplotypePathToScaffoldPathHash, stList_get(haplotypePaths, i), bucket);
        stSortedSet_insert(bucket, stList_get(haplotypePaths, i));
    }
    for (int64_t i = 0; i < stList_length(haplotypePaths); i++) {
        stList *haplotypePath = stList_get(haplotypePaths, i);
        assert(stList_length(haplotypePath) > 0);
        Segment *_5Segment = stList_get(haplotypePath, 0);
        if (!segment_getStrand(_5Segment)) {
            _5Segment = segment_getReverse(stList_get(haplotypePath, stList_length(haplotypePath) - 1));
        }
        assert(segment_getStrand(_5Segment));
        if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) {
            assert(!trueAdjacency(segment_get5Cap(_5Segment), haplotypeEventStrings));
        }
        int64_t insertLength;
        int64_t deleteLength;
        Cap *otherCap;
        enum CapCode _5CapCode = getCapCode(segment_get5Cap(_5Segment), &otherCap, haplotypeEventStrings, contaminationEventStrings, &insertLength, &deleteLength, capCodeParameters);
        if (_5CapCode == SCAFFOLD_GAP || _5CapCode == AMBIGUITY_GAP) {
            assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath) != NULL);
            int64_t j = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath), 0);
            Segment *adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(_5Segment));
            assert(adjacentSegment != NULL);
            while (!hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)) { //is not a haplotype end
                adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(adjacentSegment));
                assert(adjacentSegment != NULL);
            }
            assert(adjacentSegment != NULL);
            assert(hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)); //is a haplotype end
            stList *adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, adjacentSegment);
            if (adjacentHaplotypePath == NULL) {
                adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, segment_getReverse(
                        adjacentSegment));
            }
            assert(adjacentHaplotypePath != NULL);
            assert(adjacentHaplotypePath != haplotypePath);
            assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath) != NULL);
            int64_t k = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath), 0);

            //Now merge the buckets and make new int tuples..
            stSortedSet *bucket1 = stHash_search(haplotypePathToScaffoldPathHash, haplotypePath);
            stSortedSet *bucket2 = stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath);
            assert(bucket1 != NULL);
            assert(bucket2 != NULL);
            assert(bucket1 != bucket2);
            stSortedSet *bucket3 = stSortedSet_getUnion(bucket1, bucket2);
            stSortedSetIterator *bucketIt = stSortedSet_getIterator(bucket3);
            stList *l;
            while ((l = stSortedSet_getNext(bucketIt)) != NULL) {
                //Do the bucket first
                assert(stHash_search(haplotypePathToScaffoldPathHash, l) == bucket1 || stHash_search(haplotypePathToScaffoldPathHash, l) == bucket2);
                stHash_remove(haplotypePathToScaffoldPathHash, l);
                stHash_insert(haplotypePathToScaffoldPathHash, l, bucket3);
                //Now the length
                stIntTuple *m = stHash_remove(haplotypeToMaximalHaplotypeLengthHash, l);
                assert(m != NULL);
                assert(stIntTuple_get(m, 0) == j || stIntTuple_get(m, 0) == k);
                stHash_insert(haplotypeToMaximalHaplotypeLengthHash, l, stIntTuple_construct1( j + k));
                stIntTuple_destruct(m);
            }
            assert(stHash_search(haplotypePathToScaffoldPathHash, haplotypePath) == bucket3);
            assert(stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath) == bucket3);
            stSortedSet_destructIterator(bucketIt);
        }
    }
    stHash_destruct(segmentToMaximalHaplotypePathHash);
    return haplotypeToMaximalHaplotypeLengthHash;
}