Example #1
0
int stSortedSet_equals(stSortedSet *sortedSet1, stSortedSet *sortedSet2) {
    if(stSortedSet_size(sortedSet1) != stSortedSet_size(sortedSet2)) {
        return 0;
    }
    if(!stSortedSet_comparatorsEqual(sortedSet1, sortedSet2)) {
        return 0;
    }
    int (*cmpFn)(const void *, const void *) = stSortedSet_getComparator(sortedSet1)->compareFn;

    stSortedSetIterator *it1 = stSortedSet_getIterator(sortedSet1);
    stSortedSetIterator *it2 = stSortedSet_getIterator(sortedSet2);
    void *o1 = stSortedSet_getNext(it1), *o2 = stSortedSet_getNext(it2);
    while(o1 != NULL && o2 != NULL) {
        if(cmpFn(o1, o2) != 0) {
            stSortedSet_destructIterator(it1);
            stSortedSet_destructIterator(it2);
            return 0;
        }
        o1 = stSortedSet_getNext(it1);
        o2 = stSortedSet_getNext(it2);
    }
    stSortedSet_destructIterator(it1);
    stSortedSet_destructIterator(it2);
    return 1;
}
Example #2
0
static void test_stSortedSetIterator(CuTest* testCase) {
    sonLibSortedSetTestSetup();
    int32_t i;
    for(i=0; i<size; i++) {
        stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i]));
    }
    stSortedSetIterator *iterator = stSortedSet_getIterator(sortedSet);
    CuAssertTrue(testCase, iterator != NULL);

    for(i=0; i<sortedSize; i++) {
        CuAssertIntEquals(testCase, sortedInput[i], stIntTuple_getPosition(stSortedSet_getNext(iterator), 0));
    }
    CuAssertTrue(testCase, stSortedSet_getNext(iterator) == NULL);
    stSortedSetIterator *iterator2 = stSortedSet_copyIterator(iterator);
    CuAssertTrue(testCase, iterator2 != NULL);
    for(i=0; i<sortedSize; i++) {
        CuAssertIntEquals(testCase, sortedInput[sortedSize - 1 - i], stIntTuple_getPosition(stSortedSet_getPrevious(iterator), 0));
        CuAssertIntEquals(testCase, sortedInput[sortedSize - 1 - i], stIntTuple_getPosition(stSortedSet_getPrevious(iterator2), 0));
    }
    CuAssertTrue(testCase, stSortedSet_getPrevious(iterator) == NULL);
    CuAssertTrue(testCase, stSortedSet_getPrevious(iterator2) == NULL);
    stSortedSet_destructIterator(iterator);
    stSortedSet_destructIterator(iterator2);
    sonLibSortedSetTestTeardown();
}
Example #3
0
static void test_stSortedSetIterator_getIteratorFrom(CuTest* testCase) {
    sonLibSortedSetTestSetup();
    int32_t i;
    for(i=0; i<size; i++) {
        stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i]));
    }
    stSortedSetIterator *iterator = stSortedSet_getIterator(sortedSet);
    CuAssertTrue(testCase, iterator != NULL);

    for(i=0; i<sortedSize; i++) {
        stSortedSetIterator *it = stSortedSet_getIteratorFrom(sortedSet, stIntTuple_construct(1, sortedInput[i]));
        stIntTuple *intTuple = stSortedSet_getNext(it);
        CuAssertTrue(testCase, intTuple != NULL);
        CuAssertIntEquals(testCase, sortedInput[i], stIntTuple_getPosition(intTuple, 0));
        stSortedSet_destructIterator(it);
    }

    stTry {
        stSortedSet_getIteratorFrom(sortedSet, stIntTuple_construct(1, 7)); //This number if not in the input.
        CuAssertTrue(testCase, 0);
    } stCatch(except) {
        CuAssertTrue(testCase, stExcept_getId(except) == SORTED_SET_EXCEPTION_ID);
    }
    stTryEnd

    sonLibSortedSetTestTeardown();
}
static void makeMatchingPerfect(stList *chosenEdges, stList *adjacencyEdges,
        stSortedSet *nodes) {
    /*
     * While the the number of edges is less than a perfect matching add random edges.
     */
    stSortedSet *attachedNodes = getNodeSetOfEdges(chosenEdges);
    stHash *nodesToAdjacencyEdges = getNodesToEdgesHash(adjacencyEdges);
    stIntTuple *pNode = NULL;
    stSortedSetIterator *it = stSortedSet_getIterator(nodes);
    stIntTuple *node;
    while((node = stSortedSet_getNext(it)) != NULL) {
        if (stSortedSet_search(attachedNodes, node) == NULL) {
            if (pNode == NULL) {
                pNode = node;
            } else {
                stList_append(chosenEdges,
                        getEdgeForNodes(stIntTuple_get(pNode, 0), stIntTuple_get(node, 0), nodesToAdjacencyEdges));
                pNode = NULL;
            }
        }
    }
    stSortedSet_destructIterator(it);
    assert(pNode == NULL);
    stSortedSet_destruct(attachedNodes);
    assert(stList_length(chosenEdges) * 2 == stSortedSet_size(nodes));
    stHash_destruct(nodesToAdjacencyEdges);
}
static stHash *getComponents(stList *filteredEdges) {
    /*
     * A kind of stupid reimplementation of the greedy function, done just to trap typos.
     */
    stHash *nodesToComponents = stHash_construct3((uint64_t(*)(const void *)) stIntTuple_hashKey,
            (int(*)(const void *, const void *)) stIntTuple_equalsFn, NULL, NULL);
    for (int64_t i = 0; i < stList_length(nodes); i++) {
        stIntTuple *node = stList_get(nodes, i);
        stSortedSet *component = stSortedSet_construct();
        stSortedSet_insert(component, node);
        stHash_insert(nodesToComponents, node, component);
    }
    for (int64_t i = 0; i < stList_length(filteredEdges); i++) {
        stIntTuple *edge = stList_get(filteredEdges, i);
        stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1));
        stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2));
        stSortedSet *component1 = stHash_search(nodesToComponents, node1);
        stSortedSet *component2 = stHash_search(nodesToComponents, node2);
        assert(component1 != NULL && component2 != NULL);
        if (component1 != component2) {
            stSortedSet *component3 = stSortedSet_getUnion(component1, component2);
            stSortedSetIterator *setIt = stSortedSet_getIterator(component3);
            stIntTuple *node3;
            while ((node3 = stSortedSet_getNext(setIt)) != NULL) {
                stHash_insert(nodesToComponents, node3, component3);
            }
            stSortedSet_destructIterator(setIt);
            stSortedSet_destruct(component1);
            stSortedSet_destruct(component2);
        }
        stIntTuple_destruct(node1);
        stIntTuple_destruct(node2);
    }
    return nodesToComponents;
}
Example #6
0
/*
 * Function does the actual depth first search to detect if the thing has an acyclic ordering.
 */
static int64_t dfs(stHash *adjacencyList, stIntTuple *seqPos,
                   stSortedSet *started, stSortedSet *done) {
    if(stSortedSet_search(started, seqPos) != NULL) {
        if(stSortedSet_search(done, seqPos) == NULL) {
            //We have detected a cycle
            //st_logInfo("I have cycle %" PRIi64 " %" PRIi64 "\n", stIntTuple_getPosition(seqPos, 0), stIntTuple_getPosition(seqPos, 1));
            return 1;
        }
        //We have already explored this area, but no cycle.
        return 0;
    }
    stSortedSet_insert(started, seqPos);

    int64_t cycle =0;

    stIntTuple *nextSeqPos = stIntTuple_construct2( stIntTuple_get(seqPos, 0), stIntTuple_get(seqPos, 1) + 1);
    stSortedSet *column = stHash_search(adjacencyList, nextSeqPos);
    if(column != NULL) { //It is in the adjacency list, so we can do the recursion
        assert(stSortedSet_search(column, nextSeqPos) != NULL);
        stSortedSetIterator *it = stSortedSet_getIterator(column);
        stIntTuple *seqPos2;
        while((seqPos2 = stSortedSet_getNext(it)) != NULL) {
            cycle = cycle || dfs(adjacencyList, seqPos2, started, done);
        }
        stSortedSet_destructIterator(it);
    }
    stIntTuple_destruct(nextSeqPos);
    stSortedSet_insert(done, seqPos);
    return cycle;
}
Example #7
0
Block_InstanceIterator *block_getInstanceIterator(Block *block) {
	Block_InstanceIterator *iterator;
	iterator = st_malloc(sizeof(struct _block_instanceIterator));
	iterator->block = block;
	iterator->iterator = stSortedSet_getIterator(block->blockContents->segments);
	return iterator;
}
Example #8
0
stSortedSetIterator *stSortedSet_getIteratorFrom(stSortedSet *items, void *item) {
    stSortedSetIterator *iterator = stSortedSet_getIterator(items);
    if(avl_t_find(&iterator->traverser, items->sortedSet, item) == NULL) {
        stThrowNew(SORTED_SET_EXCEPTION_ID, "Tried to create an iterator with an item that is not in the list of items");
    }
    stSortedSet_getPrevious(iterator);
    return iterator;
}
Example #9
0
stSortedSet *stSortedSet_copyConstruct(stSortedSet *sortedSet, void (*destructElementFn)(void *)) {
    stSortedSet *sortedSet2 = stSortedSet_construct3(stSortedSet_getComparator(sortedSet)->compareFn, destructElementFn);
    stSortedSetIterator *it = stSortedSet_getIterator(sortedSet);
    void *o;
    while((o = stSortedSet_getNext(it)) != NULL) {
        stSortedSet_insert(sortedSet2, o);
    }
    stSortedSet_destructIterator(it);
    return sortedSet2;
}
Example #10
0
void writeEndAlignmentToDisk(End *end, stSortedSet *endAlignment, FILE *fileHandle) {
    fprintf(fileHandle, "%s %" PRIi64 "\n", cactusMisc_nameToStringStatic(end_getName(end)), stSortedSet_size(endAlignment));
    stSortedSetIterator *it = stSortedSet_getIterator(endAlignment);
    AlignedPair *aP;
    while((aP = stSortedSet_getNext(it)) != NULL) {
        fprintf(fileHandle, "%" PRIi64 " %" PRIi64 " %i %" PRIi64 " ", aP->subsequenceIdentifier, aP->position, aP->strand, aP->score);
        aP = aP->reverse;
        fprintf(fileHandle, "%" PRIi64 " %" PRIi64 " %i %" PRIi64 "\n", aP->subsequenceIdentifier, aP->position, aP->strand, aP->score);
    }
    stSortedSet_destructIterator(it);
}
Example #11
0
stList *stSortedSet_getList(stSortedSet *sortedSet) {
    stList *list = stList_construct2(stSortedSet_size(sortedSet));
    stSortedSetIterator *it = stSortedSet_getIterator(sortedSet);
    void *o;
    int32_t i=0;
    while((o = stSortedSet_getNext(it)) != NULL) {
        stList_set(list, i++, o);
    }
    assert(i == stSortedSet_size(sortedSet));
    stSortedSet_destructIterator(it);
    return list;
}
Example #12
0
/* Check that all tuple records in a set are present and have the expect
 * value.  The expected value in the set is multiplied by valueMult to get
 * the actual expected value */
static void readWriteAndRemoveRecordsLotsCheck(CuTest *testCase, stSortedSet *set, int valueMult) {
    CuAssertIntEquals(testCase, stSortedSet_size(set), stKVDatabase_getNumberOfRecords(database));
    stSortedSetIterator *it = stSortedSet_getIterator(set);
    stIntTuple *tuple;
    while ((tuple = stSortedSet_getNext(it)) != NULL) {
        int32_t *value = (int32_t *) stKVDatabase_getRecord(database, stIntTuple_getPosition(tuple, 0));
        CuAssertTrue(testCase, stKVDatabase_containsRecord(database, stIntTuple_getPosition(tuple, 0)));
        CuAssertIntEquals(testCase, valueMult*stIntTuple_getPosition(tuple, 0), *value);
        free(value);
    }
    stSortedSet_destructIterator(it);
}
Example #13
0
void test_stList_getSortedSet(CuTest *testCase) {
    setup();
    stSortedSet *sortedSet = stList_getSortedSet(list, (int (*)(const void *, const void *))strcmp);
    CuAssertTrue(testCase, stSortedSet_size(sortedSet) == stringNumber);
    stSortedSetIterator *iterator = stSortedSet_getIterator(sortedSet);
    CuAssertStrEquals(testCase, "five", stSortedSet_getNext(iterator));
    CuAssertStrEquals(testCase, "four", stSortedSet_getNext(iterator));
    CuAssertStrEquals(testCase, "one", stSortedSet_getNext(iterator));
    CuAssertStrEquals(testCase, "three", stSortedSet_getNext(iterator));
    CuAssertStrEquals(testCase, "two", stSortedSet_getNext(iterator));
    stSortedSet_destructIterator(iterator);
    stSortedSet_destruct(sortedSet);
    teardown();
}
Example #14
0
stSortedSet *stSortedSet_getUnion(stSortedSet *sortedSet1, stSortedSet *sortedSet2) {
    if(!stSortedSet_comparatorsEqual(sortedSet1, sortedSet2)) {
        stThrowNew(SORTED_SET_EXCEPTION_ID, "Comparators are not equal for creating the union of two sorted sets");
    }
    stSortedSet *sortedSet3 = stSortedSet_construct3(stSortedSet_getComparator(sortedSet1)->compareFn, NULL);

    //Add those from sortedSet1
    stSortedSetIterator *it= stSortedSet_getIterator(sortedSet1);
    void *o;
    while((o = stSortedSet_getNext(it)) != NULL) {
        stSortedSet_insert(sortedSet3, o);
    }
    stSortedSet_destructIterator(it);

    //Add those from sortedSet2
    it= stSortedSet_getIterator(sortedSet2);
    while((o = stSortedSet_getNext(it)) != NULL) {
        stSortedSet_insert(sortedSet3, o);
    }
    stSortedSet_destructIterator(it);

    return sortedSet3;
}
Example #15
0
stSortedSet *stSortedSet_getDifference(stSortedSet *sortedSet1, stSortedSet *sortedSet2) {
    if(!stSortedSet_comparatorsEqual(sortedSet1, sortedSet2)) {
        stThrowNew(SORTED_SET_EXCEPTION_ID, "Comparators are not equal for creating the sorted set difference");
    }
    stSortedSet *sortedSet3 = stSortedSet_construct3(stSortedSet_getComparator(sortedSet1)->compareFn, NULL);

    //Add those from sortedSet1 only if they are not in sortedSet2
    stSortedSetIterator *it= stSortedSet_getIterator(sortedSet1);
    void *o;
    while((o = stSortedSet_getNext(it)) != NULL) {
        if(stSortedSet_search(sortedSet2, o) == NULL) {
            stSortedSet_insert(sortedSet3, o);
        }
    }
    stSortedSet_destructIterator(it);

    return sortedSet3;
}
static stList *getEdgesThatBridgeComponents(stList *components,
        stHash *nodesToNonZeroWeightedAdjacencyEdges) {
    /*
     * Get set of adjacency edges that bridge between (have a node in two) components.
     */

    stList *bridgingAdjacencyEdges = stList_construct();

    for (int64_t i = 0; i < stList_length(components); i++) {
        stSortedSet *componentNodes = getNodeSetOfEdges(
                stList_get(components, i));
        stSortedSetIterator *it = stSortedSet_getIterator(componentNodes);
        stIntTuple *node;
        while ((node = stSortedSet_getNext(it)) != NULL) {
            stList *edges = stHash_search(nodesToNonZeroWeightedAdjacencyEdges,
                    node);
            if (edges != NULL) {
                for (int64_t j = 0; j < stList_length(edges); j++) {
                    stIntTuple *edge = stList_get(edges, j);
                    stIntTuple *node1 = stIntTuple_construct1(
                            stIntTuple_get(edge, 0));
                    stIntTuple *node2 = stIntTuple_construct1(
                            stIntTuple_get(edge, 1));
                    assert(
                            stSortedSet_search(componentNodes, node1) != NULL
                                    || stSortedSet_search(componentNodes, node2)
                                            != NULL);
                    if (stSortedSet_search(componentNodes, node1) == NULL
                            || stSortedSet_search(componentNodes, node2)
                                    == NULL) {
                        stList_append(bridgingAdjacencyEdges, edge);
                    }
                    stIntTuple_destruct(node1);
                    stIntTuple_destruct(node2);
                }
            }
        }
        stSortedSet_destructIterator(it);
        stSortedSet_destruct(componentNodes);
    }

    return bridgingAdjacencyEdges;
}
Example #17
0
/*
 * This builds an adjacency list structure for the the sequences. Every sequence-position
 * has a column in the hash with which it can be aligned with.
 */
static stHash *buildAdjacencyList(stList *pairs, int64_t sequenceNumber) {
    stHash *hash = stHash_construct3((uint64_t (*)(const void *))stIntTuple_hashKey,
                                     (int (*)(const void *, const void *))stIntTuple_equalsFn,
                                     (void (*)(void *))stIntTuple_destruct, NULL);
    for(int64_t seq=0; seq<sequenceNumber; seq++) {
        for(int64_t position=0; position<MAX_SEQUENCE_SIZE; position++) {
            stIntTuple *seqPos = stIntTuple_construct2( seq, position);
            stSortedSet *column = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL);
            stSortedSet_insert(column, seqPos);
            stHash_insert(hash, seqPos, column);
        }
    }
    stListIterator *it = stList_getIterator(pairs);
    stIntTuple *pair;
    while((pair = stList_getNext(it)) != NULL) {
        stIntTuple *seqPos1 = stIntTuple_construct2( stIntTuple_get(pair, 0), stIntTuple_get(pair, 1));
        stIntTuple *seqPos2 = stIntTuple_construct2( stIntTuple_get(pair, 2), stIntTuple_get(pair, 3));
        stSortedSet *column1 = stHash_search(hash, seqPos1);
        assert(column1 != NULL);
        stSortedSet *column2 = stHash_search(hash, seqPos2);
        assert(column2 != NULL);
        if(column1 != column2) { //Merge the columns
            stSortedSetIterator *it2 = stSortedSet_getIterator(column2);
            stIntTuple *seqPos3;
            while((seqPos3 = stSortedSet_getNext(it2)) != NULL) {
                assert(stSortedSet_search(column1, seqPos3) == NULL);
                stSortedSet_insert(column1, seqPos3);
                assert(stHash_search(hash, seqPos3) == column2);
                stHash_insert(hash, seqPos3, column1);
                assert(stHash_search(hash, seqPos3) == column1);
            }
            stSortedSet_destructIterator(it2);
            stSortedSet_destruct(column2);
        }
        //Cleanup loop.
        stIntTuple_destruct(seqPos1);
        stIntTuple_destruct(seqPos2);
    }
    stList_destructIterator(it);
    return hash;
}
Example #18
0
int main(int argc, char *argv[]) {

    char * logLevelString = NULL;
    char * cactusDiskDatabaseString = NULL;
    int64_t i, j;
    int64_t spanningTrees = 10;
    int64_t maximumLength = 1500;
    bool useProgressiveMerging = 0;
    float matchGamma = 0.5;
    bool useBanding = 0;
    int64_t k;
    stList *listOfEndAlignmentFiles = NULL;
    char *endAlignmentsToPrecomputeOutputFile = NULL;
    bool calculateWhichEndsToComputeSeparately = 0;
    int64_t largeEndSize = 1000000;
    int64_t chainLengthForBigFlower = 1000000;
    int64_t longChain = 2;
    char *ingroupCoverageFilePath = NULL;
    int64_t minimumSizeToRescue = 1;
    double minimumCoverageToRescue = 0.0;

    PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters = pairwiseAlignmentBandingParameters_construct();

    /*
     * Setup the input parameters for cactus core.
     */
    bool pruneOutStubAlignments = 0;

    /*
     * Parse the options.
     */
    while (1) {
        static struct option long_options[] = { { "logLevel", required_argument, 0, 'a' }, { "cactusDisk", required_argument, 0, 'b' }, {
                "help", no_argument, 0, 'h' }, { "spanningTrees", required_argument, 0, 'i' },
                { "maximumLength", required_argument, 0, 'j' }, { "useBanding", no_argument, 0, 'k' },
                { "gapGamma", required_argument, 0, 'l' }, { "matchGamma", required_argument, 0, 'L' },
                { "splitMatrixBiggerThanThis", required_argument, 0, 'o' }, { "anchorMatrixBiggerThanThis",
                        required_argument, 0, 'p' }, { "repeatMaskMatrixBiggerThanThis", required_argument, 0, 'q' }, {
                        "diagonalExpansion", required_argument, 0, 'r' }, { "constraintDiagonalTrim", required_argument, 0, 't' }, {
                        "minimumDegree", required_argument, 0, 'u' }, { "alignAmbiguityCharacters", no_argument, 0, 'w' }, {
                        "pruneOutStubAlignments", no_argument, 0, 'y' }, {
                        "minimumIngroupDegree", required_argument, 0, 'A' }, { "minimumOutgroupDegree", required_argument, 0, 'B' },
                { "precomputedAlignments", required_argument, 0, 'D' }, {
                        "endAlignmentsToPrecomputeOutputFile", required_argument, 0, 'E' }, { "useProgressiveMerging",
                        no_argument, 0, 'F' }, { "calculateWhichEndsToComputeSeparately", no_argument, 0, 'G' }, { "largeEndSize",
                        required_argument, 0, 'I' },
                        {"ingroupCoverageFile", required_argument, 0, 'J'},
                        {"minimumSizeToRescue", required_argument, 0, 'K'},
                        {"minimumCoverageToRescue", required_argument, 0, 'M'},
                        { "minimumNumberOfSpecies", required_argument, 0, 'N' },
                        { 0, 0, 0, 0 } };

        int option_index = 0;

        int key = getopt_long(argc, argv, "a:b:hi:j:kl:o:p:q:r:t:u:wy:A:B:D:E:FGI:J:K:L:M:N:", long_options, &option_index);

        if (key == -1) {
            break;
        }

        switch (key) {
            case 'a':
                logLevelString = stString_copy(optarg);
                st_setLogLevelFromString(logLevelString);
                break;
            case 'b':
                cactusDiskDatabaseString = stString_copy(optarg);
                break;
            case 'h':
                usage();
                return 0;
            case 'i':
                i = sscanf(optarg, "%" PRIi64 "", &spanningTrees);
                (void) i;
                assert(i == 1);
                assert(spanningTrees >= 0);
                break;
            case 'j':
                i = sscanf(optarg, "%" PRIi64 "", &maximumLength);
                assert(i == 1);
                assert(maximumLength >= 0);
                break;
            case 'k':
                useBanding = !useBanding;
                break;
            case 'l':
                i = sscanf(optarg, "%f", &pairwiseAlignmentBandingParameters->gapGamma);
                assert(i == 1);
                assert(pairwiseAlignmentBandingParameters->gapGamma >= 0.0);
                break;
            case 'L':
                i = sscanf(optarg, "%f", &matchGamma);
                assert(i == 1);
                assert(matchGamma >= 0.0);
                break;
            case 'o':
                i = sscanf(optarg, "%" PRIi64 "", &k);
                assert(i == 1);
                assert(k >= 0);
                pairwiseAlignmentBandingParameters->splitMatrixBiggerThanThis = (int64_t) k * k;
                break;
            case 'p':
                i = sscanf(optarg, "%" PRIi64 "", &k);
                assert(i == 1);
                assert(k >= 0);
                pairwiseAlignmentBandingParameters->anchorMatrixBiggerThanThis = (int64_t) k * k;
                break;
            case 'q':
                i = sscanf(optarg, "%" PRIi64 "", &k);
                assert(i == 1);
                assert(k >= 0);
                pairwiseAlignmentBandingParameters->repeatMaskMatrixBiggerThanThis = (int64_t) k * k;
                break;
            case 'r':
                i = sscanf(optarg, "%" PRIi64 "", &pairwiseAlignmentBandingParameters->diagonalExpansion);
                assert(i == 1);
                assert(pairwiseAlignmentBandingParameters->diagonalExpansion >= 0);
                assert(pairwiseAlignmentBandingParameters->diagonalExpansion % 2 == 0);
                break;
            case 't':
                i = sscanf(optarg, "%" PRIi64 "", &pairwiseAlignmentBandingParameters->constraintDiagonalTrim);
                assert(i == 1);
                assert(pairwiseAlignmentBandingParameters->constraintDiagonalTrim >= 0);
                break;
            case 'u':
                i = sscanf(optarg, "%" PRIi64 "", &minimumDegree);
                assert(i == 1);
                break;
            case 'w':
                pairwiseAlignmentBandingParameters->alignAmbiguityCharacters = 1;
                break;
            case 'y':
                pruneOutStubAlignments = 1;
                break;
            case 'A':
                i = sscanf(optarg, "%" PRIi64 "", &minimumIngroupDegree);
                assert(i == 1);
                break;
            case 'B':
                i = sscanf(optarg, "%" PRIi64 "", &minimumOutgroupDegree);
                assert(i == 1);
                break;
            case 'D':
                listOfEndAlignmentFiles = stString_split(optarg);
                break;
            case 'E':
                endAlignmentsToPrecomputeOutputFile = stString_copy(optarg);
                break;
            case 'F':
                useProgressiveMerging = 1;
                break;
            case 'G':
                calculateWhichEndsToComputeSeparately = 1;
                break;
            case 'I':
                i = sscanf(optarg, "%" PRIi64 "", &largeEndSize);
                assert(i == 1);
                break;
            case 'J':
                ingroupCoverageFilePath = stString_copy(optarg);
                break;
            case 'K':
                i = sscanf(optarg, "%" PRIi64, &minimumSizeToRescue);
                assert(i == 1);
                break;
            case 'M':
                i = sscanf(optarg, "%lf", &minimumCoverageToRescue);
                assert(i == 1);
                break;
            case 'N':
                i = sscanf(optarg, "%" PRIi64, &minimumNumberOfSpecies);
                if (i != 1) {
                    st_errAbort("Error parsing minimumNumberOfSpecies parameter");
                }
                break;
            default:
                usage();
                return 1;
        }
    }

    st_setLogLevelFromString(logLevelString);

    /*
     * Load the flowerdisk
     */
    stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString);
    CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0); //We precache the sequences
    st_logInfo("Set up the flower disk\n");

    /*
     * Load the hmm
     */
    StateMachine *sM = stateMachine5_construct(fiveState);

    /*
     * For each flower.
     */
    if (calculateWhichEndsToComputeSeparately) {
        stList *flowers = flowerWriter_parseFlowersFromStdin(cactusDisk);
        if (stList_length(flowers) != 1) {
            st_errAbort("We are breaking up a flower's end alignments for precomputation but we have %" PRIi64 " flowers.\n", stList_length(flowers));
        }
        stSortedSet *endsToAlignSeparately = getEndsToAlignSeparately(stList_get(flowers, 0), maximumLength, largeEndSize);
        assert(stSortedSet_size(endsToAlignSeparately) != 1);
        stSortedSetIterator *it = stSortedSet_getIterator(endsToAlignSeparately);
        End *end;
        while ((end = stSortedSet_getNext(it)) != NULL) {
            fprintf(stdout, "%s\t%" PRIi64 "\t%" PRIi64 "\n", cactusMisc_nameToStringStatic(end_getName(end)), end_getInstanceNumber(end), getTotalAdjacencyLength(end));
        }
        return 0; //avoid cleanup costs
        stSortedSet_destructIterator(it);
        stSortedSet_destruct(endsToAlignSeparately);
    } else if (endAlignmentsToPrecomputeOutputFile != NULL) {
        /*
         * In this case we will align a set of end and save the alignments in a file.
         */
        stList *names = flowerWriter_parseNames(stdin);
        Flower *flower = cactusDisk_getFlower(cactusDisk, *((Name *)stList_get(names, 0)));
        FILE *fileHandle = fopen(endAlignmentsToPrecomputeOutputFile, "w");
        for(int64_t i=1; i<stList_length(names); i++) {
            End *end = flower_getEnd(flower, *((Name *)stList_get(names, i)));
            if (end == NULL) {
                st_errAbort("The end %" PRIi64 " was not found in the flower\n", *((Name *)stList_get(names, i)));
            }
            stSortedSet *endAlignment = makeEndAlignment(sM, end, spanningTrees, maximumLength, useProgressiveMerging,
                            matchGamma, pairwiseAlignmentBandingParameters);
            writeEndAlignmentToDisk(end, endAlignment, fileHandle);
            stSortedSet_destruct(endAlignment);
        }
        fclose(fileHandle);
        return 0; //avoid cleanup costs
        stList_destruct(names);
        st_logInfo("Finished precomputing end alignments\n");
    } else {
        /*
         * Compute complete flower alignments, possibly loading some precomputed alignments.
         */
        bedRegion *bedRegions = NULL;
        size_t numBeds = 0;
        if (ingroupCoverageFilePath != NULL) {
            // Pre-load the mmap for the coverage file.
            FILE *coverageFile = fopen(ingroupCoverageFilePath, "rb");
            if (coverageFile == NULL) {
                st_errnoAbort("Opening coverage file %s failed",
                              ingroupCoverageFilePath);
            }
            fseek(coverageFile, 0, SEEK_END);
            int64_t coverageFileLen = ftell(coverageFile);
            assert(coverageFileLen >= 0);
            assert(coverageFileLen % sizeof(bedRegion) == 0);
            if (coverageFileLen == 0) {
                // mmap doesn't like length-0 mappings, for obvious
                // reasons. Pretend that the coverage file doesn't
                // exist in this case, since it contains no data.
                ingroupCoverageFilePath = NULL;
            } else {
                // Establish a memory mapping for the file.
                bedRegions = mmap(NULL, coverageFileLen, PROT_READ, MAP_SHARED,
                                  fileno(coverageFile), 0);
                if (bedRegions == MAP_FAILED) {
                    st_errnoAbort("Failure mapping coverage file");
                }

                numBeds = coverageFileLen / sizeof(bedRegion);
            }
            fclose(coverageFile);
        }

        stList *flowers = flowerWriter_parseFlowersFromStdin(cactusDisk);
        if (listOfEndAlignmentFiles != NULL && stList_length(flowers) != 1) {
            st_errAbort("We have precomputed alignments but %" PRIi64 " flowers to align.\n", stList_length(flowers));
        }
        cactusDisk_preCacheStrings(cactusDisk, flowers);
        for (j = 0; j < stList_length(flowers); j++) {
            flower = stList_get(flowers, j);
            st_logInfo("Processing a flower\n");

            stSortedSet *alignedPairs = makeFlowerAlignment3(sM, flower, listOfEndAlignmentFiles, spanningTrees, maximumLength,
                    useProgressiveMerging, matchGamma, pairwiseAlignmentBandingParameters, pruneOutStubAlignments);
            st_logInfo("Created the alignment: %" PRIi64 " pairs\n", stSortedSet_size(alignedPairs));
            stPinchIterator *pinchIterator = stPinchIterator_constructFromAlignedPairs(alignedPairs, getNextAlignedPairAlignment);

            /*
             * Run the cactus caf functions to build cactus.
             */
            stPinchThreadSet *threadSet = stCaf_setup(flower);
            stCaf_anneal(threadSet, pinchIterator, NULL);
            if (minimumDegree < 2) {
                stCaf_makeDegreeOneBlocks(threadSet);
            }
            if (minimumIngroupDegree > 0 || minimumOutgroupDegree > 0 || minimumDegree > 1) {
                stCaf_melt(flower, threadSet, blockFilterFn, 0, 0, 0, INT64_MAX);
            }

            if (ingroupCoverageFilePath != NULL) {
                // Rescue any sequence that is covered by outgroups
                // but currently unaligned into single-degree blocks.
                stPinchThreadSetIt pinchIt = stPinchThreadSet_getIt(threadSet);
                stPinchThread *thread;
                while ((thread = stPinchThreadSetIt_getNext(&pinchIt)) != NULL) {
                    Cap *cap = flower_getCap(flower,
                                             stPinchThread_getName(thread));
                    assert(cap != NULL);
                    Sequence *sequence = cap_getSequence(cap);
                    assert(sequence != NULL);
                    rescueCoveredRegions(thread, bedRegions, numBeds,
                                         sequence_getName(sequence),
                                         minimumSizeToRescue,
                                         minimumCoverageToRescue);
                }
                stCaf_joinTrivialBoundaries(threadSet);
            }

            stCaf_finish(flower, threadSet, chainLengthForBigFlower, longChain, INT64_MAX, INT64_MAX); //Flower now destroyed.
            stPinchThreadSet_destruct(threadSet);
            st_logInfo("Ran the cactus core script.\n");

            /*
             * Cleanup
             */
            //Clean up the sorted set after cleaning up the iterator
            stPinchIterator_destruct(pinchIterator);
            stSortedSet_destruct(alignedPairs);

            st_logInfo("Finished filling in the alignments for the flower\n");
        }
        stList_destruct(flowers);
        //st_errAbort("Done\n");
        /*
         * Write and close the cactusdisk.
         */
        cactusDisk_write(cactusDisk);
        return 0; //Exit without clean up is quicker, enable cleanup when doing memory leak detection.
        if (bedRegions != NULL) {
            // Clean up our mapping.
            munmap(bedRegions, numBeds * sizeof(bedRegion));
        }
    }


    ///////////////////////////////////////////////////////////////////////////
    // Cleanup
    ///////////////////////////////////////////////////////////////////////////

    stateMachine_destruct(sM);
    cactusDisk_destruct(cactusDisk);
    stKVDatabaseConf_destruct(kvDatabaseConf);
    //destructCactusCoreInputParameters(cCIP);
    free(cactusDiskDatabaseString);
    if (listOfEndAlignmentFiles != NULL) {
        stList_destruct(listOfEndAlignmentFiles);
    }
    if (logLevelString != NULL) {
        free(logLevelString);
    }
    st_logInfo("Finished with the flower disk for this flower.\n");

    //while(1);

    return 0;
}
Example #19
0
static stHash *getScaffoldPathsP(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash,
        stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) {
    stHash *haplotypeToMaximalHaplotypeLengthHash = buildContigPathToContigPathLengthHash(haplotypePaths);
    stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths);
    for (int64_t i = 0; i < stList_length(haplotypePaths); i++) {
        stSortedSet *bucket = stSortedSet_construct();
        stHash_insert(haplotypePathToScaffoldPathHash, stList_get(haplotypePaths, i), bucket);
        stSortedSet_insert(bucket, stList_get(haplotypePaths, i));
    }
    for (int64_t i = 0; i < stList_length(haplotypePaths); i++) {
        stList *haplotypePath = stList_get(haplotypePaths, i);
        assert(stList_length(haplotypePath) > 0);
        Segment *_5Segment = stList_get(haplotypePath, 0);
        if (!segment_getStrand(_5Segment)) {
            _5Segment = segment_getReverse(stList_get(haplotypePath, stList_length(haplotypePath) - 1));
        }
        assert(segment_getStrand(_5Segment));
        if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) {
            assert(!trueAdjacency(segment_get5Cap(_5Segment), haplotypeEventStrings));
        }
        int64_t insertLength;
        int64_t deleteLength;
        Cap *otherCap;
        enum CapCode _5CapCode = getCapCode(segment_get5Cap(_5Segment), &otherCap, haplotypeEventStrings, contaminationEventStrings, &insertLength, &deleteLength, capCodeParameters);
        if (_5CapCode == SCAFFOLD_GAP || _5CapCode == AMBIGUITY_GAP) {
            assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath) != NULL);
            int64_t j = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath), 0);
            Segment *adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(_5Segment));
            assert(adjacentSegment != NULL);
            while (!hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)) { //is not a haplotype end
                adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(adjacentSegment));
                assert(adjacentSegment != NULL);
            }
            assert(adjacentSegment != NULL);
            assert(hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)); //is a haplotype end
            stList *adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, adjacentSegment);
            if (adjacentHaplotypePath == NULL) {
                adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, segment_getReverse(
                        adjacentSegment));
            }
            assert(adjacentHaplotypePath != NULL);
            assert(adjacentHaplotypePath != haplotypePath);
            assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath) != NULL);
            int64_t k = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath), 0);

            //Now merge the buckets and make new int tuples..
            stSortedSet *bucket1 = stHash_search(haplotypePathToScaffoldPathHash, haplotypePath);
            stSortedSet *bucket2 = stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath);
            assert(bucket1 != NULL);
            assert(bucket2 != NULL);
            assert(bucket1 != bucket2);
            stSortedSet *bucket3 = stSortedSet_getUnion(bucket1, bucket2);
            stSortedSetIterator *bucketIt = stSortedSet_getIterator(bucket3);
            stList *l;
            while ((l = stSortedSet_getNext(bucketIt)) != NULL) {
                //Do the bucket first
                assert(stHash_search(haplotypePathToScaffoldPathHash, l) == bucket1 || stHash_search(haplotypePathToScaffoldPathHash, l) == bucket2);
                stHash_remove(haplotypePathToScaffoldPathHash, l);
                stHash_insert(haplotypePathToScaffoldPathHash, l, bucket3);
                //Now the length
                stIntTuple *m = stHash_remove(haplotypeToMaximalHaplotypeLengthHash, l);
                assert(m != NULL);
                assert(stIntTuple_get(m, 0) == j || stIntTuple_get(m, 0) == k);
                stHash_insert(haplotypeToMaximalHaplotypeLengthHash, l, stIntTuple_construct1( j + k));
                stIntTuple_destruct(m);
            }
            assert(stHash_search(haplotypePathToScaffoldPathHash, haplotypePath) == bucket3);
            assert(stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath) == bucket3);
            stSortedSet_destructIterator(bucketIt);
        }
    }
    stHash_destruct(segmentToMaximalHaplotypePathHash);
    return haplotypeToMaximalHaplotypeLengthHash;
}
Example #20
0
EventTree_Iterator *eventTree_getIterator(EventTree *eventTree) {
	return stSortedSet_getIterator(eventTree->events);
}
Example #21
0
Flower_FaceIterator *flower_getFaceIterator(Flower *flower) {
    return stSortedSet_getIterator(flower->faces);
}
Example #22
0
Flower_ChainIterator *flower_getChainIterator(Flower *flower) {
    return stSortedSet_getIterator(flower->chains);
}
Example #23
0
Flower_GroupIterator *flower_getGroupIterator(Flower *flower) {
    return stSortedSet_getIterator(flower->groups);
}
Example #24
0
Flower_BlockIterator *flower_getBlockIterator(Flower *flower) {
    return stSortedSet_getIterator(flower->blocks);
}
Example #25
0
Flower_SegmentIterator *flower_getSegmentIterator(Flower *flower) {
    return stSortedSet_getIterator(flower->segments);
}
Example #26
0
Flower_EndIterator *flower_getEndIterator(Flower *flower) {
    return stSortedSet_getIterator(flower->ends);
}
Example #27
0
Flower_CapIterator *flower_getCapIterator(Flower *flower) {
    return stSortedSet_getIterator(flower->caps);
}
Example #28
0
Flower_SequenceIterator *flower_getSequenceIterator(Flower *flower) {
    return stSortedSet_getIterator(flower->sequences);
}
Example #29
0
void cactusDisk_write(CactusDisk *cactusDisk) {
    Flower *flower;
    int64_t recordSize;

    stList *removeRequests = stList_construct3(0, (void (*)(void *)) stIntTuple_destruct);

    st_logDebug("Starting to write the cactus to disk\n");

    stSortedSetIterator *it = stSortedSet_getIterator(cactusDisk->flowers);
    //Sort flowers to update.
    while ((flower = stSortedSet_getNext(it)) != NULL) {
        cactusDisk_addUpdateRequest(cactusDisk, flower);
    }
    stSortedSet_destructIterator(it);

    st_logDebug("Got the flowers to update\n");

    //Remove nets that are marked for deletion..
    it = stSortedSet_getIterator(cactusDisk->flowerNamesMarkedForDeletion);
    char *nameString;
    while ((nameString = stSortedSet_getNext(it)) != NULL) {
        Name name = cactusMisc_stringToName(nameString);
        if (containsRecord(cactusDisk, name)) {
            stList_append(cactusDisk->updateRequests, stKVDatabaseBulkRequest_constructUpdateRequest(name, &name, 0)); //We set it to null in the first atomic operation.
            stList_append(removeRequests, stIntTuple_construct1(name));
        }
    }
    stSortedSet_destructIterator(it);

    st_logDebug("Avoided updating nets marked for deletion\n");

    // Insert and/or update meta-sequences.
    it = stSortedSet_getIterator(cactusDisk->metaSequences);
    MetaSequence *metaSequence;
    while ((metaSequence = stSortedSet_getNext(it)) != NULL) {
        void *vA =
                binaryRepresentation_makeBinaryRepresentation(metaSequence,
                        (void (*)(void *, void (*)(const void * ptr, size_t size, size_t count))) metaSequence_writeBinaryRepresentation,
                        &recordSize);
        //Compression
        vA = compress(vA, &recordSize);
        if (!containsRecord(cactusDisk, metaSequence_getName(metaSequence))) {
            stList_append(cactusDisk->updateRequests,
                    stKVDatabaseBulkRequest_constructInsertRequest(metaSequence_getName(metaSequence), vA, recordSize));
        } else {
            stList_append(cactusDisk->updateRequests,
                    stKVDatabaseBulkRequest_constructUpdateRequest(metaSequence_getName(metaSequence), vA, recordSize));
        }
        free(vA);
    }
    stSortedSet_destructIterator(it);

    st_logDebug("Got the sequences we are going to add to the database.\n");

    if (!containsRecord(cactusDisk, CACTUS_DISK_PARAMETER_KEY)) { //We only write the parameters once.
        //Finally the database info.
        void *cactusDiskParameters =
                binaryRepresentation_makeBinaryRepresentation(cactusDisk,
                        (void (*)(void *, void (*)(const void * ptr, size_t size, size_t count))) cactusDisk_writeBinaryRepresentation,
                        &recordSize);
        //Compression
        cactusDiskParameters = compress(cactusDiskParameters, &recordSize);
        stList_append(cactusDisk->updateRequests,
                stKVDatabaseBulkRequest_constructInsertRequest(CACTUS_DISK_PARAMETER_KEY, cactusDiskParameters,
                        recordSize));
        free(cactusDiskParameters);
    }

    st_logDebug("Checked if need to write the initial parameters\n");

    if (stList_length(cactusDisk->updateRequests) > 0) {
        st_logDebug("Going to write %" PRIi64 " updates\n", stList_length(cactusDisk->updateRequests));
        stTry
            {
                st_logDebug("Writing %" PRIi64 " updates\n", stList_length(cactusDisk->updateRequests));
                assert(stList_length(cactusDisk->updateRequests) > 0);
                stKVDatabase_bulkSetRecords(cactusDisk->database, cactusDisk->updateRequests);
            }
            stCatch(except)
                {
                    stThrowNewCause(except, ST_KV_DATABASE_EXCEPTION_ID,
                            "Failed when trying to set records in updating the cactus disk");
                }stTryEnd
        ;
    }
Example #30
0
static void readWriteAndRemoveRecordsLotsIteration(CuTest *testCase, int numRecords, bool reopenDatabase) {
    //Make a big old list of records..
    stSortedSet *set = stSortedSet_construct3((int(*)(const void *, const void *)) stIntTuple_cmpFn,
            (void(*)(void *)) stIntTuple_destruct);
    while (stSortedSet_size(set) < numRecords) {
        int32_t key = st_randomInt(0, 100 * numRecords);
        stIntTuple *tuple = stIntTuple_construct(1, key);
        if (stSortedSet_search(set, tuple) == NULL) {
            CuAssertTrue(testCase, !stKVDatabase_containsRecord(database, key));
            stSortedSet_insert(set, tuple);
            stKVDatabase_insertRecord(database, key, &key, sizeof(int32_t));
            CuAssertTrue(testCase, stKVDatabase_containsRecord(database, key));
        } else {
            CuAssertTrue(testCase, stKVDatabase_containsRecord(database, key));
            stIntTuple_destruct(tuple); // already in db
        }
    }

    readWriteAndRemoveRecordsLotsCheck(testCase, set, 1);

    //Update all records to negate values
    stSortedSetIterator *it = stSortedSet_getIterator(set);
    stIntTuple *tuple;
    while ((tuple = stSortedSet_getNext(it)) != NULL) {
        int32_t *value = (int32_t *) stKVDatabase_getRecord(database, stIntTuple_getPosition(tuple, 0));
        *value *= -1;
        stKVDatabase_updateRecord(database, stIntTuple_getPosition(tuple, 0), value, sizeof(int32_t));
        CuAssertTrue(testCase, stKVDatabase_containsRecord(database, stIntTuple_getPosition(tuple, 0)));
        free(value);
    }
    stSortedSet_destructIterator(it);

    readWriteAndRemoveRecordsLotsCheck(testCase, set, -1);

    //Try optionally committing the transaction and reloading the database..
    if (reopenDatabase) {
        //stKVDatabase_commitTransaction(database);
        stKVDatabase_destruct(database);
        database = stKVDatabase_construct(conf, false);
        //stKVDatabase_startTransaction(database);
    }

    //Now remove each one..
    it = stSortedSet_getIterator(set);
    while ((tuple = stSortedSet_getNext(it)) != NULL) {
        CuAssertTrue(testCase, stKVDatabase_containsRecord(database, stIntTuple_getPosition(tuple, 0)));
        stKVDatabase_removeRecord(database, stIntTuple_getPosition(tuple, 0));
        CuAssertTrue(testCase, !stKVDatabase_containsRecord(database, stIntTuple_getPosition(tuple, 0)));
        //Test we get exception if we remove twice.
        stTry {
                stKVDatabase_removeRecord(database, stIntTuple_getPosition(tuple, 0));
                CuAssertTrue(testCase, 0);
            }
            stCatch(except)
                {
                    CuAssertTrue(testCase, stExcept_getId(except) == ST_KV_DATABASE_EXCEPTION_ID);
                }stTryEnd;
    }
    stSortedSet_destructIterator(it);
    CuAssertIntEquals(testCase, 0, stKVDatabase_getNumberOfRecords(database));

    stSortedSet_destruct(set);
}