示例#1
0
/*
 * Uses the functions above to build an adjacency list, then by DFS attempts to create
 * a valid topological sort, returning non-zero if the graph contains a cycle.
 */
static int64_t containsACycle(stList *pairs, int64_t sequenceNumber) {
    //Build an adjacency list structure..
    stHash *adjacencyList = buildAdjacencyList(pairs, sequenceNumber);

    //Do a topological sort of the adjacency list
    stSortedSet *started = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL);
    stSortedSet *done = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL);
    int64_t cyclic = 0;
    for(int64_t seq=0; seq<sequenceNumber; seq++) {
        stIntTuple *seqPos = stIntTuple_construct2( seq, 0); //The following hacks avoid memory cleanup..
        stSortedSet *column = stHash_search(adjacencyList, seqPos);
        assert(column != NULL);
        stIntTuple *seqPos2 = stSortedSet_search(column, seqPos);
        assert(seqPos2 != NULL);
        cyclic = cyclic || dfs(adjacencyList, seqPos2, started, done);
        stIntTuple_destruct(seqPos);
    }

    //cleanup
    stHashIterator *it = stHash_getIterator(adjacencyList);
    stIntTuple *seqPos;
    stSortedSet *columns = stSortedSet_construct2((void (*)(void *))stSortedSet_destruct);
    while((seqPos = stHash_getNext(it)) != NULL) {
        stSortedSet *column = stHash_search(adjacencyList, seqPos);
        assert(column != NULL);
        stSortedSet_insert(columns, column);
    }
    stHash_destructIterator(it);
    stHash_destruct(adjacencyList);
    stSortedSet_destruct(columns);
    stSortedSet_destruct(started);
    stSortedSet_destruct(done);

    return cyclic;
}
示例#2
0
static Flower *flower_construct3(Name name, CactusDisk *cactusDisk) {
    Flower *flower;
    flower = st_malloc(sizeof(Flower));

    flower->name = name;

    flower->sequences = stSortedSet_construct3(flower_constructSequencesP, NULL);
    flower->caps = stSortedSet_construct3(flower_constructCapsP, NULL);
    flower->ends = stSortedSet_construct3(flower_constructEndsP, NULL);
    flower->segments = stSortedSet_construct3(flower_constructSegmentsP, NULL);
    flower->blocks = stSortedSet_construct3(flower_constructBlocksP, NULL);
    flower->groups = stSortedSet_construct3(flower_constructGroupsP, NULL);
    flower->chains = stSortedSet_construct3(flower_constructChainsP, NULL);
    flower->faces = stSortedSet_construct3(flower_constructFacesP, NULL);

    flower->parentFlowerName = NULL_NAME;
    flower->cactusDisk = cactusDisk;
    flower->faceIndex = 0;
    flower->chainIndex = 0;

    flower->builtBlocks = 0;
    flower->builtFaces = 0;
    flower->builtTrees = 0;

    cactusDisk_addFlower(flower->cactusDisk, flower);

    flower->eventTree = NULL;

    return flower;
}
static void checkIsValidReference(CuTest *testCase, stList *reference,
        double totalScore) {
    stList *chosenEdges = convertReferenceToAdjacencyEdges(reference);
    //Check that everyone has a partner.
    CuAssertIntEquals(testCase, nodeNumber, stList_length(chosenEdges) * 2);
    stSortedSet *nodes = stSortedSet_construct3((int(*)(const void *, const void *)) stIntTuple_cmpFn,
            (void(*)(void *)) stIntTuple_destruct);
    for (int64_t i = 0; i < nodeNumber; i++) {
        stSortedSet_insert(nodes, stIntTuple_construct1( i));
    }
    checkEdges(chosenEdges, nodes, 1, 0);
    //Check that the score is correct
    double totalScore2 = calculateZScoreOfReference(reference, nodeNumber, zMatrix);
    CuAssertDblEquals(testCase, totalScore2, totalScore, 0.000001);
    //Check that the stubs are properly connected.
    stList *allEdges = stList_copy(chosenEdges, NULL);
    stList_appendAll(allEdges, stubs);
    stList_appendAll(allEdges, chains);
    stList *components = getComponents(allEdges);
    CuAssertIntEquals(testCase, stList_length(stubs), stList_length(reference));
    CuAssertIntEquals(testCase, stList_length(stubs), stList_length(components));
    //Cleanup
    stList_destruct(components);
    stSortedSet_destruct(nodes);
    stList_destruct(allEdges);
    stList_destruct(chosenEdges);
}
示例#4
0
Block *block_construct2(Name name, int64_t length,
		End *leftEnd, End *rightEnd,
		Flower *flower) {
	Block *block;
	block = st_malloc(sizeof(Block));
	block->rBlock = st_malloc(sizeof(Block));
	block->rBlock->rBlock = block;
	block->blockContents = st_malloc(sizeof(BlockContents));
	block->rBlock->blockContents = block->blockContents;

	block->orientation = 1;
	block->rBlock->orientation = 0;

	block->blockContents->name = name;
	block->blockContents->segments = stSortedSet_construct3(blockConstruct_constructP, NULL);
	block->blockContents->length = length;
	block->blockContents->flower = flower;

	block->leftEnd = leftEnd;
	end_setBlock(leftEnd, block);
	block->rBlock->leftEnd = end_getReverse(rightEnd);
	end_setBlock(rightEnd, block);

	flower_addBlock(flower, block);

	return block;
}
示例#5
0
EventTree *eventTree_construct(CactusDisk *cactusDisk, Name rootEventName) {
	EventTree *eventTree;
	eventTree = st_malloc(sizeof(EventTree));
        eventTree->cactusDisk = cactusDisk;
        cactusDisk_setEventTree(cactusDisk, eventTree);
	eventTree->events = stSortedSet_construct3(eventTree_constructP, NULL);
	eventTree->rootEvent = event_construct(rootEventName, "ROOT", INT64_MAX, NULL, eventTree); //do this last as reciprocal call made to add the event to the events.
	return eventTree;
}
示例#6
0
stSortedSet *stSortedSet_copyConstruct(stSortedSet *sortedSet, void (*destructElementFn)(void *)) {
    stSortedSet *sortedSet2 = stSortedSet_construct3(stSortedSet_getComparator(sortedSet)->compareFn, destructElementFn);
    stSortedSetIterator *it = stSortedSet_getIterator(sortedSet);
    void *o;
    while((o = stSortedSet_getNext(it)) != NULL) {
        stSortedSet_insert(sortedSet2, o);
    }
    stSortedSet_destructIterator(it);
    return sortedSet2;
}
示例#7
0
stPosetAlignment *stPosetAlignment_construct(int64_t sequenceNumber) {
    stPosetAlignment *posetAlignment = st_malloc(sizeof(stPosetAlignment));
    posetAlignment->sequenceNumber = sequenceNumber;
    posetAlignment->constraintLists = st_malloc(sizeof(stSortedSet *) * sequenceNumber * sequenceNumber);
    for(int64_t i=0; i<posetAlignment->sequenceNumber; i++) {
        for(int64_t j=0; j<posetAlignment->sequenceNumber; j++) {
            if(i != j) {
                posetAlignment->constraintLists[i*posetAlignment->sequenceNumber + j] =
                    stSortedSet_construct3((int (*)(const void *, const void *))comparePositions,
                            (void (*)(void *))stIntTuple_destruct);
            }
        }
    }
    return posetAlignment;
}
示例#8
0
void testCactusDisk_getUniqueID_Unique(CuTest* testCase) {
    cactusDiskTestSetup();
    stSortedSet *uniqueNames = stSortedSet_construct3(testCactusDisk_getUniqueID_UniqueP, free);
    for (int64_t i = 0; i < 100000; i++) { //Gets a billion ids, checks we are good.
        Name uniqueName = cactusDisk_getUniqueID(cactusDisk);
        CuAssertTrue(testCase, uniqueName > 0);
        CuAssertTrue(testCase, uniqueName < INT64_MAX);
        CuAssertTrue(testCase, uniqueName != NULL_NAME);
        char *cA = cactusMisc_nameToString(uniqueName);
        CuAssertTrue(testCase, stSortedSet_search(uniqueNames, cA) == NULL);
        CuAssertTrue(testCase, cactusMisc_stringToName(cA) == uniqueName);
        stSortedSet_insert(uniqueNames, cA);
    }
    stSortedSet_destruct(uniqueNames);
    cactusDiskTestTeardown();
}
static void getMAFBlock2(Block *block, FILE *fileHandle) {
    if (block_getLength(block) >= minimumBlockLength) {

        //Calculate bases in the reference and other reference sequence
        Block_InstanceIterator *instanceIt = block_getInstanceIterator(block);
        bool includesReference = 0;
        bool includesOtherReference = 0;
        Segment *segment;
        while ((segment = block_getNext(instanceIt)) != NULL) {
            const char *segmentEvent = event_getHeader(
                    segment_getEvent(segment));
            if (strcmp(segmentEvent, referenceEventString) == 0) {
                includesReference = 1;
            } else if (strcmp(segmentEvent, otherReferenceEventString) == 0) {
                includesOtherReference = 1;
            }
        }
        block_destructInstanceIterator(instanceIt);
        if (ignoreOtherReferenceBlocks && includesOtherReference) {
            return;
        }

        stSortedSet *otherSampleEvents = stSortedSet_construct3(
                (int(*)(const void *, const void *)) strcmp, NULL);
        instanceIt = block_getInstanceIterator(block);
        int32_t sampleNumber = 0;
        while ((segment = block_getNext(instanceIt)) != NULL) {
            const char *segmentEvent = event_getHeader(
                    segment_getEvent(segment));
            if (strcmp(segmentEvent, sampleEventString) == 0) {
                sampleNumber++;
            } else if (strcmp(segmentEvent, referenceEventString) != 0) {
                stSortedSet_insert(otherSampleEvents, (void *) segmentEvent);
            }
        }
        block_destructInstanceIterator(instanceIt);
        baseCoverages[stSortedSet_size(otherSampleEvents)] += block_getLength(
                block) * sampleNumber;
        stSortedSet_destruct(otherSampleEvents);

        referenceBases += includesReference ? block_getLength(block)
                * sampleNumber : 0;
        otherReferenceBases += includesOtherReference ? block_getLength(block)
                * sampleNumber : 0;
    }
}
static stSortedSet *getEventStrings(End *end, stList *eventStrings) {
    stSortedSet *eventStringsSet = stSortedSet_construct3(
            (int(*)(const void *, const void *)) strcmp, NULL);
    End_InstanceIterator *instanceIt = end_getInstanceIterator(end);
    Cap *cap;
    while ((cap = end_getNext(instanceIt)) != NULL) {
        const char *header = event_getHeader(cap_getEvent(cap));
        for(int64_t i=0; i<stList_length(eventStrings); i++) {
            if(strcmp(stList_get(eventStrings, i), header) == 0) {
                stSortedSet_insert(eventStringsSet,
                                   (void *) header);
            }
        }
    }
    end_destructInstanceIterator(instanceIt);
    return eventStringsSet;
}
示例#11
0
stSortedSet *stSortedSet_getDifference(stSortedSet *sortedSet1, stSortedSet *sortedSet2) {
    if(!stSortedSet_comparatorsEqual(sortedSet1, sortedSet2)) {
        stThrowNew(SORTED_SET_EXCEPTION_ID, "Comparators are not equal for creating the sorted set difference");
    }
    stSortedSet *sortedSet3 = stSortedSet_construct3(stSortedSet_getComparator(sortedSet1)->compareFn, NULL);

    //Add those from sortedSet1 only if they are not in sortedSet2
    stSortedSetIterator *it= stSortedSet_getIterator(sortedSet1);
    void *o;
    while((o = stSortedSet_getNext(it)) != NULL) {
        if(stSortedSet_search(sortedSet2, o) == NULL) {
            stSortedSet_insert(sortedSet3, o);
        }
    }
    stSortedSet_destructIterator(it);

    return sortedSet3;
}
static stSortedSet *getOddNodes(stList *cycle) {
    /*
     * Returns alternating nodes in a simple cycle.
     */

    //Set to return
    stSortedSet *nodes = stSortedSet_construct3(
            (int(*)(const void *, const void *)) stIntTuple_cmpFn,
            (void(*)(void *)) stIntTuple_destruct);

    stHash *nodesToEdges = getNodesToEdgesHash(cycle);
    int64_t node = stIntTuple_get(stList_get(cycle, 0), 0);
    int64_t pNode = -1;
    int64_t counter = 0;
    bool b = 1;
    assert(stList_length(cycle) % 2 == 0);
    while (counter++ < stList_length(cycle)) {
        if (b) { //Make alternating
            addNodeToSet(nodes, node);
            b = 0;
        } else {
            b = 1;
        }
        stList *edges = getItemForNode(node, nodesToEdges);
        assert(stList_length(edges) == 2);
        stIntTuple *edge = stList_get(edges, 0);
        int64_t node2 = getOtherPosition(edge, node);
        if (node2 != pNode) {
            pNode = node;
            node = node2;
            continue;
        }
        edge = stList_get(edges, 1);
        node2 = getOtherPosition(edge, node);
        assert(node2 != pNode);
        pNode = node;
        node = node2;
    }
    stHash_destruct(nodesToEdges);

    assert(stList_length(cycle) / 2 == stSortedSet_size(nodes));

    return nodes;
}
示例#13
0
void testCactusDisk_getUniqueID_UniqueIntervals(CuTest* testCase) {
    cactusDiskTestSetup();
    stSortedSet *uniqueNames = stSortedSet_construct3(testCactusDisk_getUniqueID_UniqueP, free);
    for (int64_t i = 0; i < 10; i++) { //Gets a billion ids, checks we are good.
        int64_t intervalSize = st_randomInt(0, 100000);
        Name uniqueName = cactusDisk_getUniqueIDInterval(cactusDisk, intervalSize);
        for(int64_t j=0; j<intervalSize; j++) {
            CuAssertTrue(testCase, uniqueName > 0);
            CuAssertTrue(testCase, uniqueName < INT64_MAX);
            CuAssertTrue(testCase, uniqueName != NULL_NAME);
            char *cA = cactusMisc_nameToString(uniqueName);
            CuAssertTrue(testCase, stSortedSet_search(uniqueNames, cA) == NULL);
            CuAssertTrue(testCase, cactusMisc_stringToName(cA) == uniqueName);
            stSortedSet_insert(uniqueNames, cA);
            uniqueName++;
        }
    }
    stSortedSet_destruct(uniqueNames);
    cactusDiskTestTeardown();
}
示例#14
0
/*
 * This builds an adjacency list structure for the the sequences. Every sequence-position
 * has a column in the hash with which it can be aligned with.
 */
static stHash *buildAdjacencyList(stList *pairs, int64_t sequenceNumber) {
    stHash *hash = stHash_construct3((uint64_t (*)(const void *))stIntTuple_hashKey,
                                     (int (*)(const void *, const void *))stIntTuple_equalsFn,
                                     (void (*)(void *))stIntTuple_destruct, NULL);
    for(int64_t seq=0; seq<sequenceNumber; seq++) {
        for(int64_t position=0; position<MAX_SEQUENCE_SIZE; position++) {
            stIntTuple *seqPos = stIntTuple_construct2( seq, position);
            stSortedSet *column = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL);
            stSortedSet_insert(column, seqPos);
            stHash_insert(hash, seqPos, column);
        }
    }
    stListIterator *it = stList_getIterator(pairs);
    stIntTuple *pair;
    while((pair = stList_getNext(it)) != NULL) {
        stIntTuple *seqPos1 = stIntTuple_construct2( stIntTuple_get(pair, 0), stIntTuple_get(pair, 1));
        stIntTuple *seqPos2 = stIntTuple_construct2( stIntTuple_get(pair, 2), stIntTuple_get(pair, 3));
        stSortedSet *column1 = stHash_search(hash, seqPos1);
        assert(column1 != NULL);
        stSortedSet *column2 = stHash_search(hash, seqPos2);
        assert(column2 != NULL);
        if(column1 != column2) { //Merge the columns
            stSortedSetIterator *it2 = stSortedSet_getIterator(column2);
            stIntTuple *seqPos3;
            while((seqPos3 = stSortedSet_getNext(it2)) != NULL) {
                assert(stSortedSet_search(column1, seqPos3) == NULL);
                stSortedSet_insert(column1, seqPos3);
                assert(stHash_search(hash, seqPos3) == column2);
                stHash_insert(hash, seqPos3, column1);
                assert(stHash_search(hash, seqPos3) == column1);
            }
            stSortedSet_destructIterator(it2);
            stSortedSet_destruct(column2);
        }
        //Cleanup loop.
        stIntTuple_destruct(seqPos1);
        stIntTuple_destruct(seqPos2);
    }
    stList_destructIterator(it);
    return hash;
}
示例#15
0
stSortedSet *stSortedSet_getUnion(stSortedSet *sortedSet1, stSortedSet *sortedSet2) {
    if(!stSortedSet_comparatorsEqual(sortedSet1, sortedSet2)) {
        stThrowNew(SORTED_SET_EXCEPTION_ID, "Comparators are not equal for creating the union of two sorted sets");
    }
    stSortedSet *sortedSet3 = stSortedSet_construct3(stSortedSet_getComparator(sortedSet1)->compareFn, NULL);

    //Add those from sortedSet1
    stSortedSetIterator *it= stSortedSet_getIterator(sortedSet1);
    void *o;
    while((o = stSortedSet_getNext(it)) != NULL) {
        stSortedSet_insert(sortedSet3, o);
    }
    stSortedSet_destructIterator(it);

    //Add those from sortedSet2
    it= stSortedSet_getIterator(sortedSet2);
    while((o = stSortedSet_getNext(it)) != NULL) {
        stSortedSet_insert(sortedSet3, o);
    }
    stSortedSet_destructIterator(it);

    return sortedSet3;
}
示例#16
0
stSortedSet *loadEndAlignmentFromDisk(Flower *flower, FILE *fileHandle, End **end) {
    stSortedSet *endAlignment =
                stSortedSet_construct3((int (*)(const void *, const void *))alignedPair_cmpFn,
                (void (*)(void *))alignedPair_destruct);
    char *line = stFile_getLineFromFile(fileHandle);
    if(line == NULL) {
        *end = NULL;
        return NULL;
    }
    Name flowerName;
    int64_t lineNumber;
    int64_t i = sscanf(line, "%" PRIi64 " %" PRIi64 "", &flowerName, &lineNumber);
    if(i != 2 || lineNumber < 0) {
        st_errAbort("We encountered a mis-specified name in loading the first line of an end alignment from the disk: '%s'\n", line);
    }
    free(line);
    *end = flower_getEnd(flower, flowerName);
    if(*end == NULL) {
        st_errAbort("We encountered an end name that is not in the database: '%s'\n", line);
    }
    for(int64_t i=0; i<lineNumber; i++) {
        line = stFile_getLineFromFile(fileHandle);
        if(line == NULL) {
            st_errAbort("Got a null line when parsing an end alignment\n");
        }
        int64_t sI1, sI2;
        int64_t p1, st1, p2, st2, score1, score2;
        int64_t i = sscanf(line, "%" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 "", &sI1, &p1, &st1, &score1, &sI2, &p2, &st2, &score2);
        (void)i;
        if(i != 8) {
            st_errAbort("We encountered a mis-specified name in loading an end alignment from the disk: '%s'\n", line);
        }
        stSortedSet_insert(endAlignment, alignedPair_construct(sI1, p1, st1, sI2, p2, st2, score1, score2));
        free(line);
    }
    return endAlignment;
}
static void writeCliqueGraph(FILE *fileHandle, stList *edges, int64_t nodeNumber, bool negativeWeights) {
    /*
     * Writes out a representation of the adjacencies and ends as a graph readable by blossom.
     * Writes out additional edges so that every pair of nodes is connected.
     */
    int64_t edgeNumber = ((nodeNumber * nodeNumber)  - nodeNumber) / 2;
    fprintf(fileHandle, "%" PRIi64 " %" PRIi64 "\n", nodeNumber, edgeNumber);
    stSortedSet *seen = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, (void (*)(void *))stIntTuple_destruct);
    int64_t edgesWritten = 0;
    for(int64_t i=0; i<stList_length(edges); i++) {
        stIntTuple *edge = stList_get(edges, i);
        int64_t from =  stIntTuple_get(edge, 0);
        int64_t to = stIntTuple_get(edge, 1);
        assert(from < nodeNumber);
        assert(to < nodeNumber);
        assert(from >= 0);
        assert(to >= 0);
        assert(from != to);
        int64_t weight = stIntTuple_get(edge, 2);
        //If is a minimisation algorithms we invert the sign..
        fprintf(fileHandle, "%" PRIi64 " %" PRIi64 " %" PRIi64 "\n", from, to, negativeWeights ? -weight : weight);
        edgesWritten++;
        addEdgeToSet(seen, from, to);
    }
    for(int64_t i=0; i<nodeNumber; i++) {
        for(int64_t j=i+1; j<nodeNumber; j++) {
            if(!edgeInSet(seen, i, j)) {
                fprintf(fileHandle, "%" PRIi64 " %" PRIi64 " 0\n", i, j);
                edgesWritten++;
            }
        }
    }
    //Cleanup
    stSortedSet_destruct(seen);
    assert(edgeNumber == edgesWritten);
}
示例#18
0
stSortedSet *makeEndAlignment(StateMachine *sM, End *end, int64_t spanningTrees, int64_t maxSequenceLength,
        bool useProgressiveMerging, float gapGamma,
        PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters) {
    //Make an alignment of the sequences in the ends

    //Get the adjacency sequences to be aligned.
    Cap *cap;
    End_InstanceIterator *it = end_getInstanceIterator(end);
    stList *sequences = stList_construct3(0, (void (*)(void *))adjacencySequence_destruct);
    stList *seqFrags = stList_construct3(0, (void (*)(void *))seqFrag_destruct);
    stHash *endInstanceNumbers = stHash_construct2(NULL, free);
    while((cap = end_getNext(it)) != NULL) {
        if(cap_getSide(cap)) {
            cap = cap_getReverse(cap);
        }
        AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap, maxSequenceLength);
        stList_append(sequences, adjacencySequence);
        assert(cap_getAdjacency(cap) != NULL);
        End *otherEnd = end_getPositiveOrientation(cap_getEnd(cap_getAdjacency(cap)));
        stList_append(seqFrags, seqFrag_construct(adjacencySequence->string, 0, end_getName(otherEnd)));
        //Increase count of seqfrags with a given end.
        int64_t *c = stHash_search(endInstanceNumbers, otherEnd);
        if(c == NULL) {
            c = st_calloc(1, sizeof(int64_t));
            assert(*c == 0);
            stHash_insert(endInstanceNumbers, otherEnd, c);
        }
        (*c)++;
    }
    end_destructInstanceIterator(it);

    //Get the alignment.
    MultipleAlignment *mA = makeAlignment(sM, seqFrags, spanningTrees, 100000000, useProgressiveMerging, gapGamma, pairwiseAlignmentBandingParameters);

    //Build an array of weights to reweight pairs in the alignment.
    int64_t *pairwiseAlignmentsPerSequenceNonCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t));
    int64_t *pairwiseAlignmentsPerSequenceCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t));
    //First build array on number of pairwise alignments to each sequence, distinguishing alignments between sequences sharing
    //common ends.
    for(int64_t i=0; i<stList_length(mA->chosenPairwiseAlignments); i++) {
        stIntTuple *pairwiseAlignment = stList_get(mA->chosenPairwiseAlignments, i);
        int64_t seq1 = stIntTuple_get(pairwiseAlignment, 1);
        int64_t seq2 = stIntTuple_get(pairwiseAlignment, 2);
        assert(seq1 != seq2);
        SeqFrag *seqFrag1 = stList_get(seqFrags, seq1);
        SeqFrag *seqFrag2 = stList_get(seqFrags, seq2);
        int64_t *pairwiseAlignmentsPerSequence = seqFrag1->rightEndId == seqFrag2->rightEndId
                ? pairwiseAlignmentsPerSequenceCommonEnds : pairwiseAlignmentsPerSequenceNonCommonEnds;
        pairwiseAlignmentsPerSequence[seq1]++;
        pairwiseAlignmentsPerSequence[seq2]++;
    }
    //Now calculate score adjustments.
    double *scoreAdjustmentsNonCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double));
    double *scoreAdjustmentsCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double));
    for(int64_t i=0; i<stList_length(seqFrags); i++) {
        SeqFrag *seqFrag = stList_get(seqFrags, i);
        End *otherEnd = flower_getEnd(end_getFlower(end), seqFrag->rightEndId);
        assert(otherEnd != NULL);
        assert(stHash_search(endInstanceNumbers, otherEnd) != NULL);
        int64_t commonInstanceNumber = *(int64_t *)stHash_search(endInstanceNumbers, otherEnd);
        int64_t nonCommonInstanceNumber = stList_length(seqFrags) - commonInstanceNumber;

        assert(commonInstanceNumber > 0 && nonCommonInstanceNumber >= 0);
        assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] <= nonCommonInstanceNumber);
        assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] >= 0);
        assert(pairwiseAlignmentsPerSequenceCommonEnds[i] < commonInstanceNumber);
        assert(pairwiseAlignmentsPerSequenceCommonEnds[i] >= 0);

        //scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber + commonInstanceNumber - 1)/(pairwiseAlignmentsPerSequenceNonCommonEnds[i] + pairwiseAlignmentsPerSequenceCommonEnds[i]);
        //scoreAdjustmentsCommonEnds[i] = scoreAdjustmentsNonCommonEnds[i];
        if(pairwiseAlignmentsPerSequenceNonCommonEnds[i] > 0) {
            scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber)/pairwiseAlignmentsPerSequenceNonCommonEnds[i];
            assert(scoreAdjustmentsNonCommonEnds[i] >= 1.0);
            assert(scoreAdjustmentsNonCommonEnds[i] <= nonCommonInstanceNumber);
        }
        else {
            scoreAdjustmentsNonCommonEnds[i] = INT64_MIN;
        }
        if(pairwiseAlignmentsPerSequenceCommonEnds[i] > 0) {
            scoreAdjustmentsCommonEnds[i] = ((double)commonInstanceNumber-1)/pairwiseAlignmentsPerSequenceCommonEnds[i];
            assert(scoreAdjustmentsCommonEnds[i] >= 1.0);
            assert(scoreAdjustmentsCommonEnds[i] <= commonInstanceNumber-1);
        }
        else {
            scoreAdjustmentsCommonEnds[i] = INT64_MIN;
        }
    }

	//Convert the alignment pairs to an alignment of the caps..
    stSortedSet *sortedAlignment =
                stSortedSet_construct3((int (*)(const void *, const void *))alignedPair_cmpFn,
                (void (*)(void *))alignedPair_destruct);
    while(stList_length(mA->alignedPairs) > 0) {
        stIntTuple *alignedPair = stList_pop(mA->alignedPairs);
        assert(stIntTuple_length(alignedPair) == 5);
        int64_t seqIndex1 = stIntTuple_get(alignedPair, 1);
        int64_t seqIndex2 = stIntTuple_get(alignedPair, 3);
        AdjacencySequence *i = stList_get(sequences, seqIndex1);
        AdjacencySequence *j = stList_get(sequences, seqIndex2);
        assert(i != j);
        int64_t offset1 = stIntTuple_get(alignedPair, 2);
        int64_t offset2 = stIntTuple_get(alignedPair, 4);
        int64_t score = stIntTuple_get(alignedPair, 0);
        if(score <= 0) { //Happens when indel probs are included
            score = 1; //This is the minimum
        }
        assert(score > 0 && score <= PAIR_ALIGNMENT_PROB_1);
        SeqFrag *seqFrag1 = stList_get(seqFrags, seqIndex1);
        SeqFrag *seqFrag2 = stList_get(seqFrags, seqIndex2);
        assert(seqFrag1 != seqFrag2);
        double *scoreAdjustments = seqFrag1->rightEndId == seqFrag2->rightEndId ? scoreAdjustmentsCommonEnds : scoreAdjustmentsNonCommonEnds;
        assert(scoreAdjustments[seqIndex1] != INT64_MIN);
        assert(scoreAdjustments[seqIndex2] != INT64_MIN);
        AlignedPair *alignedPair2 = alignedPair_construct(
                i->subsequenceIdentifier, i->start + (i->strand ? offset1 : -offset1), i->strand,
                j->subsequenceIdentifier, j->start + (j->strand ? offset2 : -offset2), j->strand,
                score*scoreAdjustments[seqIndex1], score*scoreAdjustments[seqIndex2]); //Do the reweighting here.
        assert(stSortedSet_search(sortedAlignment, alignedPair2) == NULL);
        assert(stSortedSet_search(sortedAlignment, alignedPair2->reverse) == NULL);
        stSortedSet_insert(sortedAlignment, alignedPair2);
        stSortedSet_insert(sortedAlignment, alignedPair2->reverse);
        stIntTuple_destruct(alignedPair);
    }

    //Cleanup
    stList_destruct(seqFrags);
    stList_destruct(sequences);
    free(pairwiseAlignmentsPerSequenceNonCommonEnds);
    free(pairwiseAlignmentsPerSequenceCommonEnds);
    free(scoreAdjustmentsNonCommonEnds);
    free(scoreAdjustmentsCommonEnds);
    multipleAlignment_destruct(mA);
    stHash_destruct(endInstanceNumbers);

    return sortedAlignment;
}
示例#19
0
stSortedSet *stSortedSet_construct2(void (*destructElementFn)(void *)) {
    return stSortedSet_construct3(st_sortedSet_cmpFn, destructElementFn);
}
示例#20
0
static void readWriteAndRemoveRecordsLotsIteration(CuTest *testCase, int numRecords, bool reopenDatabase) {
    //Make a big old list of records..
    stSortedSet *set = stSortedSet_construct3((int(*)(const void *, const void *)) stIntTuple_cmpFn,
            (void(*)(void *)) stIntTuple_destruct);
    while (stSortedSet_size(set) < numRecords) {
        int32_t key = st_randomInt(0, 100 * numRecords);
        stIntTuple *tuple = stIntTuple_construct(1, key);
        if (stSortedSet_search(set, tuple) == NULL) {
            CuAssertTrue(testCase, !stKVDatabase_containsRecord(database, key));
            stSortedSet_insert(set, tuple);
            stKVDatabase_insertRecord(database, key, &key, sizeof(int32_t));
            CuAssertTrue(testCase, stKVDatabase_containsRecord(database, key));
        } else {
            CuAssertTrue(testCase, stKVDatabase_containsRecord(database, key));
            stIntTuple_destruct(tuple); // already in db
        }
    }

    readWriteAndRemoveRecordsLotsCheck(testCase, set, 1);

    //Update all records to negate values
    stSortedSetIterator *it = stSortedSet_getIterator(set);
    stIntTuple *tuple;
    while ((tuple = stSortedSet_getNext(it)) != NULL) {
        int32_t *value = (int32_t *) stKVDatabase_getRecord(database, stIntTuple_getPosition(tuple, 0));
        *value *= -1;
        stKVDatabase_updateRecord(database, stIntTuple_getPosition(tuple, 0), value, sizeof(int32_t));
        CuAssertTrue(testCase, stKVDatabase_containsRecord(database, stIntTuple_getPosition(tuple, 0)));
        free(value);
    }
    stSortedSet_destructIterator(it);

    readWriteAndRemoveRecordsLotsCheck(testCase, set, -1);

    //Try optionally committing the transaction and reloading the database..
    if (reopenDatabase) {
        //stKVDatabase_commitTransaction(database);
        stKVDatabase_destruct(database);
        database = stKVDatabase_construct(conf, false);
        //stKVDatabase_startTransaction(database);
    }

    //Now remove each one..
    it = stSortedSet_getIterator(set);
    while ((tuple = stSortedSet_getNext(it)) != NULL) {
        CuAssertTrue(testCase, stKVDatabase_containsRecord(database, stIntTuple_getPosition(tuple, 0)));
        stKVDatabase_removeRecord(database, stIntTuple_getPosition(tuple, 0));
        CuAssertTrue(testCase, !stKVDatabase_containsRecord(database, stIntTuple_getPosition(tuple, 0)));
        //Test we get exception if we remove twice.
        stTry {
                stKVDatabase_removeRecord(database, stIntTuple_getPosition(tuple, 0));
                CuAssertTrue(testCase, 0);
            }
            stCatch(except)
                {
                    CuAssertTrue(testCase, stExcept_getId(except) == ST_KV_DATABASE_EXCEPTION_ID);
                }stTryEnd;
    }
    stSortedSet_destructIterator(it);
    CuAssertIntEquals(testCase, 0, stKVDatabase_getNumberOfRecords(database));

    stSortedSet_destruct(set);
}
示例#21
0
stSortedSet *getOrderedSegments(Flower *flower) {
    stSortedSet *segments = stSortedSet_construct3(segmentCompareFn, NULL);
    getOrderedSegmentsP(flower, segments);
    return segments;
}
示例#22
0
static void sonLibSortedSetTestSetup() {
    sortedSet = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn,
                                       (void (*)(void *))stIntTuple_destruct);
    sortedSet2 = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn,
                                        (void (*)(void *))stIntTuple_destruct);
}
示例#23
0
stSortedSet *stSortedSet_construct(void) {
    return stSortedSet_construct3(st_sortedSet_cmpFn, NULL);
}
示例#24
0
static CactusDisk *cactusDisk_constructPrivate(stKVDatabaseConf *conf, bool create, const char *sequencesFileName) {
    //sequencesFileName = NULL; //Disable the ability to store the sequences on disk.
    CactusDisk *cactusDisk = st_calloc(1, sizeof(CactusDisk));

    //construct lists of in memory objects
    cactusDisk->metaSequences = stSortedSet_construct3(cactusDisk_constructMetaSequencesP, NULL);
    cactusDisk->flowers = stSortedSet_construct3(cactusDisk_constructFlowersP, NULL);
    cactusDisk->flowerNamesMarkedForDeletion = stSortedSet_construct3((int (*)(const void *, const void *)) strcmp,
            free);
    cactusDisk->updateRequests = stList_construct3(0, (void (*)(void *)) stKVDatabaseBulkRequest_destruct);

    //Now open the database
    cactusDisk->database = stKVDatabase_construct(conf, create);
    cactusDisk->cache = stCache_construct();
    cactusDisk->stringCache = stCache_construct();

    //initialise the unique ids.
    int64_t seed = (clock() << 24) | (time(NULL) << 16) | (getpid() & 65535); //Likely to be unique
    st_logDebug("The cactus disk is seeding the random number generator with the value %" PRIi64 "\n", seed);
    st_randomSeed(seed);
    cactusDisk->uniqueNumber = 0;
    cactusDisk->maxUniqueNumber = 0;

    //Now load any stuff..
    if (containsRecord(cactusDisk, CACTUS_DISK_PARAMETER_KEY)) {
        if (create) {
            stThrowNew(CACTUS_DISK_EXCEPTION_ID, "Tried to create a cactus disk, but the cactus disk already exists");
        }
        if (sequencesFileName != NULL) {
            stThrowNew(CACTUS_DISK_EXCEPTION_ID,
                    "A sequences file name is specified, but the cactus disk is not being created");
        }
        void *record = getRecord(cactusDisk, CACTUS_DISK_PARAMETER_KEY, "cactus_disk parameters");
        void *record2 = record;
        cactusDisk_loadFromBinaryRepresentation(&record, cactusDisk, conf);
        free(record2);
    } else {
        assert(create);
        if (sequencesFileName == NULL) {
            cactusDisk->storeSequencesInAFile = 0;
            cactusDisk->sequencesFileName = NULL;
            cactusDisk->sequencesReadFileHandle = NULL;
            cactusDisk->sequencesWriteFileHandle = NULL;
            cactusDisk->absSequencesFileName = NULL;
        } else {
            if (stKVDatabaseConf_getDir(conf) == NULL) {
                stThrowNew(CACTUS_DISK_EXCEPTION_ID,
                        "The database conf does not contain a directory in which the sequence file is to be found!\n");
            }
            cactusDisk->storeSequencesInAFile = 1;
            cactusDisk->sequencesFileName = stString_copy(sequencesFileName);
            cactusDisk->absSequencesFileName = stString_print("%s/%s", stKVDatabaseConf_getDir(conf),
                    cactusDisk->sequencesFileName);
            //Make sure the file exists
            cactusDisk->sequencesReadFileHandle = fopen(cactusDisk->absSequencesFileName, "w");
            assert(cactusDisk->sequencesReadFileHandle != NULL);
            fclose(cactusDisk->sequencesReadFileHandle); //Flush it first time.
            cactusDisk->sequencesReadFileHandle = NULL;
            cactusDisk->sequencesWriteFileHandle = NULL;
        }
    }

    return cactusDisk;
}