static void test_addBlockToHash_3(CuTest *testCase) {
    // concatenation with 2 bases of interstitial and a sequence length breakpoint
    options_t *options = options_construct();
    options->breakpointPenalty = 10;
    options->interstitialSequence = 5;
    stList *observedList = stList_construct3(0, free);
    stList *expectedList = stList_construct3(0, free);
    stHash *observedHash = createBlockHashFromString("a score=0\n"
                                                     "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n"
                                                     "s name.chr1      0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name2.chr1     0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name3.chr1     0 13 +       100 GCAGCTGAAAACA\n",
                                                     observedList
                                                     );
    mafBlock_t *mb = maf_newMafBlockListFromString("a score=0 test\n"
                                                   "s reference.chr0 13  5 + 158545518 ACGTA\n"
                                                   "s name.chr1      12  5 +       100 gtcGG\n"
                                                   "s name2.chr1     10  5 +       100 ATGTg\n"
                                                   "s name3.chr1     50  5 +       100 CCCCC\n"
                                                   , 3);
    stHash *expectedHash = NULL;
    expectedHash = createBlockHashFromString("a score=0\n"
                                             "s reference.chr0 0 18 + 158545518 gcagctgaaaaca------------ACGTA\n"
                                             "s name.chr1      0 17 +       100 ATGT---ATGCCGac----------gtcGG\n"
                                             "s name2.chr1     0 15 +       100 ATGT---ATGCCG------------ATGTg\n"
                                             "s name3          0 28 +        28 GCAGCTGAAAACA--NNNNNNNNNNCCCCC\n",
                                             expectedList
                                             );
    row_t *r = stHash_search(expectedHash, "name3");
    r->prevRightPos = 54;
    free(r->prevName);
    r->prevName = stString_copy("name3.chr1");
    r->multipleNames = true;
    stHash *seqHash = createSeqHashFromString("name.chr1", "ATGTATGCCGacgtc"
                                              "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"
                                              "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG");
    mtfseq_t *mtfs = newMtfseqFromString("gcagctgaaaacaACGTA"
                                         "tttttttttttttttttttttttttttttttt"
                                         "tttttttttttttttttttttttttttttttttttttttttttttttttt");
    stHash_insert(seqHash, stString_copy("reference.chr0"), mtfs);
    mtfs = newMtfseqFromString("ATGTATGCCGATGTg"
                               "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"
                               "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC");
    stHash_insert(seqHash, stString_copy("name2.chr1"), mtfs);
    mtfs = newMtfseqFromString("GCAGCTGAAAACAggggggggggggggggggggggggggggggggggggg"
                               "CCCCCaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                               );
    stHash_insert(seqHash, stString_copy("name3.chr1"), mtfs);
    addMafBlockToRowHash(observedHash, seqHash, observedList, mb, options);
    CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash));
    CuAssertTrue(testCase, listsAreEqual(observedList, expectedList));
    // clean up
    stHash_destruct(observedHash);
    stHash_destruct(expectedHash);
    stHash_destruct(seqHash);
    stList_destruct(observedList);
    stList_destruct(expectedList);
    maf_destroyMafBlockList(mb);
    destroyOptions(options);
}
static void setup() {
    teardown();
    assert(nodeNumber == -1);
    while(nodeNumber % 2 != 0) {
        nodeNumber = st_randomInt(0, 100);
    }
    assert(nodeNumber >= 0);
    assert(nodeNumber % 2 == 0);
    stubs = stList_construct3(0, (void (*)(void *))stIntTuple_destruct);
    chains = stList_construct3(0, (void (*)(void *))stIntTuple_destruct);
    for(int64_t i=0; i<nodeNumber/2; i++) {
        assert(nodeNumber/2 > 0);
        stIntTuple *edge = stIntTuple_construct2(i, nodeNumber/2 + i);
        if(stList_length(stubs) == 0 || st_random() > 0.9) {
            stList_append(stubs, edge);
        }
        else {
            stList_append(chains, edge);
        }
    }
    zMatrix = st_calloc(nodeNumber*nodeNumber, sizeof(float));
    for(int64_t i=0; i<nodeNumber; i++) {
        for(int64_t j=i+1; j<nodeNumber; j++) {
            double score = st_random();
            zMatrix[i * nodeNumber + j] = score;
            zMatrix[j * nodeNumber + i] = score;
        }
    }
    st_logDebug("To test the adjacency problem we've created a problem with %" PRIi64 " nodes %" PRIi64 " stubs and %" PRIi64 " chains\n", nodeNumber, stList_length(stubs), stList_length(chains));
}
Esempio n. 3
0
static void test_stPosetAlignment_addAndIsPossible(CuTest *testCase) {
    for(int64_t trial=0; trial<100; trial++) {
        setup();

        //Make random number of sequences.
        stList *sequenceLengths = stList_construct3(0, (void (*)(void *))stIntTuple_destruct);
        for(int64_t i=0; i<sequenceNumber; i++) {
            stList_append(sequenceLengths, stIntTuple_construct1( st_randomInt(0, MAX_SEQUENCE_SIZE)));
        }

        //Propose random alignment pairs...
        stList *pairs = stList_construct3(0, (void(*)(void *))stIntTuple_destruct);
        int64_t maxAlignedPairs = st_randomInt(0, MAX_ALIGNMENTS);
        if(sequenceNumber > 0) {
            for(int64_t i=0; i<maxAlignedPairs; i++) {
                int64_t seq1 = st_randomInt(0, sequenceNumber);
                int64_t seqLength1 = stIntTuple_get(stList_get(sequenceLengths, seq1), 0);
                if(seqLength1 == 0) {
                    continue;
                }
                int64_t position1 = st_randomInt(0, seqLength1);
                int64_t seq2 = st_randomInt(0, sequenceNumber);
                int64_t seqLength2 = stIntTuple_get(stList_get(sequenceLengths, seq1), 0);
                if(seqLength2 == 0) {
                    continue;
                }
                int64_t position2 = st_randomInt(0, seqLength2);
                if(seq1 != seq2) {
                    stList_append(pairs, stIntTuple_construct4( seq1, position1, seq2, position2));
                    if(stPosetAlignment_isPossible(posetAlignment, seq1, position1, seq2, position2)) {
                        st_logInfo("In %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " \n", seq1, position1, seq2, position2);
                        //For each accepted pair check it doesn't create a cycle.
                        CuAssertTrue(testCase, !containsACycle(pairs, sequenceNumber));
                        CuAssertTrue(testCase, stPosetAlignment_add(posetAlignment, seq1, position1, seq2, position2));
                    }
                    else {
                        st_logInfo("Out %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " \n", seq1, position1, seq2, position2);
                        //For each rejected pair check it creates a cycle..
                        CuAssertTrue(testCase, containsACycle(pairs, sequenceNumber));
                        CuAssertTrue(testCase, !stPosetAlignment_isPossible(posetAlignment, seq1, position1, seq2, position2));
                        stIntTuple_destruct(stList_pop(pairs)); //remove the pair which created the cycle.
                        CuAssertTrue(testCase, !containsACycle(pairs, sequenceNumber)); //Check we're back to being okay..
                    }
                }
            }
        }

        //Cleanup
        stList_destruct(sequenceLengths);
        stList_destruct(pairs);
        teardown();
        st_logInfo("Passed a random ordering test with %" PRIi64 " sequences and %" PRIi64 " aligned pairs\n", sequenceNumber, maxAlignedPairs);
    }
}
static void test_addBlockToHash_2(CuTest *testCase) {
    // concatenation with 2 bases of interstitial AND a previously unobserved sequence
    options_t *options = options_construct();
    options->breakpointPenalty = 10;
    options->interstitialSequence = 5;
    stList *observedList = stList_construct3(0, free);
    stList *expectedList = stList_construct3(0, free);
    stHash *observedHash = createBlockHashFromString("a score=0\n"
                                                     "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n"
                                                     "s name.chr1      0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name2.chr1     0 10 +       100 ATGT---ATGCCG\n",
                                                     observedList);
    mafBlock_t *mb = maf_newMafBlockListFromString("a score=0 test\n"
                                                   "s reference.chr0 13  5 + 158545518 ACGTA\n"
                                                   "s name.chr1      12  5 +       100 gTcGG\n"
                                                   "s name2.chr1     10  5 +       100 ATGTg\n"
                                                   "s name3.chr@      0  5 +        20 aaccg\n"
                                                   , 3);
    stHash *expectedHash = createBlockHashFromString("a score=0\n"
                                                     "s reference.chr0 0 18 + 158545518 gcagctgaaaaca--ACGTA\n"
                                                     "s name.chr1      0 17 +       100 ATGT---ATGCCGacgTcGG\n"
                                                     "s name2.chr1     0 15 +       100 ATGT---ATGCCG--ATGTg\n"
                                                     "s name3.chr@     0  5 +        20 ---------------aaccg\n",
                                                     expectedList
                                                     );
    stHash *seqHash = createSeqHashFromString("name.chr1", "ATGTATGCCGacgTc"
                                              "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"
                                              "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG");
    mtfseq_t *mtfs = newMtfseqFromString("gcagctgaaaacaACGTA"
                                         "tttttttttttttttttttttttttttttttt"
                                         "tttttttttttttttttttttttttttttttttttttttttttttttttt");
    stHash_insert(seqHash, stString_copy("reference.chr0"), mtfs);
    mtfs = newMtfseqFromString("ATGTATGCCGATGTg"
                               "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"
                               "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC");
    stHash_insert(seqHash, stString_copy("name2.chr1"), mtfs);
    mtfs = newMtfseqFromString("aaccgTTTTTTTTTTTTTTT");
    stHash_insert(seqHash, stString_copy("name3.chr@"), mtfs);
    addMafBlockToRowHash(observedHash, seqHash, observedList, mb, options);
    CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash));
    CuAssertTrue(testCase, listsAreEqual(observedList, expectedList));
    // clean up
    stHash_destruct(observedHash);
    stHash_destruct(expectedHash);
    stHash_destruct(seqHash);
    stList_destruct(observedList);
    stList_destruct(expectedList);
    maf_destroyMafBlockList(mb);
    destroyOptions(options);
}
static void test_interstitial_0(CuTest *testCase) {
    stList *observedList = stList_construct3(0, free);
    stList *expectedList = stList_construct3(0, free);
    stHash *observedHash = createBlockHashFromString("a score=0\n"
                                                     "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n"
                                                     "s name.chr1      0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name2.chr1     0 10 +       100 ATGT---ATGCCG\n",
                                                     observedList);
    stHash *expectedHash = createBlockHashFromString("a score=0\n"
                                                     "s reference.chr0 0 13 + 158545518 gcagctgaaaaca-----\n"
                                                     "s name.chr1      0 15 +       100 ATGT---ATGCCGaaaTa\n"
                                                     "s name2.chr1     0 10 +       100 ATGT---ATGCCG-----\n",
                                                     expectedList);
    stHash *seqHash = createSeqHashFromString("name.chr1", "ATGTATGCCGaaaTaTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"
                                              "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT");
    interstitialInsert(observedHash, seqHash, "name.chr1", 10, '+', 5);
    CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash));
    CuAssertTrue(testCase, listsAreEqual(observedList, expectedList));
    // clean up
    stHash_destruct(observedHash);
    stHash_destruct(expectedHash);
    stHash_destruct(seqHash);
    stList_destruct(observedList);
    stList_destruct(expectedList);
    observedList = stList_construct3(0, free);
    expectedList = stList_construct3(0, free);
    observedHash = createBlockHashFromString("a score=0\n"
                                             "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n"
                                             "s name.chr1      0 10 -       100 ATGT---ATGCCG\n"
                                             "s name2.chr1     0 10 +       100 ATGT---ATGCCG\n",
                                             observedList);
    expectedHash = createBlockHashFromString("a score=0\n"
                                             "s reference.chr0 0 13 + 158545518 gcagctgaaaaca-----\n"
                                             "s name.chr1      0 15 -       100 ATGT---ATGCCGaaaTa\n"
                                             "s name2.chr1     0 10 +       100 ATGT---ATGCCG-----\n",
                                             expectedList);
    seqHash = createSeqHashFromString("name.chr1", "ggggggggggggTTgggggggggggggggggggggggggggggggggggg" // 50
                                      "gggaagggGgggCgggTgggAgggcgggtgggagg" // 35
                                      "tAtttCGGCATACAT");
    interstitialInsert(observedHash, seqHash, "name.chr1", 10, '-', 5);
    CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash));
    CuAssertTrue(testCase, listsAreEqual(observedList, expectedList));
    // clean up
    stHash_destruct(observedHash);
    stHash_destruct(expectedHash);
    stHash_destruct(seqHash);
    stList_destruct(observedList);
    stList_destruct(expectedList);
}
Esempio n. 6
0
static stList *getRandomPairwiseAlignments() {
    stList *pairwiseAlignments = stList_construct3(0, (void(*)(void *)) destructPairwiseAlignment);
    int64_t randomAlignmentNumber = st_randomInt(0, 10);
    for (int64_t i = 0; i < randomAlignmentNumber; i++) {
        char *contig1 = stString_print("%" PRIi64 "", i);
        char *contig2 = stString_print("%" PRIi64 "", i * 10);
        int64_t start1 = st_randomInt(100000, 1000000);
        int64_t start2 = st_randomInt(100000, 1000000);
        int64_t strand1 = st_random() > 0.5;
        int64_t strand2 = st_random() > 0.5;
        int64_t end1 = start1;
        int64_t end2 = start2;
        struct List *operationList = constructEmptyList(0, NULL);
        while (st_random() > 0.1) {
            int64_t length = st_randomInt(0, 10);
            int64_t type = st_randomInt(0, 3);
            assert(type < 3);
            listAppend(operationList, constructAlignmentOperation(type, length, 0));
            if (type != PAIRWISE_INDEL_Y) {
                end1 += strand1 ? length : -length;
            }
            if (type != PAIRWISE_INDEL_X) {
                end2 += strand2 ? length : -length;
            }
        }
        stList_append(pairwiseAlignments,
                      constructPairwiseAlignment(contig1, start1, end1, strand1, contig2, start2, end2, strand2, 0.0, operationList));
        free(contig1);
        free(contig2);
    }
    return pairwiseAlignments;
}
Esempio n. 7
0
static void testBulkSetRecords(CuTest *testCase) {
    /*
     * Tests doing a bulk update of a set of records.
     */
    setup();
    int64_t i = 100, j = 110, k = 120, l = 130;
    stKVDatabase_insertRecord(database, 1, &i, sizeof(int64_t));

    stList *requests = stList_construct3(0, (void(*)(void *)) stKVDatabaseBulkRequest_destruct);
    stList_append(requests, stKVDatabaseBulkRequest_constructInsertRequest(2, &j, sizeof(int64_t)));
    stList_append(requests, stKVDatabaseBulkRequest_constructSetRequest(3, &k, sizeof(int64_t)));
    stList_append(requests, stKVDatabaseBulkRequest_constructUpdateRequest(1, &l, sizeof(int64_t)));

    stKVDatabase_bulkSetRecords(database, requests);

    stList_destruct(requests);

    int64_t *m = stKVDatabase_getRecord(database, 1);
    CuAssertTrue(testCase, m != NULL);
    CuAssertTrue(testCase, l == *m);
    free(m);

    m = stKVDatabase_getRecord(database, 2);
    CuAssertTrue(testCase, m != NULL);
    CuAssertTrue(testCase, j == *m);
    free(m);

    m = stKVDatabase_getRecord(database, 3);
    CuAssertTrue(testCase, m != NULL);
    CuAssertTrue(testCase, k == *m);
    free(m);

    teardown();
}
Esempio n. 8
0
int main(int argc, char **argv) {
  options_t *options = options_construct();
  stHash *sequenceHash = NULL; // keyed on fasta headers, valued with mtfseq_t pointers
  stHash *alignmentHash = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, destroyRow); // keyed on species names, valued with row_t pointers
  stList *rowOrder = stList_construct3(0, free); // when adding keys to alignmentHash, append to this list
  parseOptions(argc, argv, options);
  // read fastas, populate sequenceHash
  de_verbose("Creating sequence hash.\n");
  sequenceHash = createSequenceHash(options->seqs);
  mafFileApi_t *mfapi = maf_newMfa(options->maf, "r");
  de_verbose("Creating alignment hash.\n");
  buildAlignmentHash(mfapi, alignmentHash, sequenceHash, rowOrder, options);
  if (options->outMfa != NULL) {
    // fasta output
    de_verbose("Writing fasta output.\n");
    writeFastaOut(alignmentHash, rowOrder, options);
  }
  if (options->outMaf != NULL) {
    // maf output
    de_verbose("Writing maf output.\n");
    writeMafOut(alignmentHash, rowOrder, options);
  }
  // cleanup
  maf_destroyMfa(mfapi);
  stHash_destruct(alignmentHash);
  stHash_destruct(sequenceHash);
  stList_destruct(rowOrder);
  destroyOptions(options);
  return(EXIT_SUCCESS);
}
Esempio n. 9
0
static stList *getSubstringsForFlowerSegments(stList *flowers) {
    /*
     * Get the set of substrings representing the strings in the segments of the given flowers.
     */
    stList *substrings = stList_construct3(0, (void (*)(void *)) substring_destruct);
    for (int64_t i = 0; i < stList_length(flowers); i++) {
        Flower *flower = stList_get(flowers, i);
        Flower_EndIterator *blockIt = flower_getBlockIterator(flower);
        Block *block;
        while ((block = flower_getNextBlock(blockIt)) != NULL) {
            Block_InstanceIterator *instanceIt = block_getInstanceIterator(block);
            Segment *segment;
            while ((segment = block_getNext(instanceIt)) != NULL) {
                Sequence *sequence;
                if ((sequence = segment_getSequence(segment)) != NULL) {
                    segment = segment_getStrand(segment) ? segment : segment_getReverse(segment);
                    assert(segment_getLength(segment) > 0);
                    stList_append(substrings,
                            substring_construct(sequence_getMetaSequence(sequence)->stringName,
                                    segment_getStart(segment) - sequence_getStart(sequence),
                                    segment_getLength(segment)));
                }
            }
            block_destructInstanceIterator(instanceIt);
        }
        flower_destructBlockIterator(blockIt);
    }
    return substrings;
}
Esempio n. 10
0
static stList *mergeSubstrings(stList *substrings, int64_t proximityToMerge) {
    /*
     * Merge set of substrings into fewer substrings, if they overlap by less than proximityToMerge
     */
    stList *mergedSubstrings = stList_construct3(0, (void (*)(void *)) substring_destruct);
    if (stList_length(substrings) == 0) {
        return mergedSubstrings;
    }
    stList_sort(substrings, (int (*)(const void *, const void *)) substring_cmp);
    Substring *pSubsequence = substring_clone(stList_get(substrings, 0));
    stList_append(mergedSubstrings, pSubsequence);
    for (int64_t i = 1; i < stList_length(substrings); i++) {
        Substring *substring = stList_get(substrings, i);
        if (pSubsequence->name == substring->name
                && pSubsequence->start + pSubsequence->length + proximityToMerge >= substring->start) { //Merge
            if (pSubsequence->start + pSubsequence->length < substring->start + substring->length) {
                pSubsequence->length = substring->start + substring->length - pSubsequence->start;
            }
        } else {
            pSubsequence = substring_clone(substring);
            stList_append(mergedSubstrings, pSubsequence);
        }
    }
    return mergedSubstrings;
}
static void doBestMergeOfTwoSimpleCycles(stList *cycles,
        stList *nonZeroWeightAdjacencyEdges, stSortedSet *allAdjacencyEdges) {
    /*
     * Merge two simple cycles, using the best possible adjacency switch. Modifies components list in place,
     * destroying two old components and adding a new one. If new adjacency edges are needed then they are
     * added to the adjacency edges list.
     */
    assert(stList_length(cycles) > 1);

    /*
     * Get the best adjacency switch.
     */
    AdjacencySwitch *adjacencySwitch = getBestAdjacencySwitch(cycles,
            nonZeroWeightAdjacencyEdges, allAdjacencyEdges);
    assert(adjacencySwitch != NULL);

    /*
     * Find the two components to merge.
     */
    stList *cyclesToMerge = stList_construct3(0,
            (void(*)(void *)) stList_destruct);
    for (int64_t i = 0; i < stList_length(cycles); i++) {
        stList *cycle = stList_get(cycles, i);
        if (stList_contains(cycle, adjacencySwitch->oldEdge1)) {
            assert(!stList_contains(cycle, adjacencySwitch->oldEdge2));
            stList_append(cyclesToMerge, cycle);
        } else if (stList_contains(cycle, adjacencySwitch->oldEdge2)) {
            stList_append(cyclesToMerge, cycle);
        }
    }

    /*
     * Now construct the new component and modify the list of components in place.
     */
    assert(stList_length(cyclesToMerge) == 2);
    stList *newComponent = stList_join(cyclesToMerge);
    assert(!stList_contains(newComponent, NULL));
    //Cleanup the old components
    assert(stList_contains(cycles, stList_get(cyclesToMerge, 0)));
    stList_removeItem(cycles, stList_get(cyclesToMerge, 0));
    assert(stList_contains(cycles, stList_get(cyclesToMerge, 1)));
    stList_removeItem(cycles, stList_get(cyclesToMerge, 1));
    stList_destruct(cyclesToMerge);
    //Now remove the old edges and add the new ones
    assert(stList_contains(newComponent, adjacencySwitch->oldEdge1));
    stList_removeItem(newComponent, adjacencySwitch->oldEdge1);
    assert(stList_contains(newComponent, adjacencySwitch->oldEdge2));
    stList_removeItem(newComponent, adjacencySwitch->oldEdge2);
    assert(!stList_contains(newComponent, adjacencySwitch->newEdge1));
    stList_append(newComponent, adjacencySwitch->newEdge1);
    assert(!stList_contains(newComponent, adjacencySwitch->newEdge2));
    stList_append(newComponent, adjacencySwitch->newEdge2);
    adjacencySwitch_destruct(adjacencySwitch); //Clean the adjacency switch.
    //Finally add the component to the list of components
    stList_append(cycles, newComponent);

}
Esempio n. 12
0
stTree *stTree_construct(void) {
    stTree *tree = st_malloc(sizeof(stTree));
    tree->branchLength = INFINITY;
    tree->nodes = stList_construct3(0, (void (*)(void *))stTree_destruct);
    tree->label = NULL;
    tree->parent = NULL;
    tree->clientData = NULL;
    return tree;
}
Esempio n. 13
0
/*
 * Constructs a face from a given Cap
 */
static void buildFaces_constructFromCap(Cap * startingCap,
        stHash *liftedEdgesTable, Flower * flower) {
    Face *face = face_construct(flower);
    stList *topNodes = stList_construct3(16, NULL);
    stList *liftedEdges;
    Cap *cap, *bottomNode, *ancestor;
    int64_t index, index2;

    printf("Constructing new face");

    // Establishlist of top nodes
    buildFaces_fillTopNodeList(startingCap, topNodes, liftedEdgesTable);

#ifndef NDEBUG
    // What, no top nodes!?
    if (stList_length(topNodes) == 0)
        abort();
#endif

    // Initialize data structure
    face_allocateSpace(face, stList_length(topNodes));

    // For every top node
    for (index = 0; index < stList_length(topNodes); index++) {
        cap = stList_get(topNodes, index);
        face_setTopNode(face, index, cap);
        liftedEdges = stHash_search(liftedEdgesTable, cap);

        if (!liftedEdges) {
            face_setBottomNodeNumber(face, index, 0);
            continue;
        }

        face_setBottomNodeNumber(face, index, stList_length(liftedEdges));
        // For every bottom node of that top node
        for (index2 = 0; index2 < stList_length(liftedEdges); index2++) {
            bottomNode
                    = ((LiftedEdge *) stList_get(liftedEdges, index2))->bottomNode;
            face_addBottomNode(face, index, bottomNode);
            ancestor = cap_getTopCap(cap_getPositiveOrientation(
                    cap_getAdjacency(bottomNode)));
            if (cap_getAdjacency(cap) != ancestor)
                face_setDerivedDestination(face, index, index2, ancestor);
            else
                face_setDerivedDestination(face, index, index2, NULL);

#ifndef NDEBUG
            // If bottom nodes part of top nodes
            assert(!stList_contains(topNodes, cap_getPositiveOrientation(
                    ((LiftedEdge*) stList_get(liftedEdges, index2))->bottomNode)));
#endif
        }
    }

    // Clean up
    stList_destruct(topNodes);
}
stList *splitMultipleStubCycles(stList *chosenEdges,
        stList *nonZeroWeightAdjacencyEdges, stSortedSet *allAdjacencyEdges,
        stList *stubEdges, stList *chainEdges) {
    /*
     *  Returns an updated list of adjacency edges, such that each stub edge is a member of exactly one cycle.
     */

    /*
     * Calculate components.
     */
    stList *cycles = getComponents2(chosenEdges, stubEdges, chainEdges);

    /*
     * Find components with multiple stub edges.
     */
    stList *singleStubEdgeCycles = stList_construct3(0,
            (void(*)(void *)) stList_destruct);
    for (int64_t i = 0; i < stList_length(cycles); i++) {
        stList *subCycle = stList_get(cycles, i);
        stList *subAdjacencyEdges;
        stList *subStubEdges;
        stList *subChainEdges;
        splitIntoAdjacenciesStubsAndChains(subCycle,
                nonZeroWeightAdjacencyEdges, stubEdges, chainEdges,
                &subAdjacencyEdges, &subStubEdges, &subChainEdges);
        stList *splitCycles = splitMultipleStubCycle(subCycle,
                subAdjacencyEdges, allAdjacencyEdges, subStubEdges,
                subChainEdges);
        stList_appendAll(singleStubEdgeCycles, splitCycles);
        stList_setDestructor(splitCycles, NULL); //Do this to avoid destroying the underlying lists
        stList_destruct(splitCycles);
        stList_destruct(subAdjacencyEdges);
        stList_destruct(subStubEdges);
        stList_destruct(subChainEdges);
    }
    stList_destruct(cycles);

    /*
     * Remove the stub/chain edges from the components.
     */
    stSortedSet *stubAndChainEdgesSet = getSetOfMergedLists(stubEdges,
            chainEdges);
    stList *adjacencyOnlyComponents = filterListsToExclude(
            singleStubEdgeCycles, stubAndChainEdgesSet);
    stList_destruct(singleStubEdgeCycles);
    stSortedSet_destruct(stubAndChainEdgesSet);

    /*
     * Merge the adjacency edges in the components into a single list.
     */
    stList *updatedChosenEdges = stList_join(adjacencyOnlyComponents);
    stList_destruct(adjacencyOnlyComponents);

    return updatedChosenEdges;
}
Esempio n. 15
0
/*
 * Fill in a hashtable which to every node associates
 * alist of lifted edges
 */
static stHash *buildFaces_computeLiftedEdges(Flower * flower) {
    stHash *liftedEdgesTable = stHash_construct3(buildFaces_hashfunction,
            buildFaces_key_eq_fn, NULL, buildFaces_destructValue);
    Flower_CapIterator *iter = flower_getCapIterator(flower);
    Cap *cap, *attachedAncestor;
    Cap *adjacency, *adjacencyAncestor;
    stList *liftedEdges;
    LiftedEdge *liftedEdge;

    // Iterate through potential bottom nodes
    while ((cap = flower_getNextCap(iter))) {
        // ... check if connected
        if ((adjacency = cap_getAdjacency(cap))) {
            // ... lift
            attachedAncestor = cap_getTopCap(cap);
            adjacencyAncestor = cap_getTopCap(cap_getPositiveOrientation(
                    adjacency));

#ifndef NDEBUG
            assert((attachedAncestor && adjacencyAncestor) || (!attachedAncestor && !adjacencyAncestor));
#endif

            // If root node
            if (attachedAncestor == NULL)
                continue;

            // ... create lifted edge
            liftedEdge = st_malloc(sizeof(LiftedEdge));
            liftedEdge->destination = adjacencyAncestor;
            liftedEdge->bottomNode = cap;

#ifndef NDEBUG
            // Self loop
            if (adjacencyAncestor == attachedAncestor)
                abort();
#endif

            // ... add it to the hashtable
            if ((liftedEdges
                    = stHash_search(liftedEdgesTable, attachedAncestor))) {
                stList_append(liftedEdges, liftedEdge);
            } else {
                liftedEdges = stList_construct3(2,
                        buildFaces_stList_destructElem);
                stList_append(liftedEdges, liftedEdge);
                stHash_insert(liftedEdgesTable, attachedAncestor, liftedEdges);
            }
        }
    }

    flower_destructCapIterator(iter);
    return liftedEdgesTable;
}
static stList *filterListsToExclude(stList *listOfLists, stSortedSet *set) {
    /*
     * Takes a list of lists and returns a new list of lists whose elements are the product of applying
     * filterToExclude to each member of listOfLists in the same order.
     */
    stList *listOfLists2 = stList_construct3(0,
            (void(*)(void *)) stList_destruct);
    for (int64_t i = 0; i < stList_length(listOfLists); i++) {
        stList_append(listOfLists2,
                stList_filterToExclude(stList_get(listOfLists, i), set));
    }
    return listOfLists2;
}
static void test_penalize_0(CuTest *testCase) {
    stList *observedList = stList_construct3(0, free);
    stList *expectedList = stList_construct3(0, free);
    stHash *observedHash = createBlockHashFromString("a score=0\n"
                                                     "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n"
                                                     "s name.chr1      0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name2.chr1     0 10 +       100 ATGT---ATGCCG\n",
                                                     observedList);
    stHash *expectedHash = createBlockHashFromString("a score=0\n"
                                                     "s reference.chr0 0 13 + 158545518 gcagctgaaaaca-----\n"
                                                     "s name.chr1      0 15 +       100 ATGT---ATGCCGNNNNN\n"
                                                     "s name2.chr1     0 10 +       100 ATGT---ATGCCG-----\n",
                                                     expectedList);
    penalize(observedHash, "name.chr1", 5);
    CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash));
    CuAssertTrue(testCase, listsAreEqual(observedList, expectedList));
    // clean up
    stHash_destruct(observedHash);
    stHash_destruct(expectedHash);
    stList_destruct(observedList);
    stList_destruct(expectedList);
}
int main(int argc, char *argv[]) {
    //////////////////////////////////////////////
    //Parse the inputs
    //////////////////////////////////////////////

    parseBasicArguments(argc, argv, "linkageStats");

    ///////////////////////////////////////////////////////////////////////////
    // Get the intervals
    ///////////////////////////////////////////////////////////////////////////

    stList *haplotypeEventStrings = getEventStrings(
            treatHaplotype1AsContamination ? NULL : hap1EventString,
            treatHaplotype2AsContamination ? NULL : hap2EventString);
    stList *assemblyEventStringInList = stList_construct();
    stList_append(assemblyEventStringInList, assemblyEventString);

    stList *intervals = stList_construct3(0, (void (*)(void *))sequenceInterval_destruct);
    for(int64_t i=0; i<stList_length(haplotypeEventStrings); i++) {
        const char *hapEventString = stList_get(haplotypeEventStrings, i);
        st_logInfo("Getting contig paths for haplotype: %s", hapEventString);
        stList *contigPaths = getContigPaths(flower, hapEventString, assemblyEventStringInList);
        stList *hapIntervals = getSplitContigPathIntervals(flower, contigPaths, hapEventString,
                assemblyEventStringInList);
        stList_destruct(contigPaths);
        st_logInfo("Getting contig paths\n");
        stList_appendAll(intervals, hapIntervals);
        stList_setDestructor(hapIntervals, NULL);
        stList_destruct(hapIntervals);
    }

    st_logDebug("Got a total of %" PRIi64 " intervals\n", stList_length(intervals));

    ///////////////////////////////////////////////////////////////////////////
    // Write it out.
    ///////////////////////////////////////////////////////////////////////////

    FILE *fileHandle = fopen(outputFile, "w");
    for (int64_t i = 0; i < stList_length(intervals); i++) {
        SequenceInterval *sequenceInterval = stList_get(intervals, i);
        st_logDebug("We have a path interval %s %" PRIi64 " %" PRIi64 "\n", sequenceInterval->sequenceName,
                sequenceInterval->start, sequenceInterval->end);
        fprintf(fileHandle, "%s %" PRIi64 " %" PRIi64 "\n", sequenceInterval->sequenceName,
                sequenceInterval->start, sequenceInterval->end);
    }

    st_logInfo("Finished writing out the stats.\n");
    fclose(fileHandle);

    return 0;
}
Esempio n. 19
0
static void setup() {
    teardown();

    //Make nodes
    nodes = stList_construct3(0, (void(*)(void *)) stIntTuple_destruct);
    int64_t nodeNumber = st_randomInt(0, 1000);
    for (int64_t i = 0; i < nodeNumber; i++) {
        stList_append(nodes, stIntTuple_construct1( i));
    }

    //Make edges
    edges = stList_construct3(0, (void(*)(void *)) stIntTuple_destruct);
    float edgeProb = st_random();
    for (int64_t i = 0; i < nodeNumber; i++) {
        for (int64_t j = i; j < nodeNumber; j++) {
            if (st_random() <= edgeProb) {
                stList_append(edges, stIntTuple_construct3( st_randomInt(1, 100), i, j));
            }
        }
    }

    //Max component size
    maxComponentSize = 1 + log(nodeNumber) * 10; //(st_randomInt(0, nodeNumber+1);
}
Esempio n. 20
0
stList *lineTokensFromFile(const char *filePath, int64_t getLine) {
    FILE *fH = fopen(filePath, "r");
    int64_t lineCount = 1;
    stList *tokens;
    char *string = stFile_getLineFromFile(fH);
    while (string != NULL) {
        string = stFile_getLineFromFile(fH);
        if (lineCount == getLine) {
            tokens = stString_split(string);
            return tokens;
        } else {
            lineCount++;
            free(string);
        }
    }
    fclose(fH);
    return stList_construct3(0, &free);
}
Esempio n. 21
0
stList *getContigPaths(Flower *flower, const char *eventString, stList *eventStrings) {
    stList *maximalHaplotypePaths = stList_construct3(0,
            (void(*)(void *)) stList_destruct);
    stSortedSet *segmentSet = stSortedSet_construct();
    getMaximalHaplotypePathsP(flower, maximalHaplotypePaths, segmentSet, eventString, eventStrings);

    //Do some debug checks..
    st_logDebug("We have %" PRIi64 " maximal haplotype paths\n", stList_length(
            maximalHaplotypePaths));
    getMaximalHaplotypePathsCheck(flower, segmentSet, eventString, eventStrings);
    for (int64_t i = 0; i < stList_length(maximalHaplotypePaths); i++) {
        stList *maximalHaplotypePath = stList_get(maximalHaplotypePaths, i);
        st_logDebug("We have a maximal haplotype path with length %" PRIi64 "\n",
                stList_length(maximalHaplotypePath));
        assert(stList_length(maximalHaplotypePath) > 0);
        Segment *_5Segment = stList_get(maximalHaplotypePath, 0);
        Segment *_3Segment = stList_get(maximalHaplotypePath, stList_length(
                maximalHaplotypePath) - 1);
        if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) {
            assert(!trueAdjacency(segment_get5Cap(_5Segment), eventStrings));
        }
        if (getAdjacentCapsSegment(segment_get3Cap(_3Segment)) != NULL) {
            assert(!trueAdjacency(segment_get3Cap(_3Segment), eventStrings));
        }
        for (int64_t j = 0; j < stList_length(maximalHaplotypePath) - 1; j++) {
            _5Segment = stList_get(maximalHaplotypePath, j);
            _3Segment = stList_get(maximalHaplotypePath, j + 1);
            assert(trueAdjacency(segment_get3Cap(_5Segment), eventStrings));
            assert(trueAdjacency(segment_get5Cap(_3Segment), eventStrings));
            assert(cap_getAdjacency(getTerminalCap(segment_get3Cap(_5Segment)))
                    == getTerminalCap(segment_get5Cap(_3Segment)));
            assert(strcmp(event_getHeader(segment_getEvent(_5Segment)),
                   eventString) == 0);
            assert(strcmp(event_getHeader(segment_getEvent(_3Segment)),
                    eventString) == 0);
            assert(hasCapInEvents(cap_getEnd(segment_get5Cap(_5Segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(_5Segment))));
            assert(hasCapInEvents(cap_getEnd(segment_get5Cap(_3Segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(_3Segment))));
        }
    }

    stSortedSet_destruct(segmentSet);

    return maximalHaplotypePaths;
}
Esempio n. 22
0
static stList *bulkGetRecordsRange(stKVDatabase *database, int64_t firstKey, int64_t numRecords) {
	stList* results = stList_construct3(numRecords, (void(*)(void *))stKVDatabaseBulkResult_destruct);
	startTransaction(database);
	stTry {
		for (int32_t i = 0; i < numRecords; ++i)
		{
			int64_t key = firstKey + i;
			int64_t recordSize;
			void* record = getRecord2(database, key, &recordSize);
			stKVDatabaseBulkResult* result = stKVDatabaseBulkResult_construct(record, recordSize);
			stList_set(results, i, result);
		}
		commitTransaction(database);
	}stCatch(ex) {
		abortTransaction(database);
		stThrowNewCause(ex, ST_KV_DATABASE_EXCEPTION_ID, "tokyo cabinet bulk get records failed");
	}stTryEnd;

	return results;
}
Esempio n. 23
0
static void testBulkRemoveRecords(CuTest *testCase) {
    /*
     * Tests doing a bulk update of a set of records.
     */
    setup();
    int64_t i = 100, j = 110, k = 120, l = 130;
    stKVDatabase_insertRecord(database, 1, &i, sizeof(int64_t));
    stKVDatabase_insertRecord(database, 2, &j, sizeof(int64_t));
    stKVDatabase_insertRecord(database, 3, &k, sizeof(int64_t));
    stKVDatabase_insertRecord(database, 4, &l, sizeof(int64_t));
    stKVDatabase_insertRecord(database, 5, &i, 0); //Test null record addition
    CuAssertTrue(testCase, stKVDatabase_containsRecord(database, 1));
    CuAssertTrue(testCase, stKVDatabase_containsRecord(database, 2));
    CuAssertTrue(testCase, stKVDatabase_containsRecord(database, 3));
    CuAssertTrue(testCase, stKVDatabase_containsRecord(database, 4));
    CuAssertTrue(testCase, stKVDatabase_containsRecord(database, 5));
    CuAssertTrue(testCase, stKVDatabase_getNumberOfRecords(database) == 5);

    stList *requests = stList_construct3(0, (void(*)(void *)) stInt64Tuple_destruct);

    // test empty request list
    stKVDatabase_bulkRemoveRecords(database, requests);

    stList_append(requests, stInt64Tuple_construct(1, (int64_t)1));
    stList_append(requests, stInt64Tuple_construct(1, (int64_t)2));
    stList_append(requests, stInt64Tuple_construct(1, (int64_t)3));
    stList_append(requests, stInt64Tuple_construct(1, (int64_t)4));
    stList_append(requests, stInt64Tuple_construct(1, (int64_t)5));
    stKVDatabase_bulkRemoveRecords(database, requests);

    CuAssertTrue(testCase, !stKVDatabase_containsRecord(database, 1));
    CuAssertTrue(testCase, !stKVDatabase_containsRecord(database, 2));
    CuAssertTrue(testCase, !stKVDatabase_containsRecord(database, 3));
    CuAssertTrue(testCase, !stKVDatabase_containsRecord(database, 4));
    CuAssertTrue(testCase, !stKVDatabase_containsRecord(database, 5));
    CuAssertTrue(testCase, stKVDatabase_getNumberOfRecords(database) == 0);

    stList_destruct(requests);

    teardown();
}
stList *getComponents(stList *edges) {
    /*
     * Gets a list of connected components, each connected component
     * being represented as a list of the edges, such that each edge is in exactly one
     * connected component. Allows for multi-graphs (multiple edges connecting two nodes).
     */

    stHash *nodesToEdges = getNodesToEdgesHash(edges);

    /*
     * Traverse the edges greedily
     */
    stList *components =
            stList_construct3(0, (void(*)(void *)) stList_destruct);
    stList *nodes = stHash_getKeys(nodesToEdges);
    while (stList_length(nodes) > 0) {
        stIntTuple *node = stList_pop(nodes);
        stList *edges = stHash_search(nodesToEdges, node);
        if (edges != NULL) { //We have a component to build
            stSortedSet *component = stSortedSet_construct();
            stHash_remove(nodesToEdges, node);
            for (int64_t i = 0; i < stList_length(edges); i++) {
                stIntTuple *edge = stList_get(edges, i);
                getComponentsP(nodesToEdges, stIntTuple_get(edge, 0),
                        component);
                getComponentsP(nodesToEdges, stIntTuple_get(edge, 1),
                        component);
            }
            stList_append(components, stSortedSet_getList(component));
            //Cleanup
            stSortedSet_destruct(component);
            stList_destruct(edges);
        }
        stIntTuple_destruct(node);
    }
    assert(stHash_size(nodesToEdges) == 0);
    stHash_destruct(nodesToEdges);
    stList_destruct(nodes);

    return components;
}
Esempio n. 25
0
static void test_st_randomChoice(CuTest *testCase) {
    /*
     * Excercies the random int function.
     */
    stList *list = stList_construct3(0, (void (*)(void *))stIntTuple_destruct);

    stTry {
        st_randomChoice(list);
    } stCatch(except) {
        CuAssertTrue(testCase, stExcept_getId(except) == RANDOM_EXCEPTION_ID);
    }
    stTryEnd

    for(int32_t i = 0; i < 10; i++) {
        stList_append(list, stIntTuple_construct(1, i));
    }
    for(int32_t i = 0; i < 100; i++) {
        CuAssertTrue(testCase, stList_contains(list, st_randomChoice(list)));
    }
    stList_destruct(list);
}
Esempio n. 26
0
static stList *getRecords(CactusDisk *cactusDisk, stList *objectNames, char *type) {
    if (stList_length(objectNames) == 0) {
        return stList_construct3(0, NULL);
    }
    stList *records = NULL;
    stTry
        {
            records = stKVDatabase_bulkGetRecords(cactusDisk->database, objectNames);
        }
        stCatch(except)
            {
                stThrowNewCause(except, ST_KV_DATABASE_EXCEPTION_ID,
                        "An unknown database error occurred when getting a bulk set of %s", type);
            }stTryEnd
    ;
    assert(records != NULL);
    assert(stList_length(objectNames) == stList_length(records));
    stList_setDestructor(records, free);
    for (int64_t i = 0; i < stList_length(objectNames); i++) {
        Name objectName = *((int64_t *) stList_get(objectNames, i));
        int64_t recordSize;
        void *record;
        stKVDatabaseBulkResult *result = stList_get(records, i);
        assert(result != NULL);
        if (!stCache_containsRecord(cactusDisk->cache, objectName, 0, INT64_MAX)) {
            record = stKVDatabaseBulkResult_getRecord(result, &recordSize);
            assert(recordSize >= 0);
            assert(record != NULL);
            record = decompress(record, &recordSize);
            stCache_setRecord(cactusDisk->cache, objectName, 0, recordSize, record);
        } else {
            record = stCache_getRecord(cactusDisk->cache, objectName, 0, INT64_MAX, &recordSize);
            assert(recordSize >= 0);
            assert(record != NULL);
        }
        stKVDatabaseBulkResult_destruct(result);
        stList_set(records, i, record);
    }
    return records;
}
Esempio n. 27
0
char *cactusDisk_getString(CactusDisk *cactusDisk, Name name, int64_t start, int64_t length, int64_t strand,
        int64_t totalSequenceLength) {
    /*
     * Gets a string from the database.
     *
     */
    assert(length >= 0);
    if (length == 0) {
        return stString_copy("");
    }
    //First try getting it from the cache
    char *string = cactusDisk_getStringFromCache(cactusDisk, name, start, length, strand);
    if (string == NULL) { //If not in the cache, add it to the cache and then get it from the cache.
        stList *list = stList_construct3(0, (void (*)(void *)) substring_destruct);
        stList_append(list, substring_construct(name, start, length));
        cacheSubstringsFromDB(cactusDisk, list);
        stList_destruct(list);
        string = cactusDisk_getStringFromCache(cactusDisk, name, start, length, strand);
    }
    assert(string != NULL);
    return string;
}
Esempio n. 28
0
/*
 * Recursive function which fills a givenlist with the
 * connected nodes within a module and fills their lifted
 * edges in the same pass
 */
static void buildFaces_fillTopNodeList2(Cap * cap, stList *list,
        stHash *liftedEdgesTable) {
    stList *liftedEdges = stList_construct3(2,
                        buildFaces_stList_destructElem);
    int64_t index;

    // Orientation check
    cap = cap_getPositiveOrientation(cap);

    // Limit of recursion
    if (stList_contains(list, cap))
        return;

    // Actual filling
    st_logInfo("Adding cap %p to face\n", cap);
    stList_append(list, cap);

    // Compute lifted edges
    for (index = 0; index < cap_getChildNumber(cap); index++) 
	buildFaces_computeLiftedEdgesAtTopNode(cap_getChild(cap, index), liftedEdges);

    // If emptylist...
    if (stList_length(liftedEdges) == 0) 
	stList_destruct(liftedEdges);
    // Recursion through lifted edges
    else {
	stHash_insert(liftedEdgesTable, cap, liftedEdges);
        for (index = 0; index < stList_length(liftedEdges); index++)
            buildFaces_fillTopNodeList2(
                    ((LiftedEdge *) stList_get(liftedEdges, index))->destination,
                   list, liftedEdgesTable);
    }

    // Recursion through adjacency
    if (cap_getAdjacency(cap))
        buildFaces_fillTopNodeList2(cap_getAdjacency(cap),list,
                liftedEdgesTable);
}
Esempio n. 29
0
static stList *getSubstringsForFlowers(stList *flowers) {
    /*
     * Get the set of substrings for sequence intervals in the given set of flowers.
     */
    stList *substrings = stList_construct3(0, (void (*)(void *)) substring_destruct);
    for (int64_t i = 0; i < stList_length(flowers); i++) {
        Flower *flower = stList_get(flowers, i);
        Flower_EndIterator *endIt = flower_getEndIterator(flower);
        End *end;
        while ((end = flower_getNextEnd(endIt)) != NULL) {
            if (end_isStubEnd(end)) {
                End_InstanceIterator *instanceIt = end_getInstanceIterator(end);
                Cap *cap;
                while ((cap = end_getNext(instanceIt)) != NULL) {
                    Sequence *sequence;
                    if ((sequence = cap_getSequence(cap)) != NULL) {
                        cap = cap_getStrand(cap) ? cap : cap_getReverse(cap);
                        if (!cap_getSide(cap)) { //We have a sequence interval of interest
                            Cap *adjacentCap = cap_getAdjacency(cap);
                            assert(adjacentCap != NULL);
                            int64_t length = cap_getCoordinate(adjacentCap) - cap_getCoordinate(cap) - 1;
                            assert(length >= 0);
                            if (length > 0) {
                                stList_append(substrings,
                                        substring_construct(sequence_getMetaSequence(sequence)->stringName,
                                                cap_getCoordinate(cap) + 1 - sequence_getStart(sequence), length));
                            }
                        }
                    }
                }
                end_destructInstanceIterator(instanceIt);
            }
        }
        flower_destructEndIterator(endIt);
    }
    return substrings;
}
Esempio n. 30
0
stSortedSet *makeEndAlignment(StateMachine *sM, End *end, int64_t spanningTrees, int64_t maxSequenceLength,
        bool useProgressiveMerging, float gapGamma,
        PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters) {
    //Make an alignment of the sequences in the ends

    //Get the adjacency sequences to be aligned.
    Cap *cap;
    End_InstanceIterator *it = end_getInstanceIterator(end);
    stList *sequences = stList_construct3(0, (void (*)(void *))adjacencySequence_destruct);
    stList *seqFrags = stList_construct3(0, (void (*)(void *))seqFrag_destruct);
    stHash *endInstanceNumbers = stHash_construct2(NULL, free);
    while((cap = end_getNext(it)) != NULL) {
        if(cap_getSide(cap)) {
            cap = cap_getReverse(cap);
        }
        AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap, maxSequenceLength);
        stList_append(sequences, adjacencySequence);
        assert(cap_getAdjacency(cap) != NULL);
        End *otherEnd = end_getPositiveOrientation(cap_getEnd(cap_getAdjacency(cap)));
        stList_append(seqFrags, seqFrag_construct(adjacencySequence->string, 0, end_getName(otherEnd)));
        //Increase count of seqfrags with a given end.
        int64_t *c = stHash_search(endInstanceNumbers, otherEnd);
        if(c == NULL) {
            c = st_calloc(1, sizeof(int64_t));
            assert(*c == 0);
            stHash_insert(endInstanceNumbers, otherEnd, c);
        }
        (*c)++;
    }
    end_destructInstanceIterator(it);

    //Get the alignment.
    MultipleAlignment *mA = makeAlignment(sM, seqFrags, spanningTrees, 100000000, useProgressiveMerging, gapGamma, pairwiseAlignmentBandingParameters);

    //Build an array of weights to reweight pairs in the alignment.
    int64_t *pairwiseAlignmentsPerSequenceNonCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t));
    int64_t *pairwiseAlignmentsPerSequenceCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t));
    //First build array on number of pairwise alignments to each sequence, distinguishing alignments between sequences sharing
    //common ends.
    for(int64_t i=0; i<stList_length(mA->chosenPairwiseAlignments); i++) {
        stIntTuple *pairwiseAlignment = stList_get(mA->chosenPairwiseAlignments, i);
        int64_t seq1 = stIntTuple_get(pairwiseAlignment, 1);
        int64_t seq2 = stIntTuple_get(pairwiseAlignment, 2);
        assert(seq1 != seq2);
        SeqFrag *seqFrag1 = stList_get(seqFrags, seq1);
        SeqFrag *seqFrag2 = stList_get(seqFrags, seq2);
        int64_t *pairwiseAlignmentsPerSequence = seqFrag1->rightEndId == seqFrag2->rightEndId
                ? pairwiseAlignmentsPerSequenceCommonEnds : pairwiseAlignmentsPerSequenceNonCommonEnds;
        pairwiseAlignmentsPerSequence[seq1]++;
        pairwiseAlignmentsPerSequence[seq2]++;
    }
    //Now calculate score adjustments.
    double *scoreAdjustmentsNonCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double));
    double *scoreAdjustmentsCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double));
    for(int64_t i=0; i<stList_length(seqFrags); i++) {
        SeqFrag *seqFrag = stList_get(seqFrags, i);
        End *otherEnd = flower_getEnd(end_getFlower(end), seqFrag->rightEndId);
        assert(otherEnd != NULL);
        assert(stHash_search(endInstanceNumbers, otherEnd) != NULL);
        int64_t commonInstanceNumber = *(int64_t *)stHash_search(endInstanceNumbers, otherEnd);
        int64_t nonCommonInstanceNumber = stList_length(seqFrags) - commonInstanceNumber;

        assert(commonInstanceNumber > 0 && nonCommonInstanceNumber >= 0);
        assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] <= nonCommonInstanceNumber);
        assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] >= 0);
        assert(pairwiseAlignmentsPerSequenceCommonEnds[i] < commonInstanceNumber);
        assert(pairwiseAlignmentsPerSequenceCommonEnds[i] >= 0);

        //scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber + commonInstanceNumber - 1)/(pairwiseAlignmentsPerSequenceNonCommonEnds[i] + pairwiseAlignmentsPerSequenceCommonEnds[i]);
        //scoreAdjustmentsCommonEnds[i] = scoreAdjustmentsNonCommonEnds[i];
        if(pairwiseAlignmentsPerSequenceNonCommonEnds[i] > 0) {
            scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber)/pairwiseAlignmentsPerSequenceNonCommonEnds[i];
            assert(scoreAdjustmentsNonCommonEnds[i] >= 1.0);
            assert(scoreAdjustmentsNonCommonEnds[i] <= nonCommonInstanceNumber);
        }
        else {
            scoreAdjustmentsNonCommonEnds[i] = INT64_MIN;
        }
        if(pairwiseAlignmentsPerSequenceCommonEnds[i] > 0) {
            scoreAdjustmentsCommonEnds[i] = ((double)commonInstanceNumber-1)/pairwiseAlignmentsPerSequenceCommonEnds[i];
            assert(scoreAdjustmentsCommonEnds[i] >= 1.0);
            assert(scoreAdjustmentsCommonEnds[i] <= commonInstanceNumber-1);
        }
        else {
            scoreAdjustmentsCommonEnds[i] = INT64_MIN;
        }
    }

	//Convert the alignment pairs to an alignment of the caps..
    stSortedSet *sortedAlignment =
                stSortedSet_construct3((int (*)(const void *, const void *))alignedPair_cmpFn,
                (void (*)(void *))alignedPair_destruct);
    while(stList_length(mA->alignedPairs) > 0) {
        stIntTuple *alignedPair = stList_pop(mA->alignedPairs);
        assert(stIntTuple_length(alignedPair) == 5);
        int64_t seqIndex1 = stIntTuple_get(alignedPair, 1);
        int64_t seqIndex2 = stIntTuple_get(alignedPair, 3);
        AdjacencySequence *i = stList_get(sequences, seqIndex1);
        AdjacencySequence *j = stList_get(sequences, seqIndex2);
        assert(i != j);
        int64_t offset1 = stIntTuple_get(alignedPair, 2);
        int64_t offset2 = stIntTuple_get(alignedPair, 4);
        int64_t score = stIntTuple_get(alignedPair, 0);
        if(score <= 0) { //Happens when indel probs are included
            score = 1; //This is the minimum
        }
        assert(score > 0 && score <= PAIR_ALIGNMENT_PROB_1);
        SeqFrag *seqFrag1 = stList_get(seqFrags, seqIndex1);
        SeqFrag *seqFrag2 = stList_get(seqFrags, seqIndex2);
        assert(seqFrag1 != seqFrag2);
        double *scoreAdjustments = seqFrag1->rightEndId == seqFrag2->rightEndId ? scoreAdjustmentsCommonEnds : scoreAdjustmentsNonCommonEnds;
        assert(scoreAdjustments[seqIndex1] != INT64_MIN);
        assert(scoreAdjustments[seqIndex2] != INT64_MIN);
        AlignedPair *alignedPair2 = alignedPair_construct(
                i->subsequenceIdentifier, i->start + (i->strand ? offset1 : -offset1), i->strand,
                j->subsequenceIdentifier, j->start + (j->strand ? offset2 : -offset2), j->strand,
                score*scoreAdjustments[seqIndex1], score*scoreAdjustments[seqIndex2]); //Do the reweighting here.
        assert(stSortedSet_search(sortedAlignment, alignedPair2) == NULL);
        assert(stSortedSet_search(sortedAlignment, alignedPair2->reverse) == NULL);
        stSortedSet_insert(sortedAlignment, alignedPair2);
        stSortedSet_insert(sortedAlignment, alignedPair2->reverse);
        stIntTuple_destruct(alignedPair);
    }

    //Cleanup
    stList_destruct(seqFrags);
    stList_destruct(sequences);
    free(pairwiseAlignmentsPerSequenceNonCommonEnds);
    free(pairwiseAlignmentsPerSequenceCommonEnds);
    free(scoreAdjustmentsNonCommonEnds);
    free(scoreAdjustmentsCommonEnds);
    multipleAlignment_destruct(mA);
    stHash_destruct(endInstanceNumbers);

    return sortedAlignment;
}