static void test_addBlockToHash_3(CuTest *testCase) { // concatenation with 2 bases of interstitial and a sequence length breakpoint options_t *options = options_construct(); options->breakpointPenalty = 10; options->interstitialSequence = 5; stList *observedList = stList_construct3(0, free); stList *expectedList = stList_construct3(0, free); stHash *observedHash = createBlockHashFromString("a score=0\n" "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n" "s name.chr1 0 10 + 100 ATGT---ATGCCG\n" "s name2.chr1 0 10 + 100 ATGT---ATGCCG\n" "s name3.chr1 0 13 + 100 GCAGCTGAAAACA\n", observedList ); mafBlock_t *mb = maf_newMafBlockListFromString("a score=0 test\n" "s reference.chr0 13 5 + 158545518 ACGTA\n" "s name.chr1 12 5 + 100 gtcGG\n" "s name2.chr1 10 5 + 100 ATGTg\n" "s name3.chr1 50 5 + 100 CCCCC\n" , 3); stHash *expectedHash = NULL; expectedHash = createBlockHashFromString("a score=0\n" "s reference.chr0 0 18 + 158545518 gcagctgaaaaca------------ACGTA\n" "s name.chr1 0 17 + 100 ATGT---ATGCCGac----------gtcGG\n" "s name2.chr1 0 15 + 100 ATGT---ATGCCG------------ATGTg\n" "s name3 0 28 + 28 GCAGCTGAAAACA--NNNNNNNNNNCCCCC\n", expectedList ); row_t *r = stHash_search(expectedHash, "name3"); r->prevRightPos = 54; free(r->prevName); r->prevName = stString_copy("name3.chr1"); r->multipleNames = true; stHash *seqHash = createSeqHashFromString("name.chr1", "ATGTATGCCGacgtc" "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG" "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"); mtfseq_t *mtfs = newMtfseqFromString("gcagctgaaaacaACGTA" "tttttttttttttttttttttttttttttttt" "tttttttttttttttttttttttttttttttttttttttttttttttttt"); stHash_insert(seqHash, stString_copy("reference.chr0"), mtfs); mtfs = newMtfseqFromString("ATGTATGCCGATGTg" "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"); stHash_insert(seqHash, stString_copy("name2.chr1"), mtfs); mtfs = newMtfseqFromString("GCAGCTGAAAACAggggggggggggggggggggggggggggggggggggg" "CCCCCaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" ); stHash_insert(seqHash, stString_copy("name3.chr1"), mtfs); addMafBlockToRowHash(observedHash, seqHash, observedList, mb, options); CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash)); CuAssertTrue(testCase, listsAreEqual(observedList, expectedList)); // clean up stHash_destruct(observedHash); stHash_destruct(expectedHash); stHash_destruct(seqHash); stList_destruct(observedList); stList_destruct(expectedList); maf_destroyMafBlockList(mb); destroyOptions(options); }
static void setup() { teardown(); assert(nodeNumber == -1); while(nodeNumber % 2 != 0) { nodeNumber = st_randomInt(0, 100); } assert(nodeNumber >= 0); assert(nodeNumber % 2 == 0); stubs = stList_construct3(0, (void (*)(void *))stIntTuple_destruct); chains = stList_construct3(0, (void (*)(void *))stIntTuple_destruct); for(int64_t i=0; i<nodeNumber/2; i++) { assert(nodeNumber/2 > 0); stIntTuple *edge = stIntTuple_construct2(i, nodeNumber/2 + i); if(stList_length(stubs) == 0 || st_random() > 0.9) { stList_append(stubs, edge); } else { stList_append(chains, edge); } } zMatrix = st_calloc(nodeNumber*nodeNumber, sizeof(float)); for(int64_t i=0; i<nodeNumber; i++) { for(int64_t j=i+1; j<nodeNumber; j++) { double score = st_random(); zMatrix[i * nodeNumber + j] = score; zMatrix[j * nodeNumber + i] = score; } } st_logDebug("To test the adjacency problem we've created a problem with %" PRIi64 " nodes %" PRIi64 " stubs and %" PRIi64 " chains\n", nodeNumber, stList_length(stubs), stList_length(chains)); }
static void test_stPosetAlignment_addAndIsPossible(CuTest *testCase) { for(int64_t trial=0; trial<100; trial++) { setup(); //Make random number of sequences. stList *sequenceLengths = stList_construct3(0, (void (*)(void *))stIntTuple_destruct); for(int64_t i=0; i<sequenceNumber; i++) { stList_append(sequenceLengths, stIntTuple_construct1( st_randomInt(0, MAX_SEQUENCE_SIZE))); } //Propose random alignment pairs... stList *pairs = stList_construct3(0, (void(*)(void *))stIntTuple_destruct); int64_t maxAlignedPairs = st_randomInt(0, MAX_ALIGNMENTS); if(sequenceNumber > 0) { for(int64_t i=0; i<maxAlignedPairs; i++) { int64_t seq1 = st_randomInt(0, sequenceNumber); int64_t seqLength1 = stIntTuple_get(stList_get(sequenceLengths, seq1), 0); if(seqLength1 == 0) { continue; } int64_t position1 = st_randomInt(0, seqLength1); int64_t seq2 = st_randomInt(0, sequenceNumber); int64_t seqLength2 = stIntTuple_get(stList_get(sequenceLengths, seq1), 0); if(seqLength2 == 0) { continue; } int64_t position2 = st_randomInt(0, seqLength2); if(seq1 != seq2) { stList_append(pairs, stIntTuple_construct4( seq1, position1, seq2, position2)); if(stPosetAlignment_isPossible(posetAlignment, seq1, position1, seq2, position2)) { st_logInfo("In %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " \n", seq1, position1, seq2, position2); //For each accepted pair check it doesn't create a cycle. CuAssertTrue(testCase, !containsACycle(pairs, sequenceNumber)); CuAssertTrue(testCase, stPosetAlignment_add(posetAlignment, seq1, position1, seq2, position2)); } else { st_logInfo("Out %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " \n", seq1, position1, seq2, position2); //For each rejected pair check it creates a cycle.. CuAssertTrue(testCase, containsACycle(pairs, sequenceNumber)); CuAssertTrue(testCase, !stPosetAlignment_isPossible(posetAlignment, seq1, position1, seq2, position2)); stIntTuple_destruct(stList_pop(pairs)); //remove the pair which created the cycle. CuAssertTrue(testCase, !containsACycle(pairs, sequenceNumber)); //Check we're back to being okay.. } } } } //Cleanup stList_destruct(sequenceLengths); stList_destruct(pairs); teardown(); st_logInfo("Passed a random ordering test with %" PRIi64 " sequences and %" PRIi64 " aligned pairs\n", sequenceNumber, maxAlignedPairs); } }
static void test_addBlockToHash_2(CuTest *testCase) { // concatenation with 2 bases of interstitial AND a previously unobserved sequence options_t *options = options_construct(); options->breakpointPenalty = 10; options->interstitialSequence = 5; stList *observedList = stList_construct3(0, free); stList *expectedList = stList_construct3(0, free); stHash *observedHash = createBlockHashFromString("a score=0\n" "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n" "s name.chr1 0 10 + 100 ATGT---ATGCCG\n" "s name2.chr1 0 10 + 100 ATGT---ATGCCG\n", observedList); mafBlock_t *mb = maf_newMafBlockListFromString("a score=0 test\n" "s reference.chr0 13 5 + 158545518 ACGTA\n" "s name.chr1 12 5 + 100 gTcGG\n" "s name2.chr1 10 5 + 100 ATGTg\n" "s name3.chr@ 0 5 + 20 aaccg\n" , 3); stHash *expectedHash = createBlockHashFromString("a score=0\n" "s reference.chr0 0 18 + 158545518 gcagctgaaaaca--ACGTA\n" "s name.chr1 0 17 + 100 ATGT---ATGCCGacgTcGG\n" "s name2.chr1 0 15 + 100 ATGT---ATGCCG--ATGTg\n" "s name3.chr@ 0 5 + 20 ---------------aaccg\n", expectedList ); stHash *seqHash = createSeqHashFromString("name.chr1", "ATGTATGCCGacgTc" "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG" "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"); mtfseq_t *mtfs = newMtfseqFromString("gcagctgaaaacaACGTA" "tttttttttttttttttttttttttttttttt" "tttttttttttttttttttttttttttttttttttttttttttttttttt"); stHash_insert(seqHash, stString_copy("reference.chr0"), mtfs); mtfs = newMtfseqFromString("ATGTATGCCGATGTg" "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"); stHash_insert(seqHash, stString_copy("name2.chr1"), mtfs); mtfs = newMtfseqFromString("aaccgTTTTTTTTTTTTTTT"); stHash_insert(seqHash, stString_copy("name3.chr@"), mtfs); addMafBlockToRowHash(observedHash, seqHash, observedList, mb, options); CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash)); CuAssertTrue(testCase, listsAreEqual(observedList, expectedList)); // clean up stHash_destruct(observedHash); stHash_destruct(expectedHash); stHash_destruct(seqHash); stList_destruct(observedList); stList_destruct(expectedList); maf_destroyMafBlockList(mb); destroyOptions(options); }
static void test_interstitial_0(CuTest *testCase) { stList *observedList = stList_construct3(0, free); stList *expectedList = stList_construct3(0, free); stHash *observedHash = createBlockHashFromString("a score=0\n" "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n" "s name.chr1 0 10 + 100 ATGT---ATGCCG\n" "s name2.chr1 0 10 + 100 ATGT---ATGCCG\n", observedList); stHash *expectedHash = createBlockHashFromString("a score=0\n" "s reference.chr0 0 13 + 158545518 gcagctgaaaaca-----\n" "s name.chr1 0 15 + 100 ATGT---ATGCCGaaaTa\n" "s name2.chr1 0 10 + 100 ATGT---ATGCCG-----\n", expectedList); stHash *seqHash = createSeqHashFromString("name.chr1", "ATGTATGCCGaaaTaTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT" "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"); interstitialInsert(observedHash, seqHash, "name.chr1", 10, '+', 5); CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash)); CuAssertTrue(testCase, listsAreEqual(observedList, expectedList)); // clean up stHash_destruct(observedHash); stHash_destruct(expectedHash); stHash_destruct(seqHash); stList_destruct(observedList); stList_destruct(expectedList); observedList = stList_construct3(0, free); expectedList = stList_construct3(0, free); observedHash = createBlockHashFromString("a score=0\n" "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n" "s name.chr1 0 10 - 100 ATGT---ATGCCG\n" "s name2.chr1 0 10 + 100 ATGT---ATGCCG\n", observedList); expectedHash = createBlockHashFromString("a score=0\n" "s reference.chr0 0 13 + 158545518 gcagctgaaaaca-----\n" "s name.chr1 0 15 - 100 ATGT---ATGCCGaaaTa\n" "s name2.chr1 0 10 + 100 ATGT---ATGCCG-----\n", expectedList); seqHash = createSeqHashFromString("name.chr1", "ggggggggggggTTgggggggggggggggggggggggggggggggggggg" // 50 "gggaagggGgggCgggTgggAgggcgggtgggagg" // 35 "tAtttCGGCATACAT"); interstitialInsert(observedHash, seqHash, "name.chr1", 10, '-', 5); CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash)); CuAssertTrue(testCase, listsAreEqual(observedList, expectedList)); // clean up stHash_destruct(observedHash); stHash_destruct(expectedHash); stHash_destruct(seqHash); stList_destruct(observedList); stList_destruct(expectedList); }
static stList *getRandomPairwiseAlignments() { stList *pairwiseAlignments = stList_construct3(0, (void(*)(void *)) destructPairwiseAlignment); int64_t randomAlignmentNumber = st_randomInt(0, 10); for (int64_t i = 0; i < randomAlignmentNumber; i++) { char *contig1 = stString_print("%" PRIi64 "", i); char *contig2 = stString_print("%" PRIi64 "", i * 10); int64_t start1 = st_randomInt(100000, 1000000); int64_t start2 = st_randomInt(100000, 1000000); int64_t strand1 = st_random() > 0.5; int64_t strand2 = st_random() > 0.5; int64_t end1 = start1; int64_t end2 = start2; struct List *operationList = constructEmptyList(0, NULL); while (st_random() > 0.1) { int64_t length = st_randomInt(0, 10); int64_t type = st_randomInt(0, 3); assert(type < 3); listAppend(operationList, constructAlignmentOperation(type, length, 0)); if (type != PAIRWISE_INDEL_Y) { end1 += strand1 ? length : -length; } if (type != PAIRWISE_INDEL_X) { end2 += strand2 ? length : -length; } } stList_append(pairwiseAlignments, constructPairwiseAlignment(contig1, start1, end1, strand1, contig2, start2, end2, strand2, 0.0, operationList)); free(contig1); free(contig2); } return pairwiseAlignments; }
static void testBulkSetRecords(CuTest *testCase) { /* * Tests doing a bulk update of a set of records. */ setup(); int64_t i = 100, j = 110, k = 120, l = 130; stKVDatabase_insertRecord(database, 1, &i, sizeof(int64_t)); stList *requests = stList_construct3(0, (void(*)(void *)) stKVDatabaseBulkRequest_destruct); stList_append(requests, stKVDatabaseBulkRequest_constructInsertRequest(2, &j, sizeof(int64_t))); stList_append(requests, stKVDatabaseBulkRequest_constructSetRequest(3, &k, sizeof(int64_t))); stList_append(requests, stKVDatabaseBulkRequest_constructUpdateRequest(1, &l, sizeof(int64_t))); stKVDatabase_bulkSetRecords(database, requests); stList_destruct(requests); int64_t *m = stKVDatabase_getRecord(database, 1); CuAssertTrue(testCase, m != NULL); CuAssertTrue(testCase, l == *m); free(m); m = stKVDatabase_getRecord(database, 2); CuAssertTrue(testCase, m != NULL); CuAssertTrue(testCase, j == *m); free(m); m = stKVDatabase_getRecord(database, 3); CuAssertTrue(testCase, m != NULL); CuAssertTrue(testCase, k == *m); free(m); teardown(); }
int main(int argc, char **argv) { options_t *options = options_construct(); stHash *sequenceHash = NULL; // keyed on fasta headers, valued with mtfseq_t pointers stHash *alignmentHash = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, destroyRow); // keyed on species names, valued with row_t pointers stList *rowOrder = stList_construct3(0, free); // when adding keys to alignmentHash, append to this list parseOptions(argc, argv, options); // read fastas, populate sequenceHash de_verbose("Creating sequence hash.\n"); sequenceHash = createSequenceHash(options->seqs); mafFileApi_t *mfapi = maf_newMfa(options->maf, "r"); de_verbose("Creating alignment hash.\n"); buildAlignmentHash(mfapi, alignmentHash, sequenceHash, rowOrder, options); if (options->outMfa != NULL) { // fasta output de_verbose("Writing fasta output.\n"); writeFastaOut(alignmentHash, rowOrder, options); } if (options->outMaf != NULL) { // maf output de_verbose("Writing maf output.\n"); writeMafOut(alignmentHash, rowOrder, options); } // cleanup maf_destroyMfa(mfapi); stHash_destruct(alignmentHash); stHash_destruct(sequenceHash); stList_destruct(rowOrder); destroyOptions(options); return(EXIT_SUCCESS); }
static stList *getSubstringsForFlowerSegments(stList *flowers) { /* * Get the set of substrings representing the strings in the segments of the given flowers. */ stList *substrings = stList_construct3(0, (void (*)(void *)) substring_destruct); for (int64_t i = 0; i < stList_length(flowers); i++) { Flower *flower = stList_get(flowers, i); Flower_EndIterator *blockIt = flower_getBlockIterator(flower); Block *block; while ((block = flower_getNextBlock(blockIt)) != NULL) { Block_InstanceIterator *instanceIt = block_getInstanceIterator(block); Segment *segment; while ((segment = block_getNext(instanceIt)) != NULL) { Sequence *sequence; if ((sequence = segment_getSequence(segment)) != NULL) { segment = segment_getStrand(segment) ? segment : segment_getReverse(segment); assert(segment_getLength(segment) > 0); stList_append(substrings, substring_construct(sequence_getMetaSequence(sequence)->stringName, segment_getStart(segment) - sequence_getStart(sequence), segment_getLength(segment))); } } block_destructInstanceIterator(instanceIt); } flower_destructBlockIterator(blockIt); } return substrings; }
static stList *mergeSubstrings(stList *substrings, int64_t proximityToMerge) { /* * Merge set of substrings into fewer substrings, if they overlap by less than proximityToMerge */ stList *mergedSubstrings = stList_construct3(0, (void (*)(void *)) substring_destruct); if (stList_length(substrings) == 0) { return mergedSubstrings; } stList_sort(substrings, (int (*)(const void *, const void *)) substring_cmp); Substring *pSubsequence = substring_clone(stList_get(substrings, 0)); stList_append(mergedSubstrings, pSubsequence); for (int64_t i = 1; i < stList_length(substrings); i++) { Substring *substring = stList_get(substrings, i); if (pSubsequence->name == substring->name && pSubsequence->start + pSubsequence->length + proximityToMerge >= substring->start) { //Merge if (pSubsequence->start + pSubsequence->length < substring->start + substring->length) { pSubsequence->length = substring->start + substring->length - pSubsequence->start; } } else { pSubsequence = substring_clone(substring); stList_append(mergedSubstrings, pSubsequence); } } return mergedSubstrings; }
static void doBestMergeOfTwoSimpleCycles(stList *cycles, stList *nonZeroWeightAdjacencyEdges, stSortedSet *allAdjacencyEdges) { /* * Merge two simple cycles, using the best possible adjacency switch. Modifies components list in place, * destroying two old components and adding a new one. If new adjacency edges are needed then they are * added to the adjacency edges list. */ assert(stList_length(cycles) > 1); /* * Get the best adjacency switch. */ AdjacencySwitch *adjacencySwitch = getBestAdjacencySwitch(cycles, nonZeroWeightAdjacencyEdges, allAdjacencyEdges); assert(adjacencySwitch != NULL); /* * Find the two components to merge. */ stList *cyclesToMerge = stList_construct3(0, (void(*)(void *)) stList_destruct); for (int64_t i = 0; i < stList_length(cycles); i++) { stList *cycle = stList_get(cycles, i); if (stList_contains(cycle, adjacencySwitch->oldEdge1)) { assert(!stList_contains(cycle, adjacencySwitch->oldEdge2)); stList_append(cyclesToMerge, cycle); } else if (stList_contains(cycle, adjacencySwitch->oldEdge2)) { stList_append(cyclesToMerge, cycle); } } /* * Now construct the new component and modify the list of components in place. */ assert(stList_length(cyclesToMerge) == 2); stList *newComponent = stList_join(cyclesToMerge); assert(!stList_contains(newComponent, NULL)); //Cleanup the old components assert(stList_contains(cycles, stList_get(cyclesToMerge, 0))); stList_removeItem(cycles, stList_get(cyclesToMerge, 0)); assert(stList_contains(cycles, stList_get(cyclesToMerge, 1))); stList_removeItem(cycles, stList_get(cyclesToMerge, 1)); stList_destruct(cyclesToMerge); //Now remove the old edges and add the new ones assert(stList_contains(newComponent, adjacencySwitch->oldEdge1)); stList_removeItem(newComponent, adjacencySwitch->oldEdge1); assert(stList_contains(newComponent, adjacencySwitch->oldEdge2)); stList_removeItem(newComponent, adjacencySwitch->oldEdge2); assert(!stList_contains(newComponent, adjacencySwitch->newEdge1)); stList_append(newComponent, adjacencySwitch->newEdge1); assert(!stList_contains(newComponent, adjacencySwitch->newEdge2)); stList_append(newComponent, adjacencySwitch->newEdge2); adjacencySwitch_destruct(adjacencySwitch); //Clean the adjacency switch. //Finally add the component to the list of components stList_append(cycles, newComponent); }
stTree *stTree_construct(void) { stTree *tree = st_malloc(sizeof(stTree)); tree->branchLength = INFINITY; tree->nodes = stList_construct3(0, (void (*)(void *))stTree_destruct); tree->label = NULL; tree->parent = NULL; tree->clientData = NULL; return tree; }
/* * Constructs a face from a given Cap */ static void buildFaces_constructFromCap(Cap * startingCap, stHash *liftedEdgesTable, Flower * flower) { Face *face = face_construct(flower); stList *topNodes = stList_construct3(16, NULL); stList *liftedEdges; Cap *cap, *bottomNode, *ancestor; int64_t index, index2; printf("Constructing new face"); // Establishlist of top nodes buildFaces_fillTopNodeList(startingCap, topNodes, liftedEdgesTable); #ifndef NDEBUG // What, no top nodes!? if (stList_length(topNodes) == 0) abort(); #endif // Initialize data structure face_allocateSpace(face, stList_length(topNodes)); // For every top node for (index = 0; index < stList_length(topNodes); index++) { cap = stList_get(topNodes, index); face_setTopNode(face, index, cap); liftedEdges = stHash_search(liftedEdgesTable, cap); if (!liftedEdges) { face_setBottomNodeNumber(face, index, 0); continue; } face_setBottomNodeNumber(face, index, stList_length(liftedEdges)); // For every bottom node of that top node for (index2 = 0; index2 < stList_length(liftedEdges); index2++) { bottomNode = ((LiftedEdge *) stList_get(liftedEdges, index2))->bottomNode; face_addBottomNode(face, index, bottomNode); ancestor = cap_getTopCap(cap_getPositiveOrientation( cap_getAdjacency(bottomNode))); if (cap_getAdjacency(cap) != ancestor) face_setDerivedDestination(face, index, index2, ancestor); else face_setDerivedDestination(face, index, index2, NULL); #ifndef NDEBUG // If bottom nodes part of top nodes assert(!stList_contains(topNodes, cap_getPositiveOrientation( ((LiftedEdge*) stList_get(liftedEdges, index2))->bottomNode))); #endif } } // Clean up stList_destruct(topNodes); }
stList *splitMultipleStubCycles(stList *chosenEdges, stList *nonZeroWeightAdjacencyEdges, stSortedSet *allAdjacencyEdges, stList *stubEdges, stList *chainEdges) { /* * Returns an updated list of adjacency edges, such that each stub edge is a member of exactly one cycle. */ /* * Calculate components. */ stList *cycles = getComponents2(chosenEdges, stubEdges, chainEdges); /* * Find components with multiple stub edges. */ stList *singleStubEdgeCycles = stList_construct3(0, (void(*)(void *)) stList_destruct); for (int64_t i = 0; i < stList_length(cycles); i++) { stList *subCycle = stList_get(cycles, i); stList *subAdjacencyEdges; stList *subStubEdges; stList *subChainEdges; splitIntoAdjacenciesStubsAndChains(subCycle, nonZeroWeightAdjacencyEdges, stubEdges, chainEdges, &subAdjacencyEdges, &subStubEdges, &subChainEdges); stList *splitCycles = splitMultipleStubCycle(subCycle, subAdjacencyEdges, allAdjacencyEdges, subStubEdges, subChainEdges); stList_appendAll(singleStubEdgeCycles, splitCycles); stList_setDestructor(splitCycles, NULL); //Do this to avoid destroying the underlying lists stList_destruct(splitCycles); stList_destruct(subAdjacencyEdges); stList_destruct(subStubEdges); stList_destruct(subChainEdges); } stList_destruct(cycles); /* * Remove the stub/chain edges from the components. */ stSortedSet *stubAndChainEdgesSet = getSetOfMergedLists(stubEdges, chainEdges); stList *adjacencyOnlyComponents = filterListsToExclude( singleStubEdgeCycles, stubAndChainEdgesSet); stList_destruct(singleStubEdgeCycles); stSortedSet_destruct(stubAndChainEdgesSet); /* * Merge the adjacency edges in the components into a single list. */ stList *updatedChosenEdges = stList_join(adjacencyOnlyComponents); stList_destruct(adjacencyOnlyComponents); return updatedChosenEdges; }
/* * Fill in a hashtable which to every node associates * alist of lifted edges */ static stHash *buildFaces_computeLiftedEdges(Flower * flower) { stHash *liftedEdgesTable = stHash_construct3(buildFaces_hashfunction, buildFaces_key_eq_fn, NULL, buildFaces_destructValue); Flower_CapIterator *iter = flower_getCapIterator(flower); Cap *cap, *attachedAncestor; Cap *adjacency, *adjacencyAncestor; stList *liftedEdges; LiftedEdge *liftedEdge; // Iterate through potential bottom nodes while ((cap = flower_getNextCap(iter))) { // ... check if connected if ((adjacency = cap_getAdjacency(cap))) { // ... lift attachedAncestor = cap_getTopCap(cap); adjacencyAncestor = cap_getTopCap(cap_getPositiveOrientation( adjacency)); #ifndef NDEBUG assert((attachedAncestor && adjacencyAncestor) || (!attachedAncestor && !adjacencyAncestor)); #endif // If root node if (attachedAncestor == NULL) continue; // ... create lifted edge liftedEdge = st_malloc(sizeof(LiftedEdge)); liftedEdge->destination = adjacencyAncestor; liftedEdge->bottomNode = cap; #ifndef NDEBUG // Self loop if (adjacencyAncestor == attachedAncestor) abort(); #endif // ... add it to the hashtable if ((liftedEdges = stHash_search(liftedEdgesTable, attachedAncestor))) { stList_append(liftedEdges, liftedEdge); } else { liftedEdges = stList_construct3(2, buildFaces_stList_destructElem); stList_append(liftedEdges, liftedEdge); stHash_insert(liftedEdgesTable, attachedAncestor, liftedEdges); } } } flower_destructCapIterator(iter); return liftedEdgesTable; }
static stList *filterListsToExclude(stList *listOfLists, stSortedSet *set) { /* * Takes a list of lists and returns a new list of lists whose elements are the product of applying * filterToExclude to each member of listOfLists in the same order. */ stList *listOfLists2 = stList_construct3(0, (void(*)(void *)) stList_destruct); for (int64_t i = 0; i < stList_length(listOfLists); i++) { stList_append(listOfLists2, stList_filterToExclude(stList_get(listOfLists, i), set)); } return listOfLists2; }
static void test_penalize_0(CuTest *testCase) { stList *observedList = stList_construct3(0, free); stList *expectedList = stList_construct3(0, free); stHash *observedHash = createBlockHashFromString("a score=0\n" "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n" "s name.chr1 0 10 + 100 ATGT---ATGCCG\n" "s name2.chr1 0 10 + 100 ATGT---ATGCCG\n", observedList); stHash *expectedHash = createBlockHashFromString("a score=0\n" "s reference.chr0 0 13 + 158545518 gcagctgaaaaca-----\n" "s name.chr1 0 15 + 100 ATGT---ATGCCGNNNNN\n" "s name2.chr1 0 10 + 100 ATGT---ATGCCG-----\n", expectedList); penalize(observedHash, "name.chr1", 5); CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash)); CuAssertTrue(testCase, listsAreEqual(observedList, expectedList)); // clean up stHash_destruct(observedHash); stHash_destruct(expectedHash); stList_destruct(observedList); stList_destruct(expectedList); }
int main(int argc, char *argv[]) { ////////////////////////////////////////////// //Parse the inputs ////////////////////////////////////////////// parseBasicArguments(argc, argv, "linkageStats"); /////////////////////////////////////////////////////////////////////////// // Get the intervals /////////////////////////////////////////////////////////////////////////// stList *haplotypeEventStrings = getEventStrings( treatHaplotype1AsContamination ? NULL : hap1EventString, treatHaplotype2AsContamination ? NULL : hap2EventString); stList *assemblyEventStringInList = stList_construct(); stList_append(assemblyEventStringInList, assemblyEventString); stList *intervals = stList_construct3(0, (void (*)(void *))sequenceInterval_destruct); for(int64_t i=0; i<stList_length(haplotypeEventStrings); i++) { const char *hapEventString = stList_get(haplotypeEventStrings, i); st_logInfo("Getting contig paths for haplotype: %s", hapEventString); stList *contigPaths = getContigPaths(flower, hapEventString, assemblyEventStringInList); stList *hapIntervals = getSplitContigPathIntervals(flower, contigPaths, hapEventString, assemblyEventStringInList); stList_destruct(contigPaths); st_logInfo("Getting contig paths\n"); stList_appendAll(intervals, hapIntervals); stList_setDestructor(hapIntervals, NULL); stList_destruct(hapIntervals); } st_logDebug("Got a total of %" PRIi64 " intervals\n", stList_length(intervals)); /////////////////////////////////////////////////////////////////////////// // Write it out. /////////////////////////////////////////////////////////////////////////// FILE *fileHandle = fopen(outputFile, "w"); for (int64_t i = 0; i < stList_length(intervals); i++) { SequenceInterval *sequenceInterval = stList_get(intervals, i); st_logDebug("We have a path interval %s %" PRIi64 " %" PRIi64 "\n", sequenceInterval->sequenceName, sequenceInterval->start, sequenceInterval->end); fprintf(fileHandle, "%s %" PRIi64 " %" PRIi64 "\n", sequenceInterval->sequenceName, sequenceInterval->start, sequenceInterval->end); } st_logInfo("Finished writing out the stats.\n"); fclose(fileHandle); return 0; }
static void setup() { teardown(); //Make nodes nodes = stList_construct3(0, (void(*)(void *)) stIntTuple_destruct); int64_t nodeNumber = st_randomInt(0, 1000); for (int64_t i = 0; i < nodeNumber; i++) { stList_append(nodes, stIntTuple_construct1( i)); } //Make edges edges = stList_construct3(0, (void(*)(void *)) stIntTuple_destruct); float edgeProb = st_random(); for (int64_t i = 0; i < nodeNumber; i++) { for (int64_t j = i; j < nodeNumber; j++) { if (st_random() <= edgeProb) { stList_append(edges, stIntTuple_construct3( st_randomInt(1, 100), i, j)); } } } //Max component size maxComponentSize = 1 + log(nodeNumber) * 10; //(st_randomInt(0, nodeNumber+1); }
stList *lineTokensFromFile(const char *filePath, int64_t getLine) { FILE *fH = fopen(filePath, "r"); int64_t lineCount = 1; stList *tokens; char *string = stFile_getLineFromFile(fH); while (string != NULL) { string = stFile_getLineFromFile(fH); if (lineCount == getLine) { tokens = stString_split(string); return tokens; } else { lineCount++; free(string); } } fclose(fH); return stList_construct3(0, &free); }
stList *getContigPaths(Flower *flower, const char *eventString, stList *eventStrings) { stList *maximalHaplotypePaths = stList_construct3(0, (void(*)(void *)) stList_destruct); stSortedSet *segmentSet = stSortedSet_construct(); getMaximalHaplotypePathsP(flower, maximalHaplotypePaths, segmentSet, eventString, eventStrings); //Do some debug checks.. st_logDebug("We have %" PRIi64 " maximal haplotype paths\n", stList_length( maximalHaplotypePaths)); getMaximalHaplotypePathsCheck(flower, segmentSet, eventString, eventStrings); for (int64_t i = 0; i < stList_length(maximalHaplotypePaths); i++) { stList *maximalHaplotypePath = stList_get(maximalHaplotypePaths, i); st_logDebug("We have a maximal haplotype path with length %" PRIi64 "\n", stList_length(maximalHaplotypePath)); assert(stList_length(maximalHaplotypePath) > 0); Segment *_5Segment = stList_get(maximalHaplotypePath, 0); Segment *_3Segment = stList_get(maximalHaplotypePath, stList_length( maximalHaplotypePath) - 1); if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) { assert(!trueAdjacency(segment_get5Cap(_5Segment), eventStrings)); } if (getAdjacentCapsSegment(segment_get3Cap(_3Segment)) != NULL) { assert(!trueAdjacency(segment_get3Cap(_3Segment), eventStrings)); } for (int64_t j = 0; j < stList_length(maximalHaplotypePath) - 1; j++) { _5Segment = stList_get(maximalHaplotypePath, j); _3Segment = stList_get(maximalHaplotypePath, j + 1); assert(trueAdjacency(segment_get3Cap(_5Segment), eventStrings)); assert(trueAdjacency(segment_get5Cap(_3Segment), eventStrings)); assert(cap_getAdjacency(getTerminalCap(segment_get3Cap(_5Segment))) == getTerminalCap(segment_get5Cap(_3Segment))); assert(strcmp(event_getHeader(segment_getEvent(_5Segment)), eventString) == 0); assert(strcmp(event_getHeader(segment_getEvent(_3Segment)), eventString) == 0); assert(hasCapInEvents(cap_getEnd(segment_get5Cap(_5Segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(_5Segment)))); assert(hasCapInEvents(cap_getEnd(segment_get5Cap(_3Segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(_3Segment)))); } } stSortedSet_destruct(segmentSet); return maximalHaplotypePaths; }
static stList *bulkGetRecordsRange(stKVDatabase *database, int64_t firstKey, int64_t numRecords) { stList* results = stList_construct3(numRecords, (void(*)(void *))stKVDatabaseBulkResult_destruct); startTransaction(database); stTry { for (int32_t i = 0; i < numRecords; ++i) { int64_t key = firstKey + i; int64_t recordSize; void* record = getRecord2(database, key, &recordSize); stKVDatabaseBulkResult* result = stKVDatabaseBulkResult_construct(record, recordSize); stList_set(results, i, result); } commitTransaction(database); }stCatch(ex) { abortTransaction(database); stThrowNewCause(ex, ST_KV_DATABASE_EXCEPTION_ID, "tokyo cabinet bulk get records failed"); }stTryEnd; return results; }
static void testBulkRemoveRecords(CuTest *testCase) { /* * Tests doing a bulk update of a set of records. */ setup(); int64_t i = 100, j = 110, k = 120, l = 130; stKVDatabase_insertRecord(database, 1, &i, sizeof(int64_t)); stKVDatabase_insertRecord(database, 2, &j, sizeof(int64_t)); stKVDatabase_insertRecord(database, 3, &k, sizeof(int64_t)); stKVDatabase_insertRecord(database, 4, &l, sizeof(int64_t)); stKVDatabase_insertRecord(database, 5, &i, 0); //Test null record addition CuAssertTrue(testCase, stKVDatabase_containsRecord(database, 1)); CuAssertTrue(testCase, stKVDatabase_containsRecord(database, 2)); CuAssertTrue(testCase, stKVDatabase_containsRecord(database, 3)); CuAssertTrue(testCase, stKVDatabase_containsRecord(database, 4)); CuAssertTrue(testCase, stKVDatabase_containsRecord(database, 5)); CuAssertTrue(testCase, stKVDatabase_getNumberOfRecords(database) == 5); stList *requests = stList_construct3(0, (void(*)(void *)) stInt64Tuple_destruct); // test empty request list stKVDatabase_bulkRemoveRecords(database, requests); stList_append(requests, stInt64Tuple_construct(1, (int64_t)1)); stList_append(requests, stInt64Tuple_construct(1, (int64_t)2)); stList_append(requests, stInt64Tuple_construct(1, (int64_t)3)); stList_append(requests, stInt64Tuple_construct(1, (int64_t)4)); stList_append(requests, stInt64Tuple_construct(1, (int64_t)5)); stKVDatabase_bulkRemoveRecords(database, requests); CuAssertTrue(testCase, !stKVDatabase_containsRecord(database, 1)); CuAssertTrue(testCase, !stKVDatabase_containsRecord(database, 2)); CuAssertTrue(testCase, !stKVDatabase_containsRecord(database, 3)); CuAssertTrue(testCase, !stKVDatabase_containsRecord(database, 4)); CuAssertTrue(testCase, !stKVDatabase_containsRecord(database, 5)); CuAssertTrue(testCase, stKVDatabase_getNumberOfRecords(database) == 0); stList_destruct(requests); teardown(); }
stList *getComponents(stList *edges) { /* * Gets a list of connected components, each connected component * being represented as a list of the edges, such that each edge is in exactly one * connected component. Allows for multi-graphs (multiple edges connecting two nodes). */ stHash *nodesToEdges = getNodesToEdgesHash(edges); /* * Traverse the edges greedily */ stList *components = stList_construct3(0, (void(*)(void *)) stList_destruct); stList *nodes = stHash_getKeys(nodesToEdges); while (stList_length(nodes) > 0) { stIntTuple *node = stList_pop(nodes); stList *edges = stHash_search(nodesToEdges, node); if (edges != NULL) { //We have a component to build stSortedSet *component = stSortedSet_construct(); stHash_remove(nodesToEdges, node); for (int64_t i = 0; i < stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); getComponentsP(nodesToEdges, stIntTuple_get(edge, 0), component); getComponentsP(nodesToEdges, stIntTuple_get(edge, 1), component); } stList_append(components, stSortedSet_getList(component)); //Cleanup stSortedSet_destruct(component); stList_destruct(edges); } stIntTuple_destruct(node); } assert(stHash_size(nodesToEdges) == 0); stHash_destruct(nodesToEdges); stList_destruct(nodes); return components; }
static void test_st_randomChoice(CuTest *testCase) { /* * Excercies the random int function. */ stList *list = stList_construct3(0, (void (*)(void *))stIntTuple_destruct); stTry { st_randomChoice(list); } stCatch(except) { CuAssertTrue(testCase, stExcept_getId(except) == RANDOM_EXCEPTION_ID); } stTryEnd for(int32_t i = 0; i < 10; i++) { stList_append(list, stIntTuple_construct(1, i)); } for(int32_t i = 0; i < 100; i++) { CuAssertTrue(testCase, stList_contains(list, st_randomChoice(list))); } stList_destruct(list); }
static stList *getRecords(CactusDisk *cactusDisk, stList *objectNames, char *type) { if (stList_length(objectNames) == 0) { return stList_construct3(0, NULL); } stList *records = NULL; stTry { records = stKVDatabase_bulkGetRecords(cactusDisk->database, objectNames); } stCatch(except) { stThrowNewCause(except, ST_KV_DATABASE_EXCEPTION_ID, "An unknown database error occurred when getting a bulk set of %s", type); }stTryEnd ; assert(records != NULL); assert(stList_length(objectNames) == stList_length(records)); stList_setDestructor(records, free); for (int64_t i = 0; i < stList_length(objectNames); i++) { Name objectName = *((int64_t *) stList_get(objectNames, i)); int64_t recordSize; void *record; stKVDatabaseBulkResult *result = stList_get(records, i); assert(result != NULL); if (!stCache_containsRecord(cactusDisk->cache, objectName, 0, INT64_MAX)) { record = stKVDatabaseBulkResult_getRecord(result, &recordSize); assert(recordSize >= 0); assert(record != NULL); record = decompress(record, &recordSize); stCache_setRecord(cactusDisk->cache, objectName, 0, recordSize, record); } else { record = stCache_getRecord(cactusDisk->cache, objectName, 0, INT64_MAX, &recordSize); assert(recordSize >= 0); assert(record != NULL); } stKVDatabaseBulkResult_destruct(result); stList_set(records, i, record); } return records; }
char *cactusDisk_getString(CactusDisk *cactusDisk, Name name, int64_t start, int64_t length, int64_t strand, int64_t totalSequenceLength) { /* * Gets a string from the database. * */ assert(length >= 0); if (length == 0) { return stString_copy(""); } //First try getting it from the cache char *string = cactusDisk_getStringFromCache(cactusDisk, name, start, length, strand); if (string == NULL) { //If not in the cache, add it to the cache and then get it from the cache. stList *list = stList_construct3(0, (void (*)(void *)) substring_destruct); stList_append(list, substring_construct(name, start, length)); cacheSubstringsFromDB(cactusDisk, list); stList_destruct(list); string = cactusDisk_getStringFromCache(cactusDisk, name, start, length, strand); } assert(string != NULL); return string; }
/* * Recursive function which fills a givenlist with the * connected nodes within a module and fills their lifted * edges in the same pass */ static void buildFaces_fillTopNodeList2(Cap * cap, stList *list, stHash *liftedEdgesTable) { stList *liftedEdges = stList_construct3(2, buildFaces_stList_destructElem); int64_t index; // Orientation check cap = cap_getPositiveOrientation(cap); // Limit of recursion if (stList_contains(list, cap)) return; // Actual filling st_logInfo("Adding cap %p to face\n", cap); stList_append(list, cap); // Compute lifted edges for (index = 0; index < cap_getChildNumber(cap); index++) buildFaces_computeLiftedEdgesAtTopNode(cap_getChild(cap, index), liftedEdges); // If emptylist... if (stList_length(liftedEdges) == 0) stList_destruct(liftedEdges); // Recursion through lifted edges else { stHash_insert(liftedEdgesTable, cap, liftedEdges); for (index = 0; index < stList_length(liftedEdges); index++) buildFaces_fillTopNodeList2( ((LiftedEdge *) stList_get(liftedEdges, index))->destination, list, liftedEdgesTable); } // Recursion through adjacency if (cap_getAdjacency(cap)) buildFaces_fillTopNodeList2(cap_getAdjacency(cap),list, liftedEdgesTable); }
static stList *getSubstringsForFlowers(stList *flowers) { /* * Get the set of substrings for sequence intervals in the given set of flowers. */ stList *substrings = stList_construct3(0, (void (*)(void *)) substring_destruct); for (int64_t i = 0; i < stList_length(flowers); i++) { Flower *flower = stList_get(flowers, i); Flower_EndIterator *endIt = flower_getEndIterator(flower); End *end; while ((end = flower_getNextEnd(endIt)) != NULL) { if (end_isStubEnd(end)) { End_InstanceIterator *instanceIt = end_getInstanceIterator(end); Cap *cap; while ((cap = end_getNext(instanceIt)) != NULL) { Sequence *sequence; if ((sequence = cap_getSequence(cap)) != NULL) { cap = cap_getStrand(cap) ? cap : cap_getReverse(cap); if (!cap_getSide(cap)) { //We have a sequence interval of interest Cap *adjacentCap = cap_getAdjacency(cap); assert(adjacentCap != NULL); int64_t length = cap_getCoordinate(adjacentCap) - cap_getCoordinate(cap) - 1; assert(length >= 0); if (length > 0) { stList_append(substrings, substring_construct(sequence_getMetaSequence(sequence)->stringName, cap_getCoordinate(cap) + 1 - sequence_getStart(sequence), length)); } } } } end_destructInstanceIterator(instanceIt); } } flower_destructEndIterator(endIt); } return substrings; }
stSortedSet *makeEndAlignment(StateMachine *sM, End *end, int64_t spanningTrees, int64_t maxSequenceLength, bool useProgressiveMerging, float gapGamma, PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters) { //Make an alignment of the sequences in the ends //Get the adjacency sequences to be aligned. Cap *cap; End_InstanceIterator *it = end_getInstanceIterator(end); stList *sequences = stList_construct3(0, (void (*)(void *))adjacencySequence_destruct); stList *seqFrags = stList_construct3(0, (void (*)(void *))seqFrag_destruct); stHash *endInstanceNumbers = stHash_construct2(NULL, free); while((cap = end_getNext(it)) != NULL) { if(cap_getSide(cap)) { cap = cap_getReverse(cap); } AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap, maxSequenceLength); stList_append(sequences, adjacencySequence); assert(cap_getAdjacency(cap) != NULL); End *otherEnd = end_getPositiveOrientation(cap_getEnd(cap_getAdjacency(cap))); stList_append(seqFrags, seqFrag_construct(adjacencySequence->string, 0, end_getName(otherEnd))); //Increase count of seqfrags with a given end. int64_t *c = stHash_search(endInstanceNumbers, otherEnd); if(c == NULL) { c = st_calloc(1, sizeof(int64_t)); assert(*c == 0); stHash_insert(endInstanceNumbers, otherEnd, c); } (*c)++; } end_destructInstanceIterator(it); //Get the alignment. MultipleAlignment *mA = makeAlignment(sM, seqFrags, spanningTrees, 100000000, useProgressiveMerging, gapGamma, pairwiseAlignmentBandingParameters); //Build an array of weights to reweight pairs in the alignment. int64_t *pairwiseAlignmentsPerSequenceNonCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t)); int64_t *pairwiseAlignmentsPerSequenceCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t)); //First build array on number of pairwise alignments to each sequence, distinguishing alignments between sequences sharing //common ends. for(int64_t i=0; i<stList_length(mA->chosenPairwiseAlignments); i++) { stIntTuple *pairwiseAlignment = stList_get(mA->chosenPairwiseAlignments, i); int64_t seq1 = stIntTuple_get(pairwiseAlignment, 1); int64_t seq2 = stIntTuple_get(pairwiseAlignment, 2); assert(seq1 != seq2); SeqFrag *seqFrag1 = stList_get(seqFrags, seq1); SeqFrag *seqFrag2 = stList_get(seqFrags, seq2); int64_t *pairwiseAlignmentsPerSequence = seqFrag1->rightEndId == seqFrag2->rightEndId ? pairwiseAlignmentsPerSequenceCommonEnds : pairwiseAlignmentsPerSequenceNonCommonEnds; pairwiseAlignmentsPerSequence[seq1]++; pairwiseAlignmentsPerSequence[seq2]++; } //Now calculate score adjustments. double *scoreAdjustmentsNonCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double)); double *scoreAdjustmentsCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double)); for(int64_t i=0; i<stList_length(seqFrags); i++) { SeqFrag *seqFrag = stList_get(seqFrags, i); End *otherEnd = flower_getEnd(end_getFlower(end), seqFrag->rightEndId); assert(otherEnd != NULL); assert(stHash_search(endInstanceNumbers, otherEnd) != NULL); int64_t commonInstanceNumber = *(int64_t *)stHash_search(endInstanceNumbers, otherEnd); int64_t nonCommonInstanceNumber = stList_length(seqFrags) - commonInstanceNumber; assert(commonInstanceNumber > 0 && nonCommonInstanceNumber >= 0); assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] <= nonCommonInstanceNumber); assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] >= 0); assert(pairwiseAlignmentsPerSequenceCommonEnds[i] < commonInstanceNumber); assert(pairwiseAlignmentsPerSequenceCommonEnds[i] >= 0); //scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber + commonInstanceNumber - 1)/(pairwiseAlignmentsPerSequenceNonCommonEnds[i] + pairwiseAlignmentsPerSequenceCommonEnds[i]); //scoreAdjustmentsCommonEnds[i] = scoreAdjustmentsNonCommonEnds[i]; if(pairwiseAlignmentsPerSequenceNonCommonEnds[i] > 0) { scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber)/pairwiseAlignmentsPerSequenceNonCommonEnds[i]; assert(scoreAdjustmentsNonCommonEnds[i] >= 1.0); assert(scoreAdjustmentsNonCommonEnds[i] <= nonCommonInstanceNumber); } else { scoreAdjustmentsNonCommonEnds[i] = INT64_MIN; } if(pairwiseAlignmentsPerSequenceCommonEnds[i] > 0) { scoreAdjustmentsCommonEnds[i] = ((double)commonInstanceNumber-1)/pairwiseAlignmentsPerSequenceCommonEnds[i]; assert(scoreAdjustmentsCommonEnds[i] >= 1.0); assert(scoreAdjustmentsCommonEnds[i] <= commonInstanceNumber-1); } else { scoreAdjustmentsCommonEnds[i] = INT64_MIN; } } //Convert the alignment pairs to an alignment of the caps.. stSortedSet *sortedAlignment = stSortedSet_construct3((int (*)(const void *, const void *))alignedPair_cmpFn, (void (*)(void *))alignedPair_destruct); while(stList_length(mA->alignedPairs) > 0) { stIntTuple *alignedPair = stList_pop(mA->alignedPairs); assert(stIntTuple_length(alignedPair) == 5); int64_t seqIndex1 = stIntTuple_get(alignedPair, 1); int64_t seqIndex2 = stIntTuple_get(alignedPair, 3); AdjacencySequence *i = stList_get(sequences, seqIndex1); AdjacencySequence *j = stList_get(sequences, seqIndex2); assert(i != j); int64_t offset1 = stIntTuple_get(alignedPair, 2); int64_t offset2 = stIntTuple_get(alignedPair, 4); int64_t score = stIntTuple_get(alignedPair, 0); if(score <= 0) { //Happens when indel probs are included score = 1; //This is the minimum } assert(score > 0 && score <= PAIR_ALIGNMENT_PROB_1); SeqFrag *seqFrag1 = stList_get(seqFrags, seqIndex1); SeqFrag *seqFrag2 = stList_get(seqFrags, seqIndex2); assert(seqFrag1 != seqFrag2); double *scoreAdjustments = seqFrag1->rightEndId == seqFrag2->rightEndId ? scoreAdjustmentsCommonEnds : scoreAdjustmentsNonCommonEnds; assert(scoreAdjustments[seqIndex1] != INT64_MIN); assert(scoreAdjustments[seqIndex2] != INT64_MIN); AlignedPair *alignedPair2 = alignedPair_construct( i->subsequenceIdentifier, i->start + (i->strand ? offset1 : -offset1), i->strand, j->subsequenceIdentifier, j->start + (j->strand ? offset2 : -offset2), j->strand, score*scoreAdjustments[seqIndex1], score*scoreAdjustments[seqIndex2]); //Do the reweighting here. assert(stSortedSet_search(sortedAlignment, alignedPair2) == NULL); assert(stSortedSet_search(sortedAlignment, alignedPair2->reverse) == NULL); stSortedSet_insert(sortedAlignment, alignedPair2); stSortedSet_insert(sortedAlignment, alignedPair2->reverse); stIntTuple_destruct(alignedPair); } //Cleanup stList_destruct(seqFrags); stList_destruct(sequences); free(pairwiseAlignmentsPerSequenceNonCommonEnds); free(pairwiseAlignmentsPerSequenceCommonEnds); free(scoreAdjustmentsNonCommonEnds); free(scoreAdjustmentsCommonEnds); multipleAlignment_destruct(mA); stHash_destruct(endInstanceNumbers); return sortedAlignment; }