/* * Uses the functions above to build an adjacency list, then by DFS attempts to create * a valid topological sort, returning non-zero if the graph contains a cycle. */ static int64_t containsACycle(stList *pairs, int64_t sequenceNumber) { //Build an adjacency list structure.. stHash *adjacencyList = buildAdjacencyList(pairs, sequenceNumber); //Do a topological sort of the adjacency list stSortedSet *started = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL); stSortedSet *done = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL); int64_t cyclic = 0; for(int64_t seq=0; seq<sequenceNumber; seq++) { stIntTuple *seqPos = stIntTuple_construct2( seq, 0); //The following hacks avoid memory cleanup.. stSortedSet *column = stHash_search(adjacencyList, seqPos); assert(column != NULL); stIntTuple *seqPos2 = stSortedSet_search(column, seqPos); assert(seqPos2 != NULL); cyclic = cyclic || dfs(adjacencyList, seqPos2, started, done); stIntTuple_destruct(seqPos); } //cleanup stHashIterator *it = stHash_getIterator(adjacencyList); stIntTuple *seqPos; stSortedSet *columns = stSortedSet_construct2((void (*)(void *))stSortedSet_destruct); while((seqPos = stHash_getNext(it)) != NULL) { stSortedSet *column = stHash_search(adjacencyList, seqPos); assert(column != NULL); stSortedSet_insert(columns, column); } stHash_destructIterator(it); stHash_destruct(adjacencyList); stSortedSet_destruct(columns); stSortedSet_destruct(started); stSortedSet_destruct(done); return cyclic; }
static Flower *flower_construct3(Name name, CactusDisk *cactusDisk) { Flower *flower; flower = st_malloc(sizeof(Flower)); flower->name = name; flower->sequences = stSortedSet_construct3(flower_constructSequencesP, NULL); flower->caps = stSortedSet_construct3(flower_constructCapsP, NULL); flower->ends = stSortedSet_construct3(flower_constructEndsP, NULL); flower->segments = stSortedSet_construct3(flower_constructSegmentsP, NULL); flower->blocks = stSortedSet_construct3(flower_constructBlocksP, NULL); flower->groups = stSortedSet_construct3(flower_constructGroupsP, NULL); flower->chains = stSortedSet_construct3(flower_constructChainsP, NULL); flower->faces = stSortedSet_construct3(flower_constructFacesP, NULL); flower->parentFlowerName = NULL_NAME; flower->cactusDisk = cactusDisk; flower->faceIndex = 0; flower->chainIndex = 0; flower->builtBlocks = 0; flower->builtFaces = 0; flower->builtTrees = 0; cactusDisk_addFlower(flower->cactusDisk, flower); flower->eventTree = NULL; return flower; }
static void checkIsValidReference(CuTest *testCase, stList *reference, double totalScore) { stList *chosenEdges = convertReferenceToAdjacencyEdges(reference); //Check that everyone has a partner. CuAssertIntEquals(testCase, nodeNumber, stList_length(chosenEdges) * 2); stSortedSet *nodes = stSortedSet_construct3((int(*)(const void *, const void *)) stIntTuple_cmpFn, (void(*)(void *)) stIntTuple_destruct); for (int64_t i = 0; i < nodeNumber; i++) { stSortedSet_insert(nodes, stIntTuple_construct1( i)); } checkEdges(chosenEdges, nodes, 1, 0); //Check that the score is correct double totalScore2 = calculateZScoreOfReference(reference, nodeNumber, zMatrix); CuAssertDblEquals(testCase, totalScore2, totalScore, 0.000001); //Check that the stubs are properly connected. stList *allEdges = stList_copy(chosenEdges, NULL); stList_appendAll(allEdges, stubs); stList_appendAll(allEdges, chains); stList *components = getComponents(allEdges); CuAssertIntEquals(testCase, stList_length(stubs), stList_length(reference)); CuAssertIntEquals(testCase, stList_length(stubs), stList_length(components)); //Cleanup stList_destruct(components); stSortedSet_destruct(nodes); stList_destruct(allEdges); stList_destruct(chosenEdges); }
Block *block_construct2(Name name, int64_t length, End *leftEnd, End *rightEnd, Flower *flower) { Block *block; block = st_malloc(sizeof(Block)); block->rBlock = st_malloc(sizeof(Block)); block->rBlock->rBlock = block; block->blockContents = st_malloc(sizeof(BlockContents)); block->rBlock->blockContents = block->blockContents; block->orientation = 1; block->rBlock->orientation = 0; block->blockContents->name = name; block->blockContents->segments = stSortedSet_construct3(blockConstruct_constructP, NULL); block->blockContents->length = length; block->blockContents->flower = flower; block->leftEnd = leftEnd; end_setBlock(leftEnd, block); block->rBlock->leftEnd = end_getReverse(rightEnd); end_setBlock(rightEnd, block); flower_addBlock(flower, block); return block; }
EventTree *eventTree_construct(CactusDisk *cactusDisk, Name rootEventName) { EventTree *eventTree; eventTree = st_malloc(sizeof(EventTree)); eventTree->cactusDisk = cactusDisk; cactusDisk_setEventTree(cactusDisk, eventTree); eventTree->events = stSortedSet_construct3(eventTree_constructP, NULL); eventTree->rootEvent = event_construct(rootEventName, "ROOT", INT64_MAX, NULL, eventTree); //do this last as reciprocal call made to add the event to the events. return eventTree; }
stSortedSet *stSortedSet_copyConstruct(stSortedSet *sortedSet, void (*destructElementFn)(void *)) { stSortedSet *sortedSet2 = stSortedSet_construct3(stSortedSet_getComparator(sortedSet)->compareFn, destructElementFn); stSortedSetIterator *it = stSortedSet_getIterator(sortedSet); void *o; while((o = stSortedSet_getNext(it)) != NULL) { stSortedSet_insert(sortedSet2, o); } stSortedSet_destructIterator(it); return sortedSet2; }
stPosetAlignment *stPosetAlignment_construct(int64_t sequenceNumber) { stPosetAlignment *posetAlignment = st_malloc(sizeof(stPosetAlignment)); posetAlignment->sequenceNumber = sequenceNumber; posetAlignment->constraintLists = st_malloc(sizeof(stSortedSet *) * sequenceNumber * sequenceNumber); for(int64_t i=0; i<posetAlignment->sequenceNumber; i++) { for(int64_t j=0; j<posetAlignment->sequenceNumber; j++) { if(i != j) { posetAlignment->constraintLists[i*posetAlignment->sequenceNumber + j] = stSortedSet_construct3((int (*)(const void *, const void *))comparePositions, (void (*)(void *))stIntTuple_destruct); } } } return posetAlignment; }
void testCactusDisk_getUniqueID_Unique(CuTest* testCase) { cactusDiskTestSetup(); stSortedSet *uniqueNames = stSortedSet_construct3(testCactusDisk_getUniqueID_UniqueP, free); for (int64_t i = 0; i < 100000; i++) { //Gets a billion ids, checks we are good. Name uniqueName = cactusDisk_getUniqueID(cactusDisk); CuAssertTrue(testCase, uniqueName > 0); CuAssertTrue(testCase, uniqueName < INT64_MAX); CuAssertTrue(testCase, uniqueName != NULL_NAME); char *cA = cactusMisc_nameToString(uniqueName); CuAssertTrue(testCase, stSortedSet_search(uniqueNames, cA) == NULL); CuAssertTrue(testCase, cactusMisc_stringToName(cA) == uniqueName); stSortedSet_insert(uniqueNames, cA); } stSortedSet_destruct(uniqueNames); cactusDiskTestTeardown(); }
static void getMAFBlock2(Block *block, FILE *fileHandle) { if (block_getLength(block) >= minimumBlockLength) { //Calculate bases in the reference and other reference sequence Block_InstanceIterator *instanceIt = block_getInstanceIterator(block); bool includesReference = 0; bool includesOtherReference = 0; Segment *segment; while ((segment = block_getNext(instanceIt)) != NULL) { const char *segmentEvent = event_getHeader( segment_getEvent(segment)); if (strcmp(segmentEvent, referenceEventString) == 0) { includesReference = 1; } else if (strcmp(segmentEvent, otherReferenceEventString) == 0) { includesOtherReference = 1; } } block_destructInstanceIterator(instanceIt); if (ignoreOtherReferenceBlocks && includesOtherReference) { return; } stSortedSet *otherSampleEvents = stSortedSet_construct3( (int(*)(const void *, const void *)) strcmp, NULL); instanceIt = block_getInstanceIterator(block); int32_t sampleNumber = 0; while ((segment = block_getNext(instanceIt)) != NULL) { const char *segmentEvent = event_getHeader( segment_getEvent(segment)); if (strcmp(segmentEvent, sampleEventString) == 0) { sampleNumber++; } else if (strcmp(segmentEvent, referenceEventString) != 0) { stSortedSet_insert(otherSampleEvents, (void *) segmentEvent); } } block_destructInstanceIterator(instanceIt); baseCoverages[stSortedSet_size(otherSampleEvents)] += block_getLength( block) * sampleNumber; stSortedSet_destruct(otherSampleEvents); referenceBases += includesReference ? block_getLength(block) * sampleNumber : 0; otherReferenceBases += includesOtherReference ? block_getLength(block) * sampleNumber : 0; } }
static stSortedSet *getEventStrings(End *end, stList *eventStrings) { stSortedSet *eventStringsSet = stSortedSet_construct3( (int(*)(const void *, const void *)) strcmp, NULL); End_InstanceIterator *instanceIt = end_getInstanceIterator(end); Cap *cap; while ((cap = end_getNext(instanceIt)) != NULL) { const char *header = event_getHeader(cap_getEvent(cap)); for(int64_t i=0; i<stList_length(eventStrings); i++) { if(strcmp(stList_get(eventStrings, i), header) == 0) { stSortedSet_insert(eventStringsSet, (void *) header); } } } end_destructInstanceIterator(instanceIt); return eventStringsSet; }
stSortedSet *stSortedSet_getDifference(stSortedSet *sortedSet1, stSortedSet *sortedSet2) { if(!stSortedSet_comparatorsEqual(sortedSet1, sortedSet2)) { stThrowNew(SORTED_SET_EXCEPTION_ID, "Comparators are not equal for creating the sorted set difference"); } stSortedSet *sortedSet3 = stSortedSet_construct3(stSortedSet_getComparator(sortedSet1)->compareFn, NULL); //Add those from sortedSet1 only if they are not in sortedSet2 stSortedSetIterator *it= stSortedSet_getIterator(sortedSet1); void *o; while((o = stSortedSet_getNext(it)) != NULL) { if(stSortedSet_search(sortedSet2, o) == NULL) { stSortedSet_insert(sortedSet3, o); } } stSortedSet_destructIterator(it); return sortedSet3; }
static stSortedSet *getOddNodes(stList *cycle) { /* * Returns alternating nodes in a simple cycle. */ //Set to return stSortedSet *nodes = stSortedSet_construct3( (int(*)(const void *, const void *)) stIntTuple_cmpFn, (void(*)(void *)) stIntTuple_destruct); stHash *nodesToEdges = getNodesToEdgesHash(cycle); int64_t node = stIntTuple_get(stList_get(cycle, 0), 0); int64_t pNode = -1; int64_t counter = 0; bool b = 1; assert(stList_length(cycle) % 2 == 0); while (counter++ < stList_length(cycle)) { if (b) { //Make alternating addNodeToSet(nodes, node); b = 0; } else { b = 1; } stList *edges = getItemForNode(node, nodesToEdges); assert(stList_length(edges) == 2); stIntTuple *edge = stList_get(edges, 0); int64_t node2 = getOtherPosition(edge, node); if (node2 != pNode) { pNode = node; node = node2; continue; } edge = stList_get(edges, 1); node2 = getOtherPosition(edge, node); assert(node2 != pNode); pNode = node; node = node2; } stHash_destruct(nodesToEdges); assert(stList_length(cycle) / 2 == stSortedSet_size(nodes)); return nodes; }
void testCactusDisk_getUniqueID_UniqueIntervals(CuTest* testCase) { cactusDiskTestSetup(); stSortedSet *uniqueNames = stSortedSet_construct3(testCactusDisk_getUniqueID_UniqueP, free); for (int64_t i = 0; i < 10; i++) { //Gets a billion ids, checks we are good. int64_t intervalSize = st_randomInt(0, 100000); Name uniqueName = cactusDisk_getUniqueIDInterval(cactusDisk, intervalSize); for(int64_t j=0; j<intervalSize; j++) { CuAssertTrue(testCase, uniqueName > 0); CuAssertTrue(testCase, uniqueName < INT64_MAX); CuAssertTrue(testCase, uniqueName != NULL_NAME); char *cA = cactusMisc_nameToString(uniqueName); CuAssertTrue(testCase, stSortedSet_search(uniqueNames, cA) == NULL); CuAssertTrue(testCase, cactusMisc_stringToName(cA) == uniqueName); stSortedSet_insert(uniqueNames, cA); uniqueName++; } } stSortedSet_destruct(uniqueNames); cactusDiskTestTeardown(); }
/* * This builds an adjacency list structure for the the sequences. Every sequence-position * has a column in the hash with which it can be aligned with. */ static stHash *buildAdjacencyList(stList *pairs, int64_t sequenceNumber) { stHash *hash = stHash_construct3((uint64_t (*)(const void *))stIntTuple_hashKey, (int (*)(const void *, const void *))stIntTuple_equalsFn, (void (*)(void *))stIntTuple_destruct, NULL); for(int64_t seq=0; seq<sequenceNumber; seq++) { for(int64_t position=0; position<MAX_SEQUENCE_SIZE; position++) { stIntTuple *seqPos = stIntTuple_construct2( seq, position); stSortedSet *column = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL); stSortedSet_insert(column, seqPos); stHash_insert(hash, seqPos, column); } } stListIterator *it = stList_getIterator(pairs); stIntTuple *pair; while((pair = stList_getNext(it)) != NULL) { stIntTuple *seqPos1 = stIntTuple_construct2( stIntTuple_get(pair, 0), stIntTuple_get(pair, 1)); stIntTuple *seqPos2 = stIntTuple_construct2( stIntTuple_get(pair, 2), stIntTuple_get(pair, 3)); stSortedSet *column1 = stHash_search(hash, seqPos1); assert(column1 != NULL); stSortedSet *column2 = stHash_search(hash, seqPos2); assert(column2 != NULL); if(column1 != column2) { //Merge the columns stSortedSetIterator *it2 = stSortedSet_getIterator(column2); stIntTuple *seqPos3; while((seqPos3 = stSortedSet_getNext(it2)) != NULL) { assert(stSortedSet_search(column1, seqPos3) == NULL); stSortedSet_insert(column1, seqPos3); assert(stHash_search(hash, seqPos3) == column2); stHash_insert(hash, seqPos3, column1); assert(stHash_search(hash, seqPos3) == column1); } stSortedSet_destructIterator(it2); stSortedSet_destruct(column2); } //Cleanup loop. stIntTuple_destruct(seqPos1); stIntTuple_destruct(seqPos2); } stList_destructIterator(it); return hash; }
stSortedSet *stSortedSet_getUnion(stSortedSet *sortedSet1, stSortedSet *sortedSet2) { if(!stSortedSet_comparatorsEqual(sortedSet1, sortedSet2)) { stThrowNew(SORTED_SET_EXCEPTION_ID, "Comparators are not equal for creating the union of two sorted sets"); } stSortedSet *sortedSet3 = stSortedSet_construct3(stSortedSet_getComparator(sortedSet1)->compareFn, NULL); //Add those from sortedSet1 stSortedSetIterator *it= stSortedSet_getIterator(sortedSet1); void *o; while((o = stSortedSet_getNext(it)) != NULL) { stSortedSet_insert(sortedSet3, o); } stSortedSet_destructIterator(it); //Add those from sortedSet2 it= stSortedSet_getIterator(sortedSet2); while((o = stSortedSet_getNext(it)) != NULL) { stSortedSet_insert(sortedSet3, o); } stSortedSet_destructIterator(it); return sortedSet3; }
stSortedSet *loadEndAlignmentFromDisk(Flower *flower, FILE *fileHandle, End **end) { stSortedSet *endAlignment = stSortedSet_construct3((int (*)(const void *, const void *))alignedPair_cmpFn, (void (*)(void *))alignedPair_destruct); char *line = stFile_getLineFromFile(fileHandle); if(line == NULL) { *end = NULL; return NULL; } Name flowerName; int64_t lineNumber; int64_t i = sscanf(line, "%" PRIi64 " %" PRIi64 "", &flowerName, &lineNumber); if(i != 2 || lineNumber < 0) { st_errAbort("We encountered a mis-specified name in loading the first line of an end alignment from the disk: '%s'\n", line); } free(line); *end = flower_getEnd(flower, flowerName); if(*end == NULL) { st_errAbort("We encountered an end name that is not in the database: '%s'\n", line); } for(int64_t i=0; i<lineNumber; i++) { line = stFile_getLineFromFile(fileHandle); if(line == NULL) { st_errAbort("Got a null line when parsing an end alignment\n"); } int64_t sI1, sI2; int64_t p1, st1, p2, st2, score1, score2; int64_t i = sscanf(line, "%" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 "", &sI1, &p1, &st1, &score1, &sI2, &p2, &st2, &score2); (void)i; if(i != 8) { st_errAbort("We encountered a mis-specified name in loading an end alignment from the disk: '%s'\n", line); } stSortedSet_insert(endAlignment, alignedPair_construct(sI1, p1, st1, sI2, p2, st2, score1, score2)); free(line); } return endAlignment; }
static void writeCliqueGraph(FILE *fileHandle, stList *edges, int64_t nodeNumber, bool negativeWeights) { /* * Writes out a representation of the adjacencies and ends as a graph readable by blossom. * Writes out additional edges so that every pair of nodes is connected. */ int64_t edgeNumber = ((nodeNumber * nodeNumber) - nodeNumber) / 2; fprintf(fileHandle, "%" PRIi64 " %" PRIi64 "\n", nodeNumber, edgeNumber); stSortedSet *seen = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, (void (*)(void *))stIntTuple_destruct); int64_t edgesWritten = 0; for(int64_t i=0; i<stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); int64_t from = stIntTuple_get(edge, 0); int64_t to = stIntTuple_get(edge, 1); assert(from < nodeNumber); assert(to < nodeNumber); assert(from >= 0); assert(to >= 0); assert(from != to); int64_t weight = stIntTuple_get(edge, 2); //If is a minimisation algorithms we invert the sign.. fprintf(fileHandle, "%" PRIi64 " %" PRIi64 " %" PRIi64 "\n", from, to, negativeWeights ? -weight : weight); edgesWritten++; addEdgeToSet(seen, from, to); } for(int64_t i=0; i<nodeNumber; i++) { for(int64_t j=i+1; j<nodeNumber; j++) { if(!edgeInSet(seen, i, j)) { fprintf(fileHandle, "%" PRIi64 " %" PRIi64 " 0\n", i, j); edgesWritten++; } } } //Cleanup stSortedSet_destruct(seen); assert(edgeNumber == edgesWritten); }
stSortedSet *makeEndAlignment(StateMachine *sM, End *end, int64_t spanningTrees, int64_t maxSequenceLength, bool useProgressiveMerging, float gapGamma, PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters) { //Make an alignment of the sequences in the ends //Get the adjacency sequences to be aligned. Cap *cap; End_InstanceIterator *it = end_getInstanceIterator(end); stList *sequences = stList_construct3(0, (void (*)(void *))adjacencySequence_destruct); stList *seqFrags = stList_construct3(0, (void (*)(void *))seqFrag_destruct); stHash *endInstanceNumbers = stHash_construct2(NULL, free); while((cap = end_getNext(it)) != NULL) { if(cap_getSide(cap)) { cap = cap_getReverse(cap); } AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap, maxSequenceLength); stList_append(sequences, adjacencySequence); assert(cap_getAdjacency(cap) != NULL); End *otherEnd = end_getPositiveOrientation(cap_getEnd(cap_getAdjacency(cap))); stList_append(seqFrags, seqFrag_construct(adjacencySequence->string, 0, end_getName(otherEnd))); //Increase count of seqfrags with a given end. int64_t *c = stHash_search(endInstanceNumbers, otherEnd); if(c == NULL) { c = st_calloc(1, sizeof(int64_t)); assert(*c == 0); stHash_insert(endInstanceNumbers, otherEnd, c); } (*c)++; } end_destructInstanceIterator(it); //Get the alignment. MultipleAlignment *mA = makeAlignment(sM, seqFrags, spanningTrees, 100000000, useProgressiveMerging, gapGamma, pairwiseAlignmentBandingParameters); //Build an array of weights to reweight pairs in the alignment. int64_t *pairwiseAlignmentsPerSequenceNonCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t)); int64_t *pairwiseAlignmentsPerSequenceCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t)); //First build array on number of pairwise alignments to each sequence, distinguishing alignments between sequences sharing //common ends. for(int64_t i=0; i<stList_length(mA->chosenPairwiseAlignments); i++) { stIntTuple *pairwiseAlignment = stList_get(mA->chosenPairwiseAlignments, i); int64_t seq1 = stIntTuple_get(pairwiseAlignment, 1); int64_t seq2 = stIntTuple_get(pairwiseAlignment, 2); assert(seq1 != seq2); SeqFrag *seqFrag1 = stList_get(seqFrags, seq1); SeqFrag *seqFrag2 = stList_get(seqFrags, seq2); int64_t *pairwiseAlignmentsPerSequence = seqFrag1->rightEndId == seqFrag2->rightEndId ? pairwiseAlignmentsPerSequenceCommonEnds : pairwiseAlignmentsPerSequenceNonCommonEnds; pairwiseAlignmentsPerSequence[seq1]++; pairwiseAlignmentsPerSequence[seq2]++; } //Now calculate score adjustments. double *scoreAdjustmentsNonCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double)); double *scoreAdjustmentsCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double)); for(int64_t i=0; i<stList_length(seqFrags); i++) { SeqFrag *seqFrag = stList_get(seqFrags, i); End *otherEnd = flower_getEnd(end_getFlower(end), seqFrag->rightEndId); assert(otherEnd != NULL); assert(stHash_search(endInstanceNumbers, otherEnd) != NULL); int64_t commonInstanceNumber = *(int64_t *)stHash_search(endInstanceNumbers, otherEnd); int64_t nonCommonInstanceNumber = stList_length(seqFrags) - commonInstanceNumber; assert(commonInstanceNumber > 0 && nonCommonInstanceNumber >= 0); assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] <= nonCommonInstanceNumber); assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] >= 0); assert(pairwiseAlignmentsPerSequenceCommonEnds[i] < commonInstanceNumber); assert(pairwiseAlignmentsPerSequenceCommonEnds[i] >= 0); //scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber + commonInstanceNumber - 1)/(pairwiseAlignmentsPerSequenceNonCommonEnds[i] + pairwiseAlignmentsPerSequenceCommonEnds[i]); //scoreAdjustmentsCommonEnds[i] = scoreAdjustmentsNonCommonEnds[i]; if(pairwiseAlignmentsPerSequenceNonCommonEnds[i] > 0) { scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber)/pairwiseAlignmentsPerSequenceNonCommonEnds[i]; assert(scoreAdjustmentsNonCommonEnds[i] >= 1.0); assert(scoreAdjustmentsNonCommonEnds[i] <= nonCommonInstanceNumber); } else { scoreAdjustmentsNonCommonEnds[i] = INT64_MIN; } if(pairwiseAlignmentsPerSequenceCommonEnds[i] > 0) { scoreAdjustmentsCommonEnds[i] = ((double)commonInstanceNumber-1)/pairwiseAlignmentsPerSequenceCommonEnds[i]; assert(scoreAdjustmentsCommonEnds[i] >= 1.0); assert(scoreAdjustmentsCommonEnds[i] <= commonInstanceNumber-1); } else { scoreAdjustmentsCommonEnds[i] = INT64_MIN; } } //Convert the alignment pairs to an alignment of the caps.. stSortedSet *sortedAlignment = stSortedSet_construct3((int (*)(const void *, const void *))alignedPair_cmpFn, (void (*)(void *))alignedPair_destruct); while(stList_length(mA->alignedPairs) > 0) { stIntTuple *alignedPair = stList_pop(mA->alignedPairs); assert(stIntTuple_length(alignedPair) == 5); int64_t seqIndex1 = stIntTuple_get(alignedPair, 1); int64_t seqIndex2 = stIntTuple_get(alignedPair, 3); AdjacencySequence *i = stList_get(sequences, seqIndex1); AdjacencySequence *j = stList_get(sequences, seqIndex2); assert(i != j); int64_t offset1 = stIntTuple_get(alignedPair, 2); int64_t offset2 = stIntTuple_get(alignedPair, 4); int64_t score = stIntTuple_get(alignedPair, 0); if(score <= 0) { //Happens when indel probs are included score = 1; //This is the minimum } assert(score > 0 && score <= PAIR_ALIGNMENT_PROB_1); SeqFrag *seqFrag1 = stList_get(seqFrags, seqIndex1); SeqFrag *seqFrag2 = stList_get(seqFrags, seqIndex2); assert(seqFrag1 != seqFrag2); double *scoreAdjustments = seqFrag1->rightEndId == seqFrag2->rightEndId ? scoreAdjustmentsCommonEnds : scoreAdjustmentsNonCommonEnds; assert(scoreAdjustments[seqIndex1] != INT64_MIN); assert(scoreAdjustments[seqIndex2] != INT64_MIN); AlignedPair *alignedPair2 = alignedPair_construct( i->subsequenceIdentifier, i->start + (i->strand ? offset1 : -offset1), i->strand, j->subsequenceIdentifier, j->start + (j->strand ? offset2 : -offset2), j->strand, score*scoreAdjustments[seqIndex1], score*scoreAdjustments[seqIndex2]); //Do the reweighting here. assert(stSortedSet_search(sortedAlignment, alignedPair2) == NULL); assert(stSortedSet_search(sortedAlignment, alignedPair2->reverse) == NULL); stSortedSet_insert(sortedAlignment, alignedPair2); stSortedSet_insert(sortedAlignment, alignedPair2->reverse); stIntTuple_destruct(alignedPair); } //Cleanup stList_destruct(seqFrags); stList_destruct(sequences); free(pairwiseAlignmentsPerSequenceNonCommonEnds); free(pairwiseAlignmentsPerSequenceCommonEnds); free(scoreAdjustmentsNonCommonEnds); free(scoreAdjustmentsCommonEnds); multipleAlignment_destruct(mA); stHash_destruct(endInstanceNumbers); return sortedAlignment; }
stSortedSet *stSortedSet_construct2(void (*destructElementFn)(void *)) { return stSortedSet_construct3(st_sortedSet_cmpFn, destructElementFn); }
static void readWriteAndRemoveRecordsLotsIteration(CuTest *testCase, int numRecords, bool reopenDatabase) { //Make a big old list of records.. stSortedSet *set = stSortedSet_construct3((int(*)(const void *, const void *)) stIntTuple_cmpFn, (void(*)(void *)) stIntTuple_destruct); while (stSortedSet_size(set) < numRecords) { int32_t key = st_randomInt(0, 100 * numRecords); stIntTuple *tuple = stIntTuple_construct(1, key); if (stSortedSet_search(set, tuple) == NULL) { CuAssertTrue(testCase, !stKVDatabase_containsRecord(database, key)); stSortedSet_insert(set, tuple); stKVDatabase_insertRecord(database, key, &key, sizeof(int32_t)); CuAssertTrue(testCase, stKVDatabase_containsRecord(database, key)); } else { CuAssertTrue(testCase, stKVDatabase_containsRecord(database, key)); stIntTuple_destruct(tuple); // already in db } } readWriteAndRemoveRecordsLotsCheck(testCase, set, 1); //Update all records to negate values stSortedSetIterator *it = stSortedSet_getIterator(set); stIntTuple *tuple; while ((tuple = stSortedSet_getNext(it)) != NULL) { int32_t *value = (int32_t *) stKVDatabase_getRecord(database, stIntTuple_getPosition(tuple, 0)); *value *= -1; stKVDatabase_updateRecord(database, stIntTuple_getPosition(tuple, 0), value, sizeof(int32_t)); CuAssertTrue(testCase, stKVDatabase_containsRecord(database, stIntTuple_getPosition(tuple, 0))); free(value); } stSortedSet_destructIterator(it); readWriteAndRemoveRecordsLotsCheck(testCase, set, -1); //Try optionally committing the transaction and reloading the database.. if (reopenDatabase) { //stKVDatabase_commitTransaction(database); stKVDatabase_destruct(database); database = stKVDatabase_construct(conf, false); //stKVDatabase_startTransaction(database); } //Now remove each one.. it = stSortedSet_getIterator(set); while ((tuple = stSortedSet_getNext(it)) != NULL) { CuAssertTrue(testCase, stKVDatabase_containsRecord(database, stIntTuple_getPosition(tuple, 0))); stKVDatabase_removeRecord(database, stIntTuple_getPosition(tuple, 0)); CuAssertTrue(testCase, !stKVDatabase_containsRecord(database, stIntTuple_getPosition(tuple, 0))); //Test we get exception if we remove twice. stTry { stKVDatabase_removeRecord(database, stIntTuple_getPosition(tuple, 0)); CuAssertTrue(testCase, 0); } stCatch(except) { CuAssertTrue(testCase, stExcept_getId(except) == ST_KV_DATABASE_EXCEPTION_ID); }stTryEnd; } stSortedSet_destructIterator(it); CuAssertIntEquals(testCase, 0, stKVDatabase_getNumberOfRecords(database)); stSortedSet_destruct(set); }
stSortedSet *getOrderedSegments(Flower *flower) { stSortedSet *segments = stSortedSet_construct3(segmentCompareFn, NULL); getOrderedSegmentsP(flower, segments); return segments; }
static void sonLibSortedSetTestSetup() { sortedSet = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, (void (*)(void *))stIntTuple_destruct); sortedSet2 = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, (void (*)(void *))stIntTuple_destruct); }
stSortedSet *stSortedSet_construct(void) { return stSortedSet_construct3(st_sortedSet_cmpFn, NULL); }
static CactusDisk *cactusDisk_constructPrivate(stKVDatabaseConf *conf, bool create, const char *sequencesFileName) { //sequencesFileName = NULL; //Disable the ability to store the sequences on disk. CactusDisk *cactusDisk = st_calloc(1, sizeof(CactusDisk)); //construct lists of in memory objects cactusDisk->metaSequences = stSortedSet_construct3(cactusDisk_constructMetaSequencesP, NULL); cactusDisk->flowers = stSortedSet_construct3(cactusDisk_constructFlowersP, NULL); cactusDisk->flowerNamesMarkedForDeletion = stSortedSet_construct3((int (*)(const void *, const void *)) strcmp, free); cactusDisk->updateRequests = stList_construct3(0, (void (*)(void *)) stKVDatabaseBulkRequest_destruct); //Now open the database cactusDisk->database = stKVDatabase_construct(conf, create); cactusDisk->cache = stCache_construct(); cactusDisk->stringCache = stCache_construct(); //initialise the unique ids. int64_t seed = (clock() << 24) | (time(NULL) << 16) | (getpid() & 65535); //Likely to be unique st_logDebug("The cactus disk is seeding the random number generator with the value %" PRIi64 "\n", seed); st_randomSeed(seed); cactusDisk->uniqueNumber = 0; cactusDisk->maxUniqueNumber = 0; //Now load any stuff.. if (containsRecord(cactusDisk, CACTUS_DISK_PARAMETER_KEY)) { if (create) { stThrowNew(CACTUS_DISK_EXCEPTION_ID, "Tried to create a cactus disk, but the cactus disk already exists"); } if (sequencesFileName != NULL) { stThrowNew(CACTUS_DISK_EXCEPTION_ID, "A sequences file name is specified, but the cactus disk is not being created"); } void *record = getRecord(cactusDisk, CACTUS_DISK_PARAMETER_KEY, "cactus_disk parameters"); void *record2 = record; cactusDisk_loadFromBinaryRepresentation(&record, cactusDisk, conf); free(record2); } else { assert(create); if (sequencesFileName == NULL) { cactusDisk->storeSequencesInAFile = 0; cactusDisk->sequencesFileName = NULL; cactusDisk->sequencesReadFileHandle = NULL; cactusDisk->sequencesWriteFileHandle = NULL; cactusDisk->absSequencesFileName = NULL; } else { if (stKVDatabaseConf_getDir(conf) == NULL) { stThrowNew(CACTUS_DISK_EXCEPTION_ID, "The database conf does not contain a directory in which the sequence file is to be found!\n"); } cactusDisk->storeSequencesInAFile = 1; cactusDisk->sequencesFileName = stString_copy(sequencesFileName); cactusDisk->absSequencesFileName = stString_print("%s/%s", stKVDatabaseConf_getDir(conf), cactusDisk->sequencesFileName); //Make sure the file exists cactusDisk->sequencesReadFileHandle = fopen(cactusDisk->absSequencesFileName, "w"); assert(cactusDisk->sequencesReadFileHandle != NULL); fclose(cactusDisk->sequencesReadFileHandle); //Flush it first time. cactusDisk->sequencesReadFileHandle = NULL; cactusDisk->sequencesWriteFileHandle = NULL; } } return cactusDisk; }