static void checkComponents(CuTest *testCase, stList *filteredEdges) { stHash *nodesToComponents = getComponents(filteredEdges); //Check all components are smaller than threshold stList *components = stHash_getValues(nodesToComponents); for (int64_t i = 0; i < stList_length(components); i++) { stSortedSet *component = stList_get(components, i); CuAssertTrue(testCase, stSortedSet_size(component) <= maxComponentSize); CuAssertTrue(testCase, stSortedSet_size(component) >= 1); } //Check no edges can be added from those filtered. stSortedSet *filteredEdgesSet = stList_getSortedSet(filteredEdges, (int(*)(const void *, const void *)) stIntTuple_cmpFn); for (int64_t i = 0; i < stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); if (stSortedSet_search(filteredEdgesSet, edge) == NULL) { stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2)); stSortedSet *component1 = stHash_search(nodesToComponents, node1); stSortedSet *component2 = stHash_search(nodesToComponents, node2); CuAssertTrue(testCase, component1 != NULL && component2 != NULL); CuAssertTrue(testCase, component1 != component2); CuAssertTrue(testCase, stSortedSet_size(component1) + stSortedSet_size(component2) > maxComponentSize); stIntTuple_destruct(node1); stIntTuple_destruct(node2); } } stSortedSet_destruct(filteredEdgesSet); //Cleanup the components stSortedSet *componentsSet = stList_getSortedSet(components, NULL); stList_destruct(components); stSortedSet_setDestructor(componentsSet, (void(*)(void *)) stSortedSet_destruct); stSortedSet_destruct(componentsSet); stHash_destruct(nodesToComponents); }
static void test_stSet_search(CuTest* testCase) { testSetup(); stIntTuple *i = stIntTuple_construct1( 0); stIntTuple *j = stIntTuple_construct2(10, 0); stIntTuple *k = stIntTuple_construct1( 5); //Check search by memory address CuAssertTrue(testCase, stSet_search(set0, one) == one); CuAssertTrue(testCase, stSet_search(set0, two) == two); CuAssertTrue(testCase, stSet_search(set0, three) == three); CuAssertTrue(testCase, stSet_search(set0, four) == four); CuAssertTrue(testCase, stSet_search(set0, five) == five); CuAssertTrue(testCase, stSet_search(set0, six) == six); //Check not present CuAssertTrue(testCase, stSet_search(set0, i) == NULL); CuAssertTrue(testCase, stSet_search(set0, j) == NULL); CuAssertTrue(testCase, stSet_search(set0, k) == NULL); //Check search by memory address CuAssertTrue(testCase, stSet_search(set1, one) == one); CuAssertTrue(testCase, stSet_search(set1, two) == two); CuAssertTrue(testCase, stSet_search(set1, three) == three); CuAssertTrue(testCase, stSet_search(set1, four) == four); CuAssertTrue(testCase, stSet_search(set1, five) == five); CuAssertTrue(testCase, stSet_search(set1, six) == six); //Check not present CuAssertTrue(testCase, stSet_search(set1, j) == NULL); //Check is searching by memory CuAssertTrue(testCase, stSet_search(set1, i) == one); CuAssertTrue(testCase, stSet_search(set1, k) == six); stIntTuple_destruct(i); stIntTuple_destruct(j); stIntTuple_destruct(k); testTeardown(); }
static stHash *getComponents(stList *filteredEdges) { /* * A kind of stupid reimplementation of the greedy function, done just to trap typos. */ stHash *nodesToComponents = stHash_construct3((uint64_t(*)(const void *)) stIntTuple_hashKey, (int(*)(const void *, const void *)) stIntTuple_equalsFn, NULL, NULL); for (int64_t i = 0; i < stList_length(nodes); i++) { stIntTuple *node = stList_get(nodes, i); stSortedSet *component = stSortedSet_construct(); stSortedSet_insert(component, node); stHash_insert(nodesToComponents, node, component); } for (int64_t i = 0; i < stList_length(filteredEdges); i++) { stIntTuple *edge = stList_get(filteredEdges, i); stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2)); stSortedSet *component1 = stHash_search(nodesToComponents, node1); stSortedSet *component2 = stHash_search(nodesToComponents, node2); assert(component1 != NULL && component2 != NULL); if (component1 != component2) { stSortedSet *component3 = stSortedSet_getUnion(component1, component2); stSortedSetIterator *setIt = stSortedSet_getIterator(component3); stIntTuple *node3; while ((node3 = stSortedSet_getNext(setIt)) != NULL) { stHash_insert(nodesToComponents, node3, component3); } stSortedSet_destructIterator(setIt); stSortedSet_destruct(component1); stSortedSet_destruct(component2); } stIntTuple_destruct(node1); stIntTuple_destruct(node2); } return nodesToComponents; }
static void checkIsValidReference(CuTest *testCase, stList *reference, double totalScore) { stList *chosenEdges = convertReferenceToAdjacencyEdges(reference); //Check that everyone has a partner. CuAssertIntEquals(testCase, nodeNumber, stList_length(chosenEdges) * 2); stSortedSet *nodes = stSortedSet_construct3((int(*)(const void *, const void *)) stIntTuple_cmpFn, (void(*)(void *)) stIntTuple_destruct); for (int64_t i = 0; i < nodeNumber; i++) { stSortedSet_insert(nodes, stIntTuple_construct1( i)); } checkEdges(chosenEdges, nodes, 1, 0); //Check that the score is correct double totalScore2 = calculateZScoreOfReference(reference, nodeNumber, zMatrix); CuAssertDblEquals(testCase, totalScore2, totalScore, 0.000001); //Check that the stubs are properly connected. stList *allEdges = stList_copy(chosenEdges, NULL); stList_appendAll(allEdges, stubs); stList_appendAll(allEdges, chains); stList *components = getComponents(allEdges); CuAssertIntEquals(testCase, stList_length(stubs), stList_length(reference)); CuAssertIntEquals(testCase, stList_length(stubs), stList_length(components)); //Cleanup stList_destruct(components); stSortedSet_destruct(nodes); stList_destruct(allEdges); stList_destruct(chosenEdges); }
static stList *getEdgesThatBridgeComponents(stList *components, stHash *nodesToNonZeroWeightedAdjacencyEdges) { /* * Get set of adjacency edges that bridge between (have a node in two) components. */ stList *bridgingAdjacencyEdges = stList_construct(); for (int64_t i = 0; i < stList_length(components); i++) { stSortedSet *componentNodes = getNodeSetOfEdges( stList_get(components, i)); stSortedSetIterator *it = stSortedSet_getIterator(componentNodes); stIntTuple *node; while ((node = stSortedSet_getNext(it)) != NULL) { stList *edges = stHash_search(nodesToNonZeroWeightedAdjacencyEdges, node); if (edges != NULL) { for (int64_t j = 0; j < stList_length(edges); j++) { stIntTuple *edge = stList_get(edges, j); stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 0)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 1)); assert( stSortedSet_search(componentNodes, node1) != NULL || stSortedSet_search(componentNodes, node2) != NULL); if (stSortedSet_search(componentNodes, node1) == NULL || stSortedSet_search(componentNodes, node2) == NULL) { stList_append(bridgingAdjacencyEdges, edge); } stIntTuple_destruct(node1); stIntTuple_destruct(node2); } } } stSortedSet_destructIterator(it); stSortedSet_destruct(componentNodes); } return bridgingAdjacencyEdges; }
static void test_stPosetAlignment_addAndIsPossible(CuTest *testCase) { for(int64_t trial=0; trial<100; trial++) { setup(); //Make random number of sequences. stList *sequenceLengths = stList_construct3(0, (void (*)(void *))stIntTuple_destruct); for(int64_t i=0; i<sequenceNumber; i++) { stList_append(sequenceLengths, stIntTuple_construct1( st_randomInt(0, MAX_SEQUENCE_SIZE))); } //Propose random alignment pairs... stList *pairs = stList_construct3(0, (void(*)(void *))stIntTuple_destruct); int64_t maxAlignedPairs = st_randomInt(0, MAX_ALIGNMENTS); if(sequenceNumber > 0) { for(int64_t i=0; i<maxAlignedPairs; i++) { int64_t seq1 = st_randomInt(0, sequenceNumber); int64_t seqLength1 = stIntTuple_get(stList_get(sequenceLengths, seq1), 0); if(seqLength1 == 0) { continue; } int64_t position1 = st_randomInt(0, seqLength1); int64_t seq2 = st_randomInt(0, sequenceNumber); int64_t seqLength2 = stIntTuple_get(stList_get(sequenceLengths, seq1), 0); if(seqLength2 == 0) { continue; } int64_t position2 = st_randomInt(0, seqLength2); if(seq1 != seq2) { stList_append(pairs, stIntTuple_construct4( seq1, position1, seq2, position2)); if(stPosetAlignment_isPossible(posetAlignment, seq1, position1, seq2, position2)) { st_logInfo("In %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " \n", seq1, position1, seq2, position2); //For each accepted pair check it doesn't create a cycle. CuAssertTrue(testCase, !containsACycle(pairs, sequenceNumber)); CuAssertTrue(testCase, stPosetAlignment_add(posetAlignment, seq1, position1, seq2, position2)); } else { st_logInfo("Out %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " \n", seq1, position1, seq2, position2); //For each rejected pair check it creates a cycle.. CuAssertTrue(testCase, containsACycle(pairs, sequenceNumber)); CuAssertTrue(testCase, !stPosetAlignment_isPossible(posetAlignment, seq1, position1, seq2, position2)); stIntTuple_destruct(stList_pop(pairs)); //remove the pair which created the cycle. CuAssertTrue(testCase, !containsACycle(pairs, sequenceNumber)); //Check we're back to being okay.. } } } } //Cleanup stList_destruct(sequenceLengths); stList_destruct(pairs); teardown(); st_logInfo("Passed a random ordering test with %" PRIi64 " sequences and %" PRIi64 " aligned pairs\n", sequenceNumber, maxAlignedPairs); } }
stHash *buildContigPathToContigPathLengthHash( stList *maximalHaplotypePaths) { stHash *maximalHaplotypesToMaximalHaplotypePathLengths = stHash_construct(); for (int64_t i = 0; i < stList_length(maximalHaplotypePaths); i++) { stList *maximalHaplotypePath = stList_get(maximalHaplotypePaths, i); int64_t k = contigPathLength(maximalHaplotypePath); stHash_insert(maximalHaplotypesToMaximalHaplotypePathLengths, maximalHaplotypePath, stIntTuple_construct1( k)); } return maximalHaplotypesToMaximalHaplotypePathLengths; }
static void testSetup() { // compare by value of memory address set0 = stSet_construct(); // compare by value of ints. set1 = stSet_construct3((uint64_t(*)(const void *)) stIntTuple_hashKey, (int(*)(const void *, const void *)) stIntTuple_equalsFn, (void(*)(void *)) stIntTuple_destruct); one = stIntTuple_construct1( 0); two = stIntTuple_construct1( 1); three = stIntTuple_construct1( 2); four = stIntTuple_construct1( 3); five = stIntTuple_construct1( 4); six = stIntTuple_construct1( 5); stSet_insert(set0, one); stSet_insert(set0, two); stSet_insert(set0, three); stSet_insert(set0, four); stSet_insert(set0, five); stSet_insert(set0, six); stSet_insert(set1, one); stSet_insert(set1, two); stSet_insert(set1, three); stSet_insert(set1, four); stSet_insert(set1, five); stSet_insert(set1, six); }
static void getComponentsP(stHash *nodesToEdges, int64_t node, stSortedSet *component) { stIntTuple *key = stIntTuple_construct1( node); stList *edges = stHash_search(nodesToEdges, key); if (edges != NULL) { stHash_remove(nodesToEdges, key); for (int64_t i = 0; i < stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); if (stSortedSet_search(component, edge) == NULL) { stSortedSet_insert(component, edge); } /* * Recursion on stack could equal the total number of nodes. */ getComponentsP(nodesToEdges, stIntTuple_get(edge, 0), component); getComponentsP(nodesToEdges, stIntTuple_get(edge, 1), component); } stList_destruct(edges); } stIntTuple_destruct(key); }
static void setup() { teardown(); //Make nodes nodes = stList_construct3(0, (void(*)(void *)) stIntTuple_destruct); int64_t nodeNumber = st_randomInt(0, 1000); for (int64_t i = 0; i < nodeNumber; i++) { stList_append(nodes, stIntTuple_construct1( i)); } //Make edges edges = stList_construct3(0, (void(*)(void *)) stIntTuple_destruct); float edgeProb = st_random(); for (int64_t i = 0; i < nodeNumber; i++) { for (int64_t j = i; j < nodeNumber; j++) { if (st_random() <= edgeProb) { stList_append(edges, stIntTuple_construct3( st_randomInt(1, 100), i, j)); } } } //Max component size maxComponentSize = 1 + log(nodeNumber) * 10; //(st_randomInt(0, nodeNumber+1); }
void cactusDisk_write(CactusDisk *cactusDisk) { Flower *flower; int64_t recordSize; stList *removeRequests = stList_construct3(0, (void (*)(void *)) stIntTuple_destruct); st_logDebug("Starting to write the cactus to disk\n"); stSortedSetIterator *it = stSortedSet_getIterator(cactusDisk->flowers); //Sort flowers to update. while ((flower = stSortedSet_getNext(it)) != NULL) { cactusDisk_addUpdateRequest(cactusDisk, flower); } stSortedSet_destructIterator(it); st_logDebug("Got the flowers to update\n"); //Remove nets that are marked for deletion.. it = stSortedSet_getIterator(cactusDisk->flowerNamesMarkedForDeletion); char *nameString; while ((nameString = stSortedSet_getNext(it)) != NULL) { Name name = cactusMisc_stringToName(nameString); if (containsRecord(cactusDisk, name)) { stList_append(cactusDisk->updateRequests, stKVDatabaseBulkRequest_constructUpdateRequest(name, &name, 0)); //We set it to null in the first atomic operation. stList_append(removeRequests, stIntTuple_construct1(name)); } } stSortedSet_destructIterator(it); st_logDebug("Avoided updating nets marked for deletion\n"); // Insert and/or update meta-sequences. it = stSortedSet_getIterator(cactusDisk->metaSequences); MetaSequence *metaSequence; while ((metaSequence = stSortedSet_getNext(it)) != NULL) { void *vA = binaryRepresentation_makeBinaryRepresentation(metaSequence, (void (*)(void *, void (*)(const void * ptr, size_t size, size_t count))) metaSequence_writeBinaryRepresentation, &recordSize); //Compression vA = compress(vA, &recordSize); if (!containsRecord(cactusDisk, metaSequence_getName(metaSequence))) { stList_append(cactusDisk->updateRequests, stKVDatabaseBulkRequest_constructInsertRequest(metaSequence_getName(metaSequence), vA, recordSize)); } else { stList_append(cactusDisk->updateRequests, stKVDatabaseBulkRequest_constructUpdateRequest(metaSequence_getName(metaSequence), vA, recordSize)); } free(vA); } stSortedSet_destructIterator(it); st_logDebug("Got the sequences we are going to add to the database.\n"); if (!containsRecord(cactusDisk, CACTUS_DISK_PARAMETER_KEY)) { //We only write the parameters once. //Finally the database info. void *cactusDiskParameters = binaryRepresentation_makeBinaryRepresentation(cactusDisk, (void (*)(void *, void (*)(const void * ptr, size_t size, size_t count))) cactusDisk_writeBinaryRepresentation, &recordSize); //Compression cactusDiskParameters = compress(cactusDiskParameters, &recordSize); stList_append(cactusDisk->updateRequests, stKVDatabaseBulkRequest_constructInsertRequest(CACTUS_DISK_PARAMETER_KEY, cactusDiskParameters, recordSize)); free(cactusDiskParameters); } st_logDebug("Checked if need to write the initial parameters\n"); if (stList_length(cactusDisk->updateRequests) > 0) { st_logDebug("Going to write %" PRIi64 " updates\n", stList_length(cactusDisk->updateRequests)); stTry { st_logDebug("Writing %" PRIi64 " updates\n", stList_length(cactusDisk->updateRequests)); assert(stList_length(cactusDisk->updateRequests) > 0); stKVDatabase_bulkSetRecords(cactusDisk->database, cactusDisk->updateRequests); } stCatch(except) { stThrowNewCause(except, ST_KV_DATABASE_EXCEPTION_ID, "Failed when trying to set records in updating the cactus disk"); }stTryEnd ; }
static stHash *getScaffoldPathsP(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) { stHash *haplotypeToMaximalHaplotypeLengthHash = buildContigPathToContigPathLengthHash(haplotypePaths); stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths); for (int64_t i = 0; i < stList_length(haplotypePaths); i++) { stSortedSet *bucket = stSortedSet_construct(); stHash_insert(haplotypePathToScaffoldPathHash, stList_get(haplotypePaths, i), bucket); stSortedSet_insert(bucket, stList_get(haplotypePaths, i)); } for (int64_t i = 0; i < stList_length(haplotypePaths); i++) { stList *haplotypePath = stList_get(haplotypePaths, i); assert(stList_length(haplotypePath) > 0); Segment *_5Segment = stList_get(haplotypePath, 0); if (!segment_getStrand(_5Segment)) { _5Segment = segment_getReverse(stList_get(haplotypePath, stList_length(haplotypePath) - 1)); } assert(segment_getStrand(_5Segment)); if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) { assert(!trueAdjacency(segment_get5Cap(_5Segment), haplotypeEventStrings)); } int64_t insertLength; int64_t deleteLength; Cap *otherCap; enum CapCode _5CapCode = getCapCode(segment_get5Cap(_5Segment), &otherCap, haplotypeEventStrings, contaminationEventStrings, &insertLength, &deleteLength, capCodeParameters); if (_5CapCode == SCAFFOLD_GAP || _5CapCode == AMBIGUITY_GAP) { assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath) != NULL); int64_t j = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath), 0); Segment *adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(_5Segment)); assert(adjacentSegment != NULL); while (!hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)) { //is not a haplotype end adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(adjacentSegment)); assert(adjacentSegment != NULL); } assert(adjacentSegment != NULL); assert(hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)); //is a haplotype end stList *adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, adjacentSegment); if (adjacentHaplotypePath == NULL) { adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, segment_getReverse( adjacentSegment)); } assert(adjacentHaplotypePath != NULL); assert(adjacentHaplotypePath != haplotypePath); assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath) != NULL); int64_t k = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath), 0); //Now merge the buckets and make new int tuples.. stSortedSet *bucket1 = stHash_search(haplotypePathToScaffoldPathHash, haplotypePath); stSortedSet *bucket2 = stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath); assert(bucket1 != NULL); assert(bucket2 != NULL); assert(bucket1 != bucket2); stSortedSet *bucket3 = stSortedSet_getUnion(bucket1, bucket2); stSortedSetIterator *bucketIt = stSortedSet_getIterator(bucket3); stList *l; while ((l = stSortedSet_getNext(bucketIt)) != NULL) { //Do the bucket first assert(stHash_search(haplotypePathToScaffoldPathHash, l) == bucket1 || stHash_search(haplotypePathToScaffoldPathHash, l) == bucket2); stHash_remove(haplotypePathToScaffoldPathHash, l); stHash_insert(haplotypePathToScaffoldPathHash, l, bucket3); //Now the length stIntTuple *m = stHash_remove(haplotypeToMaximalHaplotypeLengthHash, l); assert(m != NULL); assert(stIntTuple_get(m, 0) == j || stIntTuple_get(m, 0) == k); stHash_insert(haplotypeToMaximalHaplotypeLengthHash, l, stIntTuple_construct1( j + k)); stIntTuple_destruct(m); } assert(stHash_search(haplotypePathToScaffoldPathHash, haplotypePath) == bucket3); assert(stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath) == bucket3); stSortedSet_destructIterator(bucketIt); } } stHash_destruct(segmentToMaximalHaplotypePathHash); return haplotypeToMaximalHaplotypeLengthHash; }