static void test_stSet_insert(CuTest* testCase) { /* * Tests inserting already present keys. */ testSetup(); CuAssertTrue(testCase, stSet_search(set0, one) == one); stSet_insert(set0, one); CuAssertTrue(testCase, stSet_search(set0, one) == one); stSet_insert(set0, three); CuAssertTrue(testCase, stSet_search(set0, three) == three); stIntTuple *seven = stIntTuple_construct2(7, 7); CuAssertTrue(testCase, stSet_search(set0, seven) == NULL); stSet_insert(set0, seven); CuAssertTrue(testCase, stSet_search(set0, seven) == seven); stIntTuple_destruct(seven); testTeardown(); }
static void test_stSet_remove(CuTest* testCase) { testSetup(); CuAssertTrue(testCase, stSet_remove(set0, one) == one); CuAssertTrue(testCase, stSet_search(set0, one) == NULL); CuAssertTrue(testCase, stSet_remove(set1, one) == one); CuAssertTrue(testCase, stSet_search(set1, one) == NULL); stSet_insert(set1, one); CuAssertTrue(testCase, stSet_search(set1, one) == one); testTeardown(); }
stSet *stSet_getUnion(stSet *set1, stSet *set2) { stSet_verifySetsHaveSameFunctions(set1, set2); stSet *set3 = stSet_construct3(stSet_getHashFunction(set1), stSet_getEqualityFunction(set1), NULL); // Add everything stSetIterator *sit= stSet_getIterator(set1); void *o; while ((o = stSet_getNext(sit)) != NULL) { stSet_insert(set3, o); } stSet_destructIterator(sit); sit = stSet_getIterator(set2); while ((o = stSet_getNext(sit)) != NULL) { stSet_insert(set3, o); } stSet_destructIterator(sit); return set3; }
static void test_stSet_getUnion(CuTest* testCase) { testSetup(); // Check union of empty sets is empty stSet *set2 = stSet_construct(); stSet *set3 = stSet_construct(); stSet *set4 = stSet_getUnion(set2, set3); CuAssertTrue(testCase, stSet_size(set4) == 0); stSet_destruct(set2); stSet_destruct(set3); stSet_destruct(set4); // Check union of non empty set and empty set is non-empty set2 = stSet_construct(); set3 = stSet_getUnion(set0, set2); CuAssertTrue(testCase, stSet_size(set3) == 6); stSet_destruct(set2); stSet_destruct(set3); // Check union of two non-empty overlapping sets is correct set2 = stSet_construct(); set3 = stSet_construct(); stIntTuple **uniqs = (stIntTuple **) st_malloc(sizeof(*uniqs) * 4); uniqs[0] = stIntTuple_construct2(9, 0); uniqs[1] = stIntTuple_construct2(9, 1); uniqs[2] = stIntTuple_construct2(9, 2); uniqs[3] = stIntTuple_construct2(9, 3); stIntTuple **common = (stIntTuple **) st_malloc(sizeof(*uniqs) * 5); common[0] = stIntTuple_construct2(5, 0); common[1] = stIntTuple_construct2(5, 1); common[2] = stIntTuple_construct2(5, 2); common[3] = stIntTuple_construct2(5, 3); common[4] = stIntTuple_construct2(5, 4); for (int i = 0; i < 5; ++i) { stSet_insert(set2, common[i]); stSet_insert(set3, common[i]); } stSet_insert(set2, uniqs[0]); stSet_insert(set2, uniqs[1]); stSet_insert(set3, uniqs[2]); stSet_insert(set3, uniqs[3]); set4 = stSet_getUnion(set2, set3); CuAssertTrue(testCase, stSet_size(set4) == 9); for (int i = 0; i < 4; ++i) { CuAssertTrue(testCase, stSet_search(set4, uniqs[i]) != NULL); } for (int i = 0; i < 5; ++i) { CuAssertTrue(testCase, stSet_search(set4, common[i]) != NULL); } stSet_destruct(set2); stSet_destruct(set3); stSet_destruct(set4); // Check we get an exception with sets with different functions. stTry { stSet_getUnion(set0, set1); } stCatch(except) { CuAssertTrue(testCase, stExcept_getId(except) == SET_EXCEPTION_ID); } stTryEnd testTeardown(); }
stSet *stSet_getDifference(stSet *set1, stSet *set2) { stSet_verifySetsHaveSameFunctions(set1, set2); stSet *set3 = stSet_construct3(stSet_getHashFunction(set1), stSet_getEqualityFunction(set1), NULL); // Add those from set1 only if they are not in set2 stSetIterator *sit= stSet_getIterator(set1); void *o; while ((o = stSet_getNext(sit)) != NULL) { if (stSet_search(set2, o) == NULL) { stSet_insert(set3, o); } } stSet_destructIterator(sit); return set3; }
static void test_stSet_removeAndFreeKey(CuTest* testCase) { stSet *set2 = stSet_construct2(free); stList *keys = stList_construct(); int64_t keyNumber = 1000; for (int64_t i = 0; i < keyNumber; i++) { int64_t *key = st_malloc(sizeof(*key)); stList_append(keys, key); stSet_insert(set2, key); } for (int64_t i = 0; i < keyNumber; i++) { int64_t *key = stList_get(keys, i); CuAssertPtrEquals(testCase, key, stSet_removeAndFreeKey(set2, key)); } CuAssertIntEquals(testCase, 0, stSet_size(set2)); stSet_destruct(set2); stList_destruct(keys); }
stTree *stTree_getMRCA(stTree *node1, stTree *node2) { // Find all of node 1's parents (inclusive of node 1) stSet *parents = stSet_construct(); stTree *curNode = node1; do { stSet_insert(parents, curNode); } while ((curNode = stTree_getParent(curNode)) != NULL); // Find the first parent of node 2 that is a parent of node 1 stTree *ret = NULL; curNode = node2; do { if (stSet_search(parents, curNode) != NULL) { ret = curNode; break; } } while ((curNode = stTree_getParent(curNode)) != NULL); stSet_destruct(parents); return ret; }
static void test_stSet_testIterator(CuTest *testCase) { testSetup(); stSetIterator *iterator = stSet_getIterator(set0); stSetIterator *iteratorCopy = stSet_copyIterator(iterator); int64_t i = 0; stSet *seen = stSet_construct(); for (i = 0; i < 6; i++) { void *o = stSet_getNext(iterator); CuAssertTrue(testCase, o != NULL); CuAssertTrue(testCase, stSet_search(set0, o) != NULL); CuAssertTrue(testCase, stSet_search(seen, o) == NULL); CuAssertTrue(testCase, stSet_getNext(iteratorCopy) == o); stSet_insert(seen, o); } CuAssertTrue(testCase, stSet_getNext(iterator) == NULL); CuAssertTrue(testCase, stSet_getNext(iterator) == NULL); CuAssertTrue(testCase, stSet_getNext(iteratorCopy) == NULL); stSet_destruct(seen); stSet_destructIterator(iterator); stSet_destructIterator(iteratorCopy); testTeardown(); }
static void testSetup() { // compare by value of memory address set0 = stSet_construct(); // compare by value of ints. set1 = stSet_construct3((uint64_t(*)(const void *)) stIntTuple_hashKey, (int(*)(const void *, const void *)) stIntTuple_equalsFn, (void(*)(void *)) stIntTuple_destruct); one = stIntTuple_construct1( 0); two = stIntTuple_construct1( 1); three = stIntTuple_construct1( 2); four = stIntTuple_construct1( 3); five = stIntTuple_construct1( 4); six = stIntTuple_construct1( 5); stSet_insert(set0, one); stSet_insert(set0, two); stSet_insert(set0, three); stSet_insert(set0, four); stSet_insert(set0, five); stSet_insert(set0, six); stSet_insert(set1, one); stSet_insert(set1, two); stSet_insert(set1, three); stSet_insert(set1, four); stSet_insert(set1, five); stSet_insert(set1, six); }
// Compute the connected components, if they haven't been computed // already since the last modification. static void computeConnectedComponents(stNaiveConnectivity *connectivity) { if (connectivity->connectedComponentCache != NULL) { // Already computed the connected components. return; } stHashIterator *nodeIt = stHash_getIterator(connectivity->nodesToAdjList); void *node; stNaiveConnectedComponent *componentsHead = NULL; while ((node = stHash_getNext(nodeIt)) != NULL) { stSet *myNodeSet = stSet_construct(); stSet_insert(myNodeSet, node); struct adjacency *adjList = stHash_search(connectivity->nodesToAdjList, node); if (adjList != NULL) { while (adjList != NULL) { stSet_insert(myNodeSet, adjList->toNode); adjList = adjList->next; } } // Now go through the existing connected components and see if // this overlaps any of them. If it's not a full overlap, then // this set becomes the union, and we continue looking for // additional overlaps, then this becomes a new connected // component. If we find that this is a subset of an existing // component, we can quit early, since we can't possibly add // to it or any others. stNaiveConnectedComponent *curComponent = componentsHead; while (curComponent != NULL) { stNaiveConnectedComponent *next = curComponent->next; // Find out whether our node set is a subset of this // connected component, or if it shares any overlap. bool isSubset = true; bool overlap = false; stSetIterator *myNodeIt = stSet_getIterator(myNodeSet); void *node; while ((node = stSet_getNext(myNodeIt)) != NULL) { if (stSet_search(curComponent->nodes, node)) { overlap = true; } else { isSubset = false; } } stSet_destructIterator(myNodeIt); if (isSubset) { assert(overlap == true); // Quit early. stSet_destruct(myNodeSet); myNodeSet = NULL; break; } else if (overlap) { stSet *newNodeSet = stSet_getUnion(myNodeSet, curComponent->nodes); stSet_destruct(myNodeSet); removeComponent(&componentsHead, curComponent); myNodeSet = newNodeSet; } curComponent = next; } if (myNodeSet != NULL) { // We have a new (or possibly merged) connected component to // add to the list. stNaiveConnectedComponent *newComponent = malloc(sizeof(stNaiveConnectedComponent)); newComponent->nodes = myNodeSet; newComponent->next = componentsHead; componentsHead = newComponent; } } stHash_destructIterator(nodeIt); connectivity->connectedComponentCache = componentsHead; }
int main(int argc, char *argv[]) { /* * Open the database. * Construct a flower. * Construct an event tree representing the species tree. * For each sequence contruct two ends each containing an cap. * Make a file for the sequence. * Link the two caps. * Finish! */ int64_t key, j; Group *group; Flower_EndIterator *endIterator; End *end; bool makeEventHeadersAlphaNumeric = 0; /* * Arguments/options */ char * logLevelString = NULL; char * speciesTree = NULL; char * outgroupEvents = NULL; /////////////////////////////////////////////////////////////////////////// // (0) Parse the inputs handed by genomeCactus.py / setup stuff. /////////////////////////////////////////////////////////////////////////// while (1) { static struct option long_options[] = { { "logLevel", required_argument, 0, 'a' }, { "cactusDisk", required_argument, 0, 'b' }, { "speciesTree", required_argument, 0, 'f' }, { "outgroupEvents", required_argument, 0, 'g' }, { "help", no_argument, 0, 'h' }, { "makeEventHeadersAlphaNumeric", no_argument, 0, 'i' }, { 0, 0, 0, 0 } }; int option_index = 0; key = getopt_long(argc, argv, "a:b:f:hg:i", long_options, &option_index); if (key == -1) { break; } switch (key) { case 'a': logLevelString = optarg; break; case 'b': cactusDiskDatabaseString = optarg; break; case 'f': speciesTree = optarg; break; case 'g': outgroupEvents = optarg; break; case 'h': usage(); return 0; case 'i': makeEventHeadersAlphaNumeric = 1; break; default: usage(); return 1; } } /////////////////////////////////////////////////////////////////////////// // (0) Check the inputs. /////////////////////////////////////////////////////////////////////////// //assert(logLevelString == NULL || strcmp(logLevelString, "CRITICAL") == 0 || strcmp(logLevelString, "INFO") == 0 || strcmp(logLevelString, "DEBUG") == 0); assert(cactusDiskDatabaseString != NULL); assert(speciesTree != NULL); ////////////////////////////////////////////// //Set up logging ////////////////////////////////////////////// st_setLogLevelFromString(logLevelString); ////////////////////////////////////////////// //Log (some of) the inputs ////////////////////////////////////////////// st_logInfo("Flower disk name : %s\n", cactusDiskDatabaseString); for (j = optind; j < argc; j++) { st_logInfo("Sequence file/directory %s\n", argv[j]); } ////////////////////////////////////////////// //Load the database ////////////////////////////////////////////// stKVDatabaseConf *kvDatabaseConf = kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString); if (stKVDatabaseConf_getType(kvDatabaseConf) == stKVDatabaseTypeTokyoCabinet || stKVDatabaseConf_getType(kvDatabaseConf) == stKVDatabaseTypeKyotoTycoon) { assert(stKVDatabaseConf_getDir(kvDatabaseConf) != NULL); cactusDisk = cactusDisk_construct2(kvDatabaseConf, "cactusSequences"); } else { cactusDisk = cactusDisk_construct(kvDatabaseConf, 1); } st_logInfo("Set up the flower disk\n"); ////////////////////////////////////////////// //Construct the flower ////////////////////////////////////////////// if (cactusDisk_getFlower(cactusDisk, 0) != NULL) { cactusDisk_destruct(cactusDisk); st_logInfo("The first flower already exists\n"); return 0; } flower = flower_construct2(0, cactusDisk); assert(flower_getName(flower) == 0); st_logInfo("Constructed the flower\n"); ////////////////////////////////////////////// //Construct the event tree ////////////////////////////////////////////// st_logInfo("Going to build the event tree with newick string: %s\n", speciesTree); stTree *tree = stTree_parseNewickString(speciesTree); st_logInfo("Parsed the tree\n"); if (makeEventHeadersAlphaNumeric) { makeEventHeadersAlphaNumericFn(tree); } stTree_setBranchLength(tree, INT64_MAX); checkBranchLengthsAreDefined(tree); eventTree = eventTree_construct2(flower); //creates the event tree and the root even totalEventNumber = 1; st_logInfo("Constructed the basic event tree\n"); // Construct a set of outgroup names so that ancestral outgroups // get recognized. stSet *outgroupNameSet = stSet_construct3(stHash_stringKey, stHash_stringEqualKey, free); if(outgroupEvents != NULL) { stList *outgroupNames = stString_split(outgroupEvents); for(int64_t i = 0; i < stList_length(outgroupNames); i++) { char *outgroupName = stList_get(outgroupNames, i); stSet_insert(outgroupNameSet, stString_copy(outgroupName)); } stList_destruct(outgroupNames); } //now traverse the tree j = optind; assignEventsAndSequences(eventTree_getRootEvent(eventTree), tree, outgroupNameSet, argv, &j); char *eventTreeString = eventTree_makeNewickString(eventTree); st_logInfo( "Constructed the initial flower with %" PRIi64 " sequences and %" PRIi64 " events with string: %s\n", totalSequenceNumber, totalEventNumber, eventTreeString); assert(event_getSubTreeBranchLength(eventTree_getRootEvent(eventTree)) >= 0.0); free(eventTreeString); //assert(0); ////////////////////////////////////////////// //Label any outgroup events. ////////////////////////////////////////////// if (outgroupEvents != NULL) { stList *outgroupEventsList = stString_split(outgroupEvents); for (int64_t i = 0; i < stList_length(outgroupEventsList); i++) { char *outgroupEvent = makeEventHeadersAlphaNumeric ? makeAlphaNumeric(stList_get(outgroupEventsList, i)) : stString_copy(stList_get(outgroupEventsList, i)); Event *event = eventTree_getEventByHeader(eventTree, outgroupEvent); if (event == NULL) { st_errAbort("Got an outgroup string that does not match an event, outgroup string %s", outgroupEvent); } assert(!event_isOutgroup(event)); event_setOutgroupStatus(event, 1); assert(event_isOutgroup(event)); free(outgroupEvent); } stList_destruct(outgroupEventsList); } ////////////////////////////////////////////// //Construct the terminal group. ////////////////////////////////////////////// if (flower_getEndNumber(flower) > 0) { group = group_construct2(flower); endIterator = flower_getEndIterator(flower); while ((end = flower_getNextEnd(endIterator)) != NULL) { end_setGroup(end, group); } flower_destructEndIterator(endIterator); assert(group_isLeaf(group)); // Create a one link chain if there is only one pair of attached ends.. group_constructChainForLink(group); assert(!flower_builtBlocks(flower)); } else { flower_setBuiltBlocks(flower, 1); } /////////////////////////////////////////////////////////////////////////// // Write the flower to disk. /////////////////////////////////////////////////////////////////////////// //flower_check(flower); cactusDisk_write(cactusDisk); st_logInfo("Updated the flower on disk\n"); /////////////////////////////////////////////////////////////////////////// // Cleanup. /////////////////////////////////////////////////////////////////////////// cactusDisk_destruct(cactusDisk); return 0; //Exit without clean up is quicker, enable cleanup when doing memory leak detection. stSet_destruct(outgroupNameSet); stTree_destruct(tree); stKVDatabaseConf_destruct(kvDatabaseConf); return 0; }