/* * Uses the functions above to build an adjacency list, then by DFS attempts to create * a valid topological sort, returning non-zero if the graph contains a cycle. */ static int64_t containsACycle(stList *pairs, int64_t sequenceNumber) { //Build an adjacency list structure.. stHash *adjacencyList = buildAdjacencyList(pairs, sequenceNumber); //Do a topological sort of the adjacency list stSortedSet *started = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL); stSortedSet *done = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL); int64_t cyclic = 0; for(int64_t seq=0; seq<sequenceNumber; seq++) { stIntTuple *seqPos = stIntTuple_construct2( seq, 0); //The following hacks avoid memory cleanup.. stSortedSet *column = stHash_search(adjacencyList, seqPos); assert(column != NULL); stIntTuple *seqPos2 = stSortedSet_search(column, seqPos); assert(seqPos2 != NULL); cyclic = cyclic || dfs(adjacencyList, seqPos2, started, done); stIntTuple_destruct(seqPos); } //cleanup stHashIterator *it = stHash_getIterator(adjacencyList); stIntTuple *seqPos; stSortedSet *columns = stSortedSet_construct2((void (*)(void *))stSortedSet_destruct); while((seqPos = stHash_getNext(it)) != NULL) { stSortedSet *column = stHash_search(adjacencyList, seqPos); assert(column != NULL); stSortedSet_insert(columns, column); } stHash_destructIterator(it); stHash_destruct(adjacencyList); stSortedSet_destruct(columns); stSortedSet_destruct(started); stSortedSet_destruct(done); return cyclic; }
static void checkComponents(CuTest *testCase, stList *filteredEdges) { stHash *nodesToComponents = getComponents(filteredEdges); //Check all components are smaller than threshold stList *components = stHash_getValues(nodesToComponents); for (int64_t i = 0; i < stList_length(components); i++) { stSortedSet *component = stList_get(components, i); CuAssertTrue(testCase, stSortedSet_size(component) <= maxComponentSize); CuAssertTrue(testCase, stSortedSet_size(component) >= 1); } //Check no edges can be added from those filtered. stSortedSet *filteredEdgesSet = stList_getSortedSet(filteredEdges, (int(*)(const void *, const void *)) stIntTuple_cmpFn); for (int64_t i = 0; i < stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); if (stSortedSet_search(filteredEdgesSet, edge) == NULL) { stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2)); stSortedSet *component1 = stHash_search(nodesToComponents, node1); stSortedSet *component2 = stHash_search(nodesToComponents, node2); CuAssertTrue(testCase, component1 != NULL && component2 != NULL); CuAssertTrue(testCase, component1 != component2); CuAssertTrue(testCase, stSortedSet_size(component1) + stSortedSet_size(component2) > maxComponentSize); stIntTuple_destruct(node1); stIntTuple_destruct(node2); } } stSortedSet_destruct(filteredEdgesSet); //Cleanup the components stSortedSet *componentsSet = stList_getSortedSet(components, NULL); stList_destruct(components); stSortedSet_setDestructor(componentsSet, (void(*)(void *)) stSortedSet_destruct); stSortedSet_destruct(componentsSet); stHash_destruct(nodesToComponents); }
void stNaiveConnectivity_addEdge(stNaiveConnectivity *connectivity, void *node1, void *node2) { invalidateCache(connectivity); struct adjacency *newEdge1 = malloc(sizeof(struct adjacency)); struct adjacency *newEdge2 = malloc(sizeof(struct adjacency)); newEdge1->toNode = node2; newEdge2->toNode = node1; newEdge1->inverse = newEdge2; newEdge2->inverse = newEdge1; newEdge1->prev = NULL; newEdge2->prev = NULL; struct adjacency *adjList1 = stHash_search(connectivity->nodesToAdjList, node1); if (adjList1 == NULL) { newEdge1->next = NULL; } else { newEdge1->next = adjList1; adjList1->prev = newEdge1; } stHash_remove(connectivity->nodesToAdjList, node1); stHash_insert(connectivity->nodesToAdjList, node1, newEdge1); struct adjacency *adjList2 = stHash_search(connectivity->nodesToAdjList, node2); if (adjList2 == NULL) { newEdge2->next = NULL; } else { newEdge2->next = adjList2; adjList2->prev = newEdge2; } stHash_remove(connectivity->nodesToAdjList, node2); stHash_insert(connectivity->nodesToAdjList, node2, newEdge2); }
static stHash *getComponents(stList *filteredEdges) { /* * A kind of stupid reimplementation of the greedy function, done just to trap typos. */ stHash *nodesToComponents = stHash_construct3((uint64_t(*)(const void *)) stIntTuple_hashKey, (int(*)(const void *, const void *)) stIntTuple_equalsFn, NULL, NULL); for (int64_t i = 0; i < stList_length(nodes); i++) { stIntTuple *node = stList_get(nodes, i); stSortedSet *component = stSortedSet_construct(); stSortedSet_insert(component, node); stHash_insert(nodesToComponents, node, component); } for (int64_t i = 0; i < stList_length(filteredEdges); i++) { stIntTuple *edge = stList_get(filteredEdges, i); stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2)); stSortedSet *component1 = stHash_search(nodesToComponents, node1); stSortedSet *component2 = stHash_search(nodesToComponents, node2); assert(component1 != NULL && component2 != NULL); if (component1 != component2) { stSortedSet *component3 = stSortedSet_getUnion(component1, component2); stSortedSetIterator *setIt = stSortedSet_getIterator(component3); stIntTuple *node3; while ((node3 = stSortedSet_getNext(setIt)) != NULL) { stHash_insert(nodesToComponents, node3, component3); } stSortedSet_destructIterator(setIt); stSortedSet_destruct(component1); stSortedSet_destruct(component2); } stIntTuple_destruct(node1); stIntTuple_destruct(node2); } return nodesToComponents; }
static void test_readingFasta_0(CuTest *testCase) { char inputName[] = ">simChimp.chrA"; char inputSequence[] = "ATAATACTTGCACACTTCTGCTATTACTTGATGTGTTTTCTATGGGGTGT" "CTTTCAGTGCTATGGGCAAGGCCATGGATTAATGGTGCCATAATTGCTCT" "AGGCAGTGACTAGAAACAGTTCACAAGTTTTTACTGTATCAAACTATGTT" "TTATAGTACGATTCACCCTCCAGGGGACCATCCCAAACTACTGGCCTAAA" "AGGACCTGCCATGTTGTAACTCCCCAGCTTAGAAATATAGACGGGAGGAA" "TGACaaaaagaagaaaaaaaaaaaaagaaaaaataaaaaaaaaacaaaaa" "agatagagaaaaaaaaaagtaaaaacaaaaaaaaataaaaaagggaaaaa" "aaataacaaaggaacaaaaaaaaaaaaaaaaaaataaaaagaaaaaCAAG" "ATAACCTTCATGCCATTGGAGCTATCTATTATTGTCTTGACCTATGCTTT" "ATCAATTTCTTCCTTCCTAGGAAGACATTTTTCTAGAAAGCTAAACGTTT" "TTGTAGGCTTGCATGTTCTGTCTGGGCTTGAATGGTTGTGCGTCTACAAG" "CCTCATTTACCATAGCACCATGCTTGGGTGGTATCTATCATCATTATCAA" "TAGTCAAGTCATTATAATGTTTTGGTGATCAGGCCAGATCCCTTGCACCA" "GTGACTTTCTAAATAGCACCTCCTCCATCATTTAAGGATCTCTAGCAACT" "TTAATCTGACTCACCTTGCCATGCAGAGTGCATGTTCCTTTTTAACACCC" "TGTGATTATGGGTTGGGTCTATTTGTATTTGTTTGATTACATCAGACGAC" "CAGGCCAGAGACAGATAAACACAACAGCCACTGGAACCTAAAGCTGTGTT" "CAGAATGTCACGGAATGTCTCATTGCACCCAGAGCTAGGGTGGGTATGAG" "TATGATCTTCTACATAAGGTACCCCAGGAAAATTAACTTAACAACCAATC" "AATTACAGAAGATGAATTCTGCTGTTGTCTCTTATTAGTTGGACTATTCA" "GCCTAATGGTTGGCCACTTAGCTTGTCATGAGCATTACTGTACTACTATG" "TCTAGTGTTTCCAGTTATTAGTTAGCCCACTGGATAGACAGTTTTGGCTT" "GTTTTCTTTCATTTGTATTGCCCACTCACCTAGCAAATCAGACAAAGGGG" "CATGTGAAAACTACCTTAGACTCTGCAGTTAGACAAACCATACTTTCCAC" "ATAGACCTCAGACATTTGGACATGAATAATTTCCTTCCTCCGGAGTGGTG" "GTTCCTCAACACTTATCACTTTCTTCTTCTTTTACCCGTATCACTGTCAA"; FILE *ofp = de_fopen("testFasta.fa", "w"); fprintf(ofp, "%s\n", inputName); for (size_t i = 0; i < strlen(inputSequence); ++i) { fprintf(ofp, "%c", inputSequence[i]); if (((i + 1) % 50) == 0) { fprintf(ofp, "\n"); } } fprintf(ofp, "\n"); fclose(ofp); stHash *sequenceHash = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, destroyMtfseq); addSequencesToHash(sequenceHash, "testFasta.fa"); mtfseq_t *value = NULL; CuAssertTrue(testCase, (value = stHash_search(sequenceHash, "not in there")) == NULL); CuAssertTrue(testCase, (value = stHash_search(sequenceHash, "simChimp.chrA")) != NULL); if (value != NULL) { CuAssertTrue(testCase, strlen(value->seq) == strlen(inputSequence)); CuAssertTrue(testCase, strcmp(value->seq, inputSequence) == 0); } if (remove("testFasta.fa")) { fprintf(stderr, "Error, unable to remove temporary file testFasta.fa\n"); exit(EXIT_FAILURE); } stHash_destruct(sequenceHash); }
/* * Function does the actual depth first search to detect if the thing has an acyclic ordering. */ static int64_t dfs(stHash *adjacencyList, stIntTuple *seqPos, stSortedSet *started, stSortedSet *done) { if(stSortedSet_search(started, seqPos) != NULL) { if(stSortedSet_search(done, seqPos) == NULL) { //We have detected a cycle //st_logInfo("I have cycle %" PRIi64 " %" PRIi64 "\n", stIntTuple_getPosition(seqPos, 0), stIntTuple_getPosition(seqPos, 1)); return 1; } //We have already explored this area, but no cycle. return 0; } stSortedSet_insert(started, seqPos); int64_t cycle =0; stIntTuple *nextSeqPos = stIntTuple_construct2( stIntTuple_get(seqPos, 0), stIntTuple_get(seqPos, 1) + 1); stSortedSet *column = stHash_search(adjacencyList, nextSeqPos); if(column != NULL) { //It is in the adjacency list, so we can do the recursion assert(stSortedSet_search(column, nextSeqPos) != NULL); stSortedSetIterator *it = stSortedSet_getIterator(column); stIntTuple *seqPos2; while((seqPos2 = stSortedSet_getNext(it)) != NULL) { cycle = cycle || dfs(adjacencyList, seqPos2, started, done); } stSortedSet_destructIterator(it); } stIntTuple_destruct(nextSeqPos); stSortedSet_insert(done, seqPos); return cycle; }
/* * Recursive function which fills a givenlist with the * connected nodes within a module */ static void buildFaces_fillTopNodeList(Cap * cap, stList *list, stHash *liftedEdgesTable) { stList *liftedEdges; int64_t index; // Limit of recursion if (stList_contains(list, cap)) return; // Actual filling st_logInfo("Adding cap %p to face\n", cap); stList_append(list, cap); // Recursion through lifted edges if ((liftedEdges = stHash_search(liftedEdgesTable, cap))) for (index = 0; index < stList_length(liftedEdges); index++) buildFaces_fillTopNodeList( ((LiftedEdge *) stList_get(liftedEdges, index))->destination, list, liftedEdgesTable); // Recursion through adjacency if (cap_getAdjacency(cap)) buildFaces_fillTopNodeList(cap_getAdjacency(cap),list, liftedEdgesTable); }
static stList *readMatching(FILE *fileHandle, stList *originalEdges) { /* * Reads the matching created by Blossum. */ stHash *originalEdgesHash = putEdgesInHash(originalEdges); char *line = stFile_getLineFromFile(fileHandle); assert(line != NULL); int64_t nodeNumber, edgeNumber; int64_t i = sscanf(line, "%" PRIi64 " %" PRIi64 "\n", &nodeNumber, &edgeNumber); assert(i == 2); free(line); stList *chosenEdges = stList_construct(); for(int64_t j=0; j<edgeNumber; j++) { line = stFile_getLineFromFile(fileHandle); int64_t node1, node2; i = sscanf(line, "%" PRIi64 " %" PRIi64 "", &node1, &node2); assert(i == 2); free(line); assert(node1 >= 0); assert(node1 < nodeNumber); assert(node2 >= 0); assert(node2 < nodeNumber); stIntTuple *edge = constructEdge(node1, node2); stIntTuple *originalEdge = stHash_search(originalEdgesHash, edge); if(originalEdge != NULL) { stList_append(chosenEdges, originalEdge); } stIntTuple_destruct(edge); } stHash_destruct(originalEdgesHash); return chosenEdges; }
/* Parse XML string into a hash. This parses all attributes of all tags * into values. st_kv_database_conf type is stored as conf_type, * database tag is stores as db_tag. This does minimal error checking * and is really lame. */ static stHash *hackParseXmlString(const char *xmlString) { stHash *hash = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, free); char *toReplace[5] = { "</", "<", "/>", ">", "=" }; char *cA = stString_replace(xmlString, toReplace[0], " "), *cA2; for (int64_t i = 1; i < 5; i++) { cA2 = stString_replace(cA, toReplace[i], " "); free(cA); cA = cA2; } getExpectedToken(&cA2, "st_kv_database_conf"); stHash_insert(hash, stString_copy("conf_type"), getKeyValue(&cA2, "type")); stHash_insert(hash, stString_copy("db_tag"), getNextToken(&cA2)); char *key; while (((key = getNextToken(&cA2)) != NULL) && !stString_eq(key, "st_kv_database_conf")) { char *value = getNextToken(&cA2); if (value == NULL) { stThrowNew(ST_KV_DATABASE_EXCEPTION_ID, "failed to to get value for key \"%s\"", key); } if (stHash_search(hash, key) != NULL) { stThrowNew(ST_KV_DATABASE_EXCEPTION_ID, "got a duplicate entry in the database conf string \"%s\"", key); } stHash_insert(hash, key, value); } if(!stString_eq(key, "st_kv_database_conf")) { stThrowNew(ST_KV_DATABASE_EXCEPTION_ID, "got an unexpected final entry \"%s\"", key); } free(key); free(cA); return hash; }
static const char *getXmlValueRequired(stHash *hash, const char *key) { const char *value = stHash_search(hash, (char*)key); if (value == NULL) { stThrowNew(ST_KV_DATABASE_EXCEPTION_ID, "did not find a \"%s\" value in the database XML string", key); } return value; }
static stKVDatabaseConf *constructFromString(const char *xmlString) { stHash *hash = hackParseXmlString(xmlString); stKVDatabaseConf *databaseConf = NULL; const char *type = getXmlValueRequired(hash, "conf_type"); const char *dbTag = getXmlValueRequired(hash, "db_tag"); if (!stString_eq(type, dbTag)) { stThrowNew(ST_KV_DATABASE_EXCEPTION_ID, "Database XML tag \"%s\" did not match st_kv_database_conf type attribute", dbTag, type); } if (stString_eq(type, "tokyo_cabinet")) { databaseConf = stKVDatabaseConf_constructTokyoCabinet(getXmlValueRequired(hash, "database_dir")); } else if (stString_eq(type, "kyoto_tycoon")) { databaseConf = stKVDatabaseConf_constructKyotoTycoon(getXmlValueRequired(hash, "host"), getXmlPort(hash), getXmlTimeout(hash), getXMLMaxKTRecordSize(hash), getXMLMaxKTBulkSetSize(hash), getXMLMaxKTBulkSetNumRecords(hash), getXmlValueRequired(hash, "database_dir"), stHash_search(hash, "database_name")); } else if (stString_eq(type, "mysql")) { databaseConf = stKVDatabaseConf_constructMySql(getXmlValueRequired(hash, "host"), getXmlPort(hash), getXmlValueRequired(hash, "user"), getXmlValueRequired(hash, "password"), getXmlValueRequired(hash, "database_name"), getXmlValueRequired(hash, "table_name")); } else { stThrowNew(ST_KV_DATABASE_EXCEPTION_ID, "invalid database type \"%s\"", type); } stHash_destruct(hash); return databaseConf; }
static void test_addBlockToHash_3(CuTest *testCase) { // concatenation with 2 bases of interstitial and a sequence length breakpoint options_t *options = options_construct(); options->breakpointPenalty = 10; options->interstitialSequence = 5; stList *observedList = stList_construct3(0, free); stList *expectedList = stList_construct3(0, free); stHash *observedHash = createBlockHashFromString("a score=0\n" "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n" "s name.chr1 0 10 + 100 ATGT---ATGCCG\n" "s name2.chr1 0 10 + 100 ATGT---ATGCCG\n" "s name3.chr1 0 13 + 100 GCAGCTGAAAACA\n", observedList ); mafBlock_t *mb = maf_newMafBlockListFromString("a score=0 test\n" "s reference.chr0 13 5 + 158545518 ACGTA\n" "s name.chr1 12 5 + 100 gtcGG\n" "s name2.chr1 10 5 + 100 ATGTg\n" "s name3.chr1 50 5 + 100 CCCCC\n" , 3); stHash *expectedHash = NULL; expectedHash = createBlockHashFromString("a score=0\n" "s reference.chr0 0 18 + 158545518 gcagctgaaaaca------------ACGTA\n" "s name.chr1 0 17 + 100 ATGT---ATGCCGac----------gtcGG\n" "s name2.chr1 0 15 + 100 ATGT---ATGCCG------------ATGTg\n" "s name3 0 28 + 28 GCAGCTGAAAACA--NNNNNNNNNNCCCCC\n", expectedList ); row_t *r = stHash_search(expectedHash, "name3"); r->prevRightPos = 54; free(r->prevName); r->prevName = stString_copy("name3.chr1"); r->multipleNames = true; stHash *seqHash = createSeqHashFromString("name.chr1", "ATGTATGCCGacgtc" "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG" "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"); mtfseq_t *mtfs = newMtfseqFromString("gcagctgaaaacaACGTA" "tttttttttttttttttttttttttttttttt" "tttttttttttttttttttttttttttttttttttttttttttttttttt"); stHash_insert(seqHash, stString_copy("reference.chr0"), mtfs); mtfs = newMtfseqFromString("ATGTATGCCGATGTg" "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"); stHash_insert(seqHash, stString_copy("name2.chr1"), mtfs); mtfs = newMtfseqFromString("GCAGCTGAAAACAggggggggggggggggggggggggggggggggggggg" "CCCCCaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" ); stHash_insert(seqHash, stString_copy("name3.chr1"), mtfs); addMafBlockToRowHash(observedHash, seqHash, observedList, mb, options); CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash)); CuAssertTrue(testCase, listsAreEqual(observedList, expectedList)); // clean up stHash_destruct(observedHash); stHash_destruct(expectedHash); stHash_destruct(seqHash); stList_destruct(observedList); stList_destruct(expectedList); maf_destroyMafBlockList(mb); destroyOptions(options); }
stHash *buildSegmentToContigPathHash(stList *maximalHaplotypePaths) { stHash *segmentToMaximalHaplotypePathHash = stHash_construct(); for (int64_t i = 0; i < stList_length(maximalHaplotypePaths); i++) { stList *maximalHaplotypePath = stList_get(maximalHaplotypePaths, i); assert(stList_length(maximalHaplotypePath) > 0); for (int64_t j = 0; j < stList_length(maximalHaplotypePath); j++) { Segment *segment = stList_get(maximalHaplotypePath, j); assert(stHash_search(segmentToMaximalHaplotypePathHash, segment) == NULL); assert(stHash_search(segmentToMaximalHaplotypePathHash, segment_getReverse(segment)) == NULL); stHash_insert(segmentToMaximalHaplotypePathHash, segment, maximalHaplotypePath); } } return segmentToMaximalHaplotypePathHash; }
/* Default to 50M. It used to be 175M but since noticing * problems in bulk *get* within cactus secondary dbs we * crank it way down. Unlike bulk set, in the get we don't * know the total size of the requested records. To prevent * big data transfers (which are problematic), we reduce max * size of individual records and hope for the best */ static int64_t getXMLMaxKTRecordSize(stHash *hash) { const char *value = stHash_search(hash, "max_record_size"); if (value == NULL) { return (int64_t) 10000000; } else { return stSafeStrToInt64(value); } }
static int getXmlPort(stHash *hash) { const char *value = stHash_search(hash, "port"); if (value == NULL) { return 0; } else { return stSafeStrToUInt32(value); } }
/* Default to tried-and-true value of 10000 */ static int64_t getXMLMaxKTBulkSetNumRecords(stHash *hash) { const char *value = stHash_search(hash, "max_bulkset_num_records"); if (value == NULL) { return (int64_t) 10000; } else { return stSafeStrToInt64(value); } }
/* Default to 175M which seems to be about where the * kyoto tycoon network error danger zone starts */ static int64_t getXMLMaxKTBulkSetSize(stHash *hash) { const char *value = stHash_search(hash, "max_bulkset_size"); if (value == NULL) { return (int64_t) 183500800; } else { return stSafeStrToInt64(value); } }
static int getXmlTimeout(stHash *hash) { const char *value = stHash_search(hash, "timeout"); if (value == NULL) { // default to -1 -- meaning no timeout return -1; } else { return stSafeStrToUInt32(value); } }
/* * Constructs a face from a given Cap */ static void buildFaces_constructFromCap(Cap * startingCap, stHash *liftedEdgesTable, Flower * flower) { Face *face = face_construct(flower); stList *topNodes = stList_construct3(16, NULL); stList *liftedEdges; Cap *cap, *bottomNode, *ancestor; int64_t index, index2; printf("Constructing new face"); // Establishlist of top nodes buildFaces_fillTopNodeList(startingCap, topNodes, liftedEdgesTable); #ifndef NDEBUG // What, no top nodes!? if (stList_length(topNodes) == 0) abort(); #endif // Initialize data structure face_allocateSpace(face, stList_length(topNodes)); // For every top node for (index = 0; index < stList_length(topNodes); index++) { cap = stList_get(topNodes, index); face_setTopNode(face, index, cap); liftedEdges = stHash_search(liftedEdgesTable, cap); if (!liftedEdges) { face_setBottomNodeNumber(face, index, 0); continue; } face_setBottomNodeNumber(face, index, stList_length(liftedEdges)); // For every bottom node of that top node for (index2 = 0; index2 < stList_length(liftedEdges); index2++) { bottomNode = ((LiftedEdge *) stList_get(liftedEdges, index2))->bottomNode; face_addBottomNode(face, index, bottomNode); ancestor = cap_getTopCap(cap_getPositiveOrientation( cap_getAdjacency(bottomNode))); if (cap_getAdjacency(cap) != ancestor) face_setDerivedDestination(face, index, index2, ancestor); else face_setDerivedDestination(face, index, index2, NULL); #ifndef NDEBUG // If bottom nodes part of top nodes assert(!stList_contains(topNodes, cap_getPositiveOrientation( ((LiftedEdge*) stList_get(liftedEdges, index2))->bottomNode))); #endif } } // Clean up stList_destruct(topNodes); }
static char *segmentWriteFn(Segment *segment) { stTree *phylogeneticTree = stHash_search(segmentWriteFn_flowerToPhylogeneticTreeHash, block_getFlower(segment_getBlock(segment))); assert(phylogeneticTree != NULL); char *segmentString = getMaximumLikelihoodString(phylogeneticTree, segment_getBlock(segment)); //We append a zero to a segment string if it is part of block containing only a reference segment, else we append a 1. //We use these boolean values to determine if a sequence contains only these trivial strings, and is therefore trivial. char *appendedSegmentString = stString_print("%s%c ", segmentString, block_getInstanceNumber(segment_getBlock(segment)) == 1 ? '0' : '1'); free(segmentString); return appendedSegmentString; }
bool stNaiveConnectivity_hasEdge(stNaiveConnectivity *connectivity, void *node1, void *node2) { struct adjacency *adjList1 = stHash_search(connectivity->nodesToAdjList, node1); if(!adjList1) return false; while(adjList1 != NULL) { if (adjList1->toNode == node2) { return true; } adjList1 = adjList1->next; } return false; }
/* * This builds an adjacency list structure for the the sequences. Every sequence-position * has a column in the hash with which it can be aligned with. */ static stHash *buildAdjacencyList(stList *pairs, int64_t sequenceNumber) { stHash *hash = stHash_construct3((uint64_t (*)(const void *))stIntTuple_hashKey, (int (*)(const void *, const void *))stIntTuple_equalsFn, (void (*)(void *))stIntTuple_destruct, NULL); for(int64_t seq=0; seq<sequenceNumber; seq++) { for(int64_t position=0; position<MAX_SEQUENCE_SIZE; position++) { stIntTuple *seqPos = stIntTuple_construct2( seq, position); stSortedSet *column = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL); stSortedSet_insert(column, seqPos); stHash_insert(hash, seqPos, column); } } stListIterator *it = stList_getIterator(pairs); stIntTuple *pair; while((pair = stList_getNext(it)) != NULL) { stIntTuple *seqPos1 = stIntTuple_construct2( stIntTuple_get(pair, 0), stIntTuple_get(pair, 1)); stIntTuple *seqPos2 = stIntTuple_construct2( stIntTuple_get(pair, 2), stIntTuple_get(pair, 3)); stSortedSet *column1 = stHash_search(hash, seqPos1); assert(column1 != NULL); stSortedSet *column2 = stHash_search(hash, seqPos2); assert(column2 != NULL); if(column1 != column2) { //Merge the columns stSortedSetIterator *it2 = stSortedSet_getIterator(column2); stIntTuple *seqPos3; while((seqPos3 = stSortedSet_getNext(it2)) != NULL) { assert(stSortedSet_search(column1, seqPos3) == NULL); stSortedSet_insert(column1, seqPos3); assert(stHash_search(hash, seqPos3) == column2); stHash_insert(hash, seqPos3, column1); assert(stHash_search(hash, seqPos3) == column1); } stSortedSet_destructIterator(it2); stSortedSet_destruct(column2); } //Cleanup loop. stIntTuple_destruct(seqPos1); stIntTuple_destruct(seqPos2); } stList_destructIterator(it); return hash; }
static void printBlockHash(stHash *hash, const char *title) { stHashIterator *hit = stHash_getIterator(hash); char *key = NULL; row_t *r = NULL; printf("%s:\n", title); while ((key = stHash_getNext(hit)) != NULL) { r = stHash_search(hash, key); printf("%20s %6"PRIu64" %6"PRIu64" %c %9"PRIu64" %s\n", r->name ,r->start, r->length, r->strand, r->sourceLength, r->sequence); } stHash_destructIterator(hit); }
/* * Fill in a hashtable which to every node associates * alist of lifted edges */ static stHash *buildFaces_computeLiftedEdges(Flower * flower) { stHash *liftedEdgesTable = stHash_construct3(buildFaces_hashfunction, buildFaces_key_eq_fn, NULL, buildFaces_destructValue); Flower_CapIterator *iter = flower_getCapIterator(flower); Cap *cap, *attachedAncestor; Cap *adjacency, *adjacencyAncestor; stList *liftedEdges; LiftedEdge *liftedEdge; // Iterate through potential bottom nodes while ((cap = flower_getNextCap(iter))) { // ... check if connected if ((adjacency = cap_getAdjacency(cap))) { // ... lift attachedAncestor = cap_getTopCap(cap); adjacencyAncestor = cap_getTopCap(cap_getPositiveOrientation( adjacency)); #ifndef NDEBUG assert((attachedAncestor && adjacencyAncestor) || (!attachedAncestor && !adjacencyAncestor)); #endif // If root node if (attachedAncestor == NULL) continue; // ... create lifted edge liftedEdge = st_malloc(sizeof(LiftedEdge)); liftedEdge->destination = adjacencyAncestor; liftedEdge->bottomNode = cap; #ifndef NDEBUG // Self loop if (adjacencyAncestor == attachedAncestor) abort(); #endif // ... add it to the hashtable if ((liftedEdges = stHash_search(liftedEdgesTable, attachedAncestor))) { stList_append(liftedEdges, liftedEdge); } else { liftedEdges = stList_construct3(2, buildFaces_stList_destructElem); stList_append(liftedEdges, liftedEdge); stHash_insert(liftedEdgesTable, attachedAncestor, liftedEdges); } } } flower_destructCapIterator(iter); return liftedEdgesTable; }
static void debugScaffoldPathsP(Cap *cap, stList *haplotypePath, stHash *haplotypePathToScaffoldPathHash, stHash *haplotypeToMaximalHaplotypeLengthHash, stHash *segmentToMaximalHaplotypePathHash, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters, bool capDir) { int64_t insertLength; int64_t deleteLength; Cap *otherCap; enum CapCode capCode = getCapCode(cap, &otherCap, haplotypeEventStrings, contaminationEventStrings, &insertLength, &deleteLength, capCodeParameters); if (capCode == SCAFFOLD_GAP || capCode == AMBIGUITY_GAP) { Segment *adjacentSegment = getAdjacentCapsSegment(cap); assert(adjacentSegment != NULL); while (!hasCapInEvents(cap_getEnd(capDir ? segment_get5Cap(adjacentSegment) : segment_get3Cap(adjacentSegment)), haplotypeEventStrings)) { adjacentSegment = getAdjacentCapsSegment(capDir ? segment_get5Cap(adjacentSegment) : segment_get3Cap(adjacentSegment)); assert(adjacentSegment != NULL); } assert(adjacentSegment != NULL); assert(hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(adjacentSegment)))); stIntTuple *j = stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath); (void)j; assert(j != NULL); stList *adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, adjacentSegment); if (adjacentHaplotypePath == NULL) { adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, segment_getReverse(adjacentSegment)); } assert(adjacentHaplotypePath != NULL); assert(adjacentHaplotypePath != haplotypePath); stIntTuple *k = stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath); (void)k; assert(k != NULL); assert(stIntTuple_get(j, 0) == stIntTuple_get(k, 0)); assert(stHash_search(haplotypePathToScaffoldPathHash, haplotypePath) == stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath)); } }
static bool hashesAreEqual(stHash *observedHash, stHash *expectedHash) { stHashIterator *hit = stHash_getIterator(observedHash); char *key; while ((key = stHash_getNext(hit)) != NULL) { if (stHash_search(expectedHash, key) == NULL) { printBlockHash(observedHash, "observed"); printBlockHash(expectedHash, "expected"); return false; } if (!rowsAreEqual(stHash_search(observedHash, key), stHash_search(expectedHash, key))) { printBlockHash(observedHash, "observed"); printBlockHash(expectedHash, "expected"); return false; } } stHash_destructIterator(hit); hit = stHash_getIterator(expectedHash); while ((key = stHash_getNext(hit)) != NULL) { if (stHash_search(observedHash, key) == NULL) { printBlockHash(observedHash, "observed"); printBlockHash(expectedHash, "expected"); return false; } if (!rowsAreEqual(stHash_search(observedHash, key), stHash_search(expectedHash, key))) { printBlockHash(observedHash, "observed"); printBlockHash(expectedHash, "expected"); return false; } } stHash_destructIterator(hit); return true; }
void stNaiveConnectivity_removeNode(stNaiveConnectivity *connectivity, void *node) { invalidateCache(connectivity); struct adjacency *adjList = stHash_search(connectivity->nodesToAdjList, node); while (adjList != NULL) { struct adjacency *next = adjList->next; // Have to do this beforehand -- adjList will be freed by next line! stNaiveConnectivity_removeEdge(connectivity, node, adjList->toNode); adjList = next; } stHash_remove(connectivity->nodesToAdjList, node); }
void stNaiveConnectivity_removeEdge(stNaiveConnectivity *connectivity, void *node1, void *node2) { invalidateCache(connectivity); struct adjacency *adjList1 = stHash_search(connectivity->nodesToAdjList, node1); assert(adjList1 != NULL); assert(stHash_search(connectivity->nodesToAdjList, node2) != NULL); while (adjList1 != NULL) { if (adjList1->toNode == node2) { break; } adjList1 = adjList1->next; } // We can find the link in the other node's adjacency list easily struct adjacency *adjList2 = adjList1->inverse; assert(adjList2->inverse == adjList1); // Now remove the links from the lists, and free them. removeEdgeFromAdjList(connectivity, node1, adjList1); removeEdgeFromAdjList(connectivity, node2, adjList2); }
void flower_reconstructFaces(Flower * flower) { flower_destructFaces(flower); stHash *liftedEdgesTable = buildFaces_computeLiftedEdges(flower); Flower_CapIterator *iter = flower_getCapIterator(flower); stList *liftedEdges; Cap *current; while ((current = flower_getNextCap(iter))) { if ((liftedEdges = stHash_search(liftedEdgesTable, current)) && (stList_length(liftedEdges) >= 1)) { buildFaces_constructFromCap(current, liftedEdgesTable, flower); } } stHash_destruct(liftedEdgesTable); flower_destructCapIterator(iter); }
static stList *getEdgesThatBridgeComponents(stList *components, stHash *nodesToNonZeroWeightedAdjacencyEdges) { /* * Get set of adjacency edges that bridge between (have a node in two) components. */ stList *bridgingAdjacencyEdges = stList_construct(); for (int64_t i = 0; i < stList_length(components); i++) { stSortedSet *componentNodes = getNodeSetOfEdges( stList_get(components, i)); stSortedSetIterator *it = stSortedSet_getIterator(componentNodes); stIntTuple *node; while ((node = stSortedSet_getNext(it)) != NULL) { stList *edges = stHash_search(nodesToNonZeroWeightedAdjacencyEdges, node); if (edges != NULL) { for (int64_t j = 0; j < stList_length(edges); j++) { stIntTuple *edge = stList_get(edges, j); stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 0)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 1)); assert( stSortedSet_search(componentNodes, node1) != NULL || stSortedSet_search(componentNodes, node2) != NULL); if (stSortedSet_search(componentNodes, node1) == NULL || stSortedSet_search(componentNodes, node2) == NULL) { stList_append(bridgingAdjacencyEdges, edge); } stIntTuple_destruct(node1); stIntTuple_destruct(node2); } } } stSortedSet_destructIterator(it); stSortedSet_destruct(componentNodes); } return bridgingAdjacencyEdges; }