static void test_stSortedSetIterator(CuTest* testCase) { sonLibSortedSetTestSetup(); int32_t i; for(i=0; i<size; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i])); } stSortedSetIterator *iterator = stSortedSet_getIterator(sortedSet); CuAssertTrue(testCase, iterator != NULL); for(i=0; i<sortedSize; i++) { CuAssertIntEquals(testCase, sortedInput[i], stIntTuple_getPosition(stSortedSet_getNext(iterator), 0)); } CuAssertTrue(testCase, stSortedSet_getNext(iterator) == NULL); stSortedSetIterator *iterator2 = stSortedSet_copyIterator(iterator); CuAssertTrue(testCase, iterator2 != NULL); for(i=0; i<sortedSize; i++) { CuAssertIntEquals(testCase, sortedInput[sortedSize - 1 - i], stIntTuple_getPosition(stSortedSet_getPrevious(iterator), 0)); CuAssertIntEquals(testCase, sortedInput[sortedSize - 1 - i], stIntTuple_getPosition(stSortedSet_getPrevious(iterator2), 0)); } CuAssertTrue(testCase, stSortedSet_getPrevious(iterator) == NULL); CuAssertTrue(testCase, stSortedSet_getPrevious(iterator2) == NULL); stSortedSet_destructIterator(iterator); stSortedSet_destructIterator(iterator2); sonLibSortedSetTestTeardown(); }
int stSortedSet_equals(stSortedSet *sortedSet1, stSortedSet *sortedSet2) { if(stSortedSet_size(sortedSet1) != stSortedSet_size(sortedSet2)) { return 0; } if(!stSortedSet_comparatorsEqual(sortedSet1, sortedSet2)) { return 0; } int (*cmpFn)(const void *, const void *) = stSortedSet_getComparator(sortedSet1)->compareFn; stSortedSetIterator *it1 = stSortedSet_getIterator(sortedSet1); stSortedSetIterator *it2 = stSortedSet_getIterator(sortedSet2); void *o1 = stSortedSet_getNext(it1), *o2 = stSortedSet_getNext(it2); while(o1 != NULL && o2 != NULL) { if(cmpFn(o1, o2) != 0) { stSortedSet_destructIterator(it1); stSortedSet_destructIterator(it2); return 0; } o1 = stSortedSet_getNext(it1); o2 = stSortedSet_getNext(it2); } stSortedSet_destructIterator(it1); stSortedSet_destructIterator(it2); return 1; }
void test_stList_getSortedSet(CuTest *testCase) { setup(); stSortedSet *sortedSet = stList_getSortedSet(list, (int (*)(const void *, const void *))strcmp); CuAssertTrue(testCase, stSortedSet_size(sortedSet) == stringNumber); stSortedSetIterator *iterator = stSortedSet_getIterator(sortedSet); CuAssertStrEquals(testCase, "five", stSortedSet_getNext(iterator)); CuAssertStrEquals(testCase, "four", stSortedSet_getNext(iterator)); CuAssertStrEquals(testCase, "one", stSortedSet_getNext(iterator)); CuAssertStrEquals(testCase, "three", stSortedSet_getNext(iterator)); CuAssertStrEquals(testCase, "two", stSortedSet_getNext(iterator)); stSortedSet_destructIterator(iterator); stSortedSet_destruct(sortedSet); teardown(); }
static void makeMatchingPerfect(stList *chosenEdges, stList *adjacencyEdges, stSortedSet *nodes) { /* * While the the number of edges is less than a perfect matching add random edges. */ stSortedSet *attachedNodes = getNodeSetOfEdges(chosenEdges); stHash *nodesToAdjacencyEdges = getNodesToEdgesHash(adjacencyEdges); stIntTuple *pNode = NULL; stSortedSetIterator *it = stSortedSet_getIterator(nodes); stIntTuple *node; while((node = stSortedSet_getNext(it)) != NULL) { if (stSortedSet_search(attachedNodes, node) == NULL) { if (pNode == NULL) { pNode = node; } else { stList_append(chosenEdges, getEdgeForNodes(stIntTuple_get(pNode, 0), stIntTuple_get(node, 0), nodesToAdjacencyEdges)); pNode = NULL; } } } stSortedSet_destructIterator(it); assert(pNode == NULL); stSortedSet_destruct(attachedNodes); assert(stList_length(chosenEdges) * 2 == stSortedSet_size(nodes)); stHash_destruct(nodesToAdjacencyEdges); }
static void test_stSortedSetIterator_getIteratorFrom(CuTest* testCase) { sonLibSortedSetTestSetup(); int32_t i; for(i=0; i<size; i++) { stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i])); } stSortedSetIterator *iterator = stSortedSet_getIterator(sortedSet); CuAssertTrue(testCase, iterator != NULL); for(i=0; i<sortedSize; i++) { stSortedSetIterator *it = stSortedSet_getIteratorFrom(sortedSet, stIntTuple_construct(1, sortedInput[i])); stIntTuple *intTuple = stSortedSet_getNext(it); CuAssertTrue(testCase, intTuple != NULL); CuAssertIntEquals(testCase, sortedInput[i], stIntTuple_getPosition(intTuple, 0)); stSortedSet_destructIterator(it); } stTry { stSortedSet_getIteratorFrom(sortedSet, stIntTuple_construct(1, 7)); //This number if not in the input. CuAssertTrue(testCase, 0); } stCatch(except) { CuAssertTrue(testCase, stExcept_getId(except) == SORTED_SET_EXCEPTION_ID); } stTryEnd sonLibSortedSetTestTeardown(); }
/* * Function does the actual depth first search to detect if the thing has an acyclic ordering. */ static int64_t dfs(stHash *adjacencyList, stIntTuple *seqPos, stSortedSet *started, stSortedSet *done) { if(stSortedSet_search(started, seqPos) != NULL) { if(stSortedSet_search(done, seqPos) == NULL) { //We have detected a cycle //st_logInfo("I have cycle %" PRIi64 " %" PRIi64 "\n", stIntTuple_getPosition(seqPos, 0), stIntTuple_getPosition(seqPos, 1)); return 1; } //We have already explored this area, but no cycle. return 0; } stSortedSet_insert(started, seqPos); int64_t cycle =0; stIntTuple *nextSeqPos = stIntTuple_construct2( stIntTuple_get(seqPos, 0), stIntTuple_get(seqPos, 1) + 1); stSortedSet *column = stHash_search(adjacencyList, nextSeqPos); if(column != NULL) { //It is in the adjacency list, so we can do the recursion assert(stSortedSet_search(column, nextSeqPos) != NULL); stSortedSetIterator *it = stSortedSet_getIterator(column); stIntTuple *seqPos2; while((seqPos2 = stSortedSet_getNext(it)) != NULL) { cycle = cycle || dfs(adjacencyList, seqPos2, started, done); } stSortedSet_destructIterator(it); } stIntTuple_destruct(nextSeqPos); stSortedSet_insert(done, seqPos); return cycle; }
static stHash *getComponents(stList *filteredEdges) { /* * A kind of stupid reimplementation of the greedy function, done just to trap typos. */ stHash *nodesToComponents = stHash_construct3((uint64_t(*)(const void *)) stIntTuple_hashKey, (int(*)(const void *, const void *)) stIntTuple_equalsFn, NULL, NULL); for (int64_t i = 0; i < stList_length(nodes); i++) { stIntTuple *node = stList_get(nodes, i); stSortedSet *component = stSortedSet_construct(); stSortedSet_insert(component, node); stHash_insert(nodesToComponents, node, component); } for (int64_t i = 0; i < stList_length(filteredEdges); i++) { stIntTuple *edge = stList_get(filteredEdges, i); stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2)); stSortedSet *component1 = stHash_search(nodesToComponents, node1); stSortedSet *component2 = stHash_search(nodesToComponents, node2); assert(component1 != NULL && component2 != NULL); if (component1 != component2) { stSortedSet *component3 = stSortedSet_getUnion(component1, component2); stSortedSetIterator *setIt = stSortedSet_getIterator(component3); stIntTuple *node3; while ((node3 = stSortedSet_getNext(setIt)) != NULL) { stHash_insert(nodesToComponents, node3, component3); } stSortedSet_destructIterator(setIt); stSortedSet_destruct(component1); stSortedSet_destruct(component2); } stIntTuple_destruct(node1); stIntTuple_destruct(node2); } return nodesToComponents; }
stSortedSet *stSortedSet_copyConstruct(stSortedSet *sortedSet, void (*destructElementFn)(void *)) { stSortedSet *sortedSet2 = stSortedSet_construct3(stSortedSet_getComparator(sortedSet)->compareFn, destructElementFn); stSortedSetIterator *it = stSortedSet_getIterator(sortedSet); void *o; while((o = stSortedSet_getNext(it)) != NULL) { stSortedSet_insert(sortedSet2, o); } stSortedSet_destructIterator(it); return sortedSet2; }
stPinch *getNextAlignedPairAlignment(stSortedSetIterator *it) { AlignedPair *alignedPair = stSortedSet_getNext(it); if (alignedPair == NULL) { return NULL; } static stPinch pinch; stPinch_fillOut(&pinch, alignedPair->subsequenceIdentifier, alignedPair->reverse->subsequenceIdentifier, alignedPair->position, alignedPair->reverse->position, 1, alignedPair->strand == alignedPair->reverse->strand); return &pinch; }
void writeEndAlignmentToDisk(End *end, stSortedSet *endAlignment, FILE *fileHandle) { fprintf(fileHandle, "%s %" PRIi64 "\n", cactusMisc_nameToStringStatic(end_getName(end)), stSortedSet_size(endAlignment)); stSortedSetIterator *it = stSortedSet_getIterator(endAlignment); AlignedPair *aP; while((aP = stSortedSet_getNext(it)) != NULL) { fprintf(fileHandle, "%" PRIi64 " %" PRIi64 " %i %" PRIi64 " ", aP->subsequenceIdentifier, aP->position, aP->strand, aP->score); aP = aP->reverse; fprintf(fileHandle, "%" PRIi64 " %" PRIi64 " %i %" PRIi64 "\n", aP->subsequenceIdentifier, aP->position, aP->strand, aP->score); } stSortedSet_destructIterator(it); }
stList *stSortedSet_getList(stSortedSet *sortedSet) { stList *list = stList_construct2(stSortedSet_size(sortedSet)); stSortedSetIterator *it = stSortedSet_getIterator(sortedSet); void *o; int32_t i=0; while((o = stSortedSet_getNext(it)) != NULL) { stList_set(list, i++, o); } assert(i == stSortedSet_size(sortedSet)); stSortedSet_destructIterator(it); return list; }
/* Check that all tuple records in a set are present and have the expect * value. The expected value in the set is multiplied by valueMult to get * the actual expected value */ static void readWriteAndRemoveRecordsLotsCheck(CuTest *testCase, stSortedSet *set, int valueMult) { CuAssertIntEquals(testCase, stSortedSet_size(set), stKVDatabase_getNumberOfRecords(database)); stSortedSetIterator *it = stSortedSet_getIterator(set); stIntTuple *tuple; while ((tuple = stSortedSet_getNext(it)) != NULL) { int32_t *value = (int32_t *) stKVDatabase_getRecord(database, stIntTuple_getPosition(tuple, 0)); CuAssertTrue(testCase, stKVDatabase_containsRecord(database, stIntTuple_getPosition(tuple, 0))); CuAssertIntEquals(testCase, valueMult*stIntTuple_getPosition(tuple, 0), *value); free(value); } stSortedSet_destructIterator(it); }
stSortedSet *stSortedSet_getUnion(stSortedSet *sortedSet1, stSortedSet *sortedSet2) { if(!stSortedSet_comparatorsEqual(sortedSet1, sortedSet2)) { stThrowNew(SORTED_SET_EXCEPTION_ID, "Comparators are not equal for creating the union of two sorted sets"); } stSortedSet *sortedSet3 = stSortedSet_construct3(stSortedSet_getComparator(sortedSet1)->compareFn, NULL); //Add those from sortedSet1 stSortedSetIterator *it= stSortedSet_getIterator(sortedSet1); void *o; while((o = stSortedSet_getNext(it)) != NULL) { stSortedSet_insert(sortedSet3, o); } stSortedSet_destructIterator(it); //Add those from sortedSet2 it= stSortedSet_getIterator(sortedSet2); while((o = stSortedSet_getNext(it)) != NULL) { stSortedSet_insert(sortedSet3, o); } stSortedSet_destructIterator(it); return sortedSet3; }
stSortedSet *stSortedSet_getDifference(stSortedSet *sortedSet1, stSortedSet *sortedSet2) { if(!stSortedSet_comparatorsEqual(sortedSet1, sortedSet2)) { stThrowNew(SORTED_SET_EXCEPTION_ID, "Comparators are not equal for creating the sorted set difference"); } stSortedSet *sortedSet3 = stSortedSet_construct3(stSortedSet_getComparator(sortedSet1)->compareFn, NULL); //Add those from sortedSet1 only if they are not in sortedSet2 stSortedSetIterator *it= stSortedSet_getIterator(sortedSet1); void *o; while((o = stSortedSet_getNext(it)) != NULL) { if(stSortedSet_search(sortedSet2, o) == NULL) { stSortedSet_insert(sortedSet3, o); } } stSortedSet_destructIterator(it); return sortedSet3; }
static stList *getEdgesThatBridgeComponents(stList *components, stHash *nodesToNonZeroWeightedAdjacencyEdges) { /* * Get set of adjacency edges that bridge between (have a node in two) components. */ stList *bridgingAdjacencyEdges = stList_construct(); for (int64_t i = 0; i < stList_length(components); i++) { stSortedSet *componentNodes = getNodeSetOfEdges( stList_get(components, i)); stSortedSetIterator *it = stSortedSet_getIterator(componentNodes); stIntTuple *node; while ((node = stSortedSet_getNext(it)) != NULL) { stList *edges = stHash_search(nodesToNonZeroWeightedAdjacencyEdges, node); if (edges != NULL) { for (int64_t j = 0; j < stList_length(edges); j++) { stIntTuple *edge = stList_get(edges, j); stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 0)); stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 1)); assert( stSortedSet_search(componentNodes, node1) != NULL || stSortedSet_search(componentNodes, node2) != NULL); if (stSortedSet_search(componentNodes, node1) == NULL || stSortedSet_search(componentNodes, node2) == NULL) { stList_append(bridgingAdjacencyEdges, edge); } stIntTuple_destruct(node1); stIntTuple_destruct(node2); } } } stSortedSet_destructIterator(it); stSortedSet_destruct(componentNodes); } return bridgingAdjacencyEdges; }
/* * This builds an adjacency list structure for the the sequences. Every sequence-position * has a column in the hash with which it can be aligned with. */ static stHash *buildAdjacencyList(stList *pairs, int64_t sequenceNumber) { stHash *hash = stHash_construct3((uint64_t (*)(const void *))stIntTuple_hashKey, (int (*)(const void *, const void *))stIntTuple_equalsFn, (void (*)(void *))stIntTuple_destruct, NULL); for(int64_t seq=0; seq<sequenceNumber; seq++) { for(int64_t position=0; position<MAX_SEQUENCE_SIZE; position++) { stIntTuple *seqPos = stIntTuple_construct2( seq, position); stSortedSet *column = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL); stSortedSet_insert(column, seqPos); stHash_insert(hash, seqPos, column); } } stListIterator *it = stList_getIterator(pairs); stIntTuple *pair; while((pair = stList_getNext(it)) != NULL) { stIntTuple *seqPos1 = stIntTuple_construct2( stIntTuple_get(pair, 0), stIntTuple_get(pair, 1)); stIntTuple *seqPos2 = stIntTuple_construct2( stIntTuple_get(pair, 2), stIntTuple_get(pair, 3)); stSortedSet *column1 = stHash_search(hash, seqPos1); assert(column1 != NULL); stSortedSet *column2 = stHash_search(hash, seqPos2); assert(column2 != NULL); if(column1 != column2) { //Merge the columns stSortedSetIterator *it2 = stSortedSet_getIterator(column2); stIntTuple *seqPos3; while((seqPos3 = stSortedSet_getNext(it2)) != NULL) { assert(stSortedSet_search(column1, seqPos3) == NULL); stSortedSet_insert(column1, seqPos3); assert(stHash_search(hash, seqPos3) == column2); stHash_insert(hash, seqPos3, column1); assert(stHash_search(hash, seqPos3) == column1); } stSortedSet_destructIterator(it2); stSortedSet_destruct(column2); } //Cleanup loop. stIntTuple_destruct(seqPos1); stIntTuple_destruct(seqPos2); } stList_destructIterator(it); return hash; }
Cap *flower_getNextCap(Flower_CapIterator *capIterator) { return stSortedSet_getNext(capIterator); }
Segment *block_getNext(Block_InstanceIterator *iterator) { return block_getInstanceP(iterator->block, stSortedSet_getNext(iterator->iterator)); }
Event *eventTree_getNext(EventTree_Iterator *iterator) { return stSortedSet_getNext(iterator); }
Face *flower_getNextFace(Flower_FaceIterator *faceIterator) { return stSortedSet_getNext(faceIterator); }
Chain *flower_getNextChain(Flower_ChainIterator *chainIterator) { return stSortedSet_getNext(chainIterator); }
Group *flower_getNextGroup(Flower_GroupIterator *groupIterator) { return stSortedSet_getNext(groupIterator); }
Block *flower_getNextBlock(Flower_BlockIterator *blockIterator) { return stSortedSet_getNext(blockIterator); }
Segment *flower_getNextSegment(Flower_SegmentIterator *segmentIterator) { return stSortedSet_getNext(segmentIterator); }
End *flower_getNextEnd(Flower_EndIterator *endIterator) { return stSortedSet_getNext(endIterator); }
static void readWriteAndRemoveRecordsLotsIteration(CuTest *testCase, int numRecords, bool reopenDatabase) { //Make a big old list of records.. stSortedSet *set = stSortedSet_construct3((int(*)(const void *, const void *)) stIntTuple_cmpFn, (void(*)(void *)) stIntTuple_destruct); while (stSortedSet_size(set) < numRecords) { int32_t key = st_randomInt(0, 100 * numRecords); stIntTuple *tuple = stIntTuple_construct(1, key); if (stSortedSet_search(set, tuple) == NULL) { CuAssertTrue(testCase, !stKVDatabase_containsRecord(database, key)); stSortedSet_insert(set, tuple); stKVDatabase_insertRecord(database, key, &key, sizeof(int32_t)); CuAssertTrue(testCase, stKVDatabase_containsRecord(database, key)); } else { CuAssertTrue(testCase, stKVDatabase_containsRecord(database, key)); stIntTuple_destruct(tuple); // already in db } } readWriteAndRemoveRecordsLotsCheck(testCase, set, 1); //Update all records to negate values stSortedSetIterator *it = stSortedSet_getIterator(set); stIntTuple *tuple; while ((tuple = stSortedSet_getNext(it)) != NULL) { int32_t *value = (int32_t *) stKVDatabase_getRecord(database, stIntTuple_getPosition(tuple, 0)); *value *= -1; stKVDatabase_updateRecord(database, stIntTuple_getPosition(tuple, 0), value, sizeof(int32_t)); CuAssertTrue(testCase, stKVDatabase_containsRecord(database, stIntTuple_getPosition(tuple, 0))); free(value); } stSortedSet_destructIterator(it); readWriteAndRemoveRecordsLotsCheck(testCase, set, -1); //Try optionally committing the transaction and reloading the database.. if (reopenDatabase) { //stKVDatabase_commitTransaction(database); stKVDatabase_destruct(database); database = stKVDatabase_construct(conf, false); //stKVDatabase_startTransaction(database); } //Now remove each one.. it = stSortedSet_getIterator(set); while ((tuple = stSortedSet_getNext(it)) != NULL) { CuAssertTrue(testCase, stKVDatabase_containsRecord(database, stIntTuple_getPosition(tuple, 0))); stKVDatabase_removeRecord(database, stIntTuple_getPosition(tuple, 0)); CuAssertTrue(testCase, !stKVDatabase_containsRecord(database, stIntTuple_getPosition(tuple, 0))); //Test we get exception if we remove twice. stTry { stKVDatabase_removeRecord(database, stIntTuple_getPosition(tuple, 0)); CuAssertTrue(testCase, 0); } stCatch(except) { CuAssertTrue(testCase, stExcept_getId(except) == ST_KV_DATABASE_EXCEPTION_ID); }stTryEnd; } stSortedSet_destructIterator(it); CuAssertIntEquals(testCase, 0, stKVDatabase_getNumberOfRecords(database)); stSortedSet_destruct(set); }
Sequence *flower_getNextSequence(Flower_SequenceIterator *sequenceIterator) { return stSortedSet_getNext(sequenceIterator); }
void cactusDisk_write(CactusDisk *cactusDisk) { Flower *flower; int64_t recordSize; stList *removeRequests = stList_construct3(0, (void (*)(void *)) stIntTuple_destruct); st_logDebug("Starting to write the cactus to disk\n"); stSortedSetIterator *it = stSortedSet_getIterator(cactusDisk->flowers); //Sort flowers to update. while ((flower = stSortedSet_getNext(it)) != NULL) { cactusDisk_addUpdateRequest(cactusDisk, flower); } stSortedSet_destructIterator(it); st_logDebug("Got the flowers to update\n"); //Remove nets that are marked for deletion.. it = stSortedSet_getIterator(cactusDisk->flowerNamesMarkedForDeletion); char *nameString; while ((nameString = stSortedSet_getNext(it)) != NULL) { Name name = cactusMisc_stringToName(nameString); if (containsRecord(cactusDisk, name)) { stList_append(cactusDisk->updateRequests, stKVDatabaseBulkRequest_constructUpdateRequest(name, &name, 0)); //We set it to null in the first atomic operation. stList_append(removeRequests, stIntTuple_construct1(name)); } } stSortedSet_destructIterator(it); st_logDebug("Avoided updating nets marked for deletion\n"); // Insert and/or update meta-sequences. it = stSortedSet_getIterator(cactusDisk->metaSequences); MetaSequence *metaSequence; while ((metaSequence = stSortedSet_getNext(it)) != NULL) { void *vA = binaryRepresentation_makeBinaryRepresentation(metaSequence, (void (*)(void *, void (*)(const void * ptr, size_t size, size_t count))) metaSequence_writeBinaryRepresentation, &recordSize); //Compression vA = compress(vA, &recordSize); if (!containsRecord(cactusDisk, metaSequence_getName(metaSequence))) { stList_append(cactusDisk->updateRequests, stKVDatabaseBulkRequest_constructInsertRequest(metaSequence_getName(metaSequence), vA, recordSize)); } else { stList_append(cactusDisk->updateRequests, stKVDatabaseBulkRequest_constructUpdateRequest(metaSequence_getName(metaSequence), vA, recordSize)); } free(vA); } stSortedSet_destructIterator(it); st_logDebug("Got the sequences we are going to add to the database.\n"); if (!containsRecord(cactusDisk, CACTUS_DISK_PARAMETER_KEY)) { //We only write the parameters once. //Finally the database info. void *cactusDiskParameters = binaryRepresentation_makeBinaryRepresentation(cactusDisk, (void (*)(void *, void (*)(const void * ptr, size_t size, size_t count))) cactusDisk_writeBinaryRepresentation, &recordSize); //Compression cactusDiskParameters = compress(cactusDiskParameters, &recordSize); stList_append(cactusDisk->updateRequests, stKVDatabaseBulkRequest_constructInsertRequest(CACTUS_DISK_PARAMETER_KEY, cactusDiskParameters, recordSize)); free(cactusDiskParameters); } st_logDebug("Checked if need to write the initial parameters\n"); if (stList_length(cactusDisk->updateRequests) > 0) { st_logDebug("Going to write %" PRIi64 " updates\n", stList_length(cactusDisk->updateRequests)); stTry { st_logDebug("Writing %" PRIi64 " updates\n", stList_length(cactusDisk->updateRequests)); assert(stList_length(cactusDisk->updateRequests) > 0); stKVDatabase_bulkSetRecords(cactusDisk->database, cactusDisk->updateRequests); } stCatch(except) { stThrowNewCause(except, ST_KV_DATABASE_EXCEPTION_ID, "Failed when trying to set records in updating the cactus disk"); }stTryEnd ; }
static stHash *getScaffoldPathsP(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) { stHash *haplotypeToMaximalHaplotypeLengthHash = buildContigPathToContigPathLengthHash(haplotypePaths); stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths); for (int64_t i = 0; i < stList_length(haplotypePaths); i++) { stSortedSet *bucket = stSortedSet_construct(); stHash_insert(haplotypePathToScaffoldPathHash, stList_get(haplotypePaths, i), bucket); stSortedSet_insert(bucket, stList_get(haplotypePaths, i)); } for (int64_t i = 0; i < stList_length(haplotypePaths); i++) { stList *haplotypePath = stList_get(haplotypePaths, i); assert(stList_length(haplotypePath) > 0); Segment *_5Segment = stList_get(haplotypePath, 0); if (!segment_getStrand(_5Segment)) { _5Segment = segment_getReverse(stList_get(haplotypePath, stList_length(haplotypePath) - 1)); } assert(segment_getStrand(_5Segment)); if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) { assert(!trueAdjacency(segment_get5Cap(_5Segment), haplotypeEventStrings)); } int64_t insertLength; int64_t deleteLength; Cap *otherCap; enum CapCode _5CapCode = getCapCode(segment_get5Cap(_5Segment), &otherCap, haplotypeEventStrings, contaminationEventStrings, &insertLength, &deleteLength, capCodeParameters); if (_5CapCode == SCAFFOLD_GAP || _5CapCode == AMBIGUITY_GAP) { assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath) != NULL); int64_t j = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath), 0); Segment *adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(_5Segment)); assert(adjacentSegment != NULL); while (!hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)) { //is not a haplotype end adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(adjacentSegment)); assert(adjacentSegment != NULL); } assert(adjacentSegment != NULL); assert(hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)); //is a haplotype end stList *adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, adjacentSegment); if (adjacentHaplotypePath == NULL) { adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, segment_getReverse( adjacentSegment)); } assert(adjacentHaplotypePath != NULL); assert(adjacentHaplotypePath != haplotypePath); assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath) != NULL); int64_t k = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath), 0); //Now merge the buckets and make new int tuples.. stSortedSet *bucket1 = stHash_search(haplotypePathToScaffoldPathHash, haplotypePath); stSortedSet *bucket2 = stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath); assert(bucket1 != NULL); assert(bucket2 != NULL); assert(bucket1 != bucket2); stSortedSet *bucket3 = stSortedSet_getUnion(bucket1, bucket2); stSortedSetIterator *bucketIt = stSortedSet_getIterator(bucket3); stList *l; while ((l = stSortedSet_getNext(bucketIt)) != NULL) { //Do the bucket first assert(stHash_search(haplotypePathToScaffoldPathHash, l) == bucket1 || stHash_search(haplotypePathToScaffoldPathHash, l) == bucket2); stHash_remove(haplotypePathToScaffoldPathHash, l); stHash_insert(haplotypePathToScaffoldPathHash, l, bucket3); //Now the length stIntTuple *m = stHash_remove(haplotypeToMaximalHaplotypeLengthHash, l); assert(m != NULL); assert(stIntTuple_get(m, 0) == j || stIntTuple_get(m, 0) == k); stHash_insert(haplotypeToMaximalHaplotypeLengthHash, l, stIntTuple_construct1( j + k)); stIntTuple_destruct(m); } assert(stHash_search(haplotypePathToScaffoldPathHash, haplotypePath) == bucket3); assert(stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath) == bucket3); stSortedSet_destructIterator(bucketIt); } } stHash_destruct(segmentToMaximalHaplotypePathHash); return haplotypeToMaximalHaplotypeLengthHash; }
int main(int argc, char *argv[]) { char * logLevelString = NULL; char * cactusDiskDatabaseString = NULL; int64_t i, j; int64_t spanningTrees = 10; int64_t maximumLength = 1500; bool useProgressiveMerging = 0; float matchGamma = 0.5; bool useBanding = 0; int64_t k; stList *listOfEndAlignmentFiles = NULL; char *endAlignmentsToPrecomputeOutputFile = NULL; bool calculateWhichEndsToComputeSeparately = 0; int64_t largeEndSize = 1000000; int64_t chainLengthForBigFlower = 1000000; int64_t longChain = 2; char *ingroupCoverageFilePath = NULL; int64_t minimumSizeToRescue = 1; double minimumCoverageToRescue = 0.0; PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters = pairwiseAlignmentBandingParameters_construct(); /* * Setup the input parameters for cactus core. */ bool pruneOutStubAlignments = 0; /* * Parse the options. */ while (1) { static struct option long_options[] = { { "logLevel", required_argument, 0, 'a' }, { "cactusDisk", required_argument, 0, 'b' }, { "help", no_argument, 0, 'h' }, { "spanningTrees", required_argument, 0, 'i' }, { "maximumLength", required_argument, 0, 'j' }, { "useBanding", no_argument, 0, 'k' }, { "gapGamma", required_argument, 0, 'l' }, { "matchGamma", required_argument, 0, 'L' }, { "splitMatrixBiggerThanThis", required_argument, 0, 'o' }, { "anchorMatrixBiggerThanThis", required_argument, 0, 'p' }, { "repeatMaskMatrixBiggerThanThis", required_argument, 0, 'q' }, { "diagonalExpansion", required_argument, 0, 'r' }, { "constraintDiagonalTrim", required_argument, 0, 't' }, { "minimumDegree", required_argument, 0, 'u' }, { "alignAmbiguityCharacters", no_argument, 0, 'w' }, { "pruneOutStubAlignments", no_argument, 0, 'y' }, { "minimumIngroupDegree", required_argument, 0, 'A' }, { "minimumOutgroupDegree", required_argument, 0, 'B' }, { "precomputedAlignments", required_argument, 0, 'D' }, { "endAlignmentsToPrecomputeOutputFile", required_argument, 0, 'E' }, { "useProgressiveMerging", no_argument, 0, 'F' }, { "calculateWhichEndsToComputeSeparately", no_argument, 0, 'G' }, { "largeEndSize", required_argument, 0, 'I' }, {"ingroupCoverageFile", required_argument, 0, 'J'}, {"minimumSizeToRescue", required_argument, 0, 'K'}, {"minimumCoverageToRescue", required_argument, 0, 'M'}, { "minimumNumberOfSpecies", required_argument, 0, 'N' }, { 0, 0, 0, 0 } }; int option_index = 0; int key = getopt_long(argc, argv, "a:b:hi:j:kl:o:p:q:r:t:u:wy:A:B:D:E:FGI:J:K:L:M:N:", long_options, &option_index); if (key == -1) { break; } switch (key) { case 'a': logLevelString = stString_copy(optarg); st_setLogLevelFromString(logLevelString); break; case 'b': cactusDiskDatabaseString = stString_copy(optarg); break; case 'h': usage(); return 0; case 'i': i = sscanf(optarg, "%" PRIi64 "", &spanningTrees); (void) i; assert(i == 1); assert(spanningTrees >= 0); break; case 'j': i = sscanf(optarg, "%" PRIi64 "", &maximumLength); assert(i == 1); assert(maximumLength >= 0); break; case 'k': useBanding = !useBanding; break; case 'l': i = sscanf(optarg, "%f", &pairwiseAlignmentBandingParameters->gapGamma); assert(i == 1); assert(pairwiseAlignmentBandingParameters->gapGamma >= 0.0); break; case 'L': i = sscanf(optarg, "%f", &matchGamma); assert(i == 1); assert(matchGamma >= 0.0); break; case 'o': i = sscanf(optarg, "%" PRIi64 "", &k); assert(i == 1); assert(k >= 0); pairwiseAlignmentBandingParameters->splitMatrixBiggerThanThis = (int64_t) k * k; break; case 'p': i = sscanf(optarg, "%" PRIi64 "", &k); assert(i == 1); assert(k >= 0); pairwiseAlignmentBandingParameters->anchorMatrixBiggerThanThis = (int64_t) k * k; break; case 'q': i = sscanf(optarg, "%" PRIi64 "", &k); assert(i == 1); assert(k >= 0); pairwiseAlignmentBandingParameters->repeatMaskMatrixBiggerThanThis = (int64_t) k * k; break; case 'r': i = sscanf(optarg, "%" PRIi64 "", &pairwiseAlignmentBandingParameters->diagonalExpansion); assert(i == 1); assert(pairwiseAlignmentBandingParameters->diagonalExpansion >= 0); assert(pairwiseAlignmentBandingParameters->diagonalExpansion % 2 == 0); break; case 't': i = sscanf(optarg, "%" PRIi64 "", &pairwiseAlignmentBandingParameters->constraintDiagonalTrim); assert(i == 1); assert(pairwiseAlignmentBandingParameters->constraintDiagonalTrim >= 0); break; case 'u': i = sscanf(optarg, "%" PRIi64 "", &minimumDegree); assert(i == 1); break; case 'w': pairwiseAlignmentBandingParameters->alignAmbiguityCharacters = 1; break; case 'y': pruneOutStubAlignments = 1; break; case 'A': i = sscanf(optarg, "%" PRIi64 "", &minimumIngroupDegree); assert(i == 1); break; case 'B': i = sscanf(optarg, "%" PRIi64 "", &minimumOutgroupDegree); assert(i == 1); break; case 'D': listOfEndAlignmentFiles = stString_split(optarg); break; case 'E': endAlignmentsToPrecomputeOutputFile = stString_copy(optarg); break; case 'F': useProgressiveMerging = 1; break; case 'G': calculateWhichEndsToComputeSeparately = 1; break; case 'I': i = sscanf(optarg, "%" PRIi64 "", &largeEndSize); assert(i == 1); break; case 'J': ingroupCoverageFilePath = stString_copy(optarg); break; case 'K': i = sscanf(optarg, "%" PRIi64, &minimumSizeToRescue); assert(i == 1); break; case 'M': i = sscanf(optarg, "%lf", &minimumCoverageToRescue); assert(i == 1); break; case 'N': i = sscanf(optarg, "%" PRIi64, &minimumNumberOfSpecies); if (i != 1) { st_errAbort("Error parsing minimumNumberOfSpecies parameter"); } break; default: usage(); return 1; } } st_setLogLevelFromString(logLevelString); /* * Load the flowerdisk */ stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString); CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0); //We precache the sequences st_logInfo("Set up the flower disk\n"); /* * Load the hmm */ StateMachine *sM = stateMachine5_construct(fiveState); /* * For each flower. */ if (calculateWhichEndsToComputeSeparately) { stList *flowers = flowerWriter_parseFlowersFromStdin(cactusDisk); if (stList_length(flowers) != 1) { st_errAbort("We are breaking up a flower's end alignments for precomputation but we have %" PRIi64 " flowers.\n", stList_length(flowers)); } stSortedSet *endsToAlignSeparately = getEndsToAlignSeparately(stList_get(flowers, 0), maximumLength, largeEndSize); assert(stSortedSet_size(endsToAlignSeparately) != 1); stSortedSetIterator *it = stSortedSet_getIterator(endsToAlignSeparately); End *end; while ((end = stSortedSet_getNext(it)) != NULL) { fprintf(stdout, "%s\t%" PRIi64 "\t%" PRIi64 "\n", cactusMisc_nameToStringStatic(end_getName(end)), end_getInstanceNumber(end), getTotalAdjacencyLength(end)); } return 0; //avoid cleanup costs stSortedSet_destructIterator(it); stSortedSet_destruct(endsToAlignSeparately); } else if (endAlignmentsToPrecomputeOutputFile != NULL) { /* * In this case we will align a set of end and save the alignments in a file. */ stList *names = flowerWriter_parseNames(stdin); Flower *flower = cactusDisk_getFlower(cactusDisk, *((Name *)stList_get(names, 0))); FILE *fileHandle = fopen(endAlignmentsToPrecomputeOutputFile, "w"); for(int64_t i=1; i<stList_length(names); i++) { End *end = flower_getEnd(flower, *((Name *)stList_get(names, i))); if (end == NULL) { st_errAbort("The end %" PRIi64 " was not found in the flower\n", *((Name *)stList_get(names, i))); } stSortedSet *endAlignment = makeEndAlignment(sM, end, spanningTrees, maximumLength, useProgressiveMerging, matchGamma, pairwiseAlignmentBandingParameters); writeEndAlignmentToDisk(end, endAlignment, fileHandle); stSortedSet_destruct(endAlignment); } fclose(fileHandle); return 0; //avoid cleanup costs stList_destruct(names); st_logInfo("Finished precomputing end alignments\n"); } else { /* * Compute complete flower alignments, possibly loading some precomputed alignments. */ bedRegion *bedRegions = NULL; size_t numBeds = 0; if (ingroupCoverageFilePath != NULL) { // Pre-load the mmap for the coverage file. FILE *coverageFile = fopen(ingroupCoverageFilePath, "rb"); if (coverageFile == NULL) { st_errnoAbort("Opening coverage file %s failed", ingroupCoverageFilePath); } fseek(coverageFile, 0, SEEK_END); int64_t coverageFileLen = ftell(coverageFile); assert(coverageFileLen >= 0); assert(coverageFileLen % sizeof(bedRegion) == 0); if (coverageFileLen == 0) { // mmap doesn't like length-0 mappings, for obvious // reasons. Pretend that the coverage file doesn't // exist in this case, since it contains no data. ingroupCoverageFilePath = NULL; } else { // Establish a memory mapping for the file. bedRegions = mmap(NULL, coverageFileLen, PROT_READ, MAP_SHARED, fileno(coverageFile), 0); if (bedRegions == MAP_FAILED) { st_errnoAbort("Failure mapping coverage file"); } numBeds = coverageFileLen / sizeof(bedRegion); } fclose(coverageFile); } stList *flowers = flowerWriter_parseFlowersFromStdin(cactusDisk); if (listOfEndAlignmentFiles != NULL && stList_length(flowers) != 1) { st_errAbort("We have precomputed alignments but %" PRIi64 " flowers to align.\n", stList_length(flowers)); } cactusDisk_preCacheStrings(cactusDisk, flowers); for (j = 0; j < stList_length(flowers); j++) { flower = stList_get(flowers, j); st_logInfo("Processing a flower\n"); stSortedSet *alignedPairs = makeFlowerAlignment3(sM, flower, listOfEndAlignmentFiles, spanningTrees, maximumLength, useProgressiveMerging, matchGamma, pairwiseAlignmentBandingParameters, pruneOutStubAlignments); st_logInfo("Created the alignment: %" PRIi64 " pairs\n", stSortedSet_size(alignedPairs)); stPinchIterator *pinchIterator = stPinchIterator_constructFromAlignedPairs(alignedPairs, getNextAlignedPairAlignment); /* * Run the cactus caf functions to build cactus. */ stPinchThreadSet *threadSet = stCaf_setup(flower); stCaf_anneal(threadSet, pinchIterator, NULL); if (minimumDegree < 2) { stCaf_makeDegreeOneBlocks(threadSet); } if (minimumIngroupDegree > 0 || minimumOutgroupDegree > 0 || minimumDegree > 1) { stCaf_melt(flower, threadSet, blockFilterFn, 0, 0, 0, INT64_MAX); } if (ingroupCoverageFilePath != NULL) { // Rescue any sequence that is covered by outgroups // but currently unaligned into single-degree blocks. stPinchThreadSetIt pinchIt = stPinchThreadSet_getIt(threadSet); stPinchThread *thread; while ((thread = stPinchThreadSetIt_getNext(&pinchIt)) != NULL) { Cap *cap = flower_getCap(flower, stPinchThread_getName(thread)); assert(cap != NULL); Sequence *sequence = cap_getSequence(cap); assert(sequence != NULL); rescueCoveredRegions(thread, bedRegions, numBeds, sequence_getName(sequence), minimumSizeToRescue, minimumCoverageToRescue); } stCaf_joinTrivialBoundaries(threadSet); } stCaf_finish(flower, threadSet, chainLengthForBigFlower, longChain, INT64_MAX, INT64_MAX); //Flower now destroyed. stPinchThreadSet_destruct(threadSet); st_logInfo("Ran the cactus core script.\n"); /* * Cleanup */ //Clean up the sorted set after cleaning up the iterator stPinchIterator_destruct(pinchIterator); stSortedSet_destruct(alignedPairs); st_logInfo("Finished filling in the alignments for the flower\n"); } stList_destruct(flowers); //st_errAbort("Done\n"); /* * Write and close the cactusdisk. */ cactusDisk_write(cactusDisk); return 0; //Exit without clean up is quicker, enable cleanup when doing memory leak detection. if (bedRegions != NULL) { // Clean up our mapping. munmap(bedRegions, numBeds * sizeof(bedRegion)); } } /////////////////////////////////////////////////////////////////////////// // Cleanup /////////////////////////////////////////////////////////////////////////// stateMachine_destruct(sM); cactusDisk_destruct(cactusDisk); stKVDatabaseConf_destruct(kvDatabaseConf); //destructCactusCoreInputParameters(cCIP); free(cactusDiskDatabaseString); if (listOfEndAlignmentFiles != NULL) { stList_destruct(listOfEndAlignmentFiles); } if (logLevelString != NULL) { free(logLevelString); } st_logInfo("Finished with the flower disk for this flower.\n"); //while(1); return 0; }