static void setup() { teardown(); assert(nodeNumber == -1); while(nodeNumber % 2 != 0) { nodeNumber = st_randomInt(0, 100); } assert(nodeNumber >= 0); assert(nodeNumber % 2 == 0); stubs = stList_construct3(0, (void (*)(void *))stIntTuple_destruct); chains = stList_construct3(0, (void (*)(void *))stIntTuple_destruct); for(int64_t i=0; i<nodeNumber/2; i++) { assert(nodeNumber/2 > 0); stIntTuple *edge = stIntTuple_construct2(i, nodeNumber/2 + i); if(stList_length(stubs) == 0 || st_random() > 0.9) { stList_append(stubs, edge); } else { stList_append(chains, edge); } } zMatrix = st_calloc(nodeNumber*nodeNumber, sizeof(float)); for(int64_t i=0; i<nodeNumber; i++) { for(int64_t j=i+1; j<nodeNumber; j++) { double score = st_random(); zMatrix[i * nodeNumber + j] = score; zMatrix[j * nodeNumber + i] = score; } } st_logDebug("To test the adjacency problem we've created a problem with %" PRIi64 " nodes %" PRIi64 " stubs and %" PRIi64 " chains\n", nodeNumber, stList_length(stubs), stList_length(chains)); }
char *stTreap_printBackwards(stTreap *node) { node = stTreap_findRoot(node); node = stTreap_findMax(node); char *path = st_calloc(stTreap_size(node) + 1, 1); while(node) { strcat(path, node->value); node = stTreap_prev(node); } return(path); }
char *stTreap_print(stTreap *node) { node = stTreap_findRoot(node); node = stTreap_findMin(node); char *path = st_calloc(stTreap_size(node) + 1, 1); while(node) { strcat(path, node->value); node = stTreap_next(node); } return(path); }
stSortedSet *makeEndAlignment(StateMachine *sM, End *end, int64_t spanningTrees, int64_t maxSequenceLength, bool useProgressiveMerging, float gapGamma, PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters) { //Make an alignment of the sequences in the ends //Get the adjacency sequences to be aligned. Cap *cap; End_InstanceIterator *it = end_getInstanceIterator(end); stList *sequences = stList_construct3(0, (void (*)(void *))adjacencySequence_destruct); stList *seqFrags = stList_construct3(0, (void (*)(void *))seqFrag_destruct); stHash *endInstanceNumbers = stHash_construct2(NULL, free); while((cap = end_getNext(it)) != NULL) { if(cap_getSide(cap)) { cap = cap_getReverse(cap); } AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap, maxSequenceLength); stList_append(sequences, adjacencySequence); assert(cap_getAdjacency(cap) != NULL); End *otherEnd = end_getPositiveOrientation(cap_getEnd(cap_getAdjacency(cap))); stList_append(seqFrags, seqFrag_construct(adjacencySequence->string, 0, end_getName(otherEnd))); //Increase count of seqfrags with a given end. int64_t *c = stHash_search(endInstanceNumbers, otherEnd); if(c == NULL) { c = st_calloc(1, sizeof(int64_t)); assert(*c == 0); stHash_insert(endInstanceNumbers, otherEnd, c); } (*c)++; } end_destructInstanceIterator(it); //Get the alignment. MultipleAlignment *mA = makeAlignment(sM, seqFrags, spanningTrees, 100000000, useProgressiveMerging, gapGamma, pairwiseAlignmentBandingParameters); //Build an array of weights to reweight pairs in the alignment. int64_t *pairwiseAlignmentsPerSequenceNonCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t)); int64_t *pairwiseAlignmentsPerSequenceCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t)); //First build array on number of pairwise alignments to each sequence, distinguishing alignments between sequences sharing //common ends. for(int64_t i=0; i<stList_length(mA->chosenPairwiseAlignments); i++) { stIntTuple *pairwiseAlignment = stList_get(mA->chosenPairwiseAlignments, i); int64_t seq1 = stIntTuple_get(pairwiseAlignment, 1); int64_t seq2 = stIntTuple_get(pairwiseAlignment, 2); assert(seq1 != seq2); SeqFrag *seqFrag1 = stList_get(seqFrags, seq1); SeqFrag *seqFrag2 = stList_get(seqFrags, seq2); int64_t *pairwiseAlignmentsPerSequence = seqFrag1->rightEndId == seqFrag2->rightEndId ? pairwiseAlignmentsPerSequenceCommonEnds : pairwiseAlignmentsPerSequenceNonCommonEnds; pairwiseAlignmentsPerSequence[seq1]++; pairwiseAlignmentsPerSequence[seq2]++; } //Now calculate score adjustments. double *scoreAdjustmentsNonCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double)); double *scoreAdjustmentsCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double)); for(int64_t i=0; i<stList_length(seqFrags); i++) { SeqFrag *seqFrag = stList_get(seqFrags, i); End *otherEnd = flower_getEnd(end_getFlower(end), seqFrag->rightEndId); assert(otherEnd != NULL); assert(stHash_search(endInstanceNumbers, otherEnd) != NULL); int64_t commonInstanceNumber = *(int64_t *)stHash_search(endInstanceNumbers, otherEnd); int64_t nonCommonInstanceNumber = stList_length(seqFrags) - commonInstanceNumber; assert(commonInstanceNumber > 0 && nonCommonInstanceNumber >= 0); assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] <= nonCommonInstanceNumber); assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] >= 0); assert(pairwiseAlignmentsPerSequenceCommonEnds[i] < commonInstanceNumber); assert(pairwiseAlignmentsPerSequenceCommonEnds[i] >= 0); //scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber + commonInstanceNumber - 1)/(pairwiseAlignmentsPerSequenceNonCommonEnds[i] + pairwiseAlignmentsPerSequenceCommonEnds[i]); //scoreAdjustmentsCommonEnds[i] = scoreAdjustmentsNonCommonEnds[i]; if(pairwiseAlignmentsPerSequenceNonCommonEnds[i] > 0) { scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber)/pairwiseAlignmentsPerSequenceNonCommonEnds[i]; assert(scoreAdjustmentsNonCommonEnds[i] >= 1.0); assert(scoreAdjustmentsNonCommonEnds[i] <= nonCommonInstanceNumber); } else { scoreAdjustmentsNonCommonEnds[i] = INT64_MIN; } if(pairwiseAlignmentsPerSequenceCommonEnds[i] > 0) { scoreAdjustmentsCommonEnds[i] = ((double)commonInstanceNumber-1)/pairwiseAlignmentsPerSequenceCommonEnds[i]; assert(scoreAdjustmentsCommonEnds[i] >= 1.0); assert(scoreAdjustmentsCommonEnds[i] <= commonInstanceNumber-1); } else { scoreAdjustmentsCommonEnds[i] = INT64_MIN; } } //Convert the alignment pairs to an alignment of the caps.. stSortedSet *sortedAlignment = stSortedSet_construct3((int (*)(const void *, const void *))alignedPair_cmpFn, (void (*)(void *))alignedPair_destruct); while(stList_length(mA->alignedPairs) > 0) { stIntTuple *alignedPair = stList_pop(mA->alignedPairs); assert(stIntTuple_length(alignedPair) == 5); int64_t seqIndex1 = stIntTuple_get(alignedPair, 1); int64_t seqIndex2 = stIntTuple_get(alignedPair, 3); AdjacencySequence *i = stList_get(sequences, seqIndex1); AdjacencySequence *j = stList_get(sequences, seqIndex2); assert(i != j); int64_t offset1 = stIntTuple_get(alignedPair, 2); int64_t offset2 = stIntTuple_get(alignedPair, 4); int64_t score = stIntTuple_get(alignedPair, 0); if(score <= 0) { //Happens when indel probs are included score = 1; //This is the minimum } assert(score > 0 && score <= PAIR_ALIGNMENT_PROB_1); SeqFrag *seqFrag1 = stList_get(seqFrags, seqIndex1); SeqFrag *seqFrag2 = stList_get(seqFrags, seqIndex2); assert(seqFrag1 != seqFrag2); double *scoreAdjustments = seqFrag1->rightEndId == seqFrag2->rightEndId ? scoreAdjustmentsCommonEnds : scoreAdjustmentsNonCommonEnds; assert(scoreAdjustments[seqIndex1] != INT64_MIN); assert(scoreAdjustments[seqIndex2] != INT64_MIN); AlignedPair *alignedPair2 = alignedPair_construct( i->subsequenceIdentifier, i->start + (i->strand ? offset1 : -offset1), i->strand, j->subsequenceIdentifier, j->start + (j->strand ? offset2 : -offset2), j->strand, score*scoreAdjustments[seqIndex1], score*scoreAdjustments[seqIndex2]); //Do the reweighting here. assert(stSortedSet_search(sortedAlignment, alignedPair2) == NULL); assert(stSortedSet_search(sortedAlignment, alignedPair2->reverse) == NULL); stSortedSet_insert(sortedAlignment, alignedPair2); stSortedSet_insert(sortedAlignment, alignedPair2->reverse); stIntTuple_destruct(alignedPair); } //Cleanup stList_destruct(seqFrags); stList_destruct(sequences); free(pairwiseAlignmentsPerSequenceNonCommonEnds); free(pairwiseAlignmentsPerSequenceCommonEnds); free(scoreAdjustmentsNonCommonEnds); free(scoreAdjustmentsCommonEnds); multipleAlignment_destruct(mA); stHash_destruct(endInstanceNumbers); return sortedAlignment; }
int main(int argc, char *argv[]) { ////////////////////////////////////////////// //Parse the inputs ////////////////////////////////////////////// parseBasicArguments(argc, argv, "coverageStats"); assert(referenceEventString != NULL); assert(otherReferenceEventString != NULL); assert(outgroupEventString != NULL); /////////////////////////////////////////////////////////////////////////// // Calculate and print to file a crap load of numbers. /////////////////////////////////////////////////////////////////////////// Sequence *referenceSequence = NULL; Sequence *otherReferenceSequence = NULL; Flower_SequenceIterator *sequenceIt = flower_getSequenceIterator(flower); Sequence *sequence; while ((sequence = flower_getNextSequence(sequenceIt)) != NULL) { const char *eventHeader = event_getHeader(sequence_getEvent(sequence)); if (eventHeader != NULL && strcmp(eventHeader, referenceEventString) == 0) { if (referenceSequence == NULL || sequence_getLength(sequence) >= sequence_getLength(referenceSequence)) { referenceSequence = sequence; } } if (eventHeader != NULL && strcmp(eventHeader, otherReferenceEventString) == 0) { if (otherReferenceSequence == NULL || sequence_getLength(sequence) >= sequence_getLength(otherReferenceSequence)) { otherReferenceSequence = sequence; } } } flower_destructSequenceIterator(sequenceIt); assert(referenceSequence != NULL); assert(otherReferenceSequence != NULL); FILE *fileHandle = fopen(outputFile, "w"); fprintf( fileHandle, "<coverageStats referenceSequenceLength=\"%i\" otherReferenceSequenceLength=\"%i\">\n", sequence_getLength(referenceSequence), sequence_getLength(otherReferenceSequence)); EventTree_Iterator *eventIt = eventTree_getIterator( flower_getEventTree(flower)); eventNumber = eventTree_getEventNumber(flower_getEventTree(flower)); Event * event; totalBaseCoverages = st_calloc(sizeof(int32_t), eventNumber + 1); totalReferenceBases = 0; totalOtherReferenceBases = 0; int32_t totalSamples = 0; ignoreOtherReferenceBlocks = 0; while ((event = eventTree_getNext(eventIt)) != NULL) { sampleEventString = event_getHeader(event); if (sampleEventString != NULL && strcmp(sampleEventString, "ROOT") != 0 && strcmp(sampleEventString, "") != 0) { baseCoverages = st_calloc(sizeof(int32_t), eventNumber + 1); baseCoverages[0] = strcmp(sampleEventString, referenceEventString) != 0 ? getTotalLengthOfAdjacencies(flower, sampleEventString) : 0; referenceBases = 0; otherReferenceBases = 0; getMAFs(flower, fileHandle, getMAFBlock2); if(strcmp(sampleEventString, referenceEventString) == 0) { for(int32_t i=2; i<eventNumber + 1; i++) { baseCoverages[i-1] = baseCoverages[i]; } baseCoverages[eventNumber] = 0; } printStatsForSample( strcmp(sampleEventString, referenceEventString) != 0 && strcmp(sampleEventString, outgroupEventString) != 0, fileHandle, 1); free(baseCoverages); totalSamples += (strcmp(sampleEventString, referenceEventString) != 0 && strcmp(sampleEventString, outgroupEventString) != 0) ? 1 : 0; } } eventTree_destructIterator(eventIt); //Do average base coverages.. sampleEventString = "average"; baseCoverages = totalBaseCoverages; referenceBases = totalReferenceBases; otherReferenceBases = totalOtherReferenceBases; printStatsForSample(0, fileHandle, totalSamples); //Do all.. sampleEventString = referenceEventString; baseCoverages = st_calloc(sizeof(int32_t), eventNumber + 1); baseCoverages[0] = totalBaseCoverages[0]; referenceBases = 0; getMAFs(flower, fileHandle, getMAFBlock2); for(int32_t i=2; i<eventNumber + 1; i++) { baseCoverages[i-1] = baseCoverages[i]; } baseCoverages[eventNumber] = 0; otherReferenceBases = sequence_getLength(otherReferenceSequence); sampleEventString = "all"; printStatsForSample(0, fileHandle, 1); free(baseCoverages); //Do blocks without other reference ignoreOtherReferenceBlocks = 1; sampleEventString = referenceEventString; baseCoverages = st_calloc(sizeof(int32_t), eventNumber + 1); baseCoverages[0] = totalBaseCoverages[0] - getTotalLengthOfAdjacencies(flower, otherReferenceEventString); referenceBases = 0; otherReferenceBases = 0; getMAFs(flower, fileHandle, getMAFBlock2); for(int32_t i=2; i<eventNumber + 1; i++) { baseCoverages[i-1] = baseCoverages[i]; } baseCoverages[eventNumber] = 0; sampleEventString = "minusOtherReference"; printStatsForSample(0, fileHandle, 1); free(baseCoverages); fprintf(fileHandle, "</coverageStats>\n"); st_logInfo("Finished writing out the stats.\n"); fclose(fileHandle); return 0; }
static CactusDisk *cactusDisk_constructPrivate(stKVDatabaseConf *conf, bool create, const char *sequencesFileName) { //sequencesFileName = NULL; //Disable the ability to store the sequences on disk. CactusDisk *cactusDisk = st_calloc(1, sizeof(CactusDisk)); //construct lists of in memory objects cactusDisk->metaSequences = stSortedSet_construct3(cactusDisk_constructMetaSequencesP, NULL); cactusDisk->flowers = stSortedSet_construct3(cactusDisk_constructFlowersP, NULL); cactusDisk->flowerNamesMarkedForDeletion = stSortedSet_construct3((int (*)(const void *, const void *)) strcmp, free); cactusDisk->updateRequests = stList_construct3(0, (void (*)(void *)) stKVDatabaseBulkRequest_destruct); //Now open the database cactusDisk->database = stKVDatabase_construct(conf, create); cactusDisk->cache = stCache_construct(); cactusDisk->stringCache = stCache_construct(); //initialise the unique ids. int64_t seed = (clock() << 24) | (time(NULL) << 16) | (getpid() & 65535); //Likely to be unique st_logDebug("The cactus disk is seeding the random number generator with the value %" PRIi64 "\n", seed); st_randomSeed(seed); cactusDisk->uniqueNumber = 0; cactusDisk->maxUniqueNumber = 0; //Now load any stuff.. if (containsRecord(cactusDisk, CACTUS_DISK_PARAMETER_KEY)) { if (create) { stThrowNew(CACTUS_DISK_EXCEPTION_ID, "Tried to create a cactus disk, but the cactus disk already exists"); } if (sequencesFileName != NULL) { stThrowNew(CACTUS_DISK_EXCEPTION_ID, "A sequences file name is specified, but the cactus disk is not being created"); } void *record = getRecord(cactusDisk, CACTUS_DISK_PARAMETER_KEY, "cactus_disk parameters"); void *record2 = record; cactusDisk_loadFromBinaryRepresentation(&record, cactusDisk, conf); free(record2); } else { assert(create); if (sequencesFileName == NULL) { cactusDisk->storeSequencesInAFile = 0; cactusDisk->sequencesFileName = NULL; cactusDisk->sequencesReadFileHandle = NULL; cactusDisk->sequencesWriteFileHandle = NULL; cactusDisk->absSequencesFileName = NULL; } else { if (stKVDatabaseConf_getDir(conf) == NULL) { stThrowNew(CACTUS_DISK_EXCEPTION_ID, "The database conf does not contain a directory in which the sequence file is to be found!\n"); } cactusDisk->storeSequencesInAFile = 1; cactusDisk->sequencesFileName = stString_copy(sequencesFileName); cactusDisk->absSequencesFileName = stString_print("%s/%s", stKVDatabaseConf_getDir(conf), cactusDisk->sequencesFileName); //Make sure the file exists cactusDisk->sequencesReadFileHandle = fopen(cactusDisk->absSequencesFileName, "w"); assert(cactusDisk->sequencesReadFileHandle != NULL); fclose(cactusDisk->sequencesReadFileHandle); //Flush it first time. cactusDisk->sequencesReadFileHandle = NULL; cactusDisk->sequencesWriteFileHandle = NULL; } } return cactusDisk; }
int main(int argc, char *argv[]) { static struct option long_options[] = { {"listen", required_argument, 0, 'l'}, {"port", required_argument, 0, 'p'}, {"help", no_argument, 0, 'h'}, {"verbose", no_argument, 0, 'v'}, {"ping-parent", no_argument, 0, 'x'}, {"gcc-command", required_argument, 0, 'g'}, {"vx32sdk", required_argument, 0, 's'}, {"tmpdir", required_argument, 0, 't'}, {"engine", required_argument, 0, 'e'}, {"no-vx32", no_argument, 0, 'n'}, {0, 0, 0, 0} }; struct server *server = (struct server*)st_calloc(1, sizeof(struct server)); server->info_handler = &info_handler; server->quit_handler = &quit_handler; server->process_multi= &process_multi; INIT_LIST_HEAD(&server->root); server->host = "127.0.0.1"; server->port = 22122; struct config *config = (struct config*)st_calloc(1, sizeof(struct config)); server->userdata = config; config->tmpdir = "/tmp"; config->vx32sdk_path = "./untrusted/"; config->vx32sdk_gcc_command = strdup(flatten_argv(NELEM(default_vx32sdk_gcc_command), default_vx32sdk_gcc_command, " ")); config->syscall_limit = 4; /* 4 syscalls per request allowed */ int option_index; int arg; char *engine_name = NULL; while((arg = getopt_long_only(argc, argv, "hxvnl:p:g:s:t:e:", long_options, &option_index)) != EOF) { switch(arg) { case 'h': print_help(server, config); exit(-1); break; case 'v': server->trace = 1; break; case 'x': server->ping_parent = 1; break; case 'l': server->host = optarg; break; case 'p': server->port = atoi(optarg); if(server->port < 0 || server->port > 65536) fatal("Port number broken: %i", server->port); break; case 'g': free(config->vx32sdk_gcc_command); config->vx32sdk_gcc_command = strdup(optarg); break; case 's': config->vx32sdk_path = optarg; break; case 't': config->tmpdir = optarg; break; case 'e': engine_name = optarg; break; case 'n': config->vx32_disabled = 1; break; case 0: default: fatal("\nUnknown option: \"%s\"\n", argv[optind-1]); } } int i; storage_engine_create *engine_create = NULL; storage_engine_destroy *engine_destroy = NULL; for(i=0; i<NELEM(engines); i++) { if(engine_name && 0 == strcmp(engine_name, engines[i].name)) { engine_create = engines[i].create; engine_destroy = engines[i].destroy; } } if(NULL == engine_create) fatal("\nYou must specify a storage engine:" " --engine=[%s]\n", flatten_engine_names() ); log_info("Process pid %i", getpid()); signal(SIGPIPE, SIG_IGN); commands_initialize(); process_initialize(config); char *params = flatten_argv(argc-optind, &argv[optind], ", "); log_info("Loading database engine \"%s\" with parameters \"%s\"", engine_name, params); config->api = engine_create(argc-optind, &argv[optind]); do_event_loop(server); log_info("Quit"); process_destroy(); commands_destroy(); pool_free(); engine_destroy(config->api); free(config->vx32sdk_gcc_command); free(config); free(server); exit(0); return(0); }