static void assignEventsAndSequences(Event *parentEvent, stTree *tree, stSet *outgroupNameSet, char *argv[], int64_t *j) { Event *myEvent = NULL; // To distinguish from the global "event" variable. assert(tree != NULL); totalEventNumber++; if (stTree_getChildNumber(tree) > 0) { myEvent = event_construct3(stTree_getLabel(tree), stTree_getBranchLength(tree), parentEvent, eventTree); for (int64_t i = 0; i < stTree_getChildNumber(tree); i++) { assignEventsAndSequences(myEvent, stTree_getChild(tree, i), outgroupNameSet, argv, j); } } if (stTree_getChildNumber(tree) == 0 || (stTree_getLabel(tree) != NULL && (stSet_search(outgroupNameSet, (char *)stTree_getLabel(tree)) != NULL))) { // This event is a leaf and/or an outgroup, so it has // associated sequence. assert(stTree_getLabel(tree) != NULL); assert(stTree_getBranchLength(tree) != INFINITY); if (stTree_getChildNumber(tree) == 0) { // Construct the leaf event myEvent = event_construct3(stTree_getLabel(tree), stTree_getBranchLength(tree), parentEvent, eventTree); } char *fileName = argv[*j]; if (!stFile_exists(fileName)) { st_errAbort("File does not exist: %s\n", fileName); } // Set the global "event" variable, which is needed for the // function provided to fastaReadToFunction. event = myEvent; if (stFile_isDir(fileName)) { st_logInfo("Processing directory: %s\n", fileName); stList *filesInDir = stFile_getFileNamesInDirectory(fileName); for (int64_t i = 0; i < stList_length(filesInDir); i++) { char *absChildFileName = stFile_pathJoin(fileName, stList_get(filesInDir, i)); assert(stFile_exists(absChildFileName)); setCompleteStatus(absChildFileName); //decide if the sequences in the file should be free or attached. FILE *fileHandle = fopen(absChildFileName, "r"); fastaReadToFunction(fileHandle, processSequence); fclose(fileHandle); free(absChildFileName); } stList_destruct(filesInDir); } else { st_logInfo("Processing file: %s\n", fileName); setCompleteStatus(fileName); //decide if the sequences in the file should be free or attached. FILE *fileHandle = fopen(fileName, "r"); fastaReadToFunction(fileHandle, processSequence); fclose(fileHandle); } (*j)++; } }
int main(int argc, char *argv[]) { int64_t j = 0; char *npReadFile = NULL; char *templateModelFile = stString_print("../models/testModelR9_template.model"); char *complementModelFile = stString_print("../models/testModelR9_complement_pop2.model"); double threshold = 0.8; int key; while (1) { static struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"templateModel", required_argument, 0, 'T'}, {"complementModel", required_argument, 0, 'C'}, {"npRead", required_argument, 0, 'q'}, {"threshold", required_argument, 0, 'D'}, {0, 0, 0, 0} }; int option_index = 0; key = getopt_long(argc, argv, "h:T:C:q:f:b:D:m:", long_options, &option_index); if (key == -1) { //usage(); break; } switch (key) { case 'h': usage(); return 1; case 'T': templateModelFile = stString_copy(optarg); break; case 'C': complementModelFile = stString_copy(optarg); break; case 'q': npReadFile = stString_copy(optarg); break; case 'D': j = sscanf(optarg, "%lf", &threshold); assert (j == 1); assert (threshold >= 0); break; default: usage(); return 1; } } if (!stFile_exists(npReadFile)) { st_errAbort("Could not find npRead here: %s\n", npReadFile); } // read in the .npRead file NanoporeRead *npRead = nanopore_loadNanoporeReadFromFile(npReadFile); // build state machines (to use the look up table) StateMachine *sMt = getStateMachine3(templateModelFile); //StateMachine *sMc = getStateMachine3(complementModelFile); // make 1D map of events (mean, noise) to kmers stList *templateMap = signalUtils_templateOneDAssignmentsFromRead(npRead, sMt, ASSIGNMENT_THRESHOLD); //stList *complementMap = signalUtils_complementOneDAssignmentsFromRead(npRead, sMc, ASSIGNMENT_THRESHOLD); // convert template to log normal // NB only need this if you're estimating the NOISE parameteres //nanopore_convert_to_lognormal_params(sMt->alphabetSize, sMt->kmerLength, sMt->EMISSION_MATCH_MATRIX, templateMap); // convert complement to log normal //nanopore_convert_to_lognormal_params(sMc->alphabetSize, sMc->kmerLength, sMc->EMISSION_MATCH_MATRIX, complementMap); // error log report st_uglyf("SENTINEL - Before: shift: %f scale: %f var: %f [template]\n", npRead->templateParams.shift, npRead->templateParams.scale, npRead->templateParams.var); // compute template params //nanopore_compute_noise_scale_params(sMt->EMISSION_MATCH_MATRIX, templateMap, &npRead->templateParams); // compute complement params //nanopore_compute_noise_scale_params(sMc->EMISSION_MATCH_MATRIX, complementMap, &npRead->complementParams); // error log report signalUtils_estimateNanoporeParams(sMt, npRead, &npRead->templateParams, ASSIGNMENT_THRESHOLD, signalUtils_templateOneDAssignmentsFromRead, nanopore_dontAdjustEvents); //signalUtils_estimateNanoporeParams(sMc, npRead, &npRead->complementParams, ASSIGNMENT_THRESHOLD, // signalUtils_complementOneDAssignmentsFromRead, nanopore_dontAdjustEvents); st_uglyf("SENTINEL - After: shift: %f scale: %f var: %f [template]\n", npRead->templateParams.shift, npRead->templateParams.scale, npRead->templateParams.var); //st_uglyf("SENTINEL - After: shift_sd: %f scale_sd: %f var_sd: %f [template]\n", // npRead->complementParams.shift_sd, npRead->complementParams.scale_sd, npRead->complementParams.var_sd); stList *templateKmers = lineTokensFromFile(npReadFile, 10); //stList *complementKmers = lineTokensFromFile(npReadFile, 12); //printEventNoisesAndParams(npRead, templateKmers, complementKmers); printEventMeansAndParams(npRead, templateKmers, NULL); stList_destruct(templateKmers); //stList_destruct(complementKmers); stList_destruct(templateMap); //stList_destruct(complementMap); nanopore_nanoporeReadDestruct(npRead); stateMachine_destruct(sMt); //stateMachine_destruct(sMc); (void) j; // silence unused variable warning. return 0; }