Esempio n. 1
0
static void assignEventsAndSequences(Event *parentEvent, stTree *tree,
                                     stSet *outgroupNameSet,
                                     char *argv[], int64_t *j) {
    Event *myEvent = NULL; // To distinguish from the global "event" variable.
    assert(tree != NULL);
    totalEventNumber++;
    if (stTree_getChildNumber(tree) > 0) {
        myEvent = event_construct3(stTree_getLabel(tree),
                                   stTree_getBranchLength(tree), parentEvent,
                                   eventTree);
        for (int64_t i = 0; i < stTree_getChildNumber(tree); i++) {
            assignEventsAndSequences(myEvent, stTree_getChild(tree, i),
                                     outgroupNameSet, argv, j);
        }
    }
    if (stTree_getChildNumber(tree) == 0 || (stTree_getLabel(tree) != NULL && (stSet_search(outgroupNameSet, (char *)stTree_getLabel(tree)) != NULL))) {
        // This event is a leaf and/or an outgroup, so it has
        // associated sequence.
        assert(stTree_getLabel(tree) != NULL);

        assert(stTree_getBranchLength(tree) != INFINITY);
        if (stTree_getChildNumber(tree) == 0) {
            // Construct the leaf event
            myEvent = event_construct3(stTree_getLabel(tree), stTree_getBranchLength(tree), parentEvent, eventTree);
        }

        char *fileName = argv[*j];

        if (!stFile_exists(fileName)) {
            st_errAbort("File does not exist: %s\n", fileName);
        }

        // Set the global "event" variable, which is needed for the
        // function provided to fastaReadToFunction.
        event = myEvent;
        if (stFile_isDir(fileName)) {
            st_logInfo("Processing directory: %s\n", fileName);
            stList *filesInDir = stFile_getFileNamesInDirectory(fileName);
            for (int64_t i = 0; i < stList_length(filesInDir); i++) {
                char *absChildFileName = stFile_pathJoin(fileName, stList_get(filesInDir, i));
                assert(stFile_exists(absChildFileName));
                setCompleteStatus(absChildFileName); //decide if the sequences in the file should be free or attached.
                FILE *fileHandle = fopen(absChildFileName, "r");
                fastaReadToFunction(fileHandle, processSequence);
                fclose(fileHandle);
                free(absChildFileName);
            }
            stList_destruct(filesInDir);
        } else {
            st_logInfo("Processing file: %s\n", fileName);
            setCompleteStatus(fileName); //decide if the sequences in the file should be free or attached.
            FILE *fileHandle = fopen(fileName, "r");
            fastaReadToFunction(fileHandle, processSequence);
            fclose(fileHandle);
        }
        (*j)++;
    }
}
int main(int argc, char *argv[]) {
    int64_t j = 0;
    char *npReadFile = NULL;
    char *templateModelFile = stString_print("../models/testModelR9_template.model");
    char *complementModelFile = stString_print("../models/testModelR9_complement_pop2.model");
    double threshold = 0.8;

    int key;
    while (1) {
        static struct option long_options[] = {
                {"help",                    no_argument,        0,  'h'},
                {"templateModel",           required_argument,  0,  'T'},
                {"complementModel",         required_argument,  0,  'C'},
                {"npRead",                  required_argument,  0,  'q'},
                {"threshold",               required_argument,  0,  'D'},
                {0, 0, 0, 0} };

        int option_index = 0;

        key = getopt_long(argc, argv, "h:T:C:q:f:b:D:m:",
                          long_options, &option_index);

        if (key == -1) {
            //usage();
            break;
        }
        switch (key) {
            case 'h':
                usage();
                return 1;
            case 'T':
                templateModelFile = stString_copy(optarg);
                break;
            case 'C':
                complementModelFile = stString_copy(optarg);
                break;
            case 'q':
                npReadFile = stString_copy(optarg);
                break;
            case 'D':
                j = sscanf(optarg, "%lf", &threshold);
                assert (j == 1);
                assert (threshold >= 0);
                break;
            default:
                usage();
                return 1;
        }
    }

    if (!stFile_exists(npReadFile)) {
        st_errAbort("Could not find npRead here: %s\n", npReadFile);
    }
    // read in the .npRead file
    NanoporeRead *npRead = nanopore_loadNanoporeReadFromFile(npReadFile);

    // build state machines (to use the look up table)
    StateMachine *sMt = getStateMachine3(templateModelFile);
    //StateMachine *sMc = getStateMachine3(complementModelFile);

    // make 1D map of events (mean, noise) to kmers
    stList *templateMap = signalUtils_templateOneDAssignmentsFromRead(npRead, sMt, ASSIGNMENT_THRESHOLD);
    //stList *complementMap = signalUtils_complementOneDAssignmentsFromRead(npRead, sMc, ASSIGNMENT_THRESHOLD);

    // convert template to log normal
    // NB only need this if you're estimating the NOISE parameteres
    //nanopore_convert_to_lognormal_params(sMt->alphabetSize, sMt->kmerLength, sMt->EMISSION_MATCH_MATRIX, templateMap);
    // convert complement to log normal
    //nanopore_convert_to_lognormal_params(sMc->alphabetSize, sMc->kmerLength, sMc->EMISSION_MATCH_MATRIX, complementMap);

    // error log report
    st_uglyf("SENTINEL - Before: shift: %f scale: %f var: %f [template]\n",
             npRead->templateParams.shift, npRead->templateParams.scale, npRead->templateParams.var);

    // compute template params
    //nanopore_compute_noise_scale_params(sMt->EMISSION_MATCH_MATRIX, templateMap, &npRead->templateParams);
    // compute complement params
    //nanopore_compute_noise_scale_params(sMc->EMISSION_MATCH_MATRIX, complementMap, &npRead->complementParams);

    // error log report

    signalUtils_estimateNanoporeParams(sMt, npRead, &npRead->templateParams, ASSIGNMENT_THRESHOLD,
                                       signalUtils_templateOneDAssignmentsFromRead, nanopore_dontAdjustEvents);
    //signalUtils_estimateNanoporeParams(sMc, npRead, &npRead->complementParams, ASSIGNMENT_THRESHOLD,
    //                                   signalUtils_complementOneDAssignmentsFromRead, nanopore_dontAdjustEvents);

    st_uglyf("SENTINEL - After: shift: %f scale: %f var: %f [template]\n",
             npRead->templateParams.shift, npRead->templateParams.scale, npRead->templateParams.var);
    //st_uglyf("SENTINEL - After: shift_sd: %f scale_sd: %f var_sd: %f [template]\n",
    //         npRead->complementParams.shift_sd, npRead->complementParams.scale_sd, npRead->complementParams.var_sd);

    stList *templateKmers = lineTokensFromFile(npReadFile, 10);
    //stList *complementKmers = lineTokensFromFile(npReadFile, 12);
    //printEventNoisesAndParams(npRead, templateKmers, complementKmers);
    printEventMeansAndParams(npRead, templateKmers, NULL);

    stList_destruct(templateKmers);
    //stList_destruct(complementKmers);
    stList_destruct(templateMap);
    //stList_destruct(complementMap);
    nanopore_nanoporeReadDestruct(npRead);
    stateMachine_destruct(sMt);
    //stateMachine_destruct(sMc);

    (void) j;  // silence unused variable warning.
    return 0;
}