int extract_main(int argc, char *argv[]) { char *opref = NULL, *oname, *p; int c, i; Config config; //Defaults config.keepCpG = 1; config.keepCHG = 0; config.keepCHH = 0; config.minMapq = 10; config.minPhred = 5; config.keepDupes = 0; config.keepSingleton = 0, config.keepDiscordant = 0; config.merge = 0; config.maxDepth = 2000; config.fai = NULL; config.fp = NULL; config.bai = NULL; config.reg = NULL; config.bedName = NULL; config.bed = NULL; config.fraction = 0; config.counts = 0; config.logit = 0; for(i=0; i<16; i++) config.bounds[i] = 0; static struct option lopts[] = { {"opref", 1, NULL, 'o'}, {"fraction", 0, NULL, 'f'}, {"counts", 0, NULL, 'c'}, {"logit", 0, NULL, 'm'}, {"noCpG", 0, NULL, 1}, {"CHG", 0, NULL, 2}, {"CHH", 0, NULL, 3}, {"keepDupes", 0, NULL, 4}, {"keepSingleton",0, NULL, 5}, {"keepDiscordant",0,NULL, 6}, {"OT", 1, NULL, 7}, {"OB", 1, NULL, 8}, {"CTOT", 1, NULL, 9}, {"CTOB", 1, NULL, 10}, {"mergeContext", 0, NULL, 11}, {"help", 0, NULL, 'h'}, {0, 0, NULL, 0} }; while((c = getopt_long(argc, argv, "q:p:r:l:o:D:f:c:m:", lopts,NULL)) >=0) { switch(c) { case 'h' : extract_usage(); return 0; case 'o' : opref = strdup(optarg); break; case 'D' : config.maxDepth = atoi(optarg); break; case 'r': config.reg = strdup(optarg); break; case 'l' : config.bedName = optarg; break; case 1 : config.keepCpG = 0; break; case 2 : config.keepCHG = 1; break; case 3 : config.keepCHH = 1; break; case 4 : config.keepDupes = 1; break; case 5 : config.keepSingleton = 1; break; case 6 : config.keepDiscordant = 1; break; case 7 : parseBounds(optarg, config.bounds, 0); break; case 8 : parseBounds(optarg, config.bounds, 1); break; case 9 : parseBounds(optarg, config.bounds, 2); break; case 10 : parseBounds(optarg, config.bounds, 3); break; case 11 : config.merge = 1; break; case 'q' : config.minMapq = atoi(optarg); break; case 'p' : config.minPhred = atoi(optarg); break; case 'm' : config.logit = 1; break; case 'f' : config.fraction = 1; break; case 'c' : config.counts = 1; break; case '?' : default : fprintf(stderr, "Invalid option '%c'\n", c); extract_usage(); return 1; } } if(argc == 1) { extract_usage(); return 0; } if(argc-optind != 2) { fprintf(stderr, "You must supply a reference genome in fasta format and an input BAM file!!!\n"); extract_usage(); return -1; } //Are the options reasonable? if(config.minPhred < 1) { fprintf(stderr, "-p %i is invalid. resetting to 1, which is the lowest possible value.\n", config.minPhred); config.minPhred = 1; } if(config.minMapq < 0) { fprintf(stderr, "-q %i is invalid. Resetting to 0, which is the lowest possible value.\n", config.minMapq); config.minMapq = 0; } if(config.fraction+config.counts+config.logit > 1) { fprintf(stderr, "More than one of --fraction, --counts, and --logit were specified. These are mutually exclusive.\n"); extract_usage(); return 1; } //Has more than one output format been requested? if(config.fraction + config.counts + config.logit > 1) { fprintf(stderr, "You may specify AT MOST one of -c/--counts, -f/--fraction, or -m/--logit.\n"); return -6; } //Is there still a metric to output? if(!(config.keepCpG + config.keepCHG + config.keepCHH)) { fprintf(stderr, "You haven't specified any metrics to output!\nEither don't use the --noCpG option or specify --CHG and/or --CHH.\n"); return -1; } //Open the files if((config.fai = fai_load(argv[optind])) == NULL) { fprintf(stderr, "Couldn't open the index for %s!\n", argv[optind]); extract_usage(); return -2; } if((config.fp = hts_open(argv[optind+1], "rb")) == NULL) { fprintf(stderr, "Couldn't open %s for reading!\n", argv[optind+1]); return -4; } if((config.bai = sam_index_load(config.fp, argv[optind+1])) == NULL) { fprintf(stderr, "Couldn't load the index for %s, will attempt to build it.\n", argv[optind+1]); if(bam_index_build(argv[optind+1], 0) < 0) { fprintf(stderr, "Couldn't build the index for %s! File corrupted?\n", argv[optind+1]); return -5; } if((config.bai = sam_index_load(config.fp, argv[optind+1])) == NULL) { fprintf(stderr, "Still couldn't load the index, quiting.\n"); return -5; } } //Output files config.output_fp = malloc(sizeof(FILE *) * 3); assert(config.output_fp); if(opref == NULL) { opref = strdup(argv[optind+1]); assert(opref); p = strrchr(opref, '.'); if(p != NULL) *p = '\0'; fprintf(stderr, "writing to prefix:'%s'\n", opref); } if(config.fraction) { oname = malloc(sizeof(char) * (strlen(opref)+19)); } else if(config.counts) { oname = malloc(sizeof(char) * (strlen(opref)+21)); } else if(config.logit) { oname = malloc(sizeof(char) * (strlen(opref)+20)); } else { oname = malloc(sizeof(char) * (strlen(opref)+14)); } assert(oname); if(config.keepCpG) { if(config.fraction) { sprintf(oname, "%s_CpG.meth.bedGraph", opref); } else if(config.counts) { sprintf(oname, "%s_CpG.counts.bedGraph", opref); } else if(config.logit) { sprintf(oname, "%s_CpG.logit.bedGraph", opref); } else { sprintf(oname, "%s_CpG.bedGraph", opref); } config.output_fp[0] = fopen(oname, "w"); if(config.output_fp[0] == NULL) { fprintf(stderr, "Couldn't open the output CpG metrics file for writing! Insufficient permissions?\n"); return -3; } printHeader(config.output_fp[0], "CpG", opref, config); } if(config.keepCHG) { if(config.fraction) { sprintf(oname, "%s_CHG.meth.bedGraph", opref); } else if(config.counts) { sprintf(oname, "%s_CHG.counts.bedGraph", opref); } else if(config.logit) { sprintf(oname, "%s_CHG.logit.bedGraph", opref); } else { sprintf(oname, "%s_CHG.bedGraph", opref); } config.output_fp[1] = fopen(oname, "w"); if(config.output_fp[1] == NULL) { fprintf(stderr, "Couldn't open the output CHG metrics file for writing! Insufficient permissions?\n"); return -3; } printHeader(config.output_fp[1], "CHG", opref, config); } if(config.keepCHH) { if(config.fraction) { sprintf(oname, "%s_CHH.meth.bedGraph", opref); } else if(config.counts) { sprintf(oname, "%s_CHH.counts.bedGraph", opref); } else if(config.logit) { sprintf(oname, "%s_CHH.logit.bedGraph", opref); } else { sprintf(oname, "%s_CHH.bedGraph", opref); } config.output_fp[2] = fopen(oname, "w"); if(config.output_fp[2] == NULL) { fprintf(stderr, "Couldn't open the output CHH metrics file for writing! Insufficient permissions?\n"); return -3; } printHeader(config.output_fp[2], "CHH", opref, config); } //Run the pileup extractCalls(&config); //Close things up hts_close(config.fp); fai_destroy(config.fai); if(config.keepCpG) fclose(config.output_fp[0]); if(config.keepCHG) fclose(config.output_fp[1]); if(config.keepCHH) fclose(config.output_fp[2]); hts_idx_destroy(config.bai); free(opref); if(config.reg) free(config.reg); if(config.bed) destroyBED(config.bed); free(oname); free(config.output_fp); return 0; }
int main(int argc,char *argv[]) { QCoreApplication app(argc,argv); //important for qApp->applicationDirPath() in processtracker // test_armadillo(); // test_pca_2(); // return 0; CLParams CLP; QStringList required; CLP=get_command_line_params(argc,argv,required); ProcessTracker PT; register_processors(PT); if (CLP.unnamed_parameters.count()>1) { printf("Only one command parameter may be specified.\n"); return -1; } QString command=CLP.unnamed_parameters.value(0); if (command.isEmpty()) { printf("\nmountainsort processors:\n"); for (int i=0; i<PT.processorCount(); i++) { QString cmd=PT.processor(i).command; printf("%s ",cmd.toLatin1().data()); } printf("\n\n"); return -1; } PTProcessor PP=PT.findProcessor(command); printf("%s version %s\n",PP.command.toLatin1().data(),PP.version.toLatin1().data()); if (!CLP.named_parameters.value("force").toInt()) { if (PT.processAlreadyCompleted(CLP)) { printf("Process already completed.\n"); return 0; } } CLP.named_parameters.remove("force"); QTime timer; timer.start(); if (command=="extract") { QString input_path=CLP.named_parameters["input"]; QString output_path=CLP.named_parameters["output"]; int num_channels=CLP.named_parameters["num_channels"].toInt(); long t1=CLP.named_parameters["t1"].toLong(); long t2=CLP.named_parameters["t2"].toLong(); QStringList channels_str=CLP.named_parameters["channels"].split(","); int M=channels_str.count(); int channels[M]; for (int m=0; m<M; m++) channels[m]=channels_str[m].toInt(); if ((input_path.isEmpty())||(output_path.isEmpty())) {extract_usage(); return -1;} if (M==0) {extract_usage(); return -1;} if (!extract(input_path.toLatin1().data(),output_path.toLatin1().data(),num_channels,M,channels,t1,t2)) { printf("Error in extract.\n"); return -1; } } else if (command=="bandpass_filter") { QString input_path=CLP.named_parameters["input"]; QString output_path=CLP.named_parameters["output"]; double samplefreq=CLP.named_parameters["samplefreq"].toDouble(); double freq_min=CLP.named_parameters["freq_min"].toDouble(); double freq_max=CLP.named_parameters["freq_max"].toDouble(); double outlier_threshold=CLP.named_parameters["outlier_threshold"].toDouble(); if ((input_path.isEmpty())||(output_path.isEmpty())) {bandpass_filter_usage(); return -1;} if ((samplefreq==0)||(freq_min==0)||(freq_max==0)) {bandpass_filter_usage(); return -1;} if (!bandpass_filter(input_path.toLatin1().data(),output_path.toLatin1().data(),samplefreq,freq_min,freq_max,outlier_threshold)) { printf("Error in bandpass_filter.\n"); return -1; } } else if (command=="normalize_channels") { QString input_path=CLP.named_parameters["input"]; QString output_path=CLP.named_parameters["output"]; if ((input_path.isEmpty())||(output_path.isEmpty())) {normalize_channels_usage(); return -1;} if (!normalize_channels(input_path.toLatin1().data(),output_path.toLatin1().data())) { printf("Error in normalize_channels.\n"); return -1; } } else if (command=="whiten") { QString input_path=CLP.named_parameters["input"]; QString output_path=CLP.named_parameters["output"]; int ncomp=CLP.named_parameters["ncomp"].toInt(); if ((input_path.isEmpty())||(output_path.isEmpty())) {whiten_usage(); return -1;} if (ncomp==0) {whiten_usage(); return -1;} if (!whiten(input_path.toLatin1().data(),output_path.toLatin1().data(),ncomp)) { printf("Error in whiten.\n"); return -1; } } else if (command=="detect") { QString input_path=CLP.named_parameters["input"]; QString output_path=CLP.named_parameters["output"]; int inner_window_width=CLP.named_parameters["inner_window_width"].toInt(); int outer_window_width=CLP.named_parameters["outer_window_width"].toInt(); float threshold=CLP.named_parameters["threshold"].toFloat(); if ((input_path.isEmpty())||(output_path.isEmpty())) {detect_usage(); return -1;} if (inner_window_width==0) {detect_usage(); return -1;} if (outer_window_width==0) {detect_usage(); return -1;} if (threshold==0) {detect_usage(); return -1;} if (!detect(input_path.toLatin1().data(),output_path.toLatin1().data(),inner_window_width,outer_window_width,threshold)) { printf("Error in detect.\n"); return -1; } } else if (command=="features") { QString input_path=CLP.named_parameters["input"]; QString detect_path=CLP.named_parameters["detect"]; QString adjacency_path=CLP.named_parameters["adjacency"]; QString output_path=CLP.named_parameters["output"]; int num_features=CLP.named_parameters["num_features"].toInt(); int clip_size=CLP.named_parameters["clip_size"].toInt(); if ((input_path.isEmpty())||(detect_path.isEmpty())||(adjacency_path.isEmpty())||(output_path.isEmpty())) {features_usage(); return -1;} if (num_features==0) {features_usage(); return -1;} if (clip_size==0) {features_usage(); return -1;} if (!features(input_path.toLatin1().data(),detect_path.toLatin1().data(),adjacency_path.toLatin1().data(),output_path.toLatin1().data(),num_features,clip_size)) { printf("Error in features.\n"); return -1; } } else if (command=="cluster") { QString input_path=CLP.named_parameters["input"]; QString output_path=CLP.named_parameters["output"]; if ((input_path.isEmpty())||(output_path.isEmpty())) {cluster_usage(); return -1;} if (!cluster(input_path.toLatin1().data(),output_path.toLatin1().data())) { printf("Error in cluster.\n"); return -1; } } else if (command=="split_clusters") { QString input_path=CLP.named_parameters["input"]; QString cluster_path=CLP.named_parameters["cluster"]; QString output_path=CLP.named_parameters["output"]; int num_features=CLP.named_parameters["num_features"].toInt(); int clip_size=CLP.named_parameters["clip_size"].toInt(); if ((input_path.isEmpty())||(cluster_path.isEmpty())||(output_path.isEmpty())) {cluster_usage(); return -1;} if (num_features==0) {cluster_usage(); return -1;} if (clip_size==0) {cluster_usage(); return -1;} if (!split_clusters(input_path.toLatin1().data(),cluster_path.toLatin1().data(),output_path.toLatin1().data(),num_features,clip_size)) { printf("Error in cluster.\n"); return -1; } } else if (command=="templates") { QString input_path=CLP.named_parameters["input"]; QString cluster_path=CLP.named_parameters["cluster"]; QString output_path=CLP.named_parameters["output"]; int clip_size=CLP.named_parameters["clip_size"].toInt(); if ((input_path.isEmpty())||(output_path.isEmpty())) {templates_usage(); return -1;} if (clip_size==0) {templates_usage(); return -1;} if (!templates(input_path.toLatin1().data(),cluster_path.toLatin1().data(),output_path.toLatin1().data(),clip_size)) { printf("Error in templates.\n"); return -1; } } else if (command=="consolidate") { QString cluster_path=CLP.named_parameters["cluster"]; QString templates_path=CLP.named_parameters["templates"]; QString cluster_out_path=CLP.named_parameters["cluster_out"]; QString templates_out_path=CLP.named_parameters["templates_out"]; QString load_channels_out_path=CLP.named_parameters["load_channels_out"]; if ((cluster_path.isEmpty())||(templates_path.isEmpty())) {consolidate_usage(); return -1;} if ((cluster_out_path.isEmpty())||(templates_out_path.isEmpty())) {consolidate_usage(); return -1;} if (load_channels_out_path.isEmpty()) {consolidate_usage(); return -1;} if (!consolidate(cluster_path.toLatin1().data(),templates_path.toLatin1().data(),cluster_out_path.toLatin1().data(),templates_out_path.toLatin1().data(),load_channels_out_path.toLatin1().data())) { printf("Error in consolidate.\n"); return -1; } } else if (command=="fit") { QString input_path=CLP.named_parameters["input"]; QString cluster_path=CLP.named_parameters["cluster"]; QString templates_path=CLP.named_parameters["templates"]; QString cluster_out_path=CLP.named_parameters["cluster_out"]; if ((input_path.isEmpty())||(cluster_path.isEmpty())||(templates_path.isEmpty())) {fit_usage(); return -1;} if ((cluster_out_path.isEmpty())) {fit_usage(); return -1;} if (!fit(input_path.toLatin1().data(),templates_path.toLatin1().data(),cluster_path.toLatin1().data(),cluster_out_path.toLatin1().data())) { printf("Error in fit.\n"); return -1; } } else if (command=="extract_clips") { QString input_path=CLP.named_parameters["input"]; QString cluster_path=CLP.named_parameters["cluster"]; QString output_path=CLP.named_parameters["output"]; QString index_out_path=CLP.named_parameters["index_out"]; int clip_size=CLP.named_parameters["clip_size"].toInt(); if ((input_path.isEmpty())||(cluster_path.isEmpty())) {extract_usage(); return -1;} if ((output_path.isEmpty())||(index_out_path.isEmpty())) {extract_usage(); return -1;} if (!extract_clips(input_path.toLatin1().data(),cluster_path.toLatin1().data(),output_path.toLatin1().data(),index_out_path.toLatin1().data(),clip_size)) { printf("Error in extract_clips.\n"); return -1; } } else { printf("Unknown command: %s\n",command.toLatin1().data()); return -1; } PT.reportProcessCompleted(CLP); printf("Elapsed time for %s: %.2f seconds\n",command.toLatin1().data(),timer.elapsed()*1.0/1000); return 0; }