void bamstat_main(vector < string > & argv) { bamstat_data D; //------------------------- // 1. DECLARE ALL OPTIONS //------------------------- D.declareBasicOptions(); boost::program_options::options_description opt_files ("\x1B[32mI/O\33[0m"); opt_files.add_options() ("bam", boost::program_options::value< string >(), "Sequence data in BAM/SAM format.") ("bed", boost::program_options::value< string >(), "Annotation data in BED format.") ("out", boost::program_options::value< string >(), "Output file."); boost::program_options::options_description opt_parameters ("\x1B[32mFilters\33[0m"); opt_parameters.add_options() ("filter-mapping-quality", boost::program_options::value< unsigned int >()->default_value(10), "Minimal phred mapping quality for a read to be considered.") ("filter-keep-duplicates", "Keep duplicate sequencing reads in the process."); D.option_descriptions.add(opt_files).add(opt_parameters); //------------------- // 2. PARSE OPTIONS //------------------- boost::program_options::variables_map options; try { boost::program_options::store(boost::program_options::command_line_parser(argv).options(D.option_descriptions).run(), D.options); boost::program_options::notify(D.options); } catch ( const boost::program_options::error& e ) { cerr << "Error parsing [bamstat] command line :" << string(e.what()) << endl; exit(0); } //--------------------- // 3. PRINT HELP/HEADER //--------------------- vrb.ctitle("CALCULATE BASIC QC METRICS FOR A BAM FILE"); if (D.options.count("help")) { cout << D.option_descriptions << endl; exit(EXIT_SUCCESS); } //----------------- // 4. COMMON CHECKS //----------------- if (!D.options.count("bam")) vrb.error("Sequence data needs to be specified with --bam [file.bam]"); if (!D.options.count("bed")) vrb.error("Annotation data needs to be specified with --bed [file.bed]"); if (!D.options.count("out")) vrb.error("Output needs to be specified with --out [file.out]"); //TO DO CHECK PARAMETER VALUES D.param_min_mapQ = D.options["filter-mapping-quality"].as < unsigned int > (); D.param_dup_rd = (D.options.count("filter-keep-duplicates") != 0); vrb.bullet("Mapping quality >= " + stb.str(D.param_min_mapQ)); vrb.bullet("Keep duplicate reads = " + stb.str(D.param_dup_rd)); //------------------------------------------ // 5. READ FILES / INITIALIZE / RUN ANALYSIS //------------------------------------------ D.processBasicOptions(); D.readAnnotationsBED(D.options["bed"].as < string > ()); D.readSequences(D.options["bam"].as < string > ()); D.writeOutput(D.options["out"].as < string > ()); }
void ase_main(vector < string > & argv) { ase_data D; //------------------------- // 1. DECLARE ALL OPTIONS //------------------------- D.declareBasicOptions(); boost::program_options::options_description opt_files ("\x1B[32mI/O\33[0m"); opt_files.add_options() ("vcf", boost::program_options::value< string >(), "Genotypes in VCF/BCF format.") ("bam", boost::program_options::value< string >(), "Sequence data in BAM/SAM format.") ("ind", boost::program_options::value< string >(), "Sample to be processed.") ("reg", boost::program_options::value< string >()->default_value(""), "Genomic region(s) to be processed.") ("out", boost::program_options::value< string >(), "Output file."); boost::program_options::options_description opt_parameters ("\x1B[32mFilters\33[0m"); opt_parameters.add_options() ("filter-mapping-quality", boost::program_options::value< unsigned int >()->default_value(10), "Minimal phred mapping quality for a read to be considered.") ("filter-base-quality", boost::program_options::value< unsigned int >()->default_value(10), "Minimal phred quality for a base to be considered.") ("filter-binomial-pvalue", boost::program_options::value< double >()->default_value(1.0, "1.0"), "Binomial p-value threshold for ASE in output.") ("filter-minimal-coverage", boost::program_options::value< unsigned int >()->default_value(10), "Minimal coverage for a genotype to be considered.") ("filter-imputation-qual", boost::program_options::value< double >()->default_value(0.90, "0.90"), "Minimal imputation information score for a variant to be considered.") ("filter-imputation-prob", boost::program_options::value< double >()->default_value(0.99, "0.99"), "Minimal posterior probability for a genotype to be considered.") ("filter-remove-duplicates", "Remove duplicate sequencing reads in the process."); D.option_descriptions.add(opt_files).add(opt_parameters); //------------------- // 2. PARSE OPTIONS //------------------- boost::program_options::variables_map options; try { boost::program_options::store(boost::program_options::command_line_parser(argv).options(D.option_descriptions).run(), D.options); boost::program_options::notify(D.options); } catch ( const boost::program_options::error& e ) { cerr << "Error parsing [ase] command line :" << string(e.what()) << endl; exit(0); } //--------------------- // 3. PRINT HELP/HEADER //--------------------- vrb.ctitle("CALLING ALLELE SPECIFIC SITES"); if (D.options.count("help")) { cout << D.option_descriptions << endl; exit(EXIT_SUCCESS); } //----------------- // 4. COMMON CHECKS //----------------- if (!D.options.count("vcf")) vrb.error("Genotype data needs to be specified with --vcf [file.vcf]"); if (!D.options.count("bam")) vrb.error("Sequence data needs to be specified with --bam [file.bam]"); if (!D.options.count("ind")) vrb.error("Sample ID needs to be specified with --ind [sample_id]"); if (!D.options.count("out")) vrb.error("Output needs to be specified with --out [file.out]"); //TO DO CHECK PARAMETER VALUES D.param_min_mapQ = D.options["filter-mapping-quality"].as < unsigned int > (); D.param_min_baseQ = D.options["filter-base-quality"].as < unsigned int > (); D.param_min_cov = D.options["filter-minimal-coverage"].as < unsigned int > (); D.param_min_pval = D.options["filter-binomial-pvalue"].as < double > (); D.param_min_gp = D.options["filter-imputation-prob"].as < double > (); D.param_min_iq = D.options["filter-imputation-qual"].as < double > (); D.param_dup_rd = (D.options.count("filter-remove-duplicates") == 0); vrb.bullet("Mapping quality >= " + stb.str(D.param_min_mapQ)); vrb.bullet("Base quality >= " + stb.str(D.param_min_baseQ)); vrb.bullet("Coverage >= " + stb.str(D.param_min_cov)); vrb.bullet("Binomial p-value threshold = " + stb.str(D.param_min_pval)); vrb.bullet("Genotype probability >= " + stb.str(D.param_min_gp)); vrb.bullet("Imputation quality >= " + stb.str(D.param_min_iq)); vrb.bullet("Remove duplicate reads = " + stb.str(D.param_dup_rd)); //------------------------------------------ // 5. READ FILES / INITIALIZE / RUN ANALYSIS //------------------------------------------ D.processBasicOptions(); D.readSampleFromVCF(D.options["vcf"].as < string > ()); D.readSampleFromSTR(D.options["ind"].as < string > ()); D.mergeSampleLists(); if (D.sample_count == 0) vrb.error("Could not find [" + D.options["ind"].as < string > () + "] in VCF/BCF file"); else if (D.sample_count >= 2) vrb.error("More than one sample specified with --ind"); else vrb.bullet("Target sample is [" + D.sample_id[0] + "]"); D.readGenotypes(D.options["vcf"].as < string > (), D.options["reg"].as < string > ()); D.readSequences(D.options["bam"].as < string > (), D.options["out"].as < string > ()); }
int main(int argc, char ** argv) { data D; //------------------------- // 1. DECLARE ALL OPTIONS //------------------------- bpo::options_description opt_basic ("\33[33mBasic options\33[0m"); opt_basic.add_options() ("help", "Produces this help") ("silent", "Silent mode on terminal") ("seed", bpo::value< int >()->default_value(time(NULL)), "Random number seed. Useful to replicate runs."); bpo::options_description opt_files ("\33[33mInput/Output files\33[0m"); opt_files.add_options() ("log,L", bpo::value< string >()->default_value("fastQTL_date_time_UUID.log"), "Screen output is copied in this file.") ("vcf,V", bpo::value< string >(), "Genotypes in VCF format.") ("bed,B", bpo::value< string >(), "Phenotypes in BED format.") ("cov,C", bpo::value< string >(), "Covariates in TXT format.") ("grp,G", bpo::value< string >(), "Phenotype groups in TXT format.") ("out,O", bpo::value< string >(), "Output file."); bpo::options_description opt_exclusion ("\33[33mExclusion/Inclusion files\33[0m"); opt_exclusion.add_options() ("exclude-samples", bpo::value< string >(), "List of samples to exclude.") ("include-samples", bpo::value< string >(), "List of samples to include.") ("exclude-sites", bpo::value< string >(), "List of sites to exclude.") ("include-sites", bpo::value< string >(), "List of sites to include.") ("exclude-phenotypes", bpo::value< string >(), "List of phenotypes to exclude.") ("include-phenotypes", bpo::value< string >(), "List of phenotypes to include.") ("exclude-covariates", bpo::value< string >(), "List of covariates to exclude.") ("include-covariates", bpo::value< string >(), "List of covariates to include."); bpo::options_description opt_parameters ("\33[33mParameters\33[0m"); opt_parameters.add_options() ("normal", "Normal transform the phenotypes.") ("window,W", bpo::value< double >()->default_value(1e6), "Cis-window size.") ("threshold,T", bpo::value< double >()->default_value(1.0), "P-value threshold used in nominal pass of association") ("maf-threshold", bpo::value< double >()->default_value(0.0), "Minor allele frequency threshold used when parsing genotypes") ("ma-sample-threshold", bpo::value< int >()->default_value(0), "Minimum number of samples carrying the minor allele; used when parsing genotypes") ("global-af-threshold", bpo::value< double >()->default_value(0.0), "AF threshold for all samples in VCF (used to filter AF in INFO field)") ("interaction-maf-threshold", bpo::value< double >()->default_value(0.0), "MAF threshold for interactions, applied to lower and upper half of samples"); bpo::options_description opt_modes ("\33[33mModes\33[0m"); opt_modes.add_options() ("permute,P", bpo::value< vector < int > >()->multitoken(), "Permutation pass to calculate corrected p-values for molecular phenotypes.") ("psequence", bpo::value< string >(), "Permutation sequence.") ("map", bpo::value< string >(), "Map best QTL candidates per molecular phenotype.") ("map-full", "Scan full cis-window to discover independent signals.") ("interaction", bpo::value< string >(), "Test for interactions with variable specified in file.") ("report-best-only", bpo::bool_switch()->default_value(false), "Report best variant only (nominal mode)"); bpo::options_description opt_parallel ("\33[33mParallelization\33[0m"); opt_parallel.add_options() ("chunk,K", bpo::value< vector < int > >()->multitoken(), "Specify which chunk needs to be processed") ("commands", bpo::value< vector < string > >()->multitoken(), "Generates all commands") ("region,R", bpo::value< string >(), "Region of interest."); bpo::options_description descriptions; descriptions.add(opt_basic).add(opt_files).add(opt_exclusion).add(opt_parameters).add(opt_modes).add(opt_parallel); //------------------- // 2. PARSE OPTIONS //------------------- bpo::variables_map options; try { bpo::store(bpo::command_line_parser(argc, argv).options(descriptions).run(), options); bpo::notify(options); } catch ( const boost::program_options::error& e ) { cerr << "Error parsing command line :" << string(e.what()) << endl; exit(0); } //----------------------- // 3. PRINT HEADER/HELP //----------------------- if (! options.count("silent")) { cout << endl; cout << "\33[33mF\33[0mast \33[33mQTL\33[0m" << endl; cout << " * Authors : Olivier DELANEAU, Halit ONGEN, Alfonso BUIL & Manolis DERMITZAKIS" << endl; cout << " * Contact : [email protected]" << endl; cout << " * Webpage : http://fastqtl.sourceforge.net/" << endl; cout << " * Version : v2.184_gtex" << endl; if (options.count("help")) { cout << descriptions<< endl; exit(1); } } //-------------- // 4. LOG FILE //-------------- struct timeval start_time, stop_time; gettimeofday(&start_time, 0); START_DATE = time(0); //localtime(&START_DATE); //string logfile = "fastQTL_" + sutils::date2str(&START_DATE, "%d%m%Y_%Hh%Mm%Ss") + "_" + putils::getRandomID() + ".log"; if (!options["log"].defaulted()) { if (!LOG.open(options["log"].as < string > ())) { cerr << "Impossible to open log file[" << options["log"].as < string > () << "] check writing permissions!" << endl; exit(1); } } else LOG.muteL(); if (options.count("silent")) LOG.muteC(); //------------------------ // 5. OPTIONS COMBINATIONS //------------------------ if (!options.count("vcf")) LOG.error("Genotype data needs to be specified with --vcf [file.vcf]"); if (!options.count("bed")) LOG.error("Phenotype data needs to be specified with --bed [file.bed]"); if (!options.count("out")) LOG.error("Output needs to be specified with --out [file.out]"); int nParallel = options.count("chunk") + options.count("commands") + options.count("region"); if (nParallel != 1) LOG.error("Please, specify one of these options [--region, --chunk, --commands]"); int nMode = options.count("permute") + options.count("map"); if (nMode > 1) LOG.error("Please, specify only one of these options [--permute, --map]"); //--------------- // 6. CHECK FILES //--------------- if (!futils::isFile(options["vcf"].as < string > ())) LOG.error(options["vcf"].as < string > () + " is impossible to open, check file existence or reading permissions"); if (!futils::isFile(options["bed"].as < string > ())) LOG.error(options["bed"].as < string > () + " is impossible to open, check file existence or reading permissions"); if (options.count("cov") && !futils::isFile(options["cov"].as < string > ())) LOG.error(options["cov"].as < string > () + " is impossible to open, check file existence or reading permissions"); if (options.count("interaction") && !futils::isFile(options["interaction"].as < string > ())) LOG.error(options["interaction"].as < string > () + " is impossible to open, check file existence or reading permissions"); if (options.count("grp") && !futils::isFile(options["grp"].as < string > ())) LOG.error(options["grp"].as < string > () + " is impossible to open, check file existence or reading permissions"); if (options.count("map") && !futils::isFile(options["map"].as < string > ())) LOG.error(options["map"].as < string > () + " is impossible to open, check file existence or reading permissions"); if (!futils::createFile(options["out"].as < string > ())) LOG.error(options["out"].as < string > () + " is impossible to create, check writing permissions"); //----------------------------------- // 6. CHECK INCLUSION/EXCLUSION FILES //----------------------------------- if (options.count("exclude-samples") && !futils::isFile(options["exclude-samples"].as < string > ())) LOG.error(options["exclude-samples"].as < string > () + " is impossible to open, check file existence or reading permissions"); if (options.count("include-samples") && !futils::isFile(options["include-samples"].as < string > ())) LOG.error(options["include-samples"].as < string > () + " is impossible to open, check file existence or reading permissions"); if (options.count("exclude-sites") && !futils::isFile(options["exclude-sites"].as < string > ())) LOG.error(options["exclude-sites"].as < string > () + " is impossible to open, check file existence or reading permissions"); if (options.count("include-sites") && !futils::isFile(options["include-sites"].as < string > ())) LOG.error(options["include-sites"].as < string > () + " is impossible to open, check file existence or reading permissions"); if (options.count("exclude-phenotypes") && !futils::isFile(options["exclude-phenotypes"].as < string > ())) LOG.error(options["exclude-phenotypes"].as < string > () + " is impossible to open, check file existence or reading permissions"); if (options.count("include-phenotypes") && !futils::isFile(options["include-phenotypes"].as < string > ())) LOG.error(options["include-phenotypes"].as < string > () + " is impossible to open, check file existence or reading permissions"); if (options.count("exclude-covariates") && !futils::isFile(options["exclude-covariates"].as < string > ())) LOG.error(options["exclude-covariates"].as < string > () + " is impossible to open, check file existence or reading permissions"); if (options.count("include-covariates") && !futils::isFile(options["include-covariates"].as < string > ())) LOG.error(options["include-covariates"].as < string > () + " is impossible to open, check file existence or reading permissions"); //---------------------------- // 7. CHECK METHODS/PARAMETERS //---------------------------- if (options.count("interaction")) { if (options.count("permute")) { LOG.println("\nPerform permutation-based interaction analysis (used to calculate corrected p-values for MPs)"); vector < int > nPerm = options["permute"].as < vector < int > > (); if (nPerm.size() != 1) LOG.error("Interactions only work with a fixed number of permutations!"); else { if (nPerm[0] < 50) LOG.warning("Permutation number seems to be low, check parameters"); LOG.println(" * Perform " + sutils::int2str(nPerm[0]) + " permutations"); } } else { LOG.println("\nPerform nominal interaction analysis"); } LOG.println(" * Test interaction with term from [" + options["interaction"].as < string > () + "]"); } else if (options.count("permute")) { LOG.println("\nPerform permutation based analysis (used to calculate corrected p-values for MPs)"); vector < int > nPerm = options["permute"].as < vector < int > > (); if (nPerm.size() > 3 || nPerm.size() < 1) LOG.error ("Option --permute takes 1, 2 or 3 arguments"); if (nPerm.size() == 1) { if (nPerm[0] <= 0) LOG.error("Permutation number needs to be positive integer"); if (nPerm[0] < 50) LOG.warning("Permutation number seems to be low, check parameters"); LOG.println(" * Perform " + sutils::int2str(nPerm[0]) + " permutations"); } else if (nPerm.size() == 2) { if (nPerm[0] <= 0 || nPerm[1] <= 0) LOG.error("Permutation number needs to be positive"); if (nPerm[1] <= nPerm[0]) LOG.error("For adaptive permutation scheme, arg1 needs to be smaller than arg2!"); LOG.println(" * Perform between " + sutils::int2str(nPerm[0]) + " and " + sutils::int2str(nPerm[1]) + " permutations"); } else { if (nPerm[0] <= 0 || nPerm[1] <= 0 || nPerm[2] <= 0) LOG.error("Permutation number needs to be positive"); if (nPerm[2] <= nPerm[0]) LOG.error("For adaptive permutation scheme, arg1 needs to be smaller than arg3!"); if (nPerm[0] <= nPerm[1]) LOG.error("For adaptive permutation scheme, arg2 needs to be smaller than arg1!"); LOG.println(" * Perform between " + sutils::int2str(nPerm[0]) + " and " + sutils::int2str(nPerm[2]) + " permutations and stop when " + sutils::int2str(nPerm[1]) + " best associations are found"); } if (options.count("grp")) LOG.println(" * Using MP groups from [" + options["grp"].as < string > () + "]"); } else if (options.count("map")) { LOG.println("\nPerform conditional based analysis (used to map significant QTLs for MPs"); LOG.println(" * Using per MP p-value threshold from [" + options["map"].as < string > () + "]"); if (options.count("map-full")) LOG.println(" * Scanning all variants in cis and not only nominally significant ones"); } else { LOG.println("\nPerform nominal analysis (used to get raw p-values of association)"); double threshold = options["threshold"].as < double > (); if (threshold <= 0.0 || threshold > 1.0) LOG.error("Incorrect --threshold value : 0 < X <= 1"); LOG.println(" * Using p-value threshold = " + sutils::double2str(threshold, 10)); } if (options["seed"].as < int > () < 0) LOG.error("Random number generator needs a positive seed value"); else srand(options["seed"].as < int > ()); LOG.println(" * Random number generator is seeded with " + sutils::int2str(options["seed"].as < int > ())); if (options["window"].as < double > () <= 0) LOG.error ("Incorrect value for option --window (null or negative value)"); if (options["window"].as < double > () > 1e9) LOG.error ("Cis-window cannot be larger than 1e9bp"); LOG.println(" * Considering variants within " + sutils::double2str(options["window"].as < double > ()) + " bp of the MPs"); D.cis_window = options["window"].as < double > (); D.maf_threshold = options["maf-threshold"].as < double > (); if (D.maf_threshold < 0.0 || D.maf_threshold >= 0.5) LOG.error("Incorrect --maf-threshold value : 0 <= X < 0.5"); LOG.println(" * Using minor allele frequency threshold = " + sutils::double2str(D.maf_threshold, 4)); D.ma_sample_threshold = options["ma-sample-threshold"].as < int > (); if (D.ma_sample_threshold < 0) LOG.error("Incorrect --ma-sample-threshold : 0 <= X"); LOG.println(" * Using minor allele sample threshold = " + sutils::int2str(D.ma_sample_threshold)); D.global_af_threshold = options["global-af-threshold"].as < double > (); if (D.global_af_threshold < 0.0 || D.global_af_threshold >= 0.5) LOG.error("Incorrect --global-af-threshold value : 0 <= X < 0.5"); LOG.println(" * Using INFO field AF threshold = " + sutils::double2str(D.global_af_threshold, 4)); D.interaction_maf_threshold = options["interaction-maf-threshold"].as < double > (); if (D.interaction_maf_threshold < 0.0 || D.interaction_maf_threshold >= 0.5) LOG.error("Incorrect --interaction-maf-threshold : 0 <= X < 0.5"); LOG.println(" * Applying interaction MAF AF threshold = " + sutils::double2str(D.interaction_maf_threshold, 4)); if (options.count("chunk")) { vector < int > nChunk = options["chunk"].as < vector < int > > (); if (nChunk.size() != 2) LOG.error ("--chunk needs 2 integer arguments"); if (nChunk[0] > nChunk[1]) LOG.error ("arg1 for --chunk needs to be smaller or equal to arg2"); LOG.println (" * Chunk processed " + sutils::int2str(nChunk[0]) + " / " + sutils::int2str(nChunk[1])); } else if (options.count("commands")) { vector < string > nCommands = options["commands"].as < vector < string > > (); if (nCommands.size() != 2) LOG.error ("--commands needs 2 arguments"); LOG.println (" * " + nCommands[0] + " commands output in [" + nCommands[1] +"]"); } else LOG.println (" * Focus on region [" + options["region"].as < string > () +"]"); //-------------------------------- // 7. READ EXCLUDE / INCLUDE FILES //-------------------------------- if (options.count("exclude-samples")) D.readSamplesToExclude(options["exclude-samples"].as < string > ()); if (options.count("include-samples")) D.readSamplesToInclude(options["include-samples"].as < string > ()); if (options.count("exclude-sites")) D.readGenotypesToExclude(options["exclude-sites"].as < string > ()); if (options.count("include-sites")) D.readGenotypesToInclude(options["include-sites"].as < string > ()); if (options.count("exclude-phenotypes")) D.readPhenotypesToExclude(options["exclude-phenotypes"].as < string > ()); if (options.count("include-phenotypes")) D.readPhenotypesToInclude(options["include-phenotypes"].as < string > ()); if (options.count("exclude-covariates")) D.readCovariatesToExclude(options["exclude-covariates"].as < string > ()); if (options.count("include-covariates")) D.readCovariatesToInclude(options["include-covariates"].as < string > ()); if (options.count("commands")) { //--------------------- // 8. GENERATE COMMANDS //--------------------- int nChunks = atoi(options["commands"].as < vector < string > > ()[0].c_str()); D.scanPhenotypes(options["bed"].as < string > ()); if (options.count("grp")) D.readGroups(options["grp"].as < string > ()); // need to read this before determining chunks (clusterizePhenotypes) D.clusterizePhenotypes(nChunks); D.writeCommands(options["commands"].as < vector < string > > ()[1], nChunks, argc, argv); } else { //-------------- // 9. SET REGION //-------------- if (options.count("chunk")) { D.scanPhenotypes(options["bed"].as < string > ()); if (options.count("grp")) D.readGroups(options["grp"].as < string > ()); // need to read this before determining chunks (clusterizePhenotypes) D.clusterizePhenotypes(options["chunk"].as < vector < int > > ()[1]); D.setPhenotypeRegion(options["chunk"].as < vector < int > > ()[0] - 1); D.clear(); } else if (!D.setPhenotypeRegion(options["region"].as < string > ())) LOG.error("Impossible to interpret region [" + options["region"].as < string > () + "]"); D.deduceGenotypeRegion(options["window"].as < double > ()); //--------------- // 10. READ FILES //--------------- D.readPhenotypes(options["bed"].as < string > ()); if (options.count("interaction")) D.readInteractions(options["interaction"].as < string > ()); // used by optional MAF filter, read first D.readGenotypesVCF(options["vcf"].as < string > ()); if (options.count("cov")) D.readCovariates(options["cov"].as < string > ()); if (options.count("map")) D.readThresholds(options["map"].as < string > ()); if (options.count("grp")) D.readGroups(options["grp"].as < string > ()); // read groups again, this time limited to phenotypes //------------------------ // 11. INITIALIZE ANALYSIS //------------------------ D.imputeGenotypes(); D.imputePhenotypes(); if (options.count("normal")) D.normalTranformPhenotypes(); D.initResidualizer(); //----------------- // 12. RUN ANALYSIS //----------------- if (options.count("interaction")) { if (options.count("permute")) { D.runPermutationInteraction(options["out"].as < string > (), options["permute"].as < vector < int > > ()[0]); } else if (options["report-best-only"].as<bool>()) { D.runNominalInteractionBest(options["out"].as < string > ()); } else { D.runNominalInteraction(options["out"].as < string > (), options["threshold"].as < double > ()); } } else if (options.count("permute") && options.count("grp")) D.runPermutationPerGroup(options["out"].as < string > (), options["permute"].as < vector < int > > ()); else if (options.count("permute")) { if (options.count("psequence")) D.runPermutation(options["out"].as < string > (), options["psequence"].as < string > ()); else D.runPermutation(options["out"].as < string > (), options["permute"].as < vector < int > > ()); } else if (options.count("map")) { D.runMapping(options["out"].as < string > (), options.count("map-full")); } else if (options["report-best-only"].as<bool>()) { D.runNominalBest(options["out"].as < string > ()); } else { D.runNominal(options["out"].as < string > (), options["threshold"].as < double > ()); } } //---------------- // 13. TERMINATION //---------------- D.clear(); gettimeofday(&stop_time, 0); int n_seconds = (int)floor(stop_time.tv_sec - start_time.tv_sec); LOG.println("\nRunning time: " + sutils::int2str(n_seconds) + " seconds"); if (!options["log"].defaulted()) LOG.close(); }
void trans_main(vector < string > & argv) { trans_data D; //------------------------- // 1. DECLARE ALL OPTIONS //------------------------- D.declareBasicOptions(); //Mandatory boost::program_options::options_description opt_files ("\x1B[32mI/O\33[0m"); opt_files.add_options() ("vcf", boost::program_options::value< string >(), "Genotypes in VCF/BCF format.") ("bed", boost::program_options::value< string >(), "Phenotypes in BED format.") ("cov", boost::program_options::value< string >(), "Covariates in TXT format.") ("out", boost::program_options::value< string >(), "Output file."); boost::program_options::options_description opt_parameters ("\x1B[32mParameters\33[0m"); opt_parameters.add_options() ("normal", "Quantile normalize phenotype data.") ("window", boost::program_options::value< double >()->default_value(5e6, "5e6"), "Cis-window of variants to be excluded.") ("threshold", boost::program_options::value< double >()->default_value(1e-5, "1e-5"), "P-value threshold below which hits are reported.") ("bins", boost::program_options::value< unsigned int >()->default_value(1000), "Number of bins to use to categorize all p-values above --threshold."); boost::program_options::options_description opt_modes ("\x1B[32mAnalysis type\33[0m"); opt_modes.add_options() ("nominal", "MODE1: NOMINAL PASS [Pvalues are not adjusted].") ("adjust", boost::program_options::value< string >(), "MODE2: ADJUSTED PASS [Pvalues are adjusted].") ("permute", "MODE3: PERMUTATION PASS [Permute all phenotypes once].") ("sample", boost::program_options::value< unsigned int >(), "MODE4: PERMUTATION PASS [Permute randomly chosen phenotypes multiple times]."); boost::program_options::options_description opt_parallel ("\x1B[32mParallelization\33[0m"); opt_parallel.add_options() ("chunk", boost::program_options::value< vector < int > >()->multitoken(), "Specify which chunk needs to be processed"); D.option_descriptions.add(opt_files).add(opt_parameters).add(opt_modes).add(opt_parallel); //------------------- // 2. PARSE OPTIONS //------------------- try { boost::program_options::store(boost::program_options::command_line_parser(argv).options(D.option_descriptions).run(), D.options); boost::program_options::notify(D.options); } catch ( const boost::program_options::error& e ) { cerr << "Error parsing [trans] command line :" << string(e.what()) << endl; exit(0); } //--------------------- // 3. PRINT HELP/HEADER //--------------------- vrb.ctitle("MAPPING QTL IN TRANS"); if (D.options.count("help")) { cout << D.option_descriptions << endl; exit(EXIT_SUCCESS); } //----------------- // 4. COMMON CHECKS //----------------- if (!D.options.count("vcf")) vrb.error("Genotype data needs to be specified with --vcf [file.vcf]"); if (!D.options.count("bed")) vrb.error("Phenotype data needs to be specified with --bed [file.bed]"); if (!D.options.count("out")) vrb.error("Output needs to be specified with --out [file.out]"); int nMode = D.options.count("nominal") + D.options.count("adjust") + D.options.count("permute") + D.options.count("sample"); if (nMode != 1) vrb.error("Please, specify only one of these options [--nominal, --adjust, --permute, --sample]"); if (D.options.count("chunk") && D.options.count("sample")) vrb.error("--chunk cannot be combined with --sample"); //--------- // 5. MODES //--------- //MODE1: NOMINAL PASS NON ADJUSTED if (D.options.count("nominal")) { D.mode = TRANS_MODE1; vrb.bullet("TASK: Perform a full nominal pass, do not adjust p-values"); } //MODE2: NOMINAL PASS ADJUSTED if (D.options.count("adjust")) { D.mode = TRANS_MODE2; vrb.bullet("TASK: Test and adjust p-values using [" + D.options["adjust"].as < string > () +"]"); } //MODE3: PERMUTATION PASS if (D.options.count("permute")) { D.mode = TRANS_MODE3; vrb.bullet("TASK: Permute all phenotype once and test"); } //MODE4: PERMUTATION PASS if (D.options.count("sample")) { D.mode = TRANS_MODE4; vrb.bullet("TASK: Permute randomly chosen phenotypes " + stb.str(D.options["sample"].as < unsigned int > ()) + " times and test"); } //-------------- // 6. SET PARAMS //-------------- if (D.options["window"].as < double > () <= 0 || D.options["window"].as < double > () > 1e9) vrb.error ("Incorrect cis-window size"); vrb.bullet("Cis-window size is " + stb.str((int)D.options["window"].as < double > ()) + " bp"); D.cis_window = D.options["window"].as < double > (); if (D.options.count("chunk")) { vector < int > nChunk = D.options["chunk"].as < vector < int > > (); if (nChunk.size() != 2 || nChunk[0] > nChunk[1]) vrb.error("Incorrect --chunk arguments!"); vrb.bullet("Chunk = [" + stb.str(nChunk[0]) + "/" + stb.str(nChunk[1]) + "]"); } D.n_bins = D.options["bins"].as < unsigned int > (); vrb.bullet("#bins = " + stb.str(D.n_bins)); vrb.bullet("threshold = " + stb.str(D.options["threshold"].as < double > ())); D.processBasicOptions(); if (D.options.count("chunk")) { D.scanPhenotypes(D.options["bed"].as < string > ()); D.setPhenotypeLines(D.options["chunk"].as < vector < int > > ()[0], D.options["chunk"].as < vector < int > > ()[1]); D.clear(); } //--------------------------- // 7. READ FILES & INITIALIZE //--------------------------- //D.processBasicOptions(); D.readSampleFromVCF(D.options["vcf"].as < string > ()); D.checkSampleInBED(D.options["bed"].as < string > ()); if (D.options.count("cov")) D.checkSampleInCOV(D.options["cov"].as < string > ()); D.readPhenotypes(D.options["bed"].as < string > ()); D.imputePhenotypes(); if (D.options.count("cov")) { D.readCovariates(D.options["cov"].as < string > ()); D.residualizePhenotypes(); } if (D.options.count("normal")) D.normalTranformPhenotypes(); if (D.options.count("permute")) D.shufflePhenotypes(); if (D.options.count("sample")) D.samplePhenotypes(D.options["sample"].as < unsigned int > ()); if (D.options.count("adjust")) D.buildNullDistribution(D.options["adjust"].as < string > ()); D.getCorrelationThreshold(D.options["threshold"].as < double > ()); D.normalizePhenotypes(); //---------------- // 8. RUN ANALYSIS //---------------- D.runTransPass(D.options["vcf"].as < string > (), D.options["out"].as < string > ()); }
void quan_main(vector < string > & argv) { quan_data D; //------------------------- // 1. DECLARE ALL OPTIONS //------------------------- D.declareBasicOptions(); boost::program_options::options_description opt_files ("\x1B[32mI/O\33[0m"); opt_files.add_options() ("gtf", boost::program_options::value< string >(), "Annotation in GTF format") ("bam", boost::program_options::value< vector < string > > ()->multitoken(), "Sequence data in BAM/SAM format.") ("samples",boost::program_options::value< vector < string > > ()->multitoken(), "Sample names or a file with sample names. [Optional]") ("out-prefix", boost::program_options::value< string >(), "Output file prefix."); boost::program_options::options_description opt_parameters ("\x1B[32mParameters\33[0m"); opt_parameters.add_options() ("rpkm", "Print RPKM values.") ("debug", "Print debug info to stderr.") ("gene-types", boost::program_options::value< vector < string > > ()->multitoken(), "Gene types to quantify. (Requires gene_type attribute in GTF. It will also use transcript_type if present).") ("max-read-length", boost::program_options::value< unsigned int >()->default_value(1000), "Group genes separated by this much together. Set this larger than your read length"); boost::program_options::options_description opt_filters ("\x1B[32mFilters\33[0m"); opt_filters.add_options() ("filter-mapping-quality", boost::program_options::value< unsigned int >()->default_value(10), "Minimal mapping quality for a read to be considered.") ("filter-mismatch", boost::program_options::value< double >()->default_value(-1.0,"OFF"), "Maximum mismatches allowed in a read. If between 0 and 1 taken as the fraction of read length. (Requires NM attribute)") ("filter-mismatch-total", boost::program_options::value< double >()->default_value(-1.0,"OFF"), "Maximum total mismatches allowed in paired reads. If between 0 and 1 taken as the fraction of combined read length. (Requires NM attribute)") ("check-proper-pairing", "If provided only properly paired reads according to the aligner that are in correct orientation will be considered. Otherwise all pairs in correct orientation will be considered.") ("check-consistency", "If provided checks the consistency of split reads with annotation, rather than pure overlap of one of the blocks of the split read.") ("no-merge", "If provided overlapping mate pairs will not be merged.") ("legacy-options", "Exactly replicate Dermitzakis lab original quantification script. (DO NOT USE UNLESS YOU HAVE A GOOD REASON). Sets --no-merge as well.") ("filter-failed-qc", "Remove fastq reads that fail sequencing QC (as indicated by the sequencer)") ("filter-min-exon", boost::program_options::value< unsigned int >()->default_value(0), "Minimal exon length to consider. Exons smaller than this will not be printed out in the exon quantifications, but will still count towards gene quantifications.") ("filter-remove-duplicates", "Remove duplicate sequencing reads in the process."); boost::program_options::options_description opt_parallel ("\x1B[32mParallelization\33[0m"); opt_parallel.add_options() ("chunk", boost::program_options::value< vector < int > >()->multitoken(), "Specify which chunk needs to be processed") ("region", boost::program_options::value< string >(), "Region of interest."); D.option_descriptions.add(opt_files).add(opt_parameters).add(opt_filters).add(opt_parallel); //------------------- // 2. PARSE OPTIONS //------------------- boost::program_options::variables_map options; try { boost::program_options::store(boost::program_options::command_line_parser(argv).options(D.option_descriptions).run(), D.options); boost::program_options::notify(D.options); } catch ( const boost::program_options::error& e ) { cerr << "Error parsing [quan] command line :" << string(e.what()) << endl; exit(0); } //--------------------- // 3. PRINT HELP/HEADER //--------------------- vrb.ctitle("QUANTIFY GENES AND EXONS FROM BAM FILES"); if (D.options.count("help")) { cout << D.option_descriptions << endl; exit(EXIT_SUCCESS); } //----------------- // 4. COMMON CHECKS //----------------- if (!D.options.count("gtf")) vrb.error("Genotype data needs to be specified with --gtf [file.gtf]"); if (!D.options.count("bam")) vrb.error("Sequence data needs to be specified with --bam [file.bam]"); if (!D.options.count("out-prefix")) vrb.error("Output needs to be specified with --out [file.out]"); D.min_mapQ = D.options["filter-mapping-quality"].as < unsigned int > (); vrb.bullet("Minimum mapping quality: " + stb.str(D.min_mapQ)); D.max_read_length = D.options["max-read-length"].as < unsigned int > (); vrb.bullet("Maximum read length: " + stb.str(D.max_read_length)); double intpart; D.max_mismatch_count_total = D.options["filter-mismatch-total"].as < double > (); if(D.max_mismatch_count_total >= 0 && modf(D.max_mismatch_count_total, &intpart) != 0.0) { if(D.max_mismatch_count_total > 1) vrb.error("--filter-mismatch-total cannot be greater than 1 when not an integer"); else D.fraction_mmt = true; } if ( D.max_mismatch_count_total >= 0) vrb.bullet("Maximum mismatch count per mate-pair: " + stb.str(D.max_mismatch_count_total)); D.max_mismatch_count = D.options["filter-mismatch"].as < double > (); if(D.max_mismatch_count >= 0 && modf(D.max_mismatch_count, &intpart) != 0.0) { if(D.max_mismatch_count > 1) vrb.error("--filter-mismatch cannot be greater than 1 when not an integer"); else D.fraction_mm = true; } if (D.max_mismatch_count < 0 && D.max_mismatch_count_total >= 0 && !D.fraction_mmt) D.max_mismatch_count = D.max_mismatch_count_total; if ( D.max_mismatch_count >= 0) vrb.bullet("Maximum mismatch count per read: " + stb.str(D.max_mismatch_count)); if (D.options.count("check-proper-pairing")){ vrb.bullet("Checking properly paired flag"); D.proper_pair = true; } if (D.options.count("check-consistency")){ vrb.bullet("Checking if all blocks of a split read are consistent with the annotation"); D.check_consistency = true; } if (D.options.count("filter-remove-duplicates")){ vrb.bullet("Filtering reads flagged as duplicate"); D.dup_remove = true; } if (D.options.count("filter-failed-qc")){ vrb.bullet("Filtering reads flagged as failing QC"); D.fail_qc = true; } if (D.options.count("no-merge")){ vrb.bullet("Not merging overlapping mate pairs"); D.merge = false; } if (D.options.count("legacy-options")){ if (!D.options.count("no-merge")) vrb.bullet("Not merging overlapping mate pairs"); D.min_exon = 2; vrb.bullet("Excluding exons smaller than " + stb.str(D.min_exon) ); vrb.warning("You are using --legacy-options, do you know what you are doing?"); D.old_wrong_split = true; D.merge = false; }else{ D.min_exon = D.options["filter-min-exon"].as < unsigned int > (); vrb.bullet("Excluding exons smaller than " + stb.str(D.min_exon) ); } if (D.options.count("gene-types")){ vector < string > t = D.options["gene-types"].as < vector < string > > (); D.gene_types = set < string > (t.begin(),t.end()); const char* const delim = " "; ostringstream temp; copy(D.gene_types.begin(), D.gene_types.end(), ostream_iterator<string>(temp, delim)); vrb.bullet("Genes included: " + temp.str()); } if (D.options.count("debug")) D.debug = true; int k=1,K=1; if (D.options.count("chunk")) { vector < int > nChunk = D.options["chunk"].as < vector < int > > (); if (nChunk.size() != 2 || nChunk[0] > nChunk[1]) vrb.error("Incorrect --chunk arguments!"); vrb.bullet("Chunk = [" + stb.str(nChunk[0]) + "/" + stb.str(nChunk[1]) + "]"); k=nChunk[0] , K = nChunk[1]; } else if(D.options.count("region")) vrb.bullet("Region = [" + D.options["region"].as < string > () +"]"); //TO DO CHECK PARAMETER VALUES //------------------------------------------ // 5. READ FILES / INITIALIZE / RUN ANALYSIS //------------------------------------------ D.processBasicOptions(); D.bams = D.options["bam"].as < vector < string > > (); if (D.options.count("samples")) { vector < string > n = D.options["samples"].as <vector < string > > (); D.read_Sample_Names(n); }else D.samples = D.bams; D.readGTF(D.options["gtf"].as < string > (),D.bams.size()); if (D.options.count("region")) D.setRegion(D.options["region"].as < string >()); if (D.options.count("chunk")) D.setChunk(k, K); D.readBams(); D.printBEDcount(D.options["out-prefix"].as < string > ()); if (D.options.count("rpkm")) D.printBEDrpkm(D.options["out-prefix"].as < string > ()); D.printStats(D.options["out-prefix"].as < string > ()); }
void extract_main(vector < string > & argv) { extract_data D; //------------------------- // 1. DECLARE ALL OPTIONS //------------------------- D.declareBasicOptions(); boost::program_options::options_description opt_files ("\x1B[32mI/O\33[0m"); opt_files.add_options() ("vcf", boost::program_options::value< string >(), "Genotypes in VCF/BCF format.") ("bed", boost::program_options::value< vector < string > >()->multitoken(), "Phenotypes in BED format.") ("cov", boost::program_options::value< string >(), "Covariates in TXT format.") ("out", boost::program_options::value< string >(), "Output file."); boost::program_options::options_description opt_parallel ("\x1B[32mParallelization\33[0m"); opt_parallel.add_options() ("region", boost::program_options::value< string >(), "Region of interest."); D.option_descriptions.add(opt_files).add(opt_parallel); //------------------- // 2. PARSE OPTIONS //------------------- try { boost::program_options::store(boost::program_options::command_line_parser(argv).options(D.option_descriptions).run(), D.options); boost::program_options::notify(D.options); } catch ( const boost::program_options::error& e ) { cerr << "Error parsing [extract] command line :" << string(e.what()) << endl; exit(0); } //--------------------- // 3. PRINT HELP/HEADER //--------------------- vrb.ctitle("DATA EXTRACTION"); if (D.options.count("help")) { cout << D.option_descriptions << endl; exit(EXIT_SUCCESS); } //----------------- // 4. COMMON CHECKS //----------------- if ((D.options.count("vcf") + D.options.count("bed") + D.options.count("cov")) == 0) vrb.error("At least one input file has to be specified using either --vcf [file.vcf], --bed [file.bed] or --cov [file.txt]"); if (!D.options.count("region")) vrb.warning("Please use --region to speed up data extraction for phenotype and genotype data!"); //-------------- // 5. SET REGION //-------------- if (D.options.count("region") && !D.regionData.parse(D.options["region"].as < string > ())) vrb.error("Impossible to interpret region [" + D.options["region"].as < string > () + "]"); //-------------- // 6. READ FILES //-------------- D.processBasicOptions(); vector < string > bed_list = D.options["bed"].as < vector < string > > (); for (int b = 0 ; b < bed_list.size() ; b ++) D.readSampleFromBED(bed_list[b]); if (D.options.count("vcf")) D.readSampleFromVCF(D.options["vcf"].as < string > ()); if (D.options.count("cov")) D.readSampleFromCOV(D.options["cov"].as < string > ()); D.mergeSampleLists(); for (int b = 0 ; b < bed_list.size() ; b ++) D.readBED(bed_list[b]); if (D.options.count("vcf")) D.readVCF(D.options["vcf"].as < string > ()); if (D.options.count("cov")) D.readCOV(D.options["cov"].as < string > ()); D.imputeMissing(); D.writeOUT(D.options["out"].as < string > ()); }