bool BaseCallerParameters::InitContextVarsFromOptArgs(OptArgs& opts){ assert(bc_files.options_set); char default_run_id[6]; // Create a run identifier from full output directory string ion_run_to_readname (default_run_id, (char*)bc_files.output_directory.c_str(), bc_files.output_directory.length()); context_vars.run_id = opts.GetFirstString ('-', "run-id", default_run_id); num_threads_ = opts.GetFirstInt ('n', "num-threads", max(2*numCores(), 4)); num_bamwriter_threads_ = opts.GetFirstInt ('-', "num-threads-bamwriter", 6); context_vars.flow_signals_type = opts.GetFirstString ('-', "flow-signals-type", "none"); context_vars.extra_trim_left = opts.GetFirstInt ('-', "extra-trim-left", 0); context_vars.only_process_unfiltered_set = opts.GetFirstBoolean('-', "only-process-unfiltered-set", false); // Treephaser options context_vars.dephaser = opts.GetFirstString ('-', "dephaser", "treephaser-sse"); context_vars.keynormalizer = opts.GetFirstString ('-', "keynormalizer", "gain"); context_vars.windowSize = opts.GetFirstInt ('-', "window-size", DPTreephaser::kWindowSizeDefault_); context_vars.skip_droop = opts.GetFirstBoolean('-', "skip-droop", true); context_vars.skip_recal_during_norm = opts.GetFirstBoolean('-', "skip-recal-during-normalization", false); context_vars.diagonal_state_prog = opts.GetFirstBoolean('-', "diagonal-state-prog", false); // Not every combination of options is possible here: if (context_vars.diagonal_state_prog and context_vars.dephaser != "treephaser-swan") { cout << " === BaseCaller Option Incompatibility: Using dephaser treephaser-swan with diagonal state progression instead of " << context_vars.dephaser << endl; context_vars.dephaser = "treephaser-swan"; } context_vars.process_tfs = true; context_vars.options_set = true; return true; };
bool BaseCallerParameters::InitializeSamplingFromOptArgs(OptArgs& opts, const int num_wells) { assert(context_vars.options_set); // If we are just doing phase estimation none of the options matter, so don't spam output if (context_vars.just_phase_estimation){ sampling_opts.options_set = true; return true; } sampling_opts.num_unfiltered = opts.GetFirstInt ('-', "num-unfiltered", 100000); sampling_opts.downsample_size = opts.GetFirstInt ('-', "downsample-size", 0); sampling_opts.downsample_fraction = opts.GetFirstDouble ('-', "downsample-fraction", 1.0); sampling_opts.calibration_training = opts.GetFirstInt ('-', "calibration-training", -1); sampling_opts.have_calib_panel = (not bc_files.calibration_panel_file.empty()); sampling_opts.MaskNotWanted = MaskNone; // Reconcile parameters downsample_size and downsample_fraction bool downsample = sampling_opts.downsample_size > 0 or sampling_opts.downsample_fraction < 1.0; if (sampling_opts.downsample_fraction < 1.0) { if (sampling_opts.downsample_size == 0) sampling_opts.downsample_size = (int)((float)num_wells*sampling_opts.downsample_fraction); else sampling_opts.downsample_size = min(sampling_opts.downsample_size, (int)((float)num_wells*sampling_opts.downsample_fraction)); } if (downsample) cout << "Downsampling activated: Randomly choosing " << sampling_opts.downsample_size << " reads on this chip." << endl; // Calibration training requires additional changes & overwrites command line options if (sampling_opts.calibration_training >= 0) { if (context_vars.diagonal_state_prog) { cerr << " === BaseCaller Option Incompatibility: Calibration training not supported for diagonal state progression. Aborting!" << endl; exit(EXIT_FAILURE); } if (sampling_opts.downsample_size>0) sampling_opts.calibration_training = min(sampling_opts.calibration_training, sampling_opts.downsample_size); sampling_opts.downsample_size = max(sampling_opts.calibration_training, 0); sampling_opts.MaskNotWanted = (MaskType)(MaskFilteredBadResidual|MaskFilteredBadPPF|MaskFilteredBadKey); sampling_opts.num_unfiltered = 0; context_vars.process_tfs = false; context_vars.flow_signals_type = "scaled-residual"; cout << "=== BaseCaller Calibration Training ===" << endl; cout << " - Generating a training set up to " << sampling_opts.downsample_size << " randomly selected reads." << endl; if (sampling_opts.have_calib_panel) cout << " - Adding calibration panel reads specified in " << bc_files.calibration_panel_file << endl; cout << endl; } sampling_opts.options_set = true; return true; };
int RetrieveParameterInt(OptArgs &opts, Json::Value& json, char short_name, const string& long_name_hyphens, int default_value) { string long_name_underscores = long_name_hyphens; for (unsigned int i = 0; i < long_name_underscores.size(); ++i) if (long_name_underscores[i] == '-') long_name_underscores[i] = '_'; int value = default_value; string source = "builtin default"; if (json.isMember(long_name_underscores)) { if (json[long_name_underscores].isString()) value = atoi(json[long_name_underscores].asCString()); else value = json[long_name_underscores].asInt(); source = "parameters json file"; } if (opts.HasOption(short_name, long_name_hyphens)) { value = opts.GetFirstInt(short_name, long_name_hyphens, value); source = "command line option"; } cout << setw(35) << long_name_hyphens << " = " << setw(10) << value << " (integer, " << source << ")" << endl; return value; }
void RecalibrationModel::Initialize(OptArgs& opts, vector<string> &bam_comments, const string & run_id, const ion::ChipSubset & chip_subset) { string model_file_name = opts.GetFirstString ('-', "model-file", ""); int model_threshold = opts.GetFirstInt('-', "recal-model-hp-thres", 4); bool save_hpmodel = opts.GetFirstBoolean('-', "save-hpmodel", true); bool diagonal_state_prog = opts.GetFirstBoolean('-', "diagonal-state-prog", false); if (diagonal_state_prog) model_file_name.clear(); if (InitializeModel(model_file_name, model_threshold) and save_hpmodel) SaveModelFileToBamComments(model_file_name, bam_comments, run_id, chip_subset.GetColOffset(), chip_subset.GetRowOffset()); }
void PhaseEstimator::InitializeFromOptArgs(OptArgs& opts) { phasing_estimator_ = opts.GetFirstString ('-', "phasing-estimator", "spatial-refiner-2"); string arg_cf_ie_dr = opts.GetFirstString ('-', "libcf-ie-dr", ""); residual_threshold_ = opts.GetFirstDouble ('-', "phasing-residual-filter", 1.0); max_phasing_levels_ = opts.GetFirstInt ('-', "max-phasing-levels", max_phasing_levels_default_); use_pid_norm_ = opts.GetFirstString ('-', "keynormalizer", "keynorm-old") == "keynorm-new"; windowSize_ = opts.GetFirstInt ('-', "window-size", DPTreephaser::kWindowSizeDefault_); if (!arg_cf_ie_dr.empty()) { phasing_estimator_ = "override"; result_regions_x_ = 1; result_regions_y_ = 1; result_cf_.assign(1, 0.0); result_ie_.assign(1, 0.0); result_dr_.assign(1, 0.0); if (3 != sscanf (arg_cf_ie_dr.c_str(), "%f,%f,%f", &result_cf_[0], &result_ie_[0], &result_dr_[0])) { fprintf (stderr, "Option Error: libcf-ie-dr %s\n", arg_cf_ie_dr.c_str()); exit (EXIT_FAILURE); } return; // --libcf-ie-dr overrides other phasing-related options } }
TagTrimmerParameters MolecularTagTrimmer::ReadOpts(OptArgs& opts) { // Reading command line options to set tag structures TagTrimmerParameters my_params; my_params.min_family_size = opts.GetFirstInt ('-', "min-tag-fam-size", 3); my_params.suppress_mol_tags = opts.GetFirstBoolean ('-', "suppress-mol-tags", false); //my_params.cl_a_handle = opts.GetFirstString ('-', "tag-handle", ""); //my_params.handle_cutoff = opts.GetFirstInt ('-', "handle-cutoff", 2); my_params.master_tags.prefix_mol_tag = opts.GetFirstString ('-', "prefix-mol-tag", ""); my_params.master_tags.suffix_mol_tag = opts.GetFirstString ('-', "suffix-mol-tag", ""); ValidateTagString(my_params.master_tags.prefix_mol_tag); ValidateTagString(my_params.master_tags.suffix_mol_tag); // Overload to disable molecular tagging if (my_params.min_family_size == 0) my_params.suppress_mol_tags = true; else if (my_params.min_family_size < 1) { cerr << "MolecularTagTrimmer Error: min-tag-fam-size must be at least 1. " << endl; exit(EXIT_FAILURE); } my_params.command_line_tags = my_params.master_tags.HasTags(); // Options for read filtering & and trimming method selection string trim_method = opts.GetFirstString ('-', "tag-trim-method", "sloppy-trim"); if (trim_method == "sloppy-trim") my_params.tag_trim_method = kSloppyTrim; else if (trim_method == "strict-trim") my_params.tag_trim_method = kStrictTrim; else { cerr << "MolecularTagTrimmer Error: Unknown tag trimming option " << trim_method << endl; exit(EXIT_FAILURE); } string filter_method = opts.GetFirstString ('-', "tag-filter-method", "need-all"); if (filter_method == "need-all") my_params.tag_filter_method = kneed_all_tags; else if (filter_method == "need-prefix") my_params.tag_filter_method = kneed_only_prefix_tag; else if (filter_method == "need-suffix") my_params.tag_filter_method = kneed_only_suffix_tag; else { cerr << "MolecularTagTrimmer Error: Unknown tag filtering option " << filter_method << endl; exit(EXIT_FAILURE); } return my_params; }
void ProgramControlSettings::SetOpts(OptArgs &opts, Json::Value &tvc_params) { DEBUG = opts.GetFirstInt ('d', "debug", 0); nThreads = RetrieveParameterInt (opts, tvc_params, 'n', "num-threads", 12); nVariantsPerThread = RetrieveParameterInt (opts, tvc_params, 'N', "num-variants-per-thread", 250); use_SSE_basecaller = RetrieveParameterBool (opts, tvc_params, '-', "use-sse-basecaller", true); // decide diagnostic rich_json_diagnostic = RetrieveParameterBool (opts, tvc_params, '-', "do-json-diagnostic", false); minimal_diagnostic = RetrieveParameterBool (opts, tvc_params, '-', "do-minimal-diagnostic", false); inputPositionsOnly = RetrieveParameterBool (opts, tvc_params, '-', "process-input-positions-only", false); suppress_recalibration = RetrieveParameterBool (opts, tvc_params, '-', "suppress-recalibration", true); resolve_clipped_bases = RetrieveParameterBool (opts, tvc_params, '-', "resolve-clipped-bases", false); }
void RecalibrationModel::Initialize(OptArgs& opts) { is_enabled_ = false; string model_file_name = opts.GetFirstString ('-', "model-file", ""); if (model_file_name.empty() or model_file_name == "off") { printf("RecalibrationModel: disabled\n\n"); return; } ifstream model_file; model_file.open(model_file_name.c_str()); if (model_file.fail()) { printf("RecalibrationModel: disabled (cannot open %s)\n\n", model_file_name.c_str()); model_file.close(); return; } recalModelHPThres = opts.GetFirstInt('-', "recal-model-hp-thres", 4); string comment_line; getline(model_file, comment_line); //skip the comment time int flowStart, flowEnd, flowSpan, xMin, xMax, xSpan, yMin, yMax, ySpan, max_hp_calibrated; model_file >> flowStart >> flowEnd >> flowSpan >> xMin >> xMax >> xSpan >> yMin >> yMax >> ySpan >> max_hp_calibrated; stratification.SetupRegion(xMin, xMax, xSpan, yMin, yMax, ySpan); //calculate number of partitions and initialize the stratifiedAs and stratifiedBs SetupStratification(flowStart,flowEnd, flowSpan,xMin,xMax,xSpan,yMin,yMax,ySpan,max_hp_calibrated); //TODO: parse model_file into stratifiedAs and stratifiedBs while (model_file.good()) { float paramA, paramB; int refHP; char flowBase; model_file >> flowBase >> flowStart >> flowEnd >> xMin >> xMax >> yMin >> yMax >> refHP >> paramA >> paramB; //populate it to stratifiedAs and startifiedBs int nucInd = NuctoInt(flowBase); //boundary check int offsetRegion = stratification.OffsetRegion(xMin,yMin); FillIndexes(offsetRegion,nucInd, refHP, flowStart, flowEnd, paramA, paramB); } model_file.close(); printf("Recalibration: enabled (using calibration file %s)\n\n", model_file_name.c_str()); is_enabled_ = true; if (recalModelHPThres > MAX_HPXLEN) is_enabled_ = false; }
bool BaseCallerContext::SetKeyAndFlowOrder(OptArgs& opts, const char * FlowOrder, const int NumFlows) { flow_order.SetFlowOrder( opts.GetFirstString ('-', "flow-order", FlowOrder), opts.GetFirstInt ('f', "flowlimit", NumFlows)); if (flow_order.num_flows() > NumFlows) flow_order.SetNumFlows(NumFlows); assert(flow_order.is_ok()); string lib_key = opts.GetFirstString ('-', "lib-key", "TCAG"); //! @todo Get default key from wells string tf_key = opts.GetFirstString ('-', "tf-key", "ATCG"); lib_key = opts.GetFirstString ('-', "librarykey", lib_key); // Backward compatible opts tf_key = opts.GetFirstString ('-', "tfkey", tf_key); keys.resize(2); keys[0].Set(flow_order, lib_key, "lib"); keys[1].Set(flow_order, tf_key, "tf"); return true; };
int IonstatsTestFragments(int argc, const char *argv[]) { OptArgs opts; opts.ParseCmdLine(argc, argv); string input_bam_filename = opts.GetFirstString('i', "input", ""); string fasta_filename = opts.GetFirstString('r', "ref", ""); string output_json_filename = opts.GetFirstString('o', "output", "ionstats_tf.json"); int histogram_length = opts.GetFirstInt ('h', "histogram-length", 400); if(argc < 2 or input_bam_filename.empty() or fasta_filename.empty()) { IonstatsTestFragmentsHelp(); return 1; } // // Prepare for metric calculation // map<string,string> tf_sequences; PopulateReferenceSequences(tf_sequences, fasta_filename); BamReader input_bam; if (!input_bam.Open(input_bam_filename)) { fprintf(stderr, "[ionstats] ERROR: cannot open %s\n", input_bam_filename.c_str()); return 1; } int num_tfs = input_bam.GetReferenceCount(); SamHeader sam_header = input_bam.GetHeader(); if(!sam_header.HasReadGroups()) { fprintf(stderr, "[ionstats] ERROR: no read groups in %s\n", input_bam_filename.c_str()); return 1; } string flow_order; string key; for (SamReadGroupIterator rg = sam_header.ReadGroups.Begin(); rg != sam_header.ReadGroups.End(); ++rg) { if(rg->HasFlowOrder()) flow_order = rg->FlowOrder; if(rg->HasKeySequence()) key = rg->KeySequence; } // Need these metrics stratified by TF. vector<ReadLengthHistogram> called_histogram(num_tfs); vector<ReadLengthHistogram> aligned_histogram(num_tfs); vector<ReadLengthHistogram> AQ10_histogram(num_tfs); vector<ReadLengthHistogram> AQ17_histogram(num_tfs); vector<SimpleHistogram> error_by_position(num_tfs); vector<MetricGeneratorSNR> system_snr(num_tfs); vector<MetricGeneratorHPAccuracy> hp_accuracy(num_tfs); for (int tf = 0; tf < num_tfs; ++tf) { called_histogram[tf].Initialize(histogram_length); aligned_histogram[tf].Initialize(histogram_length); AQ10_histogram[tf].Initialize(histogram_length); AQ17_histogram[tf].Initialize(histogram_length); error_by_position[tf].Initialize(histogram_length); } vector<uint16_t> flow_signal_fz(flow_order.length()); vector<int16_t> flow_signal_zm(flow_order.length()); const RefVector& refs = input_bam.GetReferenceData(); // Missing: // - hp accuracy - tough, copy verbatim from TFMapper? BamAlignment alignment; vector<char> MD_op; vector<int> MD_len; MD_op.reserve(1024); MD_len.reserve(1024); string MD_tag; // // Main loop over mapped reads in the input BAM // while(input_bam.GetNextAlignment(alignment)) { if (!alignment.IsMapped() or !alignment.GetTag("MD",MD_tag)) continue; // The check below eliminates unexpected alignments if (alignment.IsReverseStrand() or alignment.Position > 5) continue; int current_tf = alignment.RefID; // // Step 1. Parse MD tag // MD_op.clear(); MD_len.clear(); for (const char *MD_ptr = MD_tag.c_str(); *MD_ptr;) { int item_length = 0; if (*MD_ptr >= '0' and *MD_ptr <= '9') { // Its a match MD_op.push_back('M'); for (; *MD_ptr and *MD_ptr >= '0' and *MD_ptr <= '9'; ++MD_ptr) item_length = 10*item_length + *MD_ptr - '0'; } else { if (*MD_ptr == '^') { // Its a deletion MD_ptr++; MD_op.push_back('D'); } else // Its a substitution MD_op.push_back('X'); for (; *MD_ptr and *MD_ptr >= 'A' and *MD_ptr <= 'Z'; ++MD_ptr) item_length++; } MD_len.push_back(item_length); } // // Step 2. Synchronously scan through Cigar and MD, doing error accounting // int MD_idx = alignment.IsReverseStrand() ? MD_op.size()-1 : 0; int cigar_idx = alignment.IsReverseStrand() ? alignment.CigarData.size()-1 : 0; int increment = alignment.IsReverseStrand() ? -1 : 1; int AQ10_bases = 0; int AQ17_bases = 0; int num_bases = 0; int num_errors = 0; while (cigar_idx < (int)alignment.CigarData.size() and MD_idx < (int) MD_op.size() and cigar_idx >= 0 and MD_idx >= 0) { if (alignment.CigarData[cigar_idx].Length == 0) { // Try advancing cigar cigar_idx += increment; continue; } if (MD_len[MD_idx] == 0) { // Try advancing MD MD_idx += increment; continue; } // Match if (alignment.CigarData[cigar_idx].Type == 'M' and MD_op[MD_idx] == 'M') { int advance = min((int)alignment.CigarData[cigar_idx].Length, MD_len[MD_idx]); num_bases += advance; alignment.CigarData[cigar_idx].Length -= advance; MD_len[MD_idx] -= advance; // Insertion (read has a base, reference doesn't) } else if (alignment.CigarData[cigar_idx].Type == 'I') { int advance = alignment.CigarData[cigar_idx].Length; for (int cnt = 0; cnt < advance; ++cnt) { error_by_position[current_tf].Add(num_bases); num_bases++; num_errors++; } alignment.CigarData[cigar_idx].Length -= advance; // Deletion (reference has a base, read doesn't) } else if (alignment.CigarData[cigar_idx].Type == 'D' and MD_op[MD_idx] == 'D') { int advance = min((int)alignment.CigarData[cigar_idx].Length, MD_len[MD_idx]); for (int cnt = 0; cnt < advance; ++cnt) { error_by_position[current_tf].Add(num_bases); num_errors++; } alignment.CigarData[cigar_idx].Length -= advance; MD_len[MD_idx] -= advance; // Substitution } else if (MD_op[MD_idx] == 'X') { int advance = min((int)alignment.CigarData[cigar_idx].Length, MD_len[MD_idx]); for (int cnt = 0; cnt < advance; ++cnt) { error_by_position[current_tf].Add(num_bases); num_bases++; num_errors++; } alignment.CigarData[cigar_idx].Length -= advance; MD_len[MD_idx] -= advance; } else { printf("ionstats tf: Unexpected OP combination: %s Cigar=%c, MD=%c !\n", alignment.Name.c_str(), alignment.CigarData[cigar_idx].Type, MD_op[MD_idx]); break; } if (num_errors*10 <= num_bases) AQ10_bases = num_bases; if (num_errors*50 <= num_bases) AQ17_bases = num_bases; } // // Step 3. Profit // called_histogram[current_tf].Add(alignment.Length); aligned_histogram[current_tf].Add(num_bases); AQ10_histogram[current_tf].Add(AQ10_bases); AQ17_histogram[current_tf].Add(AQ17_bases); if(alignment.GetTag("ZM", flow_signal_zm)) system_snr[current_tf].Add(flow_signal_zm, key.c_str(), flow_order); else if(alignment.GetTag("FZ", flow_signal_fz)) system_snr[current_tf].Add(flow_signal_fz, key.c_str(), flow_order); // HP accuracy - keeping it simple if (!alignment.IsReverseStrand()) { string genome = key + tf_sequences[refs[current_tf].RefName]; string calls = key + alignment.QueryBases; const char *genome_ptr = genome.c_str(); const char *calls_ptr = calls.c_str(); for (int flow = 0; flow < (int)flow_order.length() and *genome_ptr and *calls_ptr; ++flow) { int genome_hp = 0; int calls_hp = 0; while (*genome_ptr == flow_order[flow]) { genome_hp++; genome_ptr++; } while (*calls_ptr == flow_order[flow]) { calls_hp++; calls_ptr++; } hp_accuracy[current_tf].Add(genome_hp, calls_hp); } } } // // Processing complete, generate ionstats_tf.json // Json::Value output_json(Json::objectValue); output_json["meta"]["creation_date"] = get_time_iso_string(time(NULL)); output_json["meta"]["format_name"] = "ionstats_tf"; output_json["meta"]["format_version"] = "1.0"; output_json["results_by_tf"] = Json::objectValue; for (int tf = 0; tf < num_tfs; ++tf) { if (aligned_histogram[tf].num_reads() < 1000) continue; called_histogram[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]["full"]); aligned_histogram[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]["aligned"]); AQ10_histogram[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]["AQ10"]); AQ17_histogram[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]["AQ17"]); error_by_position[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]["error_by_position"]); system_snr[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]); hp_accuracy[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]); output_json["results_by_tf"][refs[tf].RefName]["sequence"] = tf_sequences[refs[tf].RefName]; } input_bam.Close(); ofstream out(output_json_filename.c_str(), ios::out); if (out.good()) { out << output_json.toStyledString(); return 0; } else { fprintf(stderr, "ERROR: unable to write to '%s'\n", output_json_filename.c_str()); return 1; } }
int main (int argc, const char *argv[]) { printf ("------------- bamrealignment --------------\n"); OptArgs opts; opts.ParseCmdLine(argc, argv); vector<int> score_vals(4); string input_bam = opts.GetFirstString ('i', "input", ""); string output_bam = opts.GetFirstString ('o', "output", ""); opts.GetOption(score_vals, "4,-6,-5,-2", 's', "scores"); int clipping = opts.GetFirstInt ('c', "clipping", 2); bool anchors = opts.GetFirstBoolean ('a', "anchors", true); int bandwidth = opts.GetFirstInt ('b', "bandwidth", 10); bool verbose = opts.GetFirstBoolean ('v', "verbose", false); bool debug = opts.GetFirstBoolean ('d', "debug", false); int format = opts.GetFirstInt ('f', "format", 1); int num_threads = opts.GetFirstInt ('t', "threads", 8); string log_fname = opts.GetFirstString ('l', "log", ""); if (input_bam.empty() or output_bam.empty()) return PrintHelp(); opts.CheckNoLeftovers(); std::ofstream logf; if (log_fname.size ()) { logf.open (log_fname.c_str ()); if (!logf.is_open ()) { fprintf (stderr, "bamrealignment: Failed to open log file %s\n", log_fname.c_str()); return 1; } } BamReader reader; if (!reader.Open(input_bam)) { fprintf(stderr, "bamrealignment: Failed to open input file %s\n", input_bam.c_str()); return 1; } SamHeader header = reader.GetHeader(); RefVector refs = reader.GetReferenceData(); BamWriter writer; writer.SetNumThreads(num_threads); if (format == 1) writer.SetCompressionMode(BamWriter::Uncompressed); else writer.SetCompressionMode(BamWriter::Compressed); if (!writer.Open(output_bam, header, refs)) { fprintf(stderr, "bamrealignment: Failed to open output file %s\n", output_bam.c_str()); return 1; } // The meat starts here ------------------------------------ if (verbose) cout << "Verbose option is activated, each alignment will print to screen." << endl << " After a read hit RETURN to continue to the next one," << endl << " or press q RETURN to quit the program," << endl << " or press s Return to silence verbose," << endl << " or press c RETURN to continue printing without further prompt." << endl << endl; unsigned int readcounter = 0; unsigned int mapped_readcounter = 0; unsigned int realigned_readcounter = 0; unsigned int modified_alignment_readcounter = 0; unsigned int pos_update_readcounter = 0; unsigned int failed_clip_realigned_readcount = 0; unsigned int already_perfect_readcount = 0; unsigned int bad_md_tag_readcount = 0; unsigned int error_recreate_ref_readcount = 0; unsigned int error_clip_anchor_readcount = 0; unsigned int error_sw_readcount = 0; unsigned int error_unclip_readcount = 0; unsigned int start_position_shift; int orig_position; int new_position; string md_tag, new_md_tag, input = "x"; vector<CigarOp> new_cigar_data; vector<MDelement> new_md_data; bool position_shift = false; time_t start_time = time(NULL); Realigner aligner; aligner.verbose_ = verbose; aligner.debug_ = debug; if (!aligner.SetScores(score_vals)) cout << "bamrealignment: Four scores need to be provided: match, mismatch, gap open, gap extend score!" << endl; aligner.SetAlignmentBandwidth(bandwidth); BamAlignment alignment; while(reader.GetNextAlignment(alignment)){ readcounter ++; position_shift = false; if ( (readcounter % 100000) == 0 ) cout << "Processed " << readcounter << " reads. Elapsed time: " << (time(NULL) - start_time) << endl; if (alignment.IsMapped()) { orig_position = alignment.Position; mapped_readcounter++; aligner.SetClipping(clipping, !alignment.IsReverseStrand()); if (aligner.verbose_) { cout << endl; if (alignment.IsReverseStrand()) cout << "The read is from the reverse strand." << endl; else cout << "The read is from the forward strand." << endl; } if (!alignment.GetTag("MD", md_tag)) { if (aligner.verbose_) cout << "Warning: Skipping read " << alignment.Name << ". It is mapped but missing MD tag." << endl; if (logf.is_open ()) logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "MISSMD" << '\n'; bad_md_tag_readcount++; } else if (aligner.CreateRefFromQueryBases(alignment.QueryBases, alignment.CigarData, md_tag, anchors)) { bool clipfail = false; if (Realigner::CR_ERR_CLIP_ANCHOR == aligner.GetCreateRefError ()) { clipfail = true; failed_clip_realigned_readcount ++; } if (!aligner.computeSWalignment(new_cigar_data, new_md_data, start_position_shift)) { if (aligner.verbose_) cout << "Error in the alignment! Not updating read information." << endl; if (logf.is_open ()) logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "SWERR" << '\n'; error_sw_readcount++; writer.SaveAlignment(alignment); // Write alignment unchanged continue; } if (!aligner.addClippedBasesToTags(new_cigar_data, new_md_data, alignment.QueryBases.size())) { if (aligner.verbose_) cout << "Error when adding clipped anchors back to tags! Not updating read information." << endl; if (logf.is_open ()) logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "UNCLIPERR" << '\n'; writer.SaveAlignment(alignment); // Write alignment unchanged error_unclip_readcount ++; continue; } new_md_tag = aligner.GetMDstring(new_md_data); realigned_readcounter++; // adjust start position of read if (!aligner.LeftAnchorClipped() and start_position_shift != 0) { new_position = aligner.updateReadPosition(alignment.CigarData, (int)start_position_shift, alignment.Position); if (new_position != alignment.Position) { pos_update_readcounter++; position_shift = true; alignment.Position = new_position; } } if (position_shift || alignment.CigarData.size () != new_cigar_data.size () || md_tag != new_md_tag) { if (logf.is_open ()) { logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "MOD"; if (position_shift) logf << "-SHIFT"; if (clipfail) logf << " NOCLIP"; logf << '\n'; } modified_alignment_readcounter++; } else { if (logf.is_open ()) { logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "UNMOD"; if (clipfail) logf << " NOCLIP"; logf << '\n'; } } if (aligner.verbose_){ cout << alignment.Name << endl; cout << "------------------------------------------" << endl; // Wait for input to continue or quit program if (input.size() == 0) input = 'x'; else if (input[0] != 'c' and input[0] != 'C') getline(cin, input); if (input.size()>0){ if (input[0] == 'q' or input[0] == 'Q') return 1; else if (input[0] == 's' or input[0] == 'S') aligner.verbose_ = false; } } // Finally update alignment information alignment.CigarData = new_cigar_data; alignment.EditTag("MD", "Z" , new_md_tag); } // end of CreateRef else if else { switch (aligner.GetCreateRefError ()) { case Realigner::CR_ERR_RECREATE_REF: if (logf.is_open ()) logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "RECRERR" << '\n'; error_recreate_ref_readcount++; break; case Realigner::CR_ERR_CLIP_ANCHOR: if (logf.is_open ()) logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "CLIPERR" << '\n'; error_clip_anchor_readcount++; break; default: // On a good run this writes way too many reads to the log file - don't want to create a too large txt file // if (logf.is_open ()) //logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "PERFECT" << '\n'; already_perfect_readcount++; break; } if (aligner.verbose_) { cout << alignment.Name << endl; cout << "------------------------------------------" << endl; // Wait for input to continue or quit program if (input.size() == 0) input = 'x'; else if (input[0] != 'c' and input[0] != 'C') getline(cin, input); if (input.size()>0){ if (input[0] == 'q' or input[0] == 'Q') return 1; else if (input[0] == 's' or input[0] == 'S') aligner.verbose_ = false; } } } // --- Debug output for Rajesh --- if (debug && aligner.invalid_cigar_in_input) { aligner.verbose_ = true; cout << "Invalid cigar string / md tag pair in read " << alignment.Name << endl; // Rerun reference generation to display error aligner.CreateRefFromQueryBases(alignment.QueryBases, alignment.CigarData, md_tag, anchors); aligner.verbose_ = verbose; aligner.invalid_cigar_in_input = false; } // --- --- --- } // end of if isMapped writer.SaveAlignment(alignment); } // end while loop over reads if (aligner.invalid_cigar_in_input) cerr << "WARNING bamrealignment: There were invalid cigar string / md tag pairs in the input bam file." << endl; // ---------------------------------------------------------------- // program end -- output summary information cout << " File: " << input_bam << endl << " Total reads: " << readcounter << endl << " Mapped reads: " << mapped_readcounter << endl; if (bad_md_tag_readcount) cout << " Skipped: bad MD tags: " << bad_md_tag_readcount << endl; if (error_recreate_ref_readcount) cout << " Skipped: unable to recreate ref: " << error_recreate_ref_readcount << endl; if (error_clip_anchor_readcount) cout << " Skipped: error clipping anchor: " << error_clip_anchor_readcount << endl; cout << " Skipped: already perfect: " << already_perfect_readcount << endl << " Total reads realigned: " << mapped_readcounter - already_perfect_readcount - bad_md_tag_readcount - error_recreate_ref_readcount - error_clip_anchor_readcount << endl; if (failed_clip_realigned_readcount) cout << " (including " << failed_clip_realigned_readcount << " that failed to clip)" << endl; if (error_sw_readcount) cout << " Failed to complete SW alignment: " << error_sw_readcount << endl; if (error_unclip_readcount) cout << " Failed to unclip anchor: " << error_unclip_readcount << endl; cout << " Succesfully realigned: " << realigned_readcounter << endl << " Modified alignments: " << modified_alignment_readcounter << endl << " Shifted position: " << pos_update_readcounter << endl; cout << "Processing time: " << (time(NULL)-start_time) << " seconds." << endl; cout << "INFO: The output BAM file may be unsorted." << endl; cout << "------------------------------------------" << endl; return 0; }
int IonstatsAlignment(int argc, const char *argv[]) { OptArgs opts; opts.ParseCmdLine(argc, argv); string input_bam_filename = opts.GetFirstString('i', "input", ""); string output_json_filename = opts.GetFirstString('o', "output", "ionstats_alignment.json"); int histogram_length = opts.GetFirstInt ('h', "histogram-length", 400); if(argc < 2 or input_bam_filename.empty()) { IonstatsAlignmentHelp(); return 1; } // // Prepare for metric calculation // BamReader input_bam; if (!input_bam.Open(input_bam_filename)) { fprintf(stderr, "[ionstats] ERROR: cannot open %s\n", input_bam_filename.c_str()); return 1; } ReadLengthHistogram called_histogram; ReadLengthHistogram aligned_histogram; ReadLengthHistogram AQ7_histogram; ReadLengthHistogram AQ10_histogram; ReadLengthHistogram AQ17_histogram; ReadLengthHistogram AQ20_histogram; ReadLengthHistogram AQ47_histogram; SimpleHistogram error_by_position; called_histogram.Initialize(histogram_length); aligned_histogram.Initialize(histogram_length); AQ7_histogram.Initialize(histogram_length); AQ10_histogram.Initialize(histogram_length); AQ17_histogram.Initialize(histogram_length); AQ20_histogram.Initialize(histogram_length); AQ47_histogram.Initialize(histogram_length); error_by_position.Initialize(histogram_length); BamAlignment alignment; vector<char> MD_op; vector<int> MD_len; MD_op.reserve(1024); MD_len.reserve(1024); string MD_tag; // // Main loop over mapped reads in the input BAM // while(input_bam.GetNextAlignment(alignment)) { // Record read length called_histogram.Add(alignment.Length); if (!alignment.IsMapped() or !alignment.GetTag("MD",MD_tag)) continue; // // Step 1. Parse MD tag // MD_op.clear(); MD_len.clear(); for (const char *MD_ptr = MD_tag.c_str(); *MD_ptr;) { int item_length = 0; if (*MD_ptr >= '0' and *MD_ptr <= '9') { // Its a match MD_op.push_back('M'); for (; *MD_ptr and *MD_ptr >= '0' and *MD_ptr <= '9'; ++MD_ptr) item_length = 10*item_length + *MD_ptr - '0'; } else { if (*MD_ptr == '^') { // Its a deletion MD_ptr++; MD_op.push_back('D'); } else // Its a substitution MD_op.push_back('X'); for (; *MD_ptr and *MD_ptr >= 'A' and *MD_ptr <= 'Z'; ++MD_ptr) item_length++; } MD_len.push_back(item_length); } // // Step 2. Synchronously scan through Cigar and MD, doing error accounting // int MD_idx = alignment.IsReverseStrand() ? MD_op.size()-1 : 0; int cigar_idx = alignment.IsReverseStrand() ? alignment.CigarData.size()-1 : 0; int increment = alignment.IsReverseStrand() ? -1 : 1; int AQ7_bases = 0; int AQ10_bases = 0; int AQ17_bases = 0; int AQ20_bases = 0; int AQ47_bases = 0; int num_bases = 0; int num_errors = 0; while (cigar_idx < (int)alignment.CigarData.size() and MD_idx < (int) MD_op.size() and cigar_idx >= 0 and MD_idx >= 0) { if (alignment.CigarData[cigar_idx].Length == 0) { // Try advancing cigar cigar_idx += increment; continue; } if (MD_len[MD_idx] == 0) { // Try advancing MD MD_idx += increment; continue; } // Match if (alignment.CigarData[cigar_idx].Type == 'M' and MD_op[MD_idx] == 'M') { int advance = min((int)alignment.CigarData[cigar_idx].Length, MD_len[MD_idx]); num_bases += advance; alignment.CigarData[cigar_idx].Length -= advance; MD_len[MD_idx] -= advance; // Insertion (read has a base, reference doesn't) } else if (alignment.CigarData[cigar_idx].Type == 'I') { int advance = alignment.CigarData[cigar_idx].Length; for (int cnt = 0; cnt < advance; ++cnt) { error_by_position.Add(num_bases); num_bases++; num_errors++; } alignment.CigarData[cigar_idx].Length -= advance; // Deletion (reference has a base, read doesn't) } else if (alignment.CigarData[cigar_idx].Type == 'D' and MD_op[MD_idx] == 'D') { int advance = min((int)alignment.CigarData[cigar_idx].Length, MD_len[MD_idx]); for (int cnt = 0; cnt < advance; ++cnt) { error_by_position.Add(num_bases); num_errors++; } alignment.CigarData[cigar_idx].Length -= advance; MD_len[MD_idx] -= advance; // Substitution } else if (MD_op[MD_idx] == 'X') { int advance = min((int)alignment.CigarData[cigar_idx].Length, MD_len[MD_idx]); for (int cnt = 0; cnt < advance; ++cnt) { error_by_position.Add(num_bases); num_bases++; num_errors++; } alignment.CigarData[cigar_idx].Length -= advance; MD_len[MD_idx] -= advance; } else { printf("ionstats alignment: Unexpected OP combination: %s Cigar=%c, MD=%c !\n", alignment.Name.c_str(), alignment.CigarData[cigar_idx].Type, MD_op[MD_idx]); break; } if (num_errors*5 <= num_bases) AQ7_bases = num_bases; if (num_errors*10 <= num_bases) AQ10_bases = num_bases; if (num_errors*50 <= num_bases) AQ17_bases = num_bases; if (num_errors*100 <= num_bases) AQ20_bases = num_bases; if (num_errors == 0) AQ47_bases = num_bases; } // // Step 3. Profit // if (num_bases >= 20) aligned_histogram.Add(num_bases); if (AQ7_bases >= 20) AQ7_histogram.Add(AQ7_bases); if (AQ10_bases >= 20) AQ10_histogram.Add(AQ10_bases); if (AQ17_bases >= 20) AQ17_histogram.Add(AQ17_bases); if (AQ20_bases >= 20) AQ20_histogram.Add(AQ20_bases); if (AQ47_bases >= 20) AQ47_histogram.Add(AQ47_bases); } input_bam.Close(); // // Processing complete, generate ionstats_alignment.json // Json::Value output_json(Json::objectValue); output_json["meta"]["creation_date"] = get_time_iso_string(time(NULL)); output_json["meta"]["format_name"] = "ionstats_alignment"; output_json["meta"]["format_version"] = "1.0"; called_histogram.SaveToJson(output_json["full"]); aligned_histogram.SaveToJson(output_json["aligned"]); AQ7_histogram.SaveToJson(output_json["AQ7"]); AQ10_histogram.SaveToJson(output_json["AQ10"]); AQ17_histogram.SaveToJson(output_json["AQ17"]); AQ20_histogram.SaveToJson(output_json["AQ20"]); AQ47_histogram.SaveToJson(output_json["AQ47"]); error_by_position.SaveToJson(output_json["error_by_position"]); ofstream out(output_json_filename.c_str(), ios::out); if (out.good()) { out << output_json.toStyledString(); return 0; } else { fprintf(stderr, "ERROR: unable to write to '%s'\n", output_json_filename.c_str()); return 1; } return 0; }
int IonstatsBasecaller(int argc, const char *argv[]) { OptArgs opts; opts.ParseCmdLine(argc, argv); string input_bam_filename = opts.GetFirstString('i', "input", ""); string output_json_filename = opts.GetFirstString('o', "output", "ionstats_basecaller.json"); int histogram_length = opts.GetFirstInt ('h', "histogram-length", 400); if(argc < 2 or input_bam_filename.empty()) { IonstatsBasecallerHelp(); return 1; } BamReader input_bam; if (!input_bam.Open(input_bam_filename)) { fprintf(stderr, "[ionstats] ERROR: cannot open %s\n", input_bam_filename.c_str()); return 1; } SamHeader sam_header = input_bam.GetHeader(); if(!sam_header.HasReadGroups()) { fprintf(stderr, "[ionstats] ERROR: no read groups in %s\n", input_bam_filename.c_str()); return 1; } ReadLengthHistogram total_full_histo; ReadLengthHistogram total_insert_histo; ReadLengthHistogram total_Q17_histo; ReadLengthHistogram total_Q20_histo; total_full_histo.Initialize(histogram_length); total_insert_histo.Initialize(histogram_length); total_Q17_histo.Initialize(histogram_length); total_Q20_histo.Initialize(histogram_length); MetricGeneratorSNR system_snr; BaseQVHistogram qv_histogram; string flow_order; string key; for (SamReadGroupIterator rg = sam_header.ReadGroups.Begin(); rg != sam_header.ReadGroups.End(); ++rg) { if(rg->HasFlowOrder()) flow_order = rg->FlowOrder; if(rg->HasKeySequence()) key = rg->KeySequence; } double qv_to_error_rate[256]; for (int qv = 0; qv < 256; qv++) qv_to_error_rate[qv] = pow(10.0,-0.1*(double)qv); BamAlignment alignment; string read_group; vector<uint16_t> flow_signal_fz(flow_order.length()); vector<int16_t> flow_signal_zm(flow_order.length()); while(input_bam.GetNextAlignment(alignment)) { // Record read length unsigned int full_length = alignment.Length; total_full_histo.Add(full_length); // Record insert length int insert_length = 0; if (alignment.GetTag("ZA",insert_length)) total_insert_histo.Add(insert_length); // Compute and record Q17 and Q20 int Q17_length = 0; int Q20_length = 0; double num_accumulated_errors = 0.0; for(int pos = 0; pos < alignment.Length; ++pos) { num_accumulated_errors += qv_to_error_rate[(int)alignment.Qualities[pos] - 33]; if (num_accumulated_errors / (pos + 1) <= 0.02) Q17_length = pos + 1; if (num_accumulated_errors / (pos + 1) <= 0.01) Q20_length = pos + 1; } total_Q17_histo.Add(Q17_length); total_Q20_histo.Add(Q20_length); // Record data for system snr if(alignment.GetTag("ZM", flow_signal_zm)) system_snr.Add(flow_signal_zm, key.c_str(), flow_order); else if(alignment.GetTag("FZ", flow_signal_fz)) system_snr.Add(flow_signal_fz, key.c_str(), flow_order); // Record qv histogram qv_histogram.Add(alignment.Qualities); } input_bam.Close(); Json::Value output_json(Json::objectValue); output_json["meta"]["creation_date"] = get_time_iso_string(time(NULL)); output_json["meta"]["format_name"] = "ionstats_basecaller"; output_json["meta"]["format_version"] = "1.0"; system_snr.SaveToJson(output_json); qv_histogram.SaveToJson(output_json); total_full_histo.SaveToJson(output_json["full"]); total_insert_histo.SaveToJson(output_json["insert"]); total_Q17_histo.SaveToJson(output_json["Q17"]); total_Q20_histo.SaveToJson(output_json["Q20"]); ofstream out(output_json_filename.c_str(), ios::out); if (out.good()) { out << output_json.toStyledString(); return 0; } else { fprintf(stderr, "ERROR: unable to write to '%s'\n", output_json_filename.c_str()); return 1; } }
BaseCallerFilters::BaseCallerFilters(OptArgs& opts, const string& _flowOrder, int _numFlows, const vector<KeySequence>& _keys, Mask *_maskPtr) { flowOrder = _flowOrder; keypassFilter = opts.GetFirstBoolean('k', "keypass-filter", true); percentPositiveFlowsFilterTFs = opts.GetFirstBoolean('-', "clonal-filter-tf", false); clonalFilterTraining = opts.GetFirstBoolean('-', "clonal-filter-train", false); clonalFilterSolving = opts.GetFirstBoolean('-', "clonal-filter-solve", false); minReadLength = opts.GetFirstInt ('-', "min-read-length", 8); cafieResFilterCalling = opts.GetFirstBoolean('-', "cr-filter", false); cafieResFilterTFs = opts.GetFirstBoolean('-', "cr-filter-tf", false); generate_bead_summary_ = opts.GetFirstBoolean('-', "bead-summary", false); // TODO: get this to work right. May require "unwound" flow order, so incompatible with current wells.FlowOrder() //flt_control.cafieResMaxValueByFlowOrder[std::string ("TACG") ] = 0.06; // regular flow order //flt_control.cafieResMaxValueByFlowOrder[std::string ("TACGTACGTCTGAGCATCGATCGATGTACAGC") ] = 0.08; // xdb flow order cafieResMaxValue = opts.GetFirstDouble('-', "cr-filter-max-value", 0.08); // SFFTrim options trim_adapter = opts.GetFirstString('-', "trim-adapter", "ATCACCGACTGCCCATAGAGAGGCTGAGAC"); trim_adapter_cutoff = opts.GetFirstDouble('-', "trim-adapter-cutoff", 0.0); trim_adapter_closest = opts.GetFirstBoolean('-', "trim-adapter-pick-closest", false); trim_qual_wsize = opts.GetFirstInt('-', "trim-qual-window-size", 30); trim_qual_cutoff = opts.GetFirstDouble('-', "trim-qual-cutoff", 100.0); trim_min_read_len = opts.GetFirstInt('-', "trim-min-read-len", 8); // Validate options if (minReadLength < 1) { fprintf (stderr, "Option Error: min-read-length must specify a positive value (%d invalid).\n", minReadLength); exit (EXIT_FAILURE); } if (cafieResMaxValue <= 0) { fprintf (stderr, "Option Error: cr-filter-max-value must specify a positive value (%lf invalid).\n", cafieResMaxValue); exit (EXIT_FAILURE); } keys = _keys; numClasses = keys.size(); assert(numClasses == 2); classFilterPolyclonal.resize(numClasses); classFilterPolyclonal[0] = clonalFilterSolving; classFilterPolyclonal[1] = clonalFilterSolving && percentPositiveFlowsFilterTFs; classFilterHighResidual.resize(numClasses); classFilterHighResidual[0] = cafieResFilterCalling; classFilterHighResidual[1] = cafieResFilterCalling && cafieResFilterTFs; string filter_beverly_args = opts.GetFirstString('-', "beverly-filter", "0.03,0.03,8"); if (filter_beverly_args == "off") { filter_beverly_enabled_ = false; // Nothing, really printf("Beverly filter: disabled, use --beverly-filter=filter_ratio,trim_ratio,min_length\n"); } else { int stat = sscanf (filter_beverly_args.c_str(), "%f,%f,%d", &filter_beverly_filter_ratio_, &filter_beverly_trim_ratio_, &filter_beverly_min_read_length_); if (stat != 3) { fprintf (stderr, "Option Error: beverly-filter %s\n", filter_beverly_args.c_str()); fprintf (stderr, "Usage: --beverly-filter=filter_ratio,trim_ratio,min_length\n"); exit (EXIT_FAILURE); } filter_beverly_enabled_ = true; printf("Beverly filter: enabled, use --beverly-filter=off to disable\n"); printf("Beverly filter: filter_ratio = %1.5f\n", filter_beverly_filter_ratio_); printf("Beverly filter: trim_ratio = %1.5f\n", filter_beverly_trim_ratio_); printf("Beverly filter: min_length = %d\n", filter_beverly_min_read_length_); } maskPtr = _maskPtr; numFlows = _numFlows; filterMask.assign(maskPtr->H()*maskPtr->W(), kUninitialized); }
void PhaseEstimator::InitializeFromOptArgs(OptArgs& opts, const ion::ChipSubset & chip_subset, const string & key_norm_method) { // Parse command line options phasing_estimator_ = opts.GetFirstString ('-', "phasing-estimator", "spatial-refiner-2"); vector<double> cf_ie_dr = opts.GetFirstDoubleVector('-', "libcf-ie-dr", ""); vector<double> init_cf_ie_dr = opts.GetFirstDoubleVector('-', "initcf-ie-dr", ""); residual_threshold_ = opts.GetFirstDouble ('-', "phasing-residual-filter", 1.0); max_phasing_levels_ = opts.GetFirstInt ('-', "max-phasing-levels", max_phasing_levels_default_); num_fullchip_iterations_= opts.GetFirstInt ('-', "phasing-fullchip-iterations", 3); num_region_iterations_ = opts.GetFirstInt ('-', "phasing-region-iterations", 1); num_reads_per_region_ = opts.GetFirstInt ('-', "phasing-num-reads", 5000); min_reads_per_region_ = opts.GetFirstInt ('-', "phasing-min-reads", 1000); phase_file_name_ = opts.GetFirstString ('-', "phase-estimation-file", ""); normalization_string_ = opts.GetFirstString ('-', "phase-normalization", "adaptive"); key_norm_method_ = key_norm_method; // Static member variables norm_during_param_eval_ = opts.GetFirstBoolean('-', "phase-norm-during-eval", false); windowSize_ = opts.GetFirstInt ('-', "window-size", DPTreephaser::kWindowSizeDefault_); phasing_start_flow_ = opts.GetFirstInt ('-', "phasing-start-flow", 70); phasing_end_flow_ = opts.GetFirstInt ('-', "phasing-end-flow", 150); inclusion_threshold_ = opts.GetFirstDouble ('-', "phasing-signal-cutoff", 1.4); maxfrac_negative_flows_ = opts.GetFirstDouble ('-', "phasing-norm-threshold", 0.2); // Initialize chip size - needed for loading phase parameters chip_size_x_ = chip_subset.GetChipSizeX(); chip_size_y_ = chip_subset.GetChipSizeY(); region_size_x_ = chip_subset.GetRegionSizeX(); region_size_y_ = chip_subset.GetRegionSizeY(); num_regions_x_ = chip_subset.GetNumRegionsX(); num_regions_y_ = chip_subset.GetNumRegionsY(); num_regions_ = chip_subset.NumRegions(); // Loading existing phase estimates from a file takes precedence over all other options if (not phase_file_name_.empty()) { have_phase_estimates_ = LoadPhaseEstimationTrainSubset(phase_file_name_); if (have_phase_estimates_) { phasing_estimator_ = "override"; printf("Phase estimator settings:\n"); printf(" phase file name : %s\n", phase_file_name_.c_str()); printf(" phase estimation mode : %s\n\n", phasing_estimator_.c_str()); return; } else cout << "PhaseEstimator Error loading TrainSubset from file " << phase_file_name_ << endl; } // Set phase parameters if provided by command line if (!cf_ie_dr.empty()) { if (cf_ie_dr.size() != 3){ cerr << "BaseCaller Option Error: libcf-ie-dr needs to be a comma separated vector of 3 values." << endl; exit (EXIT_FAILURE); } SetPhaseParameters(cf_ie_dr.at(0), cf_ie_dr.at(1), cf_ie_dr.at(2)); return; // --libcf-ie-dr overrides other phasing-related options } // Set starting values for estimation if (!init_cf_ie_dr.empty()) { if (init_cf_ie_dr.size() != 3){ cerr << "BaseCaller Option Error: initcf-ie-dr needs to be a comma separated vector of 3 values." << endl; exit (EXIT_FAILURE); } init_cf_ = init_cf_ie_dr.at(0); init_ie_ = init_cf_ie_dr.at(1); init_dr_ = init_cf_ie_dr.at(2); } if (phasing_start_flow_ >= phasing_end_flow_ or phasing_start_flow_ < 0) { cerr << "BaseCaller Option Error: phasing-start-flow " << phasing_start_flow_ << "needs to be positive and smaller than phasing-end-flow " << phasing_end_flow_ << endl; exit (EXIT_FAILURE); } if (normalization_string_ == "adaptive") norm_method_ = 1; else if (normalization_string_ == "pid") norm_method_ = 2; else if (normalization_string_ == "variable") norm_method_ = 3; else if (normalization_string_ == "off") norm_method_ = 4; else norm_method_ = 0; // "gain" and anythign else is default printf("Phase estimator settings:\n"); printf(" phase file name : %s\n", phase_file_name_.c_str()); printf(" phase estimation mode : %s\n", phasing_estimator_.c_str()); printf(" initial cf,ie,dr values: %f,%f,%f\n", init_cf_,init_ie_,init_dr_); printf(" reads per region target: %d-%d\n", min_reads_per_region_, num_reads_per_region_); printf(" normalization method : %s\n", normalization_string_.c_str()); printf(" variable norm threshold: %f\n", maxfrac_negative_flows_); printf("\n"); }