Пример #1
0
double RetrieveParameterDouble(OptArgs &opts, Json::Value& json, char short_name, const string& long_name_hyphens, double default_value)
{
  string long_name_underscores = long_name_hyphens;
  for (unsigned int i = 0; i < long_name_underscores.size(); ++i)
    if (long_name_underscores[i] == '-')
      long_name_underscores[i] = '_';

  double value = default_value;
  string source = "builtin default";

  if (json.isMember(long_name_underscores)) {
    if (json[long_name_underscores].isString())
      value = atof(json[long_name_underscores].asCString());
    else
      value = json[long_name_underscores].asDouble();
    source = "parameters json file";
  }

  if (opts.HasOption(short_name, long_name_hyphens)) {
    value = opts.GetFirstDouble(short_name, long_name_hyphens, value);
    source = "command line option";
  }

  cout << setw(35) << long_name_hyphens << " = " << setw(10) << value << " (double,  " << source << ")" << endl;
  return value;
}
Пример #2
0
bool BaseCallerParameters::InitializeSamplingFromOptArgs(OptArgs& opts, const int num_wells)
{
	assert(context_vars.options_set);

    // If we are just doing phase estimation none of the options matter, so don't spam output
	if (context_vars.just_phase_estimation){
	  sampling_opts.options_set = true;
	  return true;
	}

    sampling_opts.num_unfiltered           = opts.GetFirstInt    ('-', "num-unfiltered", 100000);
    sampling_opts.downsample_size          = opts.GetFirstInt    ('-', "downsample-size", 0);
    sampling_opts.downsample_fraction      = opts.GetFirstDouble ('-', "downsample-fraction", 1.0);

    sampling_opts.calibration_training     = opts.GetFirstInt    ('-', "calibration-training", -1);
    sampling_opts.have_calib_panel         = (not bc_files.calibration_panel_file.empty());
    sampling_opts.MaskNotWanted            = MaskNone;

    // Reconcile parameters downsample_size and downsample_fraction
    bool downsample = sampling_opts.downsample_size > 0 or sampling_opts.downsample_fraction < 1.0;
    if (sampling_opts.downsample_fraction < 1.0) {
      if (sampling_opts.downsample_size == 0)
    	sampling_opts.downsample_size = (int)((float)num_wells*sampling_opts.downsample_fraction);
      else
        sampling_opts.downsample_size = min(sampling_opts.downsample_size, (int)((float)num_wells*sampling_opts.downsample_fraction));
    }
    if (downsample)
      cout << "Downsampling activated: Randomly choosing " << sampling_opts.downsample_size << " reads on this chip." << endl;

    // Calibration training requires additional changes & overwrites command line options
    if (sampling_opts.calibration_training >= 0) {
      if (context_vars.diagonal_state_prog) {
        cerr << " === BaseCaller Option Incompatibility: Calibration training not supported for diagonal state progression. Aborting!" << endl;
        exit(EXIT_FAILURE);
      }
      if (sampling_opts.downsample_size>0)
        sampling_opts.calibration_training = min(sampling_opts.calibration_training, sampling_opts.downsample_size);

      sampling_opts.downsample_size  = max(sampling_opts.calibration_training, 0);
      sampling_opts.MaskNotWanted    = (MaskType)(MaskFilteredBadResidual|MaskFilteredBadPPF|MaskFilteredBadKey);
	  sampling_opts.num_unfiltered   = 0;
      context_vars.process_tfs       = false;
      context_vars.flow_signals_type = "scaled-residual";
      cout << "=== BaseCaller Calibration Training ===" << endl;
      cout << " - Generating a training set up to " << sampling_opts.downsample_size << " randomly selected reads." << endl;
      if (sampling_opts.have_calib_panel)
        cout << " - Adding calibration panel reads specified in " << bc_files.calibration_panel_file << endl;
      cout << endl;
    }

	sampling_opts.options_set = true;
    return true;
};
Пример #3
0
float RetrieveParameterFloat(OptArgs &opts, Json::Value& json, char short_name, const string& long_name_hyphens, const float default_value)
{
  string long_name_underscores = GetRidOfDomainAndHyphens(long_name_hyphens);
  float value = default_value;
  string source = "builtin default";

  if (json.isMember(long_name_underscores)) {
    if (json[long_name_underscores].isString())
      value = atof(json[long_name_underscores].asCString());
    else
      value = json[long_name_underscores].asFloat();
    source = "parameters json file";
  }

  if (opts.HasOption(short_name, long_name_hyphens)) {
    value = (float)opts.GetFirstDouble(short_name, long_name_hyphens, value);
    source = "command line option";
  }

  cout << setw(35) << long_name_hyphens << " = " << setw(10) << value << " (float,  " << source << ")" << endl;
  return value;
}
Пример #4
0
void PhaseEstimator::InitializeFromOptArgs(OptArgs& opts)
{
  phasing_estimator_      = opts.GetFirstString ('-', "phasing-estimator", "spatial-refiner-2");
  string arg_cf_ie_dr     = opts.GetFirstString ('-', "libcf-ie-dr", "");
  residual_threshold_     = opts.GetFirstDouble ('-', "phasing-residual-filter", 1.0);
  max_phasing_levels_     = opts.GetFirstInt    ('-', "max-phasing-levels", max_phasing_levels_default_);
  use_pid_norm_           = opts.GetFirstString ('-', "keynormalizer", "keynorm-old") == "keynorm-new";
  windowSize_             = opts.GetFirstInt    ('-', "window-size", DPTreephaser::kWindowSizeDefault_);

  if (!arg_cf_ie_dr.empty()) {
    phasing_estimator_ = "override";
    result_regions_x_ = 1;
    result_regions_y_ = 1;
    result_cf_.assign(1, 0.0);
    result_ie_.assign(1, 0.0);
    result_dr_.assign(1, 0.0);
    if (3 != sscanf (arg_cf_ie_dr.c_str(), "%f,%f,%f", &result_cf_[0], &result_ie_[0], &result_dr_[0])) {
      fprintf (stderr, "Option Error: libcf-ie-dr %s\n", arg_cf_ie_dr.c_str());
      exit (EXIT_FAILURE);
    }
    return; // --libcf-ie-dr overrides other phasing-related options
  }
}
Пример #5
0
BaseCallerFilters::BaseCallerFilters(OptArgs& opts,
    const string& _flowOrder, int _numFlows, const vector<KeySequence>& _keys, Mask *_maskPtr)
{
  flowOrder = _flowOrder;

  keypassFilter                   = opts.GetFirstBoolean('k', "keypass-filter", true);
  percentPositiveFlowsFilterTFs   = opts.GetFirstBoolean('-', "clonal-filter-tf", false);
  clonalFilterTraining            = opts.GetFirstBoolean('-', "clonal-filter-train", false);
  clonalFilterSolving             = opts.GetFirstBoolean('-', "clonal-filter-solve", false);
  minReadLength                   = opts.GetFirstInt    ('-', "min-read-length", 8);
  cafieResFilterCalling           = opts.GetFirstBoolean('-', "cr-filter", false);
  cafieResFilterTFs               = opts.GetFirstBoolean('-', "cr-filter-tf", false);
  generate_bead_summary_          = opts.GetFirstBoolean('-', "bead-summary", false);

  // TODO: get this to work right. May require "unwound" flow order, so incompatible with current wells.FlowOrder()
  //flt_control.cafieResMaxValueByFlowOrder[std::string ("TACG") ] = 0.06;  // regular flow order
  //flt_control.cafieResMaxValueByFlowOrder[std::string ("TACGTACGTCTGAGCATCGATCGATGTACAGC") ] = 0.08;  // xdb flow order

  cafieResMaxValue = opts.GetFirstDouble('-',  "cr-filter-max-value", 0.08);

  // SFFTrim options
  trim_adapter = opts.GetFirstString('-', "trim-adapter", "ATCACCGACTGCCCATAGAGAGGCTGAGAC");
  trim_adapter_cutoff = opts.GetFirstDouble('-', "trim-adapter-cutoff", 0.0);
  trim_adapter_closest = opts.GetFirstBoolean('-', "trim-adapter-pick-closest", false);
  trim_qual_wsize = opts.GetFirstInt('-', "trim-qual-window-size", 30);
  trim_qual_cutoff = opts.GetFirstDouble('-', "trim-qual-cutoff", 100.0);
  trim_min_read_len = opts.GetFirstInt('-', "trim-min-read-len", 8);


  // Validate options

  if (minReadLength < 1) {
    fprintf (stderr, "Option Error: min-read-length must specify a positive value (%d invalid).\n", minReadLength);
    exit (EXIT_FAILURE);
  }
  if (cafieResMaxValue <= 0) {
    fprintf (stderr, "Option Error: cr-filter-max-value must specify a positive value (%lf invalid).\n", cafieResMaxValue);
    exit (EXIT_FAILURE);
  }

  keys = _keys;
  numClasses = keys.size();

  assert(numClasses == 2);
  classFilterPolyclonal.resize(numClasses);
  classFilterPolyclonal[0] = clonalFilterSolving;
  classFilterPolyclonal[1] = clonalFilterSolving && percentPositiveFlowsFilterTFs;
  classFilterHighResidual.resize(numClasses);
  classFilterHighResidual[0] = cafieResFilterCalling;
  classFilterHighResidual[1] = cafieResFilterCalling && cafieResFilterTFs;


  string filter_beverly_args      = opts.GetFirstString('-', "beverly-filter", "0.03,0.03,8");
  if (filter_beverly_args == "off") {
    filter_beverly_enabled_ = false; // Nothing, really
    printf("Beverly filter: disabled, use --beverly-filter=filter_ratio,trim_ratio,min_length\n");

  } else {
    int stat = sscanf (filter_beverly_args.c_str(), "%f,%f,%d",
        &filter_beverly_filter_ratio_,
        &filter_beverly_trim_ratio_,
        &filter_beverly_min_read_length_);
    if (stat != 3) {
      fprintf (stderr, "Option Error: beverly-filter %s\n", filter_beverly_args.c_str());
      fprintf (stderr, "Usage: --beverly-filter=filter_ratio,trim_ratio,min_length\n");
      exit (EXIT_FAILURE);
    }
    filter_beverly_enabled_ = true;
    printf("Beverly filter: enabled, use --beverly-filter=off to disable\n");
    printf("Beverly filter: filter_ratio = %1.5f\n", filter_beverly_filter_ratio_);
    printf("Beverly filter: trim_ratio = %1.5f\n", filter_beverly_trim_ratio_);
    printf("Beverly filter: min_length = %d\n", filter_beverly_min_read_length_);
  }

  maskPtr = _maskPtr;
  numFlows = _numFlows;

  filterMask.assign(maskPtr->H()*maskPtr->W(), kUninitialized);
}
Пример #6
0
void PhaseEstimator::InitializeFromOptArgs(OptArgs& opts, const ion::ChipSubset & chip_subset, const string & key_norm_method)
{
  // Parse command line options
  phasing_estimator_      = opts.GetFirstString ('-', "phasing-estimator", "spatial-refiner-2");
  vector<double> cf_ie_dr = opts.GetFirstDoubleVector('-', "libcf-ie-dr", "");
  vector<double> init_cf_ie_dr = opts.GetFirstDoubleVector('-', "initcf-ie-dr", "");
  residual_threshold_     = opts.GetFirstDouble ('-', "phasing-residual-filter", 1.0);
  max_phasing_levels_     = opts.GetFirstInt    ('-', "max-phasing-levels", max_phasing_levels_default_);
  num_fullchip_iterations_= opts.GetFirstInt    ('-', "phasing-fullchip-iterations", 3);
  num_region_iterations_  = opts.GetFirstInt    ('-', "phasing-region-iterations", 1);
  num_reads_per_region_   = opts.GetFirstInt    ('-', "phasing-num-reads", 5000);
  min_reads_per_region_   = opts.GetFirstInt    ('-', "phasing-min-reads", 1000);
  phase_file_name_        = opts.GetFirstString ('-', "phase-estimation-file", "");
  normalization_string_   = opts.GetFirstString ('-', "phase-normalization", "adaptive");
  key_norm_method_        = key_norm_method;

  // Static member variables
  norm_during_param_eval_ = opts.GetFirstBoolean('-', "phase-norm-during-eval", false);
  windowSize_             = opts.GetFirstInt    ('-', "window-size", DPTreephaser::kWindowSizeDefault_);
  phasing_start_flow_     = opts.GetFirstInt    ('-', "phasing-start-flow", 70);
  phasing_end_flow_       = opts.GetFirstInt    ('-', "phasing-end-flow", 150);
  inclusion_threshold_    = opts.GetFirstDouble ('-', "phasing-signal-cutoff", 1.4);
  maxfrac_negative_flows_ = opts.GetFirstDouble ('-', "phasing-norm-threshold", 0.2);

  // Initialize chip size - needed for loading phase parameters
  chip_size_x_   = chip_subset.GetChipSizeX();
  chip_size_y_   = chip_subset.GetChipSizeY();
  region_size_x_ = chip_subset.GetRegionSizeX();
  region_size_y_ = chip_subset.GetRegionSizeY();
  num_regions_x_ = chip_subset.GetNumRegionsX();
  num_regions_y_ = chip_subset.GetNumRegionsY();
  num_regions_   = chip_subset.NumRegions();

  // Loading existing phase estimates from a file takes precedence over all other options
  if (not phase_file_name_.empty()) {
	have_phase_estimates_ = LoadPhaseEstimationTrainSubset(phase_file_name_);
    if (have_phase_estimates_) {
      phasing_estimator_ = "override";
      printf("Phase estimator settings:\n");
      printf("  phase file name        : %s\n", phase_file_name_.c_str());
      printf("  phase estimation mode  : %s\n\n", phasing_estimator_.c_str());
      return;
    } else
      cout << "PhaseEstimator Error loading TrainSubset from file " << phase_file_name_ << endl;
  }

  // Set phase parameters if provided by command line
  if (!cf_ie_dr.empty()) {
    if (cf_ie_dr.size() != 3){
      cerr << "BaseCaller Option Error: libcf-ie-dr needs to be a comma separated vector of 3 values." << endl;
      exit (EXIT_FAILURE);
    }
    SetPhaseParameters(cf_ie_dr.at(0), cf_ie_dr.at(1), cf_ie_dr.at(2));
    return; // --libcf-ie-dr overrides other phasing-related options
  }

  // Set starting values for estimation
  if (!init_cf_ie_dr.empty()) {
    if (init_cf_ie_dr.size() != 3){
      cerr << "BaseCaller Option Error: initcf-ie-dr needs to be a comma separated vector of 3 values." << endl;
      exit (EXIT_FAILURE);
    }
    init_cf_ = init_cf_ie_dr.at(0);
    init_ie_ = init_cf_ie_dr.at(1);
    init_dr_ = init_cf_ie_dr.at(2);
  }

  if (phasing_start_flow_ >= phasing_end_flow_ or phasing_start_flow_ < 0) {
    cerr << "BaseCaller Option Error: phasing-start-flow " << phasing_start_flow_
         << "needs to be positive and smaller than phasing-end-flow " << phasing_end_flow_ << endl;
    exit (EXIT_FAILURE);
  }

  if (normalization_string_ == "adaptive")
    norm_method_ = 1;
  else if (normalization_string_ == "pid")
    norm_method_ = 2;
  else if (normalization_string_ == "variable")
    norm_method_ = 3;
  else if (normalization_string_ == "off")
    norm_method_ = 4;
  else
    norm_method_ = 0; // "gain" and anythign else is default

  printf("Phase estimator settings:\n");
  printf("  phase file name        : %s\n", phase_file_name_.c_str());
  printf("  phase estimation mode  : %s\n", phasing_estimator_.c_str());
  printf("  initial cf,ie,dr values: %f,%f,%f\n", init_cf_,init_ie_,init_dr_);
  printf("  reads per region target: %d-%d\n", min_reads_per_region_, num_reads_per_region_);
  printf("  normalization method   : %s\n", normalization_string_.c_str());
  printf("  variable norm threshold: %f\n", maxfrac_negative_flows_);
  printf("\n");
}