Пример #1
0
bool BaseCallerParameters::InitContextVarsFromOptArgs(OptArgs& opts){

    assert(bc_files.options_set);
    char default_run_id[6]; // Create a run identifier from full output directory string
    ion_run_to_readname (default_run_id, (char*)bc_files.output_directory.c_str(), bc_files.output_directory.length());
    context_vars.run_id                      = opts.GetFirstString ('-', "run-id", default_run_id);
	num_threads_                             = opts.GetFirstInt    ('n', "num-threads", max(2*numCores(), 4));
	num_bamwriter_threads_                   = opts.GetFirstInt    ('-', "num-threads-bamwriter", 6);

    context_vars.flow_signals_type           = opts.GetFirstString ('-', "flow-signals-type", "none");
    context_vars.extra_trim_left             = opts.GetFirstInt    ('-', "extra-trim-left", 0);
    context_vars.only_process_unfiltered_set = opts.GetFirstBoolean('-', "only-process-unfiltered-set", false);

    // Treephaser options
    context_vars.dephaser                    = opts.GetFirstString ('-', "dephaser", "treephaser-sse");
    context_vars.keynormalizer               = opts.GetFirstString ('-', "keynormalizer", "gain");
    context_vars.windowSize                  = opts.GetFirstInt    ('-', "window-size", DPTreephaser::kWindowSizeDefault_);
    context_vars.skip_droop                  = opts.GetFirstBoolean('-', "skip-droop", true);
    context_vars.skip_recal_during_norm      = opts.GetFirstBoolean('-', "skip-recal-during-normalization", false);
    context_vars.diagonal_state_prog         = opts.GetFirstBoolean('-', "diagonal-state-prog", false);

    // Not every combination of options is possible here:
    if (context_vars.diagonal_state_prog and context_vars.dephaser != "treephaser-swan") {
      cout << " === BaseCaller Option Incompatibility: Using dephaser treephaser-swan with diagonal state progression instead of "
           << context_vars.dephaser << endl;
      context_vars.dephaser = "treephaser-swan";
    }

    context_vars.process_tfs      = true;
    context_vars.options_set      = true;
    return true;
};
Пример #2
0
bool BaseCallerParameters::InitializeSamplingFromOptArgs(OptArgs& opts, const int num_wells)
{
	assert(context_vars.options_set);

    // If we are just doing phase estimation none of the options matter, so don't spam output
	if (context_vars.just_phase_estimation){
	  sampling_opts.options_set = true;
	  return true;
	}

    sampling_opts.num_unfiltered           = opts.GetFirstInt    ('-', "num-unfiltered", 100000);
    sampling_opts.downsample_size          = opts.GetFirstInt    ('-', "downsample-size", 0);
    sampling_opts.downsample_fraction      = opts.GetFirstDouble ('-', "downsample-fraction", 1.0);

    sampling_opts.calibration_training     = opts.GetFirstInt    ('-', "calibration-training", -1);
    sampling_opts.have_calib_panel         = (not bc_files.calibration_panel_file.empty());
    sampling_opts.MaskNotWanted            = MaskNone;

    // Reconcile parameters downsample_size and downsample_fraction
    bool downsample = sampling_opts.downsample_size > 0 or sampling_opts.downsample_fraction < 1.0;
    if (sampling_opts.downsample_fraction < 1.0) {
      if (sampling_opts.downsample_size == 0)
    	sampling_opts.downsample_size = (int)((float)num_wells*sampling_opts.downsample_fraction);
      else
        sampling_opts.downsample_size = min(sampling_opts.downsample_size, (int)((float)num_wells*sampling_opts.downsample_fraction));
    }
    if (downsample)
      cout << "Downsampling activated: Randomly choosing " << sampling_opts.downsample_size << " reads on this chip." << endl;

    // Calibration training requires additional changes & overwrites command line options
    if (sampling_opts.calibration_training >= 0) {
      if (context_vars.diagonal_state_prog) {
        cerr << " === BaseCaller Option Incompatibility: Calibration training not supported for diagonal state progression. Aborting!" << endl;
        exit(EXIT_FAILURE);
      }
      if (sampling_opts.downsample_size>0)
        sampling_opts.calibration_training = min(sampling_opts.calibration_training, sampling_opts.downsample_size);

      sampling_opts.downsample_size  = max(sampling_opts.calibration_training, 0);
      sampling_opts.MaskNotWanted    = (MaskType)(MaskFilteredBadResidual|MaskFilteredBadPPF|MaskFilteredBadKey);
	  sampling_opts.num_unfiltered   = 0;
      context_vars.process_tfs       = false;
      context_vars.flow_signals_type = "scaled-residual";
      cout << "=== BaseCaller Calibration Training ===" << endl;
      cout << " - Generating a training set up to " << sampling_opts.downsample_size << " randomly selected reads." << endl;
      if (sampling_opts.have_calib_panel)
        cout << " - Adding calibration panel reads specified in " << bc_files.calibration_panel_file << endl;
      cout << endl;
    }

	sampling_opts.options_set = true;
    return true;
};
Пример #3
0
int RetrieveParameterInt(OptArgs &opts, Json::Value& json, char short_name, const string& long_name_hyphens, int default_value)
{
  string long_name_underscores = long_name_hyphens;
  for (unsigned int i = 0; i < long_name_underscores.size(); ++i)
    if (long_name_underscores[i] == '-')
      long_name_underscores[i] = '_';

  int value = default_value;
  string source = "builtin default";

  if (json.isMember(long_name_underscores)) {
    if (json[long_name_underscores].isString())
      value = atoi(json[long_name_underscores].asCString());
    else
      value = json[long_name_underscores].asInt();
    source = "parameters json file";
  }

  if (opts.HasOption(short_name, long_name_hyphens)) {
    value = opts.GetFirstInt(short_name, long_name_hyphens, value);
    source = "command line option";
  }

  cout << setw(35) << long_name_hyphens << " = " << setw(10) << value << " (integer, " << source << ")" << endl;
  return value;
}
Пример #4
0
void RecalibrationModel::Initialize(OptArgs& opts, vector<string> &bam_comments, const string & run_id, const ion::ChipSubset & chip_subset)
{
  string model_file_name    = opts.GetFirstString ('-', "model-file", "");
  int model_threshold       = opts.GetFirstInt('-', "recal-model-hp-thres", 4);
  bool save_hpmodel         = opts.GetFirstBoolean('-', "save-hpmodel", true);
  bool diagonal_state_prog  = opts.GetFirstBoolean('-', "diagonal-state-prog", false);

  if (diagonal_state_prog)
    model_file_name.clear();

  if (InitializeModel(model_file_name, model_threshold) and save_hpmodel)
    SaveModelFileToBamComments(model_file_name, bam_comments, run_id, chip_subset.GetColOffset(), chip_subset.GetRowOffset());
}
Пример #5
0
void PhaseEstimator::InitializeFromOptArgs(OptArgs& opts)
{
  phasing_estimator_      = opts.GetFirstString ('-', "phasing-estimator", "spatial-refiner-2");
  string arg_cf_ie_dr     = opts.GetFirstString ('-', "libcf-ie-dr", "");
  residual_threshold_     = opts.GetFirstDouble ('-', "phasing-residual-filter", 1.0);
  max_phasing_levels_     = opts.GetFirstInt    ('-', "max-phasing-levels", max_phasing_levels_default_);
  use_pid_norm_           = opts.GetFirstString ('-', "keynormalizer", "keynorm-old") == "keynorm-new";
  windowSize_             = opts.GetFirstInt    ('-', "window-size", DPTreephaser::kWindowSizeDefault_);

  if (!arg_cf_ie_dr.empty()) {
    phasing_estimator_ = "override";
    result_regions_x_ = 1;
    result_regions_y_ = 1;
    result_cf_.assign(1, 0.0);
    result_ie_.assign(1, 0.0);
    result_dr_.assign(1, 0.0);
    if (3 != sscanf (arg_cf_ie_dr.c_str(), "%f,%f,%f", &result_cf_[0], &result_ie_[0], &result_dr_[0])) {
      fprintf (stderr, "Option Error: libcf-ie-dr %s\n", arg_cf_ie_dr.c_str());
      exit (EXIT_FAILURE);
    }
    return; // --libcf-ie-dr overrides other phasing-related options
  }
}
Пример #6
0
TagTrimmerParameters MolecularTagTrimmer::ReadOpts(OptArgs& opts)
{
  // Reading command line options to set tag structures
  TagTrimmerParameters my_params;

  my_params.min_family_size            = opts.GetFirstInt     ('-', "min-tag-fam-size", 3);
  my_params.suppress_mol_tags          = opts.GetFirstBoolean ('-', "suppress-mol-tags", false);
  //my_params.cl_a_handle                = opts.GetFirstString  ('-', "tag-handle", "");
  //my_params.handle_cutoff              = opts.GetFirstInt     ('-', "handle-cutoff", 2);

  my_params.master_tags.prefix_mol_tag = opts.GetFirstString  ('-', "prefix-mol-tag", "");
  my_params.master_tags.suffix_mol_tag = opts.GetFirstString  ('-', "suffix-mol-tag", "");

  ValidateTagString(my_params.master_tags.prefix_mol_tag);
  ValidateTagString(my_params.master_tags.suffix_mol_tag);

  // Overload to disable molecular tagging
  if (my_params.min_family_size == 0)
    my_params.suppress_mol_tags = true;
  else if (my_params.min_family_size < 1) {
    cerr << "MolecularTagTrimmer Error: min-tag-fam-size must be at least 1. " << endl;
    exit(EXIT_FAILURE);
  }

  my_params.command_line_tags = my_params.master_tags.HasTags();

  // Options for read filtering & and trimming method selection
  string trim_method          = opts.GetFirstString  ('-', "tag-trim-method", "sloppy-trim");
  if (trim_method == "sloppy-trim")
    my_params.tag_trim_method = kSloppyTrim;
  else if (trim_method == "strict-trim")
    my_params.tag_trim_method = kStrictTrim;
  else {
    cerr << "MolecularTagTrimmer Error: Unknown tag trimming option " << trim_method << endl;
    exit(EXIT_FAILURE);
  }

  string filter_method        = opts.GetFirstString  ('-', "tag-filter-method", "need-all");
  if (filter_method == "need-all")
    my_params.tag_filter_method = kneed_all_tags;
  else if (filter_method == "need-prefix")
    my_params.tag_filter_method = kneed_only_prefix_tag;
  else if (filter_method == "need-suffix")
    my_params.tag_filter_method = kneed_only_suffix_tag;
  else {
    cerr << "MolecularTagTrimmer Error: Unknown tag filtering option " << filter_method << endl;
    exit(EXIT_FAILURE);
  }
  return my_params;
}
Пример #7
0
void ProgramControlSettings::SetOpts(OptArgs &opts, Json::Value &tvc_params) {

  DEBUG                                 = opts.GetFirstInt   ('d', "debug", 0);
  nThreads                              = RetrieveParameterInt   (opts, tvc_params, 'n', "num-threads", 12);
  nVariantsPerThread                    = RetrieveParameterInt   (opts, tvc_params, 'N', "num-variants-per-thread", 250);
  use_SSE_basecaller                    = RetrieveParameterBool  (opts, tvc_params, '-', "use-sse-basecaller", true);
  // decide diagnostic
  rich_json_diagnostic                  = RetrieveParameterBool  (opts, tvc_params, '-', "do-json-diagnostic", false);
  minimal_diagnostic                    = RetrieveParameterBool  (opts, tvc_params, '-', "do-minimal-diagnostic", false);


  inputPositionsOnly                    = RetrieveParameterBool  (opts, tvc_params, '-', "process-input-positions-only", false);
  suppress_recalibration                = RetrieveParameterBool  (opts, tvc_params, '-', "suppress-recalibration", true);
  resolve_clipped_bases                 = RetrieveParameterBool  (opts, tvc_params, '-', "resolve-clipped-bases", false);
}
Пример #8
0
void RecalibrationModel::Initialize(OptArgs& opts)
{
    is_enabled_ = false;

    string model_file_name = opts.GetFirstString ('-', "model-file", "");
    if (model_file_name.empty() or model_file_name == "off") {
        printf("RecalibrationModel: disabled\n\n");
        return;
    }

    ifstream model_file;
    model_file.open(model_file_name.c_str());
    if (model_file.fail()) {
        printf("RecalibrationModel: disabled (cannot open %s)\n\n", model_file_name.c_str());
        model_file.close();
        return;
    }

    recalModelHPThres = opts.GetFirstInt('-', "recal-model-hp-thres", 4);

    string comment_line;
    getline(model_file, comment_line); //skip the comment time

    int flowStart, flowEnd, flowSpan, xMin, xMax, xSpan, yMin, yMax, ySpan, max_hp_calibrated;
    model_file >> flowStart >> flowEnd >> flowSpan >> xMin >> xMax >> xSpan >> yMin >> yMax >> ySpan >>  max_hp_calibrated;
    stratification.SetupRegion(xMin, xMax, xSpan, yMin, yMax, ySpan);
    //calculate number of partitions and initialize the stratifiedAs and stratifiedBs
    SetupStratification(flowStart,flowEnd, flowSpan,xMin,xMax,xSpan,yMin,yMax,ySpan,max_hp_calibrated);

    //TODO: parse model_file into stratifiedAs and stratifiedBs
    while (model_file.good()) {
        float paramA, paramB;
        int refHP;
        char flowBase;
        model_file >> flowBase >> flowStart >> flowEnd >> xMin >> xMax >> yMin >> yMax >> refHP >> paramA >> paramB;
        //populate it to stratifiedAs and startifiedBs
        int nucInd = NuctoInt(flowBase);
        //boundary check
        int offsetRegion = stratification.OffsetRegion(xMin,yMin);
        FillIndexes(offsetRegion,nucInd, refHP, flowStart, flowEnd, paramA, paramB);
    }

    model_file.close();

    printf("Recalibration: enabled (using calibration file %s)\n\n", model_file_name.c_str());
    is_enabled_ = true;
    if (recalModelHPThres > MAX_HPXLEN) is_enabled_ = false;
}
Пример #9
0
bool BaseCallerContext::SetKeyAndFlowOrder(OptArgs& opts, const char * FlowOrder, const int NumFlows)
{
    flow_order.SetFlowOrder( opts.GetFirstString ('-', "flow-order", FlowOrder),
                             opts.GetFirstInt    ('f', "flowlimit", NumFlows));
    if (flow_order.num_flows() > NumFlows)
      flow_order.SetNumFlows(NumFlows);
    assert(flow_order.is_ok());

    string lib_key                = opts.GetFirstString ('-', "lib-key", "TCAG"); //! @todo Get default key from wells
    string tf_key                 = opts.GetFirstString ('-', "tf-key", "ATCG");
    lib_key                       = opts.GetFirstString ('-', "librarykey", lib_key);   // Backward compatible opts
    tf_key                        = opts.GetFirstString ('-', "tfkey", tf_key);
    keys.resize(2);
    keys[0].Set(flow_order, lib_key, "lib");
    keys[1].Set(flow_order, tf_key, "tf");
    return true;
};
Пример #10
0
int IonstatsTestFragments(int argc, const char *argv[])
{
  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  string input_bam_filename   = opts.GetFirstString('i', "input", "");
  string fasta_filename       = opts.GetFirstString('r', "ref", "");
  string output_json_filename = opts.GetFirstString('o', "output", "ionstats_tf.json");
  int histogram_length        = opts.GetFirstInt   ('h', "histogram-length", 400);

  if(argc < 2 or input_bam_filename.empty() or fasta_filename.empty()) {
    IonstatsTestFragmentsHelp();
    return 1;
  }

  //
  // Prepare for metric calculation
  //

  map<string,string> tf_sequences;
  PopulateReferenceSequences(tf_sequences, fasta_filename);


  BamReader input_bam;
  if (!input_bam.Open(input_bam_filename)) {
    fprintf(stderr, "[ionstats] ERROR: cannot open %s\n", input_bam_filename.c_str());
    return 1;
  }

  int num_tfs = input_bam.GetReferenceCount();


  SamHeader sam_header = input_bam.GetHeader();
  if(!sam_header.HasReadGroups()) {
    fprintf(stderr, "[ionstats] ERROR: no read groups in %s\n", input_bam_filename.c_str());
    return 1;
  }

  string flow_order;
  string key;
  for (SamReadGroupIterator rg = sam_header.ReadGroups.Begin(); rg != sam_header.ReadGroups.End(); ++rg) {
    if(rg->HasFlowOrder())
      flow_order = rg->FlowOrder;
    if(rg->HasKeySequence())
      key = rg->KeySequence;
  }


  // Need these metrics stratified by TF.

  vector<ReadLengthHistogram> called_histogram(num_tfs);
  vector<ReadLengthHistogram> aligned_histogram(num_tfs);
  vector<ReadLengthHistogram> AQ10_histogram(num_tfs);
  vector<ReadLengthHistogram> AQ17_histogram(num_tfs);
  vector<SimpleHistogram> error_by_position(num_tfs);
  vector<MetricGeneratorSNR> system_snr(num_tfs);
  vector<MetricGeneratorHPAccuracy> hp_accuracy(num_tfs);

  for (int tf = 0; tf < num_tfs; ++tf) {
    called_histogram[tf].Initialize(histogram_length);
    aligned_histogram[tf].Initialize(histogram_length);
    AQ10_histogram[tf].Initialize(histogram_length);
    AQ17_histogram[tf].Initialize(histogram_length);
    error_by_position[tf].Initialize(histogram_length);
  }

  vector<uint16_t> flow_signal_fz(flow_order.length());
  vector<int16_t> flow_signal_zm(flow_order.length());

  const RefVector& refs = input_bam.GetReferenceData();

  // Missing:
  //  - hp accuracy - tough, copy verbatim from TFMapper?


  BamAlignment alignment;
  vector<char>  MD_op;
  vector<int>   MD_len;
  MD_op.reserve(1024);
  MD_len.reserve(1024);
  string MD_tag;

  //
  // Main loop over mapped reads in the input BAM
  //

  while(input_bam.GetNextAlignment(alignment)) {


    if (!alignment.IsMapped() or !alignment.GetTag("MD",MD_tag))
      continue;

    // The check below eliminates unexpected alignments
    if (alignment.IsReverseStrand() or alignment.Position > 5)
      continue;

    int current_tf = alignment.RefID;

    //
    // Step 1. Parse MD tag
    //

    MD_op.clear();
    MD_len.clear();

    for (const char *MD_ptr = MD_tag.c_str(); *MD_ptr;) {

      int item_length = 0;
      if (*MD_ptr >= '0' and *MD_ptr <= '9') {    // Its a match
        MD_op.push_back('M');
        for (; *MD_ptr and *MD_ptr >= '0' and *MD_ptr <= '9'; ++MD_ptr)
          item_length = 10*item_length + *MD_ptr - '0';
      } else {
        if (*MD_ptr == '^') {                     // Its a deletion
          MD_ptr++;
          MD_op.push_back('D');
        } else                                    // Its a substitution
          MD_op.push_back('X');
        for (; *MD_ptr and *MD_ptr >= 'A' and *MD_ptr <= 'Z'; ++MD_ptr)
          item_length++;
      }
      MD_len.push_back(item_length);
    }

    //
    // Step 2. Synchronously scan through Cigar and MD, doing error accounting
    //

    int MD_idx = alignment.IsReverseStrand() ? MD_op.size()-1 : 0;
    int cigar_idx = alignment.IsReverseStrand() ? alignment.CigarData.size()-1 : 0;
    int increment = alignment.IsReverseStrand() ? -1 : 1;

    int AQ10_bases = 0;
    int AQ17_bases = 0;
    int num_bases = 0;
    int num_errors = 0;

    while (cigar_idx < (int)alignment.CigarData.size() and MD_idx < (int) MD_op.size() and cigar_idx >= 0 and MD_idx >= 0) {

      if (alignment.CigarData[cigar_idx].Length == 0) { // Try advancing cigar
        cigar_idx += increment;
        continue;
      }
      if (MD_len[MD_idx] == 0) { // Try advancing MD
        MD_idx += increment;
        continue;
      }

      // Match
      if (alignment.CigarData[cigar_idx].Type == 'M' and MD_op[MD_idx] == 'M') {
        int advance = min((int)alignment.CigarData[cigar_idx].Length, MD_len[MD_idx]);
        num_bases += advance;
        alignment.CigarData[cigar_idx].Length -= advance;
        MD_len[MD_idx] -= advance;

      // Insertion (read has a base, reference doesn't)
      } else if (alignment.CigarData[cigar_idx].Type == 'I') {
        int advance = alignment.CigarData[cigar_idx].Length;
        for (int cnt = 0; cnt < advance; ++cnt) {
          error_by_position[current_tf].Add(num_bases);
          num_bases++;
          num_errors++;
        }
        alignment.CigarData[cigar_idx].Length -= advance;

      // Deletion (reference has a base, read doesn't)
      } else if (alignment.CigarData[cigar_idx].Type == 'D' and MD_op[MD_idx] == 'D') {
        int advance = min((int)alignment.CigarData[cigar_idx].Length, MD_len[MD_idx]);
        for (int cnt = 0; cnt < advance; ++cnt) {
          error_by_position[current_tf].Add(num_bases);
          num_errors++;
        }
        alignment.CigarData[cigar_idx].Length -= advance;
        MD_len[MD_idx] -= advance;

      // Substitution
      } else if (MD_op[MD_idx] == 'X') {
        int advance = min((int)alignment.CigarData[cigar_idx].Length, MD_len[MD_idx]);
        for (int cnt = 0; cnt < advance; ++cnt) {
          error_by_position[current_tf].Add(num_bases);
          num_bases++;
          num_errors++;
        }
        alignment.CigarData[cigar_idx].Length -= advance;
        MD_len[MD_idx] -= advance;

      } else {
        printf("ionstats tf: Unexpected OP combination: %s Cigar=%c, MD=%c !\n",
            alignment.Name.c_str(), alignment.CigarData[cigar_idx].Type, MD_op[MD_idx]);
        break;
      }

      if (num_errors*10 <= num_bases)   AQ10_bases = num_bases;
      if (num_errors*50 <= num_bases)   AQ17_bases = num_bases;
    }

    //
    // Step 3. Profit
    //

    called_histogram[current_tf].Add(alignment.Length);
    aligned_histogram[current_tf].Add(num_bases);
    AQ10_histogram[current_tf].Add(AQ10_bases);
    AQ17_histogram[current_tf].Add(AQ17_bases);

    if(alignment.GetTag("ZM", flow_signal_zm))
      system_snr[current_tf].Add(flow_signal_zm, key.c_str(), flow_order);
    else if(alignment.GetTag("FZ", flow_signal_fz))
      system_snr[current_tf].Add(flow_signal_fz, key.c_str(), flow_order);


    // HP accuracy - keeping it simple

    if (!alignment.IsReverseStrand()) {

      string genome = key + tf_sequences[refs[current_tf].RefName];
      string calls = key + alignment.QueryBases;
      const char *genome_ptr = genome.c_str();
      const char *calls_ptr = calls.c_str();

      for (int flow = 0; flow < (int)flow_order.length() and *genome_ptr and *calls_ptr; ++flow) {
        int genome_hp = 0;
        int calls_hp = 0;
        while (*genome_ptr == flow_order[flow]) {
          genome_hp++;
          genome_ptr++;
        }
        while (*calls_ptr == flow_order[flow]) {
          calls_hp++;
          calls_ptr++;
        }
        hp_accuracy[current_tf].Add(genome_hp, calls_hp);
      }
    }
  }



  //
  // Processing complete, generate ionstats_tf.json
  //

  Json::Value output_json(Json::objectValue);
  output_json["meta"]["creation_date"] = get_time_iso_string(time(NULL));
  output_json["meta"]["format_name"] = "ionstats_tf";
  output_json["meta"]["format_version"] = "1.0";

  output_json["results_by_tf"] = Json::objectValue;

  for (int tf = 0; tf < num_tfs; ++tf) {

    if (aligned_histogram[tf].num_reads() < 1000)
      continue;

    called_histogram[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]["full"]);
    aligned_histogram[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]["aligned"]);
    AQ10_histogram[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]["AQ10"]);
    AQ17_histogram[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]["AQ17"]);
    error_by_position[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]["error_by_position"]);
    system_snr[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]);
    hp_accuracy[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]);

    output_json["results_by_tf"][refs[tf].RefName]["sequence"] = tf_sequences[refs[tf].RefName];
  }

  input_bam.Close();

  ofstream out(output_json_filename.c_str(), ios::out);
  if (out.good()) {
    out << output_json.toStyledString();
    return 0;
  } else {
    fprintf(stderr, "ERROR: unable to write to '%s'\n", output_json_filename.c_str());
    return 1;
  }
}
Пример #11
0
int main (int argc, const char *argv[])
{
  printf ("------------- bamrealignment --------------\n");

  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  vector<int> score_vals(4);

  string input_bam  = opts.GetFirstString  ('i', "input", "");
  string output_bam = opts.GetFirstString  ('o', "output", "");
  opts.GetOption(score_vals, "4,-6,-5,-2", 's', "scores");
  int    clipping   = opts.GetFirstInt     ('c', "clipping", 2);
  bool   anchors    = opts.GetFirstBoolean ('a', "anchors", true);
  int    bandwidth  = opts.GetFirstInt     ('b', "bandwidth", 10);
  bool   verbose    = opts.GetFirstBoolean ('v', "verbose", false);
  bool   debug      = opts.GetFirstBoolean ('d', "debug", false);
  int    format     = opts.GetFirstInt     ('f', "format", 1);
  int  num_threads  = opts.GetFirstInt     ('t', "threads", 8);
  string log_fname  = opts.GetFirstString  ('l', "log", "");
  

  if (input_bam.empty() or output_bam.empty())
    return PrintHelp();

  opts.CheckNoLeftovers();

  std::ofstream logf;
  if (log_fname.size ())
  {
    logf.open (log_fname.c_str ());
    if (!logf.is_open ())
    {
      fprintf (stderr, "bamrealignment: Failed to open log file %s\n", log_fname.c_str());
      return 1;
    }
  }

  BamReader reader;
  if (!reader.Open(input_bam)) {
    fprintf(stderr, "bamrealignment: Failed to open input file %s\n", input_bam.c_str());
    return 1;
  }

  SamHeader header = reader.GetHeader();
  RefVector refs   = reader.GetReferenceData();

  BamWriter writer;
  writer.SetNumThreads(num_threads);
  if (format == 1)
    writer.SetCompressionMode(BamWriter::Uncompressed);
  else
    writer.SetCompressionMode(BamWriter::Compressed);

  if (!writer.Open(output_bam, header, refs)) {
    fprintf(stderr, "bamrealignment: Failed to open output file %s\n", output_bam.c_str());
    return 1;
  }


  // The meat starts here ------------------------------------

  if (verbose)
    cout << "Verbose option is activated, each alignment will print to screen." << endl
         << "  After a read hit RETURN to continue to the next one," << endl
         << "  or press q RETURN to quit the program," << endl
         << "  or press s Return to silence verbose," << endl
         << "  or press c RETURN to continue printing without further prompt." << endl << endl;

  unsigned int readcounter = 0;
  unsigned int mapped_readcounter = 0;
  unsigned int realigned_readcounter = 0;
  unsigned int modified_alignment_readcounter = 0;
  unsigned int pos_update_readcounter = 0;
  unsigned int failed_clip_realigned_readcount = 0;
  
  unsigned int already_perfect_readcount = 0;
  
  unsigned int bad_md_tag_readcount = 0;
  unsigned int error_recreate_ref_readcount = 0;
  unsigned int error_clip_anchor_readcount = 0;
  unsigned int error_sw_readcount = 0;
  unsigned int error_unclip_readcount = 0;
  
  unsigned int start_position_shift;
  int orig_position;
  int new_position;

  string  md_tag, new_md_tag, input = "x";
  vector<CigarOp>    new_cigar_data;
  vector<MDelement>  new_md_data;
  bool position_shift = false;
  time_t start_time = time(NULL);

  Realigner aligner;
  aligner.verbose_ = verbose;
  aligner.debug_   = debug;
  if (!aligner.SetScores(score_vals))
    cout << "bamrealignment: Four scores need to be provided: match, mismatch, gap open, gap extend score!" << endl;

  aligner.SetAlignmentBandwidth(bandwidth);

  BamAlignment alignment;
  while(reader.GetNextAlignment(alignment)){
    readcounter ++;
    position_shift = false;
    
    if ( (readcounter % 100000) == 0 )
       cout << "Processed " << readcounter << " reads. Elapsed time: " << (time(NULL) - start_time) << endl;

    if (alignment.IsMapped()) {
      
      
      
      orig_position = alignment.Position;
      mapped_readcounter++;
      aligner.SetClipping(clipping, !alignment.IsReverseStrand());
      if (aligner.verbose_) {
    	cout << endl;
        if (alignment.IsReverseStrand())
          cout << "The read is from the reverse strand." << endl;
        else
          cout << "The read is from the forward strand." << endl;
      }

      if (!alignment.GetTag("MD", md_tag)) {
    	if (aligner.verbose_)
          cout << "Warning: Skipping read " << alignment.Name << ". It is mapped but missing MD tag." << endl;
	if (logf.is_open ())
	  logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "MISSMD" << '\n';
	bad_md_tag_readcount++;
      } else if (aligner.CreateRefFromQueryBases(alignment.QueryBases, alignment.CigarData, md_tag, anchors)) {
	bool clipfail = false;
	if (Realigner::CR_ERR_CLIP_ANCHOR == aligner.GetCreateRefError ())
	{
	  clipfail = true;
	  failed_clip_realigned_readcount ++;
	}

        if (!aligner.computeSWalignment(new_cigar_data, new_md_data, start_position_shift)) {
          if (aligner.verbose_)
            cout << "Error in the alignment! Not updating read information." << endl;
	  if (logf.is_open ())
	    logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "SWERR" << '\n';
	  error_sw_readcount++;
          writer.SaveAlignment(alignment);  // Write alignment unchanged
          continue;
        }

        if (!aligner.addClippedBasesToTags(new_cigar_data, new_md_data, alignment.QueryBases.size())) {
          if (aligner.verbose_)
            cout << "Error when adding clipped anchors back to tags! Not updating read information." << endl;
	  if (logf.is_open ())
	    logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "UNCLIPERR" << '\n';
          writer.SaveAlignment(alignment);  // Write alignment unchanged
	  error_unclip_readcount ++;
          continue;
        }
        new_md_tag = aligner.GetMDstring(new_md_data);
        realigned_readcounter++;

        // adjust start position of read
        if (!aligner.LeftAnchorClipped() and start_position_shift != 0) {
          new_position = aligner.updateReadPosition(alignment.CigarData, (int)start_position_shift, alignment.Position);
          if (new_position != alignment.Position) {
            pos_update_readcounter++;
            position_shift = true;
            alignment.Position = new_position;
          }
        }
        
        if (position_shift || alignment.CigarData.size () != new_cigar_data.size () || md_tag != new_md_tag)
	{
	  if (logf.is_open ())
	  {
	    logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "MOD";
	    if (position_shift)
	      logf << "-SHIFT";
	    if (clipfail)
	      logf << " NOCLIP";
	    logf << '\n';
	  }
	  modified_alignment_readcounter++;
	}
	else
	{
            if (logf.is_open ())
	    {
	      logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "UNMOD";
              if (clipfail)
	        logf << " NOCLIP";
	      logf << '\n';
	    }
	}

        if (aligner.verbose_){
          cout << alignment.Name << endl;
          cout << "------------------------------------------" << endl;
          // Wait for input to continue or quit program
          if (input.size() == 0)
            input = 'x';
          else if (input[0] != 'c' and input[0] != 'C')
            getline(cin, input);
          if (input.size()>0){
            if (input[0] == 'q' or input[0] == 'Q')
              return 1;
            else if (input[0] == 's' or input[0] == 'S')
              aligner.verbose_ = false;
          }
        }

        // Finally update alignment information
        alignment.CigarData = new_cigar_data;
        alignment.EditTag("MD", "Z" , new_md_tag);

      } // end of CreateRef else if
      else {
	switch (aligner.GetCreateRefError ())
	{
	  case Realigner::CR_ERR_RECREATE_REF:
            if (logf.is_open ())
	      logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "RECRERR" << '\n';
	    error_recreate_ref_readcount++;
	    break;
	  case Realigner::CR_ERR_CLIP_ANCHOR:
            if (logf.is_open ())
	      logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "CLIPERR" << '\n';
	    error_clip_anchor_readcount++;
	    break;
	  default:
		  //  On a good run this writes way too many reads to the log file - don't want to create a too large txt file
          //  if (logf.is_open ())
	      //logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "PERFECT" << '\n';
	    already_perfect_readcount++;
	    break;
	}
	
	if (aligner.verbose_) {
	  cout << alignment.Name << endl;
	  cout << "------------------------------------------" << endl;
	  // Wait for input to continue or quit program
	  if (input.size() == 0)
	    input = 'x';
	  else if (input[0] != 'c' and input[0] != 'C')
	    getline(cin, input);
	  if (input.size()>0){
	    if (input[0] == 'q' or input[0] == 'Q')
	      return 1;
	    else if (input[0] == 's' or input[0] == 'S')
	      aligner.verbose_ = false;
	  }
	}
      }

      // --- Debug output for Rajesh ---
      if (debug && aligner.invalid_cigar_in_input) {
        aligner.verbose_ = true;
        cout << "Invalid cigar string / md tag pair in read " << alignment.Name << endl;
        // Rerun reference generation to display error
        aligner.CreateRefFromQueryBases(alignment.QueryBases, alignment.CigarData, md_tag, anchors);

        aligner.verbose_ = verbose;
        aligner.invalid_cigar_in_input = false;
      }
      // --- --- ---


    } // end of if isMapped

    writer.SaveAlignment(alignment);

  } // end while loop over reads

  if (aligner.invalid_cigar_in_input)
    cerr << "WARNING bamrealignment: There were invalid cigar string / md tag pairs in the input bam file." << endl;

  // ----------------------------------------------------------------
  // program end -- output summary information
  cout   << "                            File: " << input_bam    << endl
         << "                     Total reads: " << readcounter  << endl
         << "                    Mapped reads: " << mapped_readcounter << endl;
  if (bad_md_tag_readcount)
    cout << "            Skipped: bad MD tags: " << bad_md_tag_readcount << endl;
  if (error_recreate_ref_readcount)
    cout << " Skipped: unable to recreate ref: " << error_recreate_ref_readcount << endl;
  if (error_clip_anchor_readcount)
    cout << "  Skipped: error clipping anchor: " << error_clip_anchor_readcount << endl;
  cout  <<  "       Skipped:  already perfect: " << already_perfect_readcount << endl
        <<  "           Total reads realigned: " << mapped_readcounter - already_perfect_readcount - bad_md_tag_readcount - error_recreate_ref_readcount - error_clip_anchor_readcount << endl;
  if (failed_clip_realigned_readcount)
    cout << "                      (including  " << failed_clip_realigned_readcount << " that failed to clip)" << endl;
  if (error_sw_readcount)
    cout << " Failed to complete SW alignment: " << error_sw_readcount << endl;
  if (error_unclip_readcount)
    cout << "         Failed to unclip anchor: " << error_unclip_readcount << endl;
  cout   << "           Succesfully realigned: " << realigned_readcounter << endl
         << "             Modified alignments: " << modified_alignment_readcounter << endl
         << "                Shifted position: " << pos_update_readcounter << endl;
  
  cout << "Processing time: " << (time(NULL)-start_time) << " seconds." << endl;
  cout << "INFO: The output BAM file may be unsorted." << endl;
  cout << "------------------------------------------" << endl;
  return 0;
}
Пример #12
0
int IonstatsAlignment(int argc, const char *argv[])
{
  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  string input_bam_filename   = opts.GetFirstString('i', "input", "");
  string output_json_filename = opts.GetFirstString('o', "output", "ionstats_alignment.json");
  int histogram_length        = opts.GetFirstInt   ('h', "histogram-length", 400);

  if(argc < 2 or input_bam_filename.empty()) {
    IonstatsAlignmentHelp();
    return 1;
  }

  //
  // Prepare for metric calculation
  //

  BamReader input_bam;
  if (!input_bam.Open(input_bam_filename)) {
    fprintf(stderr, "[ionstats] ERROR: cannot open %s\n", input_bam_filename.c_str());
    return 1;
  }

  ReadLengthHistogram called_histogram;
  ReadLengthHistogram aligned_histogram;
  ReadLengthHistogram AQ7_histogram;
  ReadLengthHistogram AQ10_histogram;
  ReadLengthHistogram AQ17_histogram;
  ReadLengthHistogram AQ20_histogram;
  ReadLengthHistogram AQ47_histogram;
  SimpleHistogram error_by_position;

  called_histogram.Initialize(histogram_length);
  aligned_histogram.Initialize(histogram_length);
  AQ7_histogram.Initialize(histogram_length);
  AQ10_histogram.Initialize(histogram_length);
  AQ17_histogram.Initialize(histogram_length);
  AQ20_histogram.Initialize(histogram_length);
  AQ47_histogram.Initialize(histogram_length);
  error_by_position.Initialize(histogram_length);

  BamAlignment alignment;
  vector<char>  MD_op;
  vector<int>   MD_len;
  MD_op.reserve(1024);
  MD_len.reserve(1024);
  string MD_tag;

  //
  // Main loop over mapped reads in the input BAM
  //

  while(input_bam.GetNextAlignment(alignment)) {

    // Record read length
    called_histogram.Add(alignment.Length);

    if (!alignment.IsMapped() or !alignment.GetTag("MD",MD_tag))
      continue;

    //
    // Step 1. Parse MD tag
    //

    MD_op.clear();
    MD_len.clear();

    for (const char *MD_ptr = MD_tag.c_str(); *MD_ptr;) {

      int item_length = 0;
      if (*MD_ptr >= '0' and *MD_ptr <= '9') {    // Its a match
        MD_op.push_back('M');
        for (; *MD_ptr and *MD_ptr >= '0' and *MD_ptr <= '9'; ++MD_ptr)
          item_length = 10*item_length + *MD_ptr - '0';
      } else {
        if (*MD_ptr == '^') {                     // Its a deletion
          MD_ptr++;
          MD_op.push_back('D');
        } else                                    // Its a substitution
          MD_op.push_back('X');
        for (; *MD_ptr and *MD_ptr >= 'A' and *MD_ptr <= 'Z'; ++MD_ptr)
          item_length++;
      }
      MD_len.push_back(item_length);
    }

    //
    // Step 2. Synchronously scan through Cigar and MD, doing error accounting
    //

    int MD_idx = alignment.IsReverseStrand() ? MD_op.size()-1 : 0;
    int cigar_idx = alignment.IsReverseStrand() ? alignment.CigarData.size()-1 : 0;
    int increment = alignment.IsReverseStrand() ? -1 : 1;

    int AQ7_bases = 0;
    int AQ10_bases = 0;
    int AQ17_bases = 0;
    int AQ20_bases = 0;
    int AQ47_bases = 0;
    int num_bases = 0;
    int num_errors = 0;

    while (cigar_idx < (int)alignment.CigarData.size() and MD_idx < (int) MD_op.size() and cigar_idx >= 0 and MD_idx >= 0) {

      if (alignment.CigarData[cigar_idx].Length == 0) { // Try advancing cigar
        cigar_idx += increment;
        continue;
      }
      if (MD_len[MD_idx] == 0) { // Try advancing MD
        MD_idx += increment;
        continue;
      }

      // Match
      if (alignment.CigarData[cigar_idx].Type == 'M' and MD_op[MD_idx] == 'M') {
        int advance = min((int)alignment.CigarData[cigar_idx].Length, MD_len[MD_idx]);
        num_bases += advance;
        alignment.CigarData[cigar_idx].Length -= advance;
        MD_len[MD_idx] -= advance;

      // Insertion (read has a base, reference doesn't)
      } else if (alignment.CigarData[cigar_idx].Type == 'I') {
        int advance = alignment.CigarData[cigar_idx].Length;
        for (int cnt = 0; cnt < advance; ++cnt) {
          error_by_position.Add(num_bases);
          num_bases++;
          num_errors++;
        }
        alignment.CigarData[cigar_idx].Length -= advance;

      // Deletion (reference has a base, read doesn't)
      } else if (alignment.CigarData[cigar_idx].Type == 'D' and MD_op[MD_idx] == 'D') {
        int advance = min((int)alignment.CigarData[cigar_idx].Length, MD_len[MD_idx]);
        for (int cnt = 0; cnt < advance; ++cnt) {
          error_by_position.Add(num_bases);
          num_errors++;
        }
        alignment.CigarData[cigar_idx].Length -= advance;
        MD_len[MD_idx] -= advance;

      // Substitution
      } else if (MD_op[MD_idx] == 'X') {
        int advance = min((int)alignment.CigarData[cigar_idx].Length, MD_len[MD_idx]);
        for (int cnt = 0; cnt < advance; ++cnt) {
          error_by_position.Add(num_bases);
          num_bases++;
          num_errors++;
        }
        alignment.CigarData[cigar_idx].Length -= advance;
        MD_len[MD_idx] -= advance;

      } else {
        printf("ionstats alignment: Unexpected OP combination: %s Cigar=%c, MD=%c !\n",
            alignment.Name.c_str(), alignment.CigarData[cigar_idx].Type, MD_op[MD_idx]);
        break;
      }

      if (num_errors*5 <= num_bases)    AQ7_bases = num_bases;
      if (num_errors*10 <= num_bases)   AQ10_bases = num_bases;
      if (num_errors*50 <= num_bases)   AQ17_bases = num_bases;
      if (num_errors*100 <= num_bases)  AQ20_bases = num_bases;
      if (num_errors == 0)              AQ47_bases = num_bases;
    }

    //
    // Step 3. Profit
    //

    if (num_bases >= 20)    aligned_histogram.Add(num_bases);
    if (AQ7_bases >= 20)    AQ7_histogram.Add(AQ7_bases);
    if (AQ10_bases >= 20)   AQ10_histogram.Add(AQ10_bases);
    if (AQ17_bases >= 20)   AQ17_histogram.Add(AQ17_bases);
    if (AQ20_bases >= 20)   AQ20_histogram.Add(AQ20_bases);
    if (AQ47_bases >= 20)   AQ47_histogram.Add(AQ47_bases);
  }

  input_bam.Close();


  //
  // Processing complete, generate ionstats_alignment.json
  //

  Json::Value output_json(Json::objectValue);
  output_json["meta"]["creation_date"] = get_time_iso_string(time(NULL));
  output_json["meta"]["format_name"] = "ionstats_alignment";
  output_json["meta"]["format_version"] = "1.0";

  called_histogram.SaveToJson(output_json["full"]);
  aligned_histogram.SaveToJson(output_json["aligned"]);
  AQ7_histogram.SaveToJson(output_json["AQ7"]);
  AQ10_histogram.SaveToJson(output_json["AQ10"]);
  AQ17_histogram.SaveToJson(output_json["AQ17"]);
  AQ20_histogram.SaveToJson(output_json["AQ20"]);
  AQ47_histogram.SaveToJson(output_json["AQ47"]);
  error_by_position.SaveToJson(output_json["error_by_position"]);

  ofstream out(output_json_filename.c_str(), ios::out);
  if (out.good()) {
    out << output_json.toStyledString();
    return 0;
  } else {
    fprintf(stderr, "ERROR: unable to write to '%s'\n", output_json_filename.c_str());
    return 1;
  }

  return 0;
}
Пример #13
0
int IonstatsBasecaller(int argc, const char *argv[])
{
  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  string input_bam_filename   = opts.GetFirstString('i', "input", "");
  string output_json_filename = opts.GetFirstString('o', "output", "ionstats_basecaller.json");
  int histogram_length        = opts.GetFirstInt   ('h', "histogram-length", 400);

  if(argc < 2 or input_bam_filename.empty()) {
    IonstatsBasecallerHelp();
    return 1;
  }


  BamReader input_bam;
  if (!input_bam.Open(input_bam_filename)) {
    fprintf(stderr, "[ionstats] ERROR: cannot open %s\n", input_bam_filename.c_str());
    return 1;
  }

  SamHeader sam_header = input_bam.GetHeader();
  if(!sam_header.HasReadGroups()) {
    fprintf(stderr, "[ionstats] ERROR: no read groups in %s\n", input_bam_filename.c_str());
    return 1;
  }


  ReadLengthHistogram total_full_histo;
  ReadLengthHistogram total_insert_histo;
  ReadLengthHistogram total_Q17_histo;
  ReadLengthHistogram total_Q20_histo;

  total_full_histo.Initialize(histogram_length);
  total_insert_histo.Initialize(histogram_length);
  total_Q17_histo.Initialize(histogram_length);
  total_Q20_histo.Initialize(histogram_length);

  MetricGeneratorSNR system_snr;
  BaseQVHistogram qv_histogram;

  string flow_order;
  string key;
  for (SamReadGroupIterator rg = sam_header.ReadGroups.Begin(); rg != sam_header.ReadGroups.End(); ++rg) {
    if(rg->HasFlowOrder())
      flow_order = rg->FlowOrder;
    if(rg->HasKeySequence())
      key = rg->KeySequence;
  }

  double qv_to_error_rate[256];
  for (int qv = 0; qv < 256; qv++)
    qv_to_error_rate[qv] =  pow(10.0,-0.1*(double)qv);


  BamAlignment alignment;
  string read_group;
  vector<uint16_t> flow_signal_fz(flow_order.length());
  vector<int16_t> flow_signal_zm(flow_order.length());

  while(input_bam.GetNextAlignment(alignment)) {

    // Record read length
    unsigned int full_length = alignment.Length;
    total_full_histo.Add(full_length);

    // Record insert length
    int insert_length = 0;
    if (alignment.GetTag("ZA",insert_length))
      total_insert_histo.Add(insert_length);

    // Compute and record Q17 and Q20
    int Q17_length = 0;
    int Q20_length = 0;
    double num_accumulated_errors = 0.0;
    for(int pos = 0; pos < alignment.Length; ++pos) {
      num_accumulated_errors += qv_to_error_rate[(int)alignment.Qualities[pos] - 33];
      if (num_accumulated_errors / (pos + 1) <= 0.02)
        Q17_length = pos + 1;
      if (num_accumulated_errors / (pos + 1) <= 0.01)
        Q20_length = pos + 1;
    }
    total_Q17_histo.Add(Q17_length);
    total_Q20_histo.Add(Q20_length);

    // Record data for system snr
    if(alignment.GetTag("ZM", flow_signal_zm))
      system_snr.Add(flow_signal_zm, key.c_str(), flow_order);
    else if(alignment.GetTag("FZ", flow_signal_fz))
      system_snr.Add(flow_signal_fz, key.c_str(), flow_order);

    // Record qv histogram
    qv_histogram.Add(alignment.Qualities);
  }

  input_bam.Close();



  Json::Value output_json(Json::objectValue);
  output_json["meta"]["creation_date"] = get_time_iso_string(time(NULL));
  output_json["meta"]["format_name"] = "ionstats_basecaller";
  output_json["meta"]["format_version"] = "1.0";

  system_snr.SaveToJson(output_json);
  qv_histogram.SaveToJson(output_json);
  total_full_histo.SaveToJson(output_json["full"]);
  total_insert_histo.SaveToJson(output_json["insert"]);
  total_Q17_histo.SaveToJson(output_json["Q17"]);
  total_Q20_histo.SaveToJson(output_json["Q20"]);


  ofstream out(output_json_filename.c_str(), ios::out);
  if (out.good()) {
    out << output_json.toStyledString();
    return 0;
  } else {
    fprintf(stderr, "ERROR: unable to write to '%s'\n", output_json_filename.c_str());
    return 1;
  }

}
Пример #14
0
BaseCallerFilters::BaseCallerFilters(OptArgs& opts,
    const string& _flowOrder, int _numFlows, const vector<KeySequence>& _keys, Mask *_maskPtr)
{
  flowOrder = _flowOrder;

  keypassFilter                   = opts.GetFirstBoolean('k', "keypass-filter", true);
  percentPositiveFlowsFilterTFs   = opts.GetFirstBoolean('-', "clonal-filter-tf", false);
  clonalFilterTraining            = opts.GetFirstBoolean('-', "clonal-filter-train", false);
  clonalFilterSolving             = opts.GetFirstBoolean('-', "clonal-filter-solve", false);
  minReadLength                   = opts.GetFirstInt    ('-', "min-read-length", 8);
  cafieResFilterCalling           = opts.GetFirstBoolean('-', "cr-filter", false);
  cafieResFilterTFs               = opts.GetFirstBoolean('-', "cr-filter-tf", false);
  generate_bead_summary_          = opts.GetFirstBoolean('-', "bead-summary", false);

  // TODO: get this to work right. May require "unwound" flow order, so incompatible with current wells.FlowOrder()
  //flt_control.cafieResMaxValueByFlowOrder[std::string ("TACG") ] = 0.06;  // regular flow order
  //flt_control.cafieResMaxValueByFlowOrder[std::string ("TACGTACGTCTGAGCATCGATCGATGTACAGC") ] = 0.08;  // xdb flow order

  cafieResMaxValue = opts.GetFirstDouble('-',  "cr-filter-max-value", 0.08);

  // SFFTrim options
  trim_adapter = opts.GetFirstString('-', "trim-adapter", "ATCACCGACTGCCCATAGAGAGGCTGAGAC");
  trim_adapter_cutoff = opts.GetFirstDouble('-', "trim-adapter-cutoff", 0.0);
  trim_adapter_closest = opts.GetFirstBoolean('-', "trim-adapter-pick-closest", false);
  trim_qual_wsize = opts.GetFirstInt('-', "trim-qual-window-size", 30);
  trim_qual_cutoff = opts.GetFirstDouble('-', "trim-qual-cutoff", 100.0);
  trim_min_read_len = opts.GetFirstInt('-', "trim-min-read-len", 8);


  // Validate options

  if (minReadLength < 1) {
    fprintf (stderr, "Option Error: min-read-length must specify a positive value (%d invalid).\n", minReadLength);
    exit (EXIT_FAILURE);
  }
  if (cafieResMaxValue <= 0) {
    fprintf (stderr, "Option Error: cr-filter-max-value must specify a positive value (%lf invalid).\n", cafieResMaxValue);
    exit (EXIT_FAILURE);
  }

  keys = _keys;
  numClasses = keys.size();

  assert(numClasses == 2);
  classFilterPolyclonal.resize(numClasses);
  classFilterPolyclonal[0] = clonalFilterSolving;
  classFilterPolyclonal[1] = clonalFilterSolving && percentPositiveFlowsFilterTFs;
  classFilterHighResidual.resize(numClasses);
  classFilterHighResidual[0] = cafieResFilterCalling;
  classFilterHighResidual[1] = cafieResFilterCalling && cafieResFilterTFs;


  string filter_beverly_args      = opts.GetFirstString('-', "beverly-filter", "0.03,0.03,8");
  if (filter_beverly_args == "off") {
    filter_beverly_enabled_ = false; // Nothing, really
    printf("Beverly filter: disabled, use --beverly-filter=filter_ratio,trim_ratio,min_length\n");

  } else {
    int stat = sscanf (filter_beverly_args.c_str(), "%f,%f,%d",
        &filter_beverly_filter_ratio_,
        &filter_beverly_trim_ratio_,
        &filter_beverly_min_read_length_);
    if (stat != 3) {
      fprintf (stderr, "Option Error: beverly-filter %s\n", filter_beverly_args.c_str());
      fprintf (stderr, "Usage: --beverly-filter=filter_ratio,trim_ratio,min_length\n");
      exit (EXIT_FAILURE);
    }
    filter_beverly_enabled_ = true;
    printf("Beverly filter: enabled, use --beverly-filter=off to disable\n");
    printf("Beverly filter: filter_ratio = %1.5f\n", filter_beverly_filter_ratio_);
    printf("Beverly filter: trim_ratio = %1.5f\n", filter_beverly_trim_ratio_);
    printf("Beverly filter: min_length = %d\n", filter_beverly_min_read_length_);
  }

  maskPtr = _maskPtr;
  numFlows = _numFlows;

  filterMask.assign(maskPtr->H()*maskPtr->W(), kUninitialized);
}
Пример #15
0
void PhaseEstimator::InitializeFromOptArgs(OptArgs& opts, const ion::ChipSubset & chip_subset, const string & key_norm_method)
{
  // Parse command line options
  phasing_estimator_      = opts.GetFirstString ('-', "phasing-estimator", "spatial-refiner-2");
  vector<double> cf_ie_dr = opts.GetFirstDoubleVector('-', "libcf-ie-dr", "");
  vector<double> init_cf_ie_dr = opts.GetFirstDoubleVector('-', "initcf-ie-dr", "");
  residual_threshold_     = opts.GetFirstDouble ('-', "phasing-residual-filter", 1.0);
  max_phasing_levels_     = opts.GetFirstInt    ('-', "max-phasing-levels", max_phasing_levels_default_);
  num_fullchip_iterations_= opts.GetFirstInt    ('-', "phasing-fullchip-iterations", 3);
  num_region_iterations_  = opts.GetFirstInt    ('-', "phasing-region-iterations", 1);
  num_reads_per_region_   = opts.GetFirstInt    ('-', "phasing-num-reads", 5000);
  min_reads_per_region_   = opts.GetFirstInt    ('-', "phasing-min-reads", 1000);
  phase_file_name_        = opts.GetFirstString ('-', "phase-estimation-file", "");
  normalization_string_   = opts.GetFirstString ('-', "phase-normalization", "adaptive");
  key_norm_method_        = key_norm_method;

  // Static member variables
  norm_during_param_eval_ = opts.GetFirstBoolean('-', "phase-norm-during-eval", false);
  windowSize_             = opts.GetFirstInt    ('-', "window-size", DPTreephaser::kWindowSizeDefault_);
  phasing_start_flow_     = opts.GetFirstInt    ('-', "phasing-start-flow", 70);
  phasing_end_flow_       = opts.GetFirstInt    ('-', "phasing-end-flow", 150);
  inclusion_threshold_    = opts.GetFirstDouble ('-', "phasing-signal-cutoff", 1.4);
  maxfrac_negative_flows_ = opts.GetFirstDouble ('-', "phasing-norm-threshold", 0.2);

  // Initialize chip size - needed for loading phase parameters
  chip_size_x_   = chip_subset.GetChipSizeX();
  chip_size_y_   = chip_subset.GetChipSizeY();
  region_size_x_ = chip_subset.GetRegionSizeX();
  region_size_y_ = chip_subset.GetRegionSizeY();
  num_regions_x_ = chip_subset.GetNumRegionsX();
  num_regions_y_ = chip_subset.GetNumRegionsY();
  num_regions_   = chip_subset.NumRegions();

  // Loading existing phase estimates from a file takes precedence over all other options
  if (not phase_file_name_.empty()) {
	have_phase_estimates_ = LoadPhaseEstimationTrainSubset(phase_file_name_);
    if (have_phase_estimates_) {
      phasing_estimator_ = "override";
      printf("Phase estimator settings:\n");
      printf("  phase file name        : %s\n", phase_file_name_.c_str());
      printf("  phase estimation mode  : %s\n\n", phasing_estimator_.c_str());
      return;
    } else
      cout << "PhaseEstimator Error loading TrainSubset from file " << phase_file_name_ << endl;
  }

  // Set phase parameters if provided by command line
  if (!cf_ie_dr.empty()) {
    if (cf_ie_dr.size() != 3){
      cerr << "BaseCaller Option Error: libcf-ie-dr needs to be a comma separated vector of 3 values." << endl;
      exit (EXIT_FAILURE);
    }
    SetPhaseParameters(cf_ie_dr.at(0), cf_ie_dr.at(1), cf_ie_dr.at(2));
    return; // --libcf-ie-dr overrides other phasing-related options
  }

  // Set starting values for estimation
  if (!init_cf_ie_dr.empty()) {
    if (init_cf_ie_dr.size() != 3){
      cerr << "BaseCaller Option Error: initcf-ie-dr needs to be a comma separated vector of 3 values." << endl;
      exit (EXIT_FAILURE);
    }
    init_cf_ = init_cf_ie_dr.at(0);
    init_ie_ = init_cf_ie_dr.at(1);
    init_dr_ = init_cf_ie_dr.at(2);
  }

  if (phasing_start_flow_ >= phasing_end_flow_ or phasing_start_flow_ < 0) {
    cerr << "BaseCaller Option Error: phasing-start-flow " << phasing_start_flow_
         << "needs to be positive and smaller than phasing-end-flow " << phasing_end_flow_ << endl;
    exit (EXIT_FAILURE);
  }

  if (normalization_string_ == "adaptive")
    norm_method_ = 1;
  else if (normalization_string_ == "pid")
    norm_method_ = 2;
  else if (normalization_string_ == "variable")
    norm_method_ = 3;
  else if (normalization_string_ == "off")
    norm_method_ = 4;
  else
    norm_method_ = 0; // "gain" and anythign else is default

  printf("Phase estimator settings:\n");
  printf("  phase file name        : %s\n", phase_file_name_.c_str());
  printf("  phase estimation mode  : %s\n", phasing_estimator_.c_str());
  printf("  initial cf,ie,dr values: %f,%f,%f\n", init_cf_,init_ie_,init_dr_);
  printf("  reads per region target: %d-%d\n", min_reads_per_region_, num_reads_per_region_);
  printf("  normalization method   : %s\n", normalization_string_.c_str());
  printf("  variable norm threshold: %f\n", maxfrac_negative_flows_);
  printf("\n");
}