C++ (Cpp) OptArgs примеры использования

Язык программирования: C++ (Cpp)

Класс/Тип: OptArgs

Примеров на hotexamples.com: 30

C++ (Cpp) OptArgs - 30 примеров найдено. Это лучшие примеры C++ (Cpp) кода для OptArgs, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

GetFirstString(23)

GetFirstInt(15)

GetOption(15)

GetFirstBoolean(14)

CheckNoLeftovers(11)

GetFirstDouble(6)

GetLeftoverArguments(5)

Пример #1

Показать файл

Файл: CommandLineOpts.cpp Проект: Brainiarc7/TS

void CommandLineOpts::PostProcessArgs(OptArgs &opts)
{
	sys_context.FindExpLogPath();
	SetGlobalChipID ( sys_context.explog_path );

	if(ChipIdDecoder::IsProtonChip())
	{
		if(!opts.HasOption('-', "clonal-filter-bkgmodel"))
		{
			bkg_control.polyclonal_filter.enable = false;
		}
		if(!opts.HasOption('-', "xtalk-correction"))
		{
			bkg_control.enable_trace_xtalk_correction = false;
		}
		if(!opts.HasOption('-', "col-flicker-correct"))
		{
			img_control.col_flicker_correct = true;
		}
		if(!opts.HasOption('-', "col-flicker-correct-aggressive"))
		{
			img_control.aggressive_cnc = true;
		}
		if(!opts.HasOption('-', "img-gain-correct"))
		{
			img_control.gain_correct_images = true;
		}
	}
}

Пример #2

Показать файл

Файл: MergeImages.cpp Проект: Lingrui/TS

int main(int argc, const char *argv[]) {
    OptArgs opts;
    opts.ParseCmdLine(argc, argv);
    bool help;
    string topFile, bottomFile, outFile;
    opts.GetOption(topFile, "", '-', "top");
    opts.GetOption(bottomFile, "", '-', "bottom");
    opts.GetOption(outFile, "", '-', "merged");
    opts.GetOption(help, "false", 'h', "help");
    if (help || argc == 1) {
        usage();
    }
    ION_ASSERT(!topFile.empty() && !bottomFile.empty() && !outFile.empty(),
               "Need top, bottom and merged files. use --help for details.");
    MergeAcq merger;
    Image top;
    Image bottom;
    Image combo;
    cout << "Loading images." << endl;
    ION_ASSERT(top.LoadRaw(topFile.c_str()), "Couldn't load file.");
    ION_ASSERT(bottom.LoadRaw(bottomFile.c_str()), "Couldn't load file.");
    merger.SetFirstImage(&bottom);
    merger.SetSecondImage(&top, bottom.GetRows(), 0); // starting vertically raised but columns the same.
    cout << "Merging." << endl;
    merger.Merge(combo);
    Acq acq;
    cout << "Saving. " << endl;
    acq.SetData(&combo);
    acq.WriteVFC(outFile.c_str(), 0, 0, combo.GetCols(), combo.GetRows());
    cout << "Done." << endl;
    return 0;
}

Пример #3

Показать файл

Файл: ExtendParameters.cpp Проект: vswilliamson/TS

bool RetrieveParameterBool(OptArgs &opts, Json::Value& json, char short_name, const string& long_name_hyphens, bool default_value)
{
  string long_name_underscores = long_name_hyphens;
  for (unsigned int i = 0; i < long_name_underscores.size(); ++i)
    if (long_name_underscores[i] == '-')
      long_name_underscores[i] = '_';

  bool value = default_value;
  string source = "builtin default";

  if (json.isMember(long_name_underscores)) {
    if (json[long_name_underscores].isString())
      value = atoi(json[long_name_underscores].asCString());
    else
      value = json[long_name_underscores].asInt();
    source = "parameters json file";
  }

  if (opts.HasOption(short_name, long_name_hyphens)) {
    value = opts.GetFirstBoolean(short_name, long_name_hyphens, value);
    source = "command line option";
  }

  cout << setw(35) << long_name_hyphens << " = " << setw(10) << (value ? "true" : "false") << " (boolean, " << source << ")" << endl;
  return value;
}

Пример #4

Показать файл

bool BaseCallerParameters::InitializeSamplingFromOptArgs(OptArgs& opts, const int num_wells)
{
	assert(context_vars.options_set);

    // If we are just doing phase estimation none of the options matter, so don't spam output
	if (context_vars.just_phase_estimation){
	  sampling_opts.options_set = true;
	  return true;
	}

    sampling_opts.num_unfiltered           = opts.GetFirstInt    ('-', "num-unfiltered", 100000);
    sampling_opts.downsample_size          = opts.GetFirstInt    ('-', "downsample-size", 0);
    sampling_opts.downsample_fraction      = opts.GetFirstDouble ('-', "downsample-fraction", 1.0);

    sampling_opts.calibration_training     = opts.GetFirstInt    ('-', "calibration-training", -1);
    sampling_opts.have_calib_panel         = (not bc_files.calibration_panel_file.empty());
    sampling_opts.MaskNotWanted            = MaskNone;

    // Reconcile parameters downsample_size and downsample_fraction
    bool downsample = sampling_opts.downsample_size > 0 or sampling_opts.downsample_fraction < 1.0;
    if (sampling_opts.downsample_fraction < 1.0) {
      if (sampling_opts.downsample_size == 0)
    	sampling_opts.downsample_size = (int)((float)num_wells*sampling_opts.downsample_fraction);
      else
        sampling_opts.downsample_size = min(sampling_opts.downsample_size, (int)((float)num_wells*sampling_opts.downsample_fraction));
    }
    if (downsample)
      cout << "Downsampling activated: Randomly choosing " << sampling_opts.downsample_size << " reads on this chip." << endl;

    // Calibration training requires additional changes & overwrites command line options
    if (sampling_opts.calibration_training >= 0) {
      if (context_vars.diagonal_state_prog) {
        cerr << " === BaseCaller Option Incompatibility: Calibration training not supported for diagonal state progression. Aborting!" << endl;
        exit(EXIT_FAILURE);
      }
      if (sampling_opts.downsample_size>0)
        sampling_opts.calibration_training = min(sampling_opts.calibration_training, sampling_opts.downsample_size);

      sampling_opts.downsample_size  = max(sampling_opts.calibration_training, 0);
      sampling_opts.MaskNotWanted    = (MaskType)(MaskFilteredBadResidual|MaskFilteredBadPPF|MaskFilteredBadKey);
	  sampling_opts.num_unfiltered   = 0;
      context_vars.process_tfs       = false;
      context_vars.flow_signals_type = "scaled-residual";
      cout << "=== BaseCaller Calibration Training ===" << endl;
      cout << " - Generating a training set up to " << sampling_opts.downsample_size << " randomly selected reads." << endl;
      if (sampling_opts.have_calib_panel)
        cout << " - Adding calibration panel reads specified in " << bc_files.calibration_panel_file << endl;
      cout << endl;
    }

	sampling_opts.options_set = true;
    return true;
};

Пример #5

Показать файл

Файл: RecalibrationModel.cpp Проект: vswilliamson/TS

void RecalibrationModel::Initialize(OptArgs& opts, vector<string> &bam_comments, const string & run_id, const ion::ChipSubset & chip_subset)
{
  string model_file_name    = opts.GetFirstString ('-', "model-file", "");
  int model_threshold       = opts.GetFirstInt('-', "recal-model-hp-thres", 4);
  bool save_hpmodel         = opts.GetFirstBoolean('-', "save-hpmodel", true);
  bool diagonal_state_prog  = opts.GetFirstBoolean('-', "diagonal-state-prog", false);

  if (diagonal_state_prog)
    model_file_name.clear();

  if (InitializeModel(model_file_name, model_threshold) and save_hpmodel)
    SaveModelFileToBamComments(model_file_name, bam_comments, run_id, chip_subset.GetColOffset(), chip_subset.GetRowOffset());
}

Пример #6

Показать файл

TagTrimmerParameters MolecularTagTrimmer::ReadOpts(OptArgs& opts)
{
  // Reading command line options to set tag structures
  TagTrimmerParameters my_params;

  my_params.min_family_size            = opts.GetFirstInt     ('-', "min-tag-fam-size", 3);
  my_params.suppress_mol_tags          = opts.GetFirstBoolean ('-', "suppress-mol-tags", false);
  //my_params.cl_a_handle                = opts.GetFirstString  ('-', "tag-handle", "");
  //my_params.handle_cutoff              = opts.GetFirstInt     ('-', "handle-cutoff", 2);

  my_params.master_tags.prefix_mol_tag = opts.GetFirstString  ('-', "prefix-mol-tag", "");
  my_params.master_tags.suffix_mol_tag = opts.GetFirstString  ('-', "suffix-mol-tag", "");

  ValidateTagString(my_params.master_tags.prefix_mol_tag);
  ValidateTagString(my_params.master_tags.suffix_mol_tag);

  // Overload to disable molecular tagging
  if (my_params.min_family_size == 0)
    my_params.suppress_mol_tags = true;
  else if (my_params.min_family_size < 1) {
    cerr << "MolecularTagTrimmer Error: min-tag-fam-size must be at least 1. " << endl;
    exit(EXIT_FAILURE);
  }

  my_params.command_line_tags = my_params.master_tags.HasTags();

  // Options for read filtering & and trimming method selection
  string trim_method          = opts.GetFirstString  ('-', "tag-trim-method", "sloppy-trim");
  if (trim_method == "sloppy-trim")
    my_params.tag_trim_method = kSloppyTrim;
  else if (trim_method == "strict-trim")
    my_params.tag_trim_method = kStrictTrim;
  else {
    cerr << "MolecularTagTrimmer Error: Unknown tag trimming option " << trim_method << endl;
    exit(EXIT_FAILURE);
  }

  string filter_method        = opts.GetFirstString  ('-', "tag-filter-method", "need-all");
  if (filter_method == "need-all")
    my_params.tag_filter_method = kneed_all_tags;
  else if (filter_method == "need-prefix")
    my_params.tag_filter_method = kneed_only_prefix_tag;
  else if (filter_method == "need-suffix")
    my_params.tag_filter_method = kneed_only_suffix_tag;
  else {
    cerr << "MolecularTagTrimmer Error: Unknown tag filtering option " << filter_method << endl;
    exit(EXIT_FAILURE);
  }
  return my_params;
}

Пример #7

Показать файл

Файл: RecalibrationModel.cpp Проект: GerritvanNiekerk/TS

void RecalibrationModel::Initialize(OptArgs& opts)
{
    is_enabled_ = false;

    string model_file_name = opts.GetFirstString ('-', "model-file", "");
    if (model_file_name.empty() or model_file_name == "off") {
        printf("RecalibrationModel: disabled\n\n");
        return;
    }

    ifstream model_file;
    model_file.open(model_file_name.c_str());
    if (model_file.fail()) {
        printf("RecalibrationModel: disabled (cannot open %s)\n\n", model_file_name.c_str());
        model_file.close();
        return;
    }

    recalModelHPThres = opts.GetFirstInt('-', "recal-model-hp-thres", 4);

    string comment_line;
    getline(model_file, comment_line); //skip the comment time

    int flowStart, flowEnd, flowSpan, xMin, xMax, xSpan, yMin, yMax, ySpan, max_hp_calibrated;
    model_file >> flowStart >> flowEnd >> flowSpan >> xMin >> xMax >> xSpan >> yMin >> yMax >> ySpan >>  max_hp_calibrated;
    stratification.SetupRegion(xMin, xMax, xSpan, yMin, yMax, ySpan);
    //calculate number of partitions and initialize the stratifiedAs and stratifiedBs
    SetupStratification(flowStart,flowEnd, flowSpan,xMin,xMax,xSpan,yMin,yMax,ySpan,max_hp_calibrated);

    //TODO: parse model_file into stratifiedAs and stratifiedBs
    while (model_file.good()) {
        float paramA, paramB;
        int refHP;
        char flowBase;
        model_file >> flowBase >> flowStart >> flowEnd >> xMin >> xMax >> yMin >> yMax >> refHP >> paramA >> paramB;
        //populate it to stratifiedAs and startifiedBs
        int nucInd = NuctoInt(flowBase);
        //boundary check
        int offsetRegion = stratification.OffsetRegion(xMin,yMin);
        FillIndexes(offsetRegion,nucInd, refHP, flowStart, flowEnd, paramA, paramB);
    }

    model_file.close();

    printf("Recalibration: enabled (using calibration file %s)\n\n", model_file_name.c_str());
    is_enabled_ = true;
    if (recalModelHPThres > MAX_HPXLEN) is_enabled_ = false;
}

Пример #8

Показать файл

Файл: ExtendParameters.cpp Проект: vswilliamson/TS

void ExtendParameters::SetupFileIO(OptArgs &opts) {
  // freeBayes slot
  fasta                                 = opts.GetFirstString('r', "reference", "");
  if (fasta.empty()) {
    cerr << "Fatal ERROR: Reference file not specified via -r" << endl;
    exit(1);
  }
  ValidateAndCanonicalizePath(fasta);

  // freeBayes slot
  variantPriorsFile                     = opts.GetFirstString('c', "input-vcf", "");
  if (variantPriorsFile.empty()) {
    cerr << "INFO: No input VCF (Hotspot) file specified via -c,--input-vcf" << endl;
  }
  else
	ValidateAndCanonicalizePath(variantPriorsFile);

  sseMotifsFileName                     = opts.GetFirstString('e', "error-motifs", "");
  sseMotifsProvided = true;
  if (sseMotifsFileName.empty()) {
    sseMotifsProvided = false;
    cerr << "INFO: Systematic error motif file not specified via -e" << endl;
  }
  else
	ValidateAndCanonicalizePath(sseMotifsFileName);

  opts.GetOption(bams, "", 'b', "input-bam");
  if (bams.empty()) {
    cerr << "FATAL ERROR: BAM file not specified via -b" << endl;
    exit(-1);
  }
  for (unsigned int i_bam = 0; i_bam < bams.size(); ++i_bam)
    ValidateAndCanonicalizePath(bams[i_bam]);

  outputDir                             = opts.GetFirstString('O', "output-dir", ".");
  ValidateAndCanonicalizePath(outputDir);

  outputFile                            = opts.GetFirstString('o', "output-vcf", "");
  if (outputFile.empty()) {
    cerr << "Fatal ERROR: Output VCF filename not specified via -o" << endl;
    exit(1);
  }

  // Are those file names?
  postprocessed_bam                     = opts.GetFirstString('-', "postprocessed-bam", "");
  sampleName                            = opts.GetFirstString('g', "sample-name", "");
  force_sample_name                     = opts.GetFirstString('-', "force-sample-name", "");

}

Пример #9

Показать файл

Файл: ExtendParameters.cpp Проект: vswilliamson/TS

void ExtendParameters::SetFreeBayesParameters(OptArgs &opts, Json::Value& fb_params) {
  // FreeBayes parameters
  // primarily used in candidate generation

  targets                               = opts.GetFirstString('t', "target-file", "");
  trim_ampliseq_primers                 = opts.GetFirstBoolean('-', "trim-ampliseq-primers", false);
  if (targets.empty() and trim_ampliseq_primers) {
    cerr << "ERROR: --trim-ampliseq-primers enabled but no --target-file provided" << endl;
    exit(1);
  }

  allowIndels                           = RetrieveParameterBool  (opts, fb_params, '-', "allow-indels", true);
  allowSNPs                             = RetrieveParameterBool  (opts, fb_params, '-', "allow-snps", true);
  allowMNPs                             = RetrieveParameterBool  (opts, fb_params, '-', "allow-mnps", true);
  allowComplex                          = RetrieveParameterBool  (opts, fb_params, '-', "allow-complex", false);
  // deprecated:
  // leftAlignIndels                       = RetrieveParameterBool  (opts, fb_params, '-', "left-align-indels", false);
  RetrieveParameterBool  (opts, fb_params, '-', "left-align-indels", false);
  
  //useBestNAlleles = 0;
  useBestNAlleles                       = RetrieveParameterInt   (opts, fb_params, 'm', "use-best-n-alleles", 2);
  onlyUseInputAlleles                   = RetrieveParameterBool  (opts, fb_params, '-', "use-input-allele-only", false);
  min_mapping_qv                        = RetrieveParameterInt   (opts, fb_params, 'M', "min-mapping-qv", 4);
  read_snp_limit                        = RetrieveParameterInt   (opts, fb_params, 'U', "read-snp-limit", 10);
  readMaxMismatchFraction               = RetrieveParameterDouble(opts, fb_params, 'z', "read-max-mismatch-fraction", 1.0);
  maxComplexGap                         = RetrieveParameterInt   (opts, fb_params, '!', "max-complex-gap", 1);
  // read from json or command line, otherwise default to snp frequency
  minAltFraction                        = RetrieveParameterDouble(opts, fb_params, '-', "gen-min-alt-allele-freq", my_controls.filter_snps.min_allele_freq);
  minCoverage                           = RetrieveParameterInt   (opts, fb_params, '-', "gen-min-coverage", my_controls.filter_snps.min_cov);
  minIndelAltFraction                   = RetrieveParameterDouble(opts, fb_params, '-', "gen-min-indel-alt-allele-freq", my_controls.filter_hp_indel.min_allele_freq);
  //set up debug levels

  if (program_flow.DEBUG > 0)
    debug = true;

  if (program_flow.inputPositionsOnly) {
    processInputPositionsOnly = true;
  }

  if (variantPriorsFile.empty() && (processInputPositionsOnly || onlyUseInputAlleles) ) {
    cerr << "ERROR: Parameter error - Process-input-positions-only: " << processInputPositionsOnly << " use-input-allele-only: " << onlyUseInputAlleles << " :  Specified without Input VCF File " << endl;
    exit(1);
  }
}

Пример #10

Показать файл

Файл: OptBase.cpp Проект: biocyberman/TS

string RetrieveParameterString(OptArgs &opts, Json::Value& json, char short_name, const string& long_name_hyphens, const string& default_value)
{
  string long_name_underscores = GetRidOfDomainAndHyphens(long_name_hyphens);
  string value = default_value;
  string source = "builtin default";

  if (json.isMember(long_name_underscores)) {
    value = json[long_name_underscores].asCString();
    source = "parameters json file";
  }

  if (opts.HasOption(short_name, long_name_hyphens)) {
    value = opts.GetFirstString(short_name, long_name_hyphens, value);
    source = "command line option";
  }

  cout << setw(35) << long_name_hyphens << " = " << setw(10) << value << " (string, " << source << ")" << endl;
  return value;
}

Пример #11

Показать файл

Файл: ionstats.cpp Проект: Brainiarc7/TS

int IonstatsReduceH5(int argc, const char *argv[])
{
  OptArgs opts;
  opts.ParseCmdLine(argc-1, argv+1);

  string output_h5_filename = opts.GetFirstString  ('o', "output", "");
  bool merge_proton_blocks  = opts.GetFirstBoolean ('b', "merge-proton-blocks", "true");
  vector<string>  input_h5_filename;
  opts.GetLeftoverArguments(input_h5_filename);

  if(input_h5_filename.empty() or output_h5_filename.empty()) {
    IonstatsReduceH5Help();
    return 1;
  }

  if(merge_proton_blocks)
    cout << "NOTE:" << argv[0] << " " << argv[1] << ": --merge-proton-blocks=true so any Proton block-specific read group suffixes will be merged" << endl;

  return IonstatsAlignmentReduceH5(output_h5_filename, input_h5_filename, merge_proton_blocks);
}

Пример #12

Показать файл

Файл: ionstats.cpp Проект: Brainiarc7/TS

int IonstatsReduce(int argc, const char *argv[])
{
  OptArgs opts;
  opts.ParseCmdLine(argc, argv);

  string output_json_filename = opts.GetFirstString('o', "output", "");
  vector<string>  input_jsons;
  opts.GetLeftoverArguments(input_jsons);

  if(input_jsons.empty() or output_json_filename.empty()) {
    IonstatsReduceHelp();
    return 1;
  }

  ifstream in(input_jsons[0].c_str(), ifstream::in);
  if (!in.good()) {
    fprintf(stderr, "[ionstats] ERROR: cannot open %s\n", input_jsons[0].c_str());
    return 1;
  }
  Json::Value first_input_json;
  in >> first_input_json;
  in.close();

  if (!first_input_json.isMember("meta")) {
    fprintf(stderr, "[ionstats] ERROR: %s is not a valid input file for ionstats reduce\n", input_jsons[0].c_str());
    return 1;
  }
  string format_name = first_input_json["meta"].get("format_name","").asString();

  if (format_name == "ionstats_basecaller")
    return IonstatsBasecallerReduce(output_json_filename, input_jsons);
  if (format_name == "ionstats_tf")
    return IonstatsTestFragmentsReduce(output_json_filename, input_jsons);
  if (format_name == "ionstats_alignment")
    return IonstatsAlignmentReduce(output_json_filename, input_jsons);

  fprintf(stderr, "[ionstats] ERROR: %s is not a valid input file for ionstats reduce\n", input_jsons[0].c_str());
  return 1;
}

Пример #13

Показать файл

Файл: ExtendParameters.cpp Проект: vswilliamson/TS

void ProgramControlSettings::SetOpts(OptArgs &opts, Json::Value &tvc_params) {

  DEBUG                                 = opts.GetFirstInt   ('d', "debug", 0);
  nThreads                              = RetrieveParameterInt   (opts, tvc_params, 'n', "num-threads", 12);
  nVariantsPerThread                    = RetrieveParameterInt   (opts, tvc_params, 'N', "num-variants-per-thread", 250);
  use_SSE_basecaller                    = RetrieveParameterBool  (opts, tvc_params, '-', "use-sse-basecaller", true);
  // decide diagnostic
  rich_json_diagnostic                  = RetrieveParameterBool  (opts, tvc_params, '-', "do-json-diagnostic", false);
  minimal_diagnostic                    = RetrieveParameterBool  (opts, tvc_params, '-', "do-minimal-diagnostic", false);


  inputPositionsOnly                    = RetrieveParameterBool  (opts, tvc_params, '-', "process-input-positions-only", false);
  suppress_recalibration                = RetrieveParameterBool  (opts, tvc_params, '-', "suppress-recalibration", true);
  resolve_clipped_bases                 = RetrieveParameterBool  (opts, tvc_params, '-', "resolve-clipped-bases", false);
}

Пример #14

Показать файл

Файл: ExtendParameters.cpp Проект: vswilliamson/TS

void ExtendParameters::ParametersFromJSON(OptArgs &opts, Json::Value &tvc_params, Json::Value &freebayes_params, Json::Value &params_meta) {
  string parameters_file                = opts.GetFirstString('-', "parameters-file", "");
  Json::Value parameters_json(Json::objectValue);
  if (not parameters_file.empty()) {
    ifstream in(parameters_file.c_str(), ifstream::in);

    if (!in.good()) {
      fprintf(stderr, "[tvc] FATAL ERROR: cannot open %s\n", parameters_file.c_str());
      exit(-1);
    }
    
    in >> parameters_json;
    in.close();
    if (parameters_json.isMember("pluginconfig"))
      parameters_json = parameters_json["pluginconfig"];

    tvc_params = parameters_json.get("torrent_variant_caller", Json::objectValue);
    freebayes_params = parameters_json.get("freebayes", Json::objectValue);
    params_meta = parameters_json.get("meta", Json::objectValue);
  }

Пример #15

Показать файл

bool BaseCallerParameters::InitializeFilesFromOptArgs(OptArgs& opts)
{
    bc_files.input_directory        = opts.GetFirstString ('i', "input-dir", ".");
    bc_files.output_directory       = opts.GetFirstString ('o', "output-dir", ".");
    bc_files.unfiltered_untrimmed_directory = bc_files.output_directory + "/unfiltered.untrimmed";
    bc_files.unfiltered_trimmed_directory   = bc_files.output_directory + "/unfiltered.trimmed";

    CreateResultsFolder ((char*)bc_files.output_directory.c_str());
    CreateResultsFolder ((char*)bc_files.unfiltered_untrimmed_directory.c_str());
    CreateResultsFolder ((char*)bc_files.unfiltered_trimmed_directory.c_str());

    ValidateAndCanonicalizePath(bc_files.input_directory);
    ValidateAndCanonicalizePath(bc_files.output_directory);
    ValidateAndCanonicalizePath(bc_files.unfiltered_untrimmed_directory);
    ValidateAndCanonicalizePath(bc_files.unfiltered_trimmed_directory);

    bc_files.filename_wells         = opts.GetFirstString ('-', "wells", bc_files.input_directory + "/1.wells");
    bc_files.filename_mask          = opts.GetFirstString ('-', "mask", bc_files.input_directory + "/analysis.bfmask.bin");

    ValidateAndCanonicalizePath(bc_files.filename_wells);
    ValidateAndCanonicalizePath(bc_files.filename_mask, bc_files.input_directory + "/bfmask.bin");

    bc_files.filename_filter_mask   = bc_files.output_directory + "/bfmask.bin";
    bc_files.filename_json          = bc_files.output_directory + "/BaseCaller.json";
    bc_files.filename_phase         = bc_files.output_directory + "/PhaseEstimates.json";

    printf("\n");
    printf("Input files summary:\n");
    printf("     --input-dir %s\n", bc_files.input_directory.c_str());
    printf("         --wells %s\n", bc_files.filename_wells.c_str());
    printf("          --mask %s\n", bc_files.filename_mask.c_str());
    printf("\n");
    printf("Output directories summary:\n");
    printf("    --output-dir %s\n", bc_files.output_directory.c_str());
    printf("        unf.untr %s\n", bc_files.unfiltered_untrimmed_directory.c_str());
    printf("          unf.tr %s\n", bc_files.unfiltered_trimmed_directory.c_str());
    printf("\n");

    bc_files.lib_datasets_file      = opts.GetFirstString ('-', "datasets", "");
    bc_files.calibration_panel_file = opts.GetFirstString ('-', "calibration-panel", "");
    if (not bc_files.lib_datasets_file.empty())
      ValidateAndCanonicalizePath(bc_files.lib_datasets_file);
    if (not bc_files.calibration_panel_file.empty())
      ValidateAndCanonicalizePath(bc_files.calibration_panel_file);

    bc_files.options_set = true;
    return true;
};

Пример #16

Показать файл

Файл: BaseCallerParameters.cpp Проект: Lingrui/TS

bool BaseCallerContext::SetKeyAndFlowOrder(OptArgs& opts, const char * FlowOrder, const int NumFlows)
{
    flow_order.SetFlowOrder( opts.GetFirstString ('-', "flow-order", FlowOrder),
                             opts.GetFirstInt    ('f', "flowlimit", NumFlows));
    if (flow_order.num_flows() > NumFlows)
      flow_order.SetNumFlows(NumFlows);
    assert(flow_order.is_ok());

    string lib_key                = opts.GetFirstString ('-', "lib-key", "TCAG"); //! @todo Get default key from wells
    string tf_key                 = opts.GetFirstString ('-', "tf-key", "ATCG");
    lib_key                       = opts.GetFirstString ('-', "librarykey", lib_key);   // Backward compatible opts
    tf_key                        = opts.GetFirstString ('-', "tfkey", tf_key);
    keys.resize(2);
    keys[0].Set(flow_order, lib_key, "lib");
    keys[1].Set(flow_order, tf_key, "tf");
    return true;
};

Пример #17

Показать файл

Файл: PhaseEstimator.cpp Проект: Brainiarc7/TS

void PhaseEstimator::InitializeFromOptArgs(OptArgs& opts)
{
  phasing_estimator_      = opts.GetFirstString ('-', "phasing-estimator", "spatial-refiner-2");
  string arg_cf_ie_dr     = opts.GetFirstString ('-', "libcf-ie-dr", "");
  residual_threshold_     = opts.GetFirstDouble ('-', "phasing-residual-filter", 1.0);
  max_phasing_levels_     = opts.GetFirstInt    ('-', "max-phasing-levels", max_phasing_levels_default_);
  use_pid_norm_           = opts.GetFirstString ('-', "keynormalizer", "keynorm-old") == "keynorm-new";
  windowSize_             = opts.GetFirstInt    ('-', "window-size", DPTreephaser::kWindowSizeDefault_);

  if (!arg_cf_ie_dr.empty()) {
    phasing_estimator_ = "override";
    result_regions_x_ = 1;
    result_regions_y_ = 1;
    result_cf_.assign(1, 0.0);
    result_ie_.assign(1, 0.0);
    result_dr_.assign(1, 0.0);
    if (3 != sscanf (arg_cf_ie_dr.c_str(), "%f,%f,%f", &result_cf_[0], &result_ie_[0], &result_dr_[0])) {
      fprintf (stderr, "Option Error: libcf-ie-dr %s\n", arg_cf_ie_dr.c_str());
      exit (EXIT_FAILURE);
    }
    return; // --libcf-ie-dr overrides other phasing-related options
  }
}

Пример #18

Показать файл

Файл: ionstats_tf.cpp Проект: Brainiarc7/TS

int IonstatsTestFragments(int argc, const char *argv[])
{
  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  string input_bam_filename   = opts.GetFirstString('i', "input", "");
  string fasta_filename       = opts.GetFirstString('r', "ref", "");
  string output_json_filename = opts.GetFirstString('o', "output", "ionstats_tf.json");
  int histogram_length        = opts.GetFirstInt   ('h', "histogram-length", 400);

  if(argc < 2 or input_bam_filename.empty() or fasta_filename.empty()) {
    IonstatsTestFragmentsHelp();
    return 1;
  }

  //
  // Prepare for metric calculation
  //

  map<string,string> tf_sequences;
  PopulateReferenceSequences(tf_sequences, fasta_filename);


  BamReader input_bam;
  if (!input_bam.Open(input_bam_filename)) {
    fprintf(stderr, "[ionstats] ERROR: cannot open %s\n", input_bam_filename.c_str());
    return 1;
  }

  int num_tfs = input_bam.GetReferenceCount();


  SamHeader sam_header = input_bam.GetHeader();
  if(!sam_header.HasReadGroups()) {
    fprintf(stderr, "[ionstats] ERROR: no read groups in %s\n", input_bam_filename.c_str());
    return 1;
  }

  string flow_order;
  string key;
  for (SamReadGroupIterator rg = sam_header.ReadGroups.Begin(); rg != sam_header.ReadGroups.End(); ++rg) {
    if(rg->HasFlowOrder())
      flow_order = rg->FlowOrder;
    if(rg->HasKeySequence())
      key = rg->KeySequence;
  }


  // Need these metrics stratified by TF.

  vector<ReadLengthHistogram> called_histogram(num_tfs);
  vector<ReadLengthHistogram> aligned_histogram(num_tfs);
  vector<ReadLengthHistogram> AQ10_histogram(num_tfs);
  vector<ReadLengthHistogram> AQ17_histogram(num_tfs);
  vector<SimpleHistogram> error_by_position(num_tfs);
  vector<MetricGeneratorSNR> system_snr(num_tfs);
  vector<MetricGeneratorHPAccuracy> hp_accuracy(num_tfs);

  for (int tf = 0; tf < num_tfs; ++tf) {
    called_histogram[tf].Initialize(histogram_length);
    aligned_histogram[tf].Initialize(histogram_length);
    AQ10_histogram[tf].Initialize(histogram_length);
    AQ17_histogram[tf].Initialize(histogram_length);
    error_by_position[tf].Initialize(histogram_length);
  }

  vector<uint16_t> flow_signal_fz(flow_order.length());
  vector<int16_t> flow_signal_zm(flow_order.length());

  const RefVector& refs = input_bam.GetReferenceData();

  // Missing:
  //  - hp accuracy - tough, copy verbatim from TFMapper?


  BamAlignment alignment;
  vector<char>  MD_op;
  vector<int>   MD_len;
  MD_op.reserve(1024);
  MD_len.reserve(1024);
  string MD_tag;

  //
  // Main loop over mapped reads in the input BAM
  //

  while(input_bam.GetNextAlignment(alignment)) {


    if (!alignment.IsMapped() or !alignment.GetTag("MD",MD_tag))
      continue;

    // The check below eliminates unexpected alignments
    if (alignment.IsReverseStrand() or alignment.Position > 5)
      continue;

    int current_tf = alignment.RefID;

    //
    // Step 1. Parse MD tag
    //

    MD_op.clear();
    MD_len.clear();

    for (const char *MD_ptr = MD_tag.c_str(); *MD_ptr;) {

      int item_length = 0;
      if (*MD_ptr >= '0' and *MD_ptr <= '9') {    // Its a match
        MD_op.push_back('M');
        for (; *MD_ptr and *MD_ptr >= '0' and *MD_ptr <= '9'; ++MD_ptr)
          item_length = 10*item_length + *MD_ptr - '0';
      } else {
        if (*MD_ptr == '^') {                     // Its a deletion
          MD_ptr++;
          MD_op.push_back('D');
        } else                                    // Its a substitution
          MD_op.push_back('X');
        for (; *MD_ptr and *MD_ptr >= 'A' and *MD_ptr <= 'Z'; ++MD_ptr)
          item_length++;
      }
      MD_len.push_back(item_length);
    }

    //
    // Step 2. Synchronously scan through Cigar and MD, doing error accounting
    //

    int MD_idx = alignment.IsReverseStrand() ? MD_op.size()-1 : 0;
    int cigar_idx = alignment.IsReverseStrand() ? alignment.CigarData.size()-1 : 0;
    int increment = alignment.IsReverseStrand() ? -1 : 1;

    int AQ10_bases = 0;
    int AQ17_bases = 0;
    int num_bases = 0;
    int num_errors = 0;

    while (cigar_idx < (int)alignment.CigarData.size() and MD_idx < (int) MD_op.size() and cigar_idx >= 0 and MD_idx >= 0) {

      if (alignment.CigarData[cigar_idx].Length == 0) { // Try advancing cigar
        cigar_idx += increment;
        continue;
      }
      if (MD_len[MD_idx] == 0) { // Try advancing MD
        MD_idx += increment;
        continue;
      }

      // Match
      if (alignment.CigarData[cigar_idx].Type == 'M' and MD_op[MD_idx] == 'M') {
        int advance = min((int)alignment.CigarData[cigar_idx].Length, MD_len[MD_idx]);
        num_bases += advance;
        alignment.CigarData[cigar_idx].Length -= advance;
        MD_len[MD_idx] -= advance;

      // Insertion (read has a base, reference doesn't)
      } else if (alignment.CigarData[cigar_idx].Type == 'I') {
        int advance = alignment.CigarData[cigar_idx].Length;
        for (int cnt = 0; cnt < advance; ++cnt) {
          error_by_position[current_tf].Add(num_bases);
          num_bases++;
          num_errors++;
        }
        alignment.CigarData[cigar_idx].Length -= advance;

      // Deletion (reference has a base, read doesn't)
      } else if (alignment.CigarData[cigar_idx].Type == 'D' and MD_op[MD_idx] == 'D') {
        int advance = min((int)alignment.CigarData[cigar_idx].Length, MD_len[MD_idx]);
        for (int cnt = 0; cnt < advance; ++cnt) {
          error_by_position[current_tf].Add(num_bases);
          num_errors++;
        }
        alignment.CigarData[cigar_idx].Length -= advance;
        MD_len[MD_idx] -= advance;

      // Substitution
      } else if (MD_op[MD_idx] == 'X') {
        int advance = min((int)alignment.CigarData[cigar_idx].Length, MD_len[MD_idx]);
        for (int cnt = 0; cnt < advance; ++cnt) {
          error_by_position[current_tf].Add(num_bases);
          num_bases++;
          num_errors++;
        }
        alignment.CigarData[cigar_idx].Length -= advance;
        MD_len[MD_idx] -= advance;

      } else {
        printf("ionstats tf: Unexpected OP combination: %s Cigar=%c, MD=%c !\n",
            alignment.Name.c_str(), alignment.CigarData[cigar_idx].Type, MD_op[MD_idx]);
        break;
      }

      if (num_errors*10 <= num_bases)   AQ10_bases = num_bases;
      if (num_errors*50 <= num_bases)   AQ17_bases = num_bases;
    }

    //
    // Step 3. Profit
    //

    called_histogram[current_tf].Add(alignment.Length);
    aligned_histogram[current_tf].Add(num_bases);
    AQ10_histogram[current_tf].Add(AQ10_bases);
    AQ17_histogram[current_tf].Add(AQ17_bases);

    if(alignment.GetTag("ZM", flow_signal_zm))
      system_snr[current_tf].Add(flow_signal_zm, key.c_str(), flow_order);
    else if(alignment.GetTag("FZ", flow_signal_fz))
      system_snr[current_tf].Add(flow_signal_fz, key.c_str(), flow_order);


    // HP accuracy - keeping it simple

    if (!alignment.IsReverseStrand()) {

      string genome = key + tf_sequences[refs[current_tf].RefName];
      string calls = key + alignment.QueryBases;
      const char *genome_ptr = genome.c_str();
      const char *calls_ptr = calls.c_str();

      for (int flow = 0; flow < (int)flow_order.length() and *genome_ptr and *calls_ptr; ++flow) {
        int genome_hp = 0;
        int calls_hp = 0;
        while (*genome_ptr == flow_order[flow]) {
          genome_hp++;
          genome_ptr++;
        }
        while (*calls_ptr == flow_order[flow]) {
          calls_hp++;
          calls_ptr++;
        }
        hp_accuracy[current_tf].Add(genome_hp, calls_hp);
      }
    }
  }



  //
  // Processing complete, generate ionstats_tf.json
  //

  Json::Value output_json(Json::objectValue);
  output_json["meta"]["creation_date"] = get_time_iso_string(time(NULL));
  output_json["meta"]["format_name"] = "ionstats_tf";
  output_json["meta"]["format_version"] = "1.0";

  output_json["results_by_tf"] = Json::objectValue;

  for (int tf = 0; tf < num_tfs; ++tf) {

    if (aligned_histogram[tf].num_reads() < 1000)
      continue;

    called_histogram[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]["full"]);
    aligned_histogram[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]["aligned"]);
    AQ10_histogram[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]["AQ10"]);
    AQ17_histogram[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]["AQ17"]);
    error_by_position[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]["error_by_position"]);
    system_snr[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]);
    hp_accuracy[tf].SaveToJson(output_json["results_by_tf"][refs[tf].RefName]);

    output_json["results_by_tf"][refs[tf].RefName]["sequence"] = tf_sequences[refs[tf].RefName];
  }

  input_bam.Close();

  ofstream out(output_json_filename.c_str(), ios::out);
  if (out.good()) {
    out << output_json.toStyledString();
    return 0;
  } else {
    fprintf(stderr, "ERROR: unable to write to '%s'\n", output_json_filename.c_str());
    return 1;
  }
}

Пример #19

Показать файл

Файл: AnalyzeHPErrs.cpp Проект: alecw/TS

int main(int argc, const char *argv[]) {
  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  int hpLength;
  string statsOut;
  string alignmentOut;
  string pairedOut;
  string flowsOut;
  string summaryOut;
  string samFile;
  string qScoreCol;
  string wellsFile;
  string bfmaskFile;
  string snrFile;
  string binnedHpSigFile;
  string flowErrFile;
  string gcErrFile;
  int gcWin;
  string flowOrder;
  string keySeq;
  int numFlows;
  bool help;
  int qLength;
  double colCenter;
  double rowCenter;
  int colSize;
  int rowSize;
  int sampleSize;
  string wellsToUse;
  string run1, run2;
  opts.GetOption(run1, "", '-', "sff1");
  opts.GetOption(run2, "", '-', "sff2");
  opts.GetOption(wellsToUse, "", '-', "use-wells");
  opts.GetOption(samFile, "", '-', "sam-parsed");
  opts.GetOption(statsOut, "", '-', "stats-out");
  opts.GetOption(flowsOut, "", '-', "flows-out");
  opts.GetOption(alignmentOut, "", '-', "align-out");
  opts.GetOption(summaryOut, "", '-', "summary-out");
  opts.GetOption(pairedOut, "", '-', "paired-out");
  opts.GetOption(numFlows, "40", '-', "num-flows");
  opts.GetOption(hpLength, "6", '-', "max-hp");
  opts.GetOption(qScoreCol, "q7Len", '-', "qscore-col");
  opts.GetOption(qLength, "25", '-', "min-qlength");
  opts.GetOption(help,   "false", 'h', "help");
  opts.GetOption(wellsFile,   "", '-', "wells-file");
  opts.GetOption(bfmaskFile,   "", '-', "bfmask-file");
  opts.GetOption(snrFile,   "", '-', "snr-file");
  opts.GetOption(binnedHpSigFile,   "", '-', "binned-hp-sig-file");
  opts.GetOption(flowErrFile, "", '-', "flow-err-file");
  opts.GetOption(gcErrFile, "", '-', "gc-err-file");
  opts.GetOption(flowOrder, "", '-', "flow-order");
  opts.GetOption(keySeq, "", '-', "key-seq");
  opts.GetOption(colCenter, "0.5", '-', "col-center");
  opts.GetOption(rowCenter, "0.5", '-', "row-center");
  opts.GetOption(colSize, "0", '-', "col-size");
  opts.GetOption(rowSize, "0", '-', "row-size");
  opts.GetOption(gcErrFile, "", '-', "gc-err-file");
  opts.GetOption(gcWin, "40", '-', "gc-win");
  opts.GetOption(sampleSize, "100000", '-', "sample-size");
  if (help || samFile.empty() || statsOut.empty() || summaryOut.empty()) {
    usage();
  }
  opts.CheckNoLeftovers();

  // Some checks to make sure sensible bounds have been set
  if(colCenter < 0 || colCenter > 1) {
    cerr << "AnalyzeHPErrs - col-center must be in the range [0,1]" << endl;
    exit(1);
  }
  if(rowCenter < 0 || rowCenter > 1) {
    cerr << "AnalyzeHPErrs - row-center must be in the range [0,1]" << endl;
    exit(1);
  }
  if(colSize < 0) {
    cerr << "AnalyzeHPErrs - col-size cannot be negative." << endl;
    exit(1);
  }
  if(rowSize < 0) {
    cerr << "AnalyzeHPErrs - row-size cannot be negative." << endl;
    exit(1);
  }

  // Determine rows & cols if a bfmask file was supplied
  int nRow=0;
  int nCol=0;
  if(!bfmaskFile.empty()) {
    if(GetRowColFromBfmask(bfmaskFile, &nRow, &nCol)) {
      cerr << "AnalyzeHPErrs - problem determining rows & columns from bfmask file " << bfmaskFile << endl;
      exit(1);
    }
  }
	
  // Set up fds object
  FlowDiffStats* fds;
  if (!run1.empty()) {
    SffDiffStats* sds = new SffDiffStats(hpLength, nCol, nRow, qScoreCol, run1, run2);
    if (!pairedOut.empty())
      sds->SetPairedOut(pairedOut);
    fds = dynamic_cast<FlowDiffStats*>(sds);
  }
  else {
    GenomeDiffStats* gds = new GenomeDiffStats(hpLength, nCol, nRow, qScoreCol);
    if(alignmentOut != "") {
      gds->SetAlignmentsOut(alignmentOut);
    }
    if (!flowsOut.empty()) {
      gds->SetFlowsOut(flowsOut);
    }
    fds = dynamic_cast<FlowDiffStats*>(gds);
  }

  if (gcErrFile != "") {
    fds->SetFlowGCOut(gcErrFile);
    fds->SetGCWindowSize(gcWin);
  }

  if(keySeq != "") {
    fds->SetKeySeq(keySeq);
  }
  if(flowOrder != "") {
    fds->SetFlowOrder(flowOrder);
  }
  fds->SetStatsOut(statsOut);

  if (!wellsToUse.empty()) {
    std::vector<int> wells;
    std::vector<bool> use;
    ReadSetFromFile(wellsToUse, 0, wells);
    use.resize(nRow * nCol, false);
    int count = 0;
    ReservoirSample<int> wellSample(sampleSize);
    for (size_t i = 0; i < wells.size(); i++) {
      wellSample.Add(wells[i]);
    }
    wells = wellSample.GetData();
    for (size_t i = 0; i < wells.size(); i++) {
      use[wells[i]] = true;
      count++;
    }
    cout << "Read: " << count << " reads." << endl;
    fds->SetWellToAnalyze(use);
  }


  // Set integer-value row & column bounds
  int minRow=-1;
  int maxRow=-1;
  int minCol=-1;
  int maxCol=-1;
  if(colSize > 0 || rowSize > 0) {
    if(bfmaskFile.empty()) {
      cerr << "AnalyzeHPErrs - must specify bfmask file when restricting row or column ranges" << endl;
      exit(1);
    }
    if(rowSize > 0) {
      minRow = floor(nRow * rowCenter - rowSize / 2.0);
      maxRow = minRow + rowSize;
      minRow = std::max(0,minRow);
      maxRow = std::min(nRow,maxRow);
    }
    if(colSize > 0) {
      minCol = floor(nCol * colCenter - colSize / 2.0);
      maxCol = minCol + colSize;
      minCol = std::max(0,minCol);
      maxCol = std::min(nCol,maxCol);
    }
  }

  if (wellsFile != "") {
    std::vector<int32_t> xSubset, ySubset;
    fds->FillInSubset(samFile, qLength, minRow, maxRow, minCol, maxCol, xSubset, ySubset);
    if(bfmaskFile.empty()) {
      cerr << "AnalyzeHPErrs - must specify bfmask file when specifying wells file" << endl;
      exit(1);
    }
    fds->SetWellsFile(wellsFile, nRow, nCol, numFlows, xSubset, ySubset);
  }
  if (snrFile != "") {
    cout << "Opening snr file: " << snrFile << endl;
    fds->SetSNROut(snrFile);
  }
  if (binnedHpSigFile != "") {
    cout << "Opening binned HP signal file: " << binnedHpSigFile << endl;
    fds->SetBinnedHpSigOut(binnedHpSigFile);
  }
  if (flowErrFile != "") {
    cout << "Opening flow err file: " << flowErrFile << endl;
    fds->SetFlowErrOut(flowErrFile);
  }
  ofstream summary;
  summary.open(summaryOut.c_str());
  cout << "Reading and analyzing alignments from: " << samFile << endl;
  if(minCol > -1 || maxCol > -1)
    cout << "  Restricting to " << (maxCol-minCol) << " cols in the range [" << minCol << "," << maxCol << ")" << endl;
  if(minRow > -1 || maxRow > -1)
    cout << "  Restricting to " << (maxRow-minRow) << " rows in the range [" << minRow << "," << maxRow << ")" << endl;

  fds->SetAlignmentInFile(samFile);
  fds->FilterAndCompare(numFlows, summary, qLength, minRow, maxRow, minCol, maxCol);

  summary.close();
  delete fds;
  cout << "Done." << endl;
  return 0;
}

Пример #20

Показать файл

Файл: PerBaseQual.cpp Проект: GerritvanNiekerk/TS

void PerBaseQual::Init(OptArgs& opts, const string& chip_type, const string &output_directory, bool recalib)
{
	if(phred_table_)
	{
	  delete [] phred_table_;
	  phred_table_ = 0;
	}

  string phred_table_file       = opts.GetFirstString ('-', "phred-table-file", "");
  save_predictors_              = opts.GetFirstBoolean('-', "save-predictors", false);

  // Determine the correct phred table filename to use

  bool binTable = true;

  if (phred_table_file.empty()) {
    ChipIdDecoder::SetGlobalChipId(chip_type.c_str());
    ChipIdEnum chip_id = ChipIdDecoder::GetGlobalChipId();
    switch(chip_id){
    case ChipId314:
      phred_table_file = "phredTable.txt_314.binary";
      break;
    case ChipId316:
      phred_table_file = "phredTable.txt_316.binary";
      break;
    case ChipId316v2:
      phred_table_file = "phredTable.txt_318.binary";
      break; 
    case ChipId318:
      phred_table_file = "phredTable.txt_318.binary";
      break;
    case ChipId900: // Proton chip
      phred_table_file = "phredTable.txt_900.binary";
      break;
    default:
      phred_table_file = "phredTable.txt_314.binary";
      fprintf(stderr, "PerBaseQual: No default phred table for chip_type=%s, trying %s instead\n",
          chip_type.c_str(), phred_table_file.c_str());
      break;
    }

    if (recalib)
	{
		phred_table_file = phred_table_file.substr(0, phred_table_file.length() - 7);
        phred_table_file += ".Recal.binary";
	}

    char* full_filename = GetIonConfigFile(phred_table_file.c_str());
    if(!full_filename)
	{
		printf("WARNING: cannot find binary phred table file %s, try to use non-binary phred table\n", phred_table_file.c_str());
		phred_table_file = phred_table_file.substr(0, phred_table_file.length() - 7); // get rid of .binary
		binTable = false;
		char* full_filename2 = GetIonConfigFile(phred_table_file.c_str());
		if(!full_filename2)
			ION_ABORT("ERROR: Can't find phred table file " + phred_table_file);

		phred_table_file = full_filename2;
		free(full_filename2);
	}
	else
	{
		phred_table_file = full_filename;
		free(full_filename);
	}
  }

  cout << endl << "PerBaseQual::Init... phred_table_file=" << phred_table_file << endl;
  binTable = hasBinaryExtension(phred_table_file);

  // Load the phred table
  if(binTable)
  {
      cout << endl << "PerBaseQual::Init... load binary phred_table_file=" << phred_table_file << endl;
	  vector<size_t> vNumCuts(kNumPredictors, 0);

	  if(H5Fis_hdf5(phred_table_file.c_str()) > 0) 
	  {
			hid_t root = H5Fopen(phred_table_file.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
			if(root < 0)
			{
				ION_ABORT("ERROR: cannot open HDF5 file " + phred_table_file);
			}

		    hid_t grpQvTable = H5Gopen(root, "/QvTable", H5P_DEFAULT);
			if (grpQvTable < 0) 
			{
				H5Fclose(root);
				ION_ABORT("ERROR: fail to open HDF5 group QvTable");
			}

			if(H5Aexists(grpQvTable, "NumPredictors") <= 0)
			{
				H5Gclose(grpQvTable);
				H5Fclose(root);
				ION_ABORT("ERROR: HDF5 attribute NumPredictors does not exist");
			}

			hid_t attrNumPreds = H5Aopen(grpQvTable, "NumPredictors", H5P_DEFAULT);
			if (attrNumPreds < 0) 
			{
				H5Gclose(grpQvTable);
				H5Fclose(root);
				ION_ABORT("ERROR: fail to open HDF5 attribute NumPredictors");
			}

			unsigned int numPredictors = 0;
			herr_t ret = H5Aread(attrNumPreds, H5T_NATIVE_UINT, &numPredictors);
			H5Aclose(attrNumPreds);
			if(ret < 0 || numPredictors != (unsigned int)kNumPredictors)
			{
				H5Gclose(grpQvTable);
				H5Fclose(root);
				ION_ABORT("ERROR: HDF5 attribute NumPredictors is wrong");
			}

			char buf[100];
			for(size_t i = 0; i < (size_t)kNumPredictors; ++i)
			{
				offsets_.push_back(1);

				sprintf(buf, "ThresholdsOfPredictor%d", (int)i);

				if(H5Aexists(grpQvTable, buf) <= 0)
				{
					H5Gclose(grpQvTable);
					H5Fclose(root);
					ION_ABORT("ERROR: HDF5 attribute ThresholdsOfPredictor does not exist");
				}

				hid_t attrCuts = H5Aopen(grpQvTable, buf, H5P_DEFAULT);
				if (attrCuts < 0) 
				{
					H5Gclose(grpQvTable);
					H5Fclose(root);
					ION_ABORT("ERROR: fail to open HDF5 attribute ThresholdsOfPredictor");
				}

				hsize_t size = H5Aget_storage_size(attrCuts);
				size /= sizeof(float);

				float* fcuts = new float[size];

				ret = H5Aread(attrCuts, H5T_NATIVE_FLOAT, fcuts);
				H5Aclose(attrCuts);
				if(ret < 0)
				{
					H5Gclose(grpQvTable);
					H5Fclose(root);
					ION_ABORT("ERROR: fail to read HDF5 attribute ThresholdsOfPredictor");
				}

				vector<float> vCuts(size);
				copy(fcuts, fcuts + size, vCuts.begin());

				phred_cuts_.push_back(vCuts);

				delete [] fcuts;
				fcuts = 0;
			}

			hid_t dsQvs = H5Dopen(grpQvTable, "Qvs", H5P_DEFAULT);
			if (dsQvs < 0) 
			{
				H5Gclose(grpQvTable);
				H5Fclose(root);
				ION_ABORT("ERROR: fail to open HDF5 dataset Qvs");
			}

			hsize_t tbSize = H5Dget_storage_size(dsQvs);

			phred_table_ = new unsigned char[tbSize];

			ret = H5Dread(dsQvs, H5T_NATIVE_UCHAR, H5S_ALL, H5S_ALL, H5P_DEFAULT, phred_table_);
			H5Dclose(dsQvs);
			H5Gclose(grpQvTable);
			H5Fclose(root);		
			if (ret < 0)
			{
				delete [] phred_table_;
				phred_table_ = 0;

				ION_ABORT("ERROR: fail to read HDF5 dataset Qvs");
			}
	  }
	  else
	  {
		printf("WARNING: binary phred table file %s is not a HDF5 file, try binary file mode.\n", phred_table_file.c_str());
		ifstream source;
		source.open(phred_table_file.c_str(), ios::in|ios::binary|ios::ate);
		if (!source.is_open())
			ION_ABORT("ERROR: Cannot open file: " + phred_table_file);

		long totalSize = source.tellg();
		char* tbBlock = new char [totalSize];

		source.seekg (0, ios::beg);
		source.read (tbBlock, totalSize);
		source.close();

		long headerSize = 0;
		char* ptr = tbBlock;
		int numPredictors = ptr[0]; //kNumPredictors
		if(numPredictors != kNumPredictors)
		{
			delete [] tbBlock;
			tbBlock = 0;
			ION_ABORT("ERROR: Wrong number of predictors load from " + phred_table_file);
		}

		ptr += 4;
		headerSize += 4;
		
		for(int i = 0; i < kNumPredictors; ++i)
		{
			vNumCuts[i] = ptr[0];
			ptr += 4;
			headerSize += 4;

			offsets_.push_back(1);
		}

		long tbSize = 1;
		for(int i = 0; i < kNumPredictors; ++i)
		{
			vector<float> vCuts;
			tbSize *= vNumCuts[i];
			for(size_t j = 0; j < vNumCuts[i]; ++j)
			{
				float tmp;
				memcpy(&tmp, ptr, 4);
				vCuts.push_back(tmp); 
				ptr += 4;
				headerSize += 4;
			}
			
			phred_cuts_.push_back(vCuts);
		}

		if(tbSize != (totalSize - headerSize))
		{
			delete [] tbBlock;
			tbBlock = 0;
			ION_ABORT("ERROR: Wrong QV table size");
		}	

		phred_table_ = new unsigned char[tbSize];
		memcpy(phred_table_, ptr, tbSize * sizeof(unsigned char));

		delete [] tbBlock;
		tbBlock = 0;
	  }

	  for(size_t i = kNumPredictors - 2; i > 0; --i)
	  {
		offsets_[i] *= phred_cuts_[i + 1].size();
		offsets_[i - 1] = offsets_[i];
	  }
	  offsets_[0] *= phred_cuts_[1].size();
  }
  else
  {
	  ifstream source;
	  source.open(phred_table_file.c_str());
	  if (!source.is_open())
		ION_ABORT("ERROR: Cannot open file: " + phred_table_file);

	  while (!source.eof()) {
		string line;
		getline(source, line);

		if (line.empty())
		  break;

		if (line[0] == '#')
		  continue;

		stringstream strs(line);
		float temp;
		for (int k = 0; k < kNumPredictors; ++k) {
		  strs >> temp;
		  phred_thresholds_[k].push_back(temp);
		}
		strs >> temp; //skip n-th entry
		strs >> temp;
		phred_quality_.push_back(temp);
	  }

	  source.close();

	  for (int k = 0; k < kNumPredictors; ++k)
		phred_thresholds_max_[k] = *max_element(phred_thresholds_[k].begin(), phred_thresholds_[k].end()); 
  }
 
  // Prepare for predictor dump here

  if (save_predictors_) {
    string predictors_filename = output_directory + "/Predictors.txt";
    cout << endl << "Saving PerBaseQual predictors to file " << predictors_filename << endl << endl;
    predictor_dump_.open(predictors_filename.c_str());
    if (!predictor_dump_.is_open())
      ION_ABORT("ERROR: Cannot open file: " + predictors_filename);
  }
}

Пример #21

Показать файл

Файл: tvcvalidator.cpp Проект: GerritvanNiekerk/TS

int main(int argc, const char* argv[])
{
  printf ("tvcvalidator %s-%s (%s) - Prototype tvc validation tool\n\n",
      IonVersion::GetVersion().c_str(), IonVersion::GetRelease().c_str(), IonVersion::GetSvnRev().c_str());

  if (argc == 1) {
    VariantValidatorHelp();
    return 1;
  }

  OptArgs opts;
  opts.ParseCmdLine(argc, argv);

  if (opts.GetFirstBoolean('v', "version", false)) {
    return 0;
  }
  if (opts.GetFirstBoolean('h', "help", false)) {
    VariantValidatorHelp();
    return 0;
  }

  string input_vcf_filename = opts.GetFirstString ('i', "input-vcf", "");
  string truth_filename = opts.GetFirstString ('t', "truth-file", "");
  string truth_dir = opts.GetFirstString ('d', "truth-dir", "/results/plugins/validateVariantCaller/files");

  // TODO: reference optional, only used to verify reference allele in input-vcf and truth files
  //string reference_filename = opts.GetFirstString ('r', "reference", "");

  opts.CheckNoLeftovers();


  //
  // Step 1. Load input VCF file into memory
  //

  if (input_vcf_filename.empty()) {
    VariantValidatorHelp();
    cerr << "ERROR: Input VCF file not specified " << endl;
    return 1;
  }

  VariantCallerResults results_vcf;
  results_vcf.load_vcf(input_vcf_filename);
  printf("Loaded VCF %s with %d variant calls\n", input_vcf_filename.c_str(), (int)results_vcf.variants.size());



  //
  // Step 2. Parse truth files, compare them to the input vcf, and compute match scores
  //

  if (not truth_filename.empty()) {
    ValidatorTruth truth;
    truth.ReadTruthFile(truth_filename);
    truth.CompareToCalls(results_vcf);
    return 0;
  }

  truth_dir += "/*.bed";
  glob_t glob_result;
  glob(truth_dir.c_str(), GLOB_TILDE, NULL, &glob_result);
  for(unsigned int i = 0; i < glob_result.gl_pathc; ++i) {

    ValidatorTruth truth;
    truth.ReadTruthFile(string(glob_result.gl_pathv[i]));
    truth.CompareToCalls(results_vcf);

  }
  globfree(&glob_result);


  return 0;
}

Пример #22

Показать файл

Файл: bamrealignment.cpp Проект: Lingrui/TS

int main (int argc, const char *argv[])
{
  printf ("------------- bamrealignment --------------\n");

  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  vector<int> score_vals(4);

  string input_bam  = opts.GetFirstString  ('i', "input", "");
  string output_bam = opts.GetFirstString  ('o', "output", "");
  opts.GetOption(score_vals, "4,-6,-5,-2", 's', "scores");
  int    clipping   = opts.GetFirstInt     ('c', "clipping", 2);
  bool   anchors    = opts.GetFirstBoolean ('a', "anchors", true);
  int    bandwidth  = opts.GetFirstInt     ('b', "bandwidth", 10);
  bool   verbose    = opts.GetFirstBoolean ('v', "verbose", false);
  bool   debug      = opts.GetFirstBoolean ('d', "debug", false);
  int    format     = opts.GetFirstInt     ('f', "format", 1);
  int  num_threads  = opts.GetFirstInt     ('t', "threads", 8);
  string log_fname  = opts.GetFirstString  ('l', "log", "");
  

  if (input_bam.empty() or output_bam.empty())
    return PrintHelp();

  opts.CheckNoLeftovers();

  std::ofstream logf;
  if (log_fname.size ())
  {
    logf.open (log_fname.c_str ());
    if (!logf.is_open ())
    {
      fprintf (stderr, "bamrealignment: Failed to open log file %s\n", log_fname.c_str());
      return 1;
    }
  }

  BamReader reader;
  if (!reader.Open(input_bam)) {
    fprintf(stderr, "bamrealignment: Failed to open input file %s\n", input_bam.c_str());
    return 1;
  }

  SamHeader header = reader.GetHeader();
  RefVector refs   = reader.GetReferenceData();

  BamWriter writer;
  writer.SetNumThreads(num_threads);
  if (format == 1)
    writer.SetCompressionMode(BamWriter::Uncompressed);
  else
    writer.SetCompressionMode(BamWriter::Compressed);

  if (!writer.Open(output_bam, header, refs)) {
    fprintf(stderr, "bamrealignment: Failed to open output file %s\n", output_bam.c_str());
    return 1;
  }


  // The meat starts here ------------------------------------

  if (verbose)
    cout << "Verbose option is activated, each alignment will print to screen." << endl
         << "  After a read hit RETURN to continue to the next one," << endl
         << "  or press q RETURN to quit the program," << endl
         << "  or press s Return to silence verbose," << endl
         << "  or press c RETURN to continue printing without further prompt." << endl << endl;

  unsigned int readcounter = 0;
  unsigned int mapped_readcounter = 0;
  unsigned int realigned_readcounter = 0;
  unsigned int modified_alignment_readcounter = 0;
  unsigned int pos_update_readcounter = 0;
  unsigned int failed_clip_realigned_readcount = 0;
  
  unsigned int already_perfect_readcount = 0;
  
  unsigned int bad_md_tag_readcount = 0;
  unsigned int error_recreate_ref_readcount = 0;
  unsigned int error_clip_anchor_readcount = 0;
  unsigned int error_sw_readcount = 0;
  unsigned int error_unclip_readcount = 0;
  
  unsigned int start_position_shift;
  int orig_position;
  int new_position;

  string  md_tag, new_md_tag, input = "x";
  vector<CigarOp>    new_cigar_data;
  vector<MDelement>  new_md_data;
  bool position_shift = false;
  time_t start_time = time(NULL);

  Realigner aligner;
  aligner.verbose_ = verbose;
  aligner.debug_   = debug;
  if (!aligner.SetScores(score_vals))
    cout << "bamrealignment: Four scores need to be provided: match, mismatch, gap open, gap extend score!" << endl;

  aligner.SetAlignmentBandwidth(bandwidth);

  BamAlignment alignment;
  while(reader.GetNextAlignment(alignment)){
    readcounter ++;
    position_shift = false;
    
    if ( (readcounter % 100000) == 0 )
       cout << "Processed " << readcounter << " reads. Elapsed time: " << (time(NULL) - start_time) << endl;

    if (alignment.IsMapped()) {
      
      
      
      orig_position = alignment.Position;
      mapped_readcounter++;
      aligner.SetClipping(clipping, !alignment.IsReverseStrand());
      if (aligner.verbose_) {
    	cout << endl;
        if (alignment.IsReverseStrand())
          cout << "The read is from the reverse strand." << endl;
        else
          cout << "The read is from the forward strand." << endl;
      }

      if (!alignment.GetTag("MD", md_tag)) {
    	if (aligner.verbose_)
          cout << "Warning: Skipping read " << alignment.Name << ". It is mapped but missing MD tag." << endl;
	if (logf.is_open ())
	  logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "MISSMD" << '\n';
	bad_md_tag_readcount++;
      } else if (aligner.CreateRefFromQueryBases(alignment.QueryBases, alignment.CigarData, md_tag, anchors)) {
	bool clipfail = false;
	if (Realigner::CR_ERR_CLIP_ANCHOR == aligner.GetCreateRefError ())
	{
	  clipfail = true;
	  failed_clip_realigned_readcount ++;
	}

        if (!aligner.computeSWalignment(new_cigar_data, new_md_data, start_position_shift)) {
          if (aligner.verbose_)
            cout << "Error in the alignment! Not updating read information." << endl;
	  if (logf.is_open ())
	    logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "SWERR" << '\n';
	  error_sw_readcount++;
          writer.SaveAlignment(alignment);  // Write alignment unchanged
          continue;
        }

        if (!aligner.addClippedBasesToTags(new_cigar_data, new_md_data, alignment.QueryBases.size())) {
          if (aligner.verbose_)
            cout << "Error when adding clipped anchors back to tags! Not updating read information." << endl;
	  if (logf.is_open ())
	    logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "UNCLIPERR" << '\n';
          writer.SaveAlignment(alignment);  // Write alignment unchanged
	  error_unclip_readcount ++;
          continue;
        }
        new_md_tag = aligner.GetMDstring(new_md_data);
        realigned_readcounter++;

        // adjust start position of read
        if (!aligner.LeftAnchorClipped() and start_position_shift != 0) {
          new_position = aligner.updateReadPosition(alignment.CigarData, (int)start_position_shift, alignment.Position);
          if (new_position != alignment.Position) {
            pos_update_readcounter++;
            position_shift = true;
            alignment.Position = new_position;
          }
        }
        
        if (position_shift || alignment.CigarData.size () != new_cigar_data.size () || md_tag != new_md_tag)
	{
	  if (logf.is_open ())
	  {
	    logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "MOD";
	    if (position_shift)
	      logf << "-SHIFT";
	    if (clipfail)
	      logf << " NOCLIP";
	    logf << '\n';
	  }
	  modified_alignment_readcounter++;
	}
	else
	{
            if (logf.is_open ())
	    {
	      logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "UNMOD";
              if (clipfail)
	        logf << " NOCLIP";
	      logf << '\n';
	    }
	}

        if (aligner.verbose_){
          cout << alignment.Name << endl;
          cout << "------------------------------------------" << endl;
          // Wait for input to continue or quit program
          if (input.size() == 0)
            input = 'x';
          else if (input[0] != 'c' and input[0] != 'C')
            getline(cin, input);
          if (input.size()>0){
            if (input[0] == 'q' or input[0] == 'Q')
              return 1;
            else if (input[0] == 's' or input[0] == 'S')
              aligner.verbose_ = false;
          }
        }

        // Finally update alignment information
        alignment.CigarData = new_cigar_data;
        alignment.EditTag("MD", "Z" , new_md_tag);

      } // end of CreateRef else if
      else {
	switch (aligner.GetCreateRefError ())
	{
	  case Realigner::CR_ERR_RECREATE_REF:
            if (logf.is_open ())
	      logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "RECRERR" << '\n';
	    error_recreate_ref_readcount++;
	    break;
	  case Realigner::CR_ERR_CLIP_ANCHOR:
            if (logf.is_open ())
	      logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "CLIPERR" << '\n';
	    error_clip_anchor_readcount++;
	    break;
	  default:
		  //  On a good run this writes way too many reads to the log file - don't want to create a too large txt file
          //  if (logf.is_open ())
	      //logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "PERFECT" << '\n';
	    already_perfect_readcount++;
	    break;
	}
	
	if (aligner.verbose_) {
	  cout << alignment.Name << endl;
	  cout << "------------------------------------------" << endl;
	  // Wait for input to continue or quit program
	  if (input.size() == 0)
	    input = 'x';
	  else if (input[0] != 'c' and input[0] != 'C')
	    getline(cin, input);
	  if (input.size()>0){
	    if (input[0] == 'q' or input[0] == 'Q')
	      return 1;
	    else if (input[0] == 's' or input[0] == 'S')
	      aligner.verbose_ = false;
	  }
	}
      }

      // --- Debug output for Rajesh ---
      if (debug && aligner.invalid_cigar_in_input) {
        aligner.verbose_ = true;
        cout << "Invalid cigar string / md tag pair in read " << alignment.Name << endl;
        // Rerun reference generation to display error
        aligner.CreateRefFromQueryBases(alignment.QueryBases, alignment.CigarData, md_tag, anchors);

        aligner.verbose_ = verbose;
        aligner.invalid_cigar_in_input = false;
      }
      // --- --- ---


    } // end of if isMapped

    writer.SaveAlignment(alignment);

  } // end while loop over reads

  if (aligner.invalid_cigar_in_input)
    cerr << "WARNING bamrealignment: There were invalid cigar string / md tag pairs in the input bam file." << endl;

  // ----------------------------------------------------------------
  // program end -- output summary information
  cout   << "                            File: " << input_bam    << endl
         << "                     Total reads: " << readcounter  << endl
         << "                    Mapped reads: " << mapped_readcounter << endl;
  if (bad_md_tag_readcount)
    cout << "            Skipped: bad MD tags: " << bad_md_tag_readcount << endl;
  if (error_recreate_ref_readcount)
    cout << " Skipped: unable to recreate ref: " << error_recreate_ref_readcount << endl;
  if (error_clip_anchor_readcount)
    cout << "  Skipped: error clipping anchor: " << error_clip_anchor_readcount << endl;
  cout  <<  "       Skipped:  already perfect: " << already_perfect_readcount << endl
        <<  "           Total reads realigned: " << mapped_readcounter - already_perfect_readcount - bad_md_tag_readcount - error_recreate_ref_readcount - error_clip_anchor_readcount << endl;
  if (failed_clip_realigned_readcount)
    cout << "                      (including  " << failed_clip_realigned_readcount << " that failed to clip)" << endl;
  if (error_sw_readcount)
    cout << " Failed to complete SW alignment: " << error_sw_readcount << endl;
  if (error_unclip_readcount)
    cout << "         Failed to unclip anchor: " << error_unclip_readcount << endl;
  cout   << "           Succesfully realigned: " << realigned_readcounter << endl
         << "             Modified alignments: " << modified_alignment_readcounter << endl
         << "                Shifted position: " << pos_update_readcounter << endl;
  
  cout << "Processing time: " << (time(NULL)-start_time) << " seconds." << endl;
  cout << "INFO: The output BAM file may be unsorted." << endl;
  cout << "------------------------------------------" << endl;
  return 0;
}

Пример #23

Показать файл

Файл: Calibration.cpp Проект: Lingrui/TS

int main (int argc, const char *argv[])
{
  time_t program_start_time;
  time(&program_start_time);
  Json::Value calibration_json(Json::objectValue);
  DumpStartingStateOfProgram (argc,argv,program_start_time, calibration_json["Calibration"]);

  //
  // Step 1. Process command line options
  //

  OptArgs opts;
  opts.ParseCmdLine(argc, argv);

  CalibrationContext calib_context;
  if (not calib_context.InitializeFromOpts(opts)){
    PrintHelp_CalModules();
  }

  HistogramCalibration master_histogram(opts, calib_context);
  calib_context.hist_calibration_master = &master_histogram;

  LinearCalibrationModel master_linear_model(opts, calib_context);
  calib_context.linear_model_master = &master_linear_model;

  opts.CheckNoLeftovers();

  //
  // Step 2. Execute threaded calibration
  //

  time_t calibration_start_time;
  time(&calibration_start_time);

  pthread_mutex_init(&calib_context.read_mutex,  NULL);
  pthread_mutex_init(&calib_context.write_mutex, NULL);

  pthread_t worker_id[calib_context.num_threads];
  for (int worker = 0; worker < calib_context.num_threads; worker++)
  if (pthread_create(&worker_id[worker], NULL, CalibrationWorker, &calib_context)) {
    cerr << "Calibration ERROR: Problem starting thread" << endl;
    exit (EXIT_FAILURE);
  }

  for (int worker = 0; worker < calib_context.num_threads; worker++)
    pthread_join(worker_id[worker], NULL);

  pthread_mutex_destroy(&calib_context.read_mutex);
  pthread_mutex_destroy(&calib_context.write_mutex);

  time_t calibration_end_time;
  time(&calibration_end_time);


  //
  // Step 3. Create models, write output, and close modules
  //

  // HP histogram calibration
  if (master_histogram.CreateCalibrationModel())
    master_histogram.ExportModelToJson(calibration_json["HPHistogram"]);

  // Linear Model
  if (master_linear_model.CreateCalibrationModel())
    master_linear_model.ExportModelToJson(calibration_json["LinearModel"], "");


  // Transfer stuff from calibration context and close bam reader
  calib_context.Close(calibration_json["Calibration"]);

  time_t program_end_time;
  time(&program_end_time);

  calibration_json["Calibration"]["end_time"] = get_time_iso_string(program_end_time);
  calibration_json["Calibration"]["total_duration"] = (Json::Int)difftime(program_end_time,program_start_time);
  calibration_json["Calibration"]["calibration_duration"] = (Json::Int)difftime(calibration_end_time,calibration_start_time);

  SaveJson(calibration_json, calib_context.filename_json);
  return EXIT_SUCCESS;
}

Пример #24

Показать файл

Файл: TraceDriver.cpp Проект: Lingrui/TS

int main(int argc, const char *argv[]) {
  OptArgs opts;  
  TraceConfig config;
  string inputDir;
  string outputDir;
  bool help;

  opts.ParseCmdLine(argc, argv);
  opts.GetOption(inputDir, "", '-', "source-dir");
  opts.GetOption(outputDir, "", '-', "output-dir");
  opts.GetOption(config.precision, "5", '-', "precision");
  opts.GetOption(config.numEvec, "7", '-', "num-evec");
  opts.GetOption(config.doDebug, "false", '-', "debug-files");
  opts.GetOption(config.compressionType, "delta", '-', "compression");
  opts.GetOption(config.numFlows, "-1", '-', "num-flows");
  opts.GetOption(config.numCores, "6", '-', "num-cores");
  opts.GetOption(config.errCon,"0",'-',"err-con");
  opts.GetOption(config.rankGood,"0",'-',"rank-good");
  opts.GetOption(config.pivot,"0",'-',"pivot");
  opts.GetOption(help, "false", 'h', "help");
  opts.GetOption(config.isThumbnail, "false", '-', "thumbnail");
  opts.GetOption(config.use_hard_est, "false",'-', "use-hard-est");
  opts.GetOption(config.t0_hard, "0", '-', "t0-hard");
  opts.GetOption(config.tmid_hard, "0", '-', "tmid-hard");
  opts.GetOption(config.sigma_hard, "0", '-', "sigma-hard");
  opts.GetOption(config.row_step, "100", '-', "row-step");
  opts.GetOption(config.col_step, "100", '-', "col-step");
  opts.GetOption(config.bg_param, "", '-', "region-param");
  opts.GetOption(config.grind_acq_0, "0", '-', "grind-acq0");
  if(help || inputDir.empty() || outputDir.empty()) {
    usage();
  }
  char *explog_path = NULL;
  explog_path = MakeExpLogPathFromDatDir(inputDir.c_str());
  int numFlows = config.numFlows;
  if (numFlows < 0) { 
    numFlows = GetTotalFlows(explog_path); 
  }

  // Check and setup our compression type
  TraceChunkSerializer serializer;
  serializer.SetRecklessAbandon(true);
  if (config.compressionType == "svd") {
    SvdDatCompress *dc = new SvdDatCompress(config.precision, config.numEvec);
    serializer.SetCompressor(dc);
    cout << "Doing lossy svd compression. (" << serializer.GetCompressionType() << ")" << endl;
  }
  // else if (config.compressionType == "svd+") {
  //   SvdDatCompressPlus *dc = new SvdDatCompressPlus();
  //   serializer.SetCompressor(dc);
  //   cout << "Doing lossy svd compression. (" << serializer.GetCompressionType() << ")" << endl;
  // }
  // else if (config.compressionType == "svd++") {
  //   SvdDatCompressPlusPlus *dc = new SvdDatCompressPlusPlus();
  //   if (config.errCon >0 )
  //     dc->SetErrCon(config.errCon);
  //   if (config.rankGood > 0 )
  //     dc->SetRankGood(config.rankGood);
  //   if (config.pivot > 0)
  //     dc->SetPivot(config.pivot);
  //   serializer.SetCompressor(dc);
  //   cout << "Doing lossy svd compression for good traces and delta for bad ones. (" << serializer.GetCompressionType() << ")" << endl;
  // }
  else if (config.compressionType == "delta") {
    VencoLossless *venco = new VencoLossless();
    serializer.SetCompressor(venco);
    cout << "Doing lossless delta compression. (" << serializer.GetCompressionType() << ")" << endl;
  }
  else if (config.compressionType == "delta-plain") {
    DeltaComp *delta = new DeltaComp();
    serializer.SetCompressor(delta);
    cout << "Doing lossless delta plain compression. (" << serializer.GetCompressionType() << ")" << endl;
  }
  else if (config.compressionType == "delta-plain-fst") {
    DeltaCompFst *delta = new DeltaCompFst();
    serializer.SetCompressor(delta);
    cout << "Doing lossless delta plain fast compression. (" << serializer.GetCompressionType() << ")" << endl;
  }
  else if (config.compressionType == "delta-plain-fst-smx") {
   DeltaCompFstSmX *delta = new DeltaCompFstSmX();
    serializer.SetCompressor(delta);
    cout << "Doing lossless delta plain fast compression. (" << serializer.GetCompressionType() << ")" << endl;
  }
  else if (config.compressionType == "none") {
    TraceCompressor *vanilla = new TraceNoCompress();
    serializer.SetCompressor(vanilla);
    cout << "Doing no compression. (" << serializer.GetCompressionType() << ")" << endl;
  }
  else {
    ION_ABORT("Don't recognize compression type: " + config.compressionType);
  }

  const char *id = GetChipId(explog_path);
  if (explog_path) free (explog_path);
  ChipIdDecoder::SetGlobalChipId(id);
  ImageTransformer::CalibrateChannelXTCorrection(inputDir.c_str(), "lsrowimage.dat");

  Image bfImg1;
  string bfFile = inputDir + "/beadfind_pre_0003.dat";
  bfImg1.LoadRaw(bfFile.c_str());
  const RawImage *bf1raw = bfImg1.GetImage(); 
  Mask mask(bf1raw->cols, bf1raw->rows);
  ImageTransformer::XTChannelCorrect(bfImg1.raw,bfImg1.results_folder);

  bfImg1.FilterForPinned (&mask, MaskEmpty, false);

  Image bfImg2;
  string bfFile2 = inputDir + "/beadfind_pre_0001.dat";
  bfImg2.LoadRaw(bfFile2.c_str());
  ImageTransformer::XTChannelCorrect(bfImg2.raw,bfImg1.results_folder);

  bfImg2.FilterForPinned (&mask, MaskEmpty, false);
  const RawImage *bf2raw = bfImg2.GetImage(); 


  GridMesh<T0Prior> t0Prior;
  T0Calc bfT0;
  /* Calc t0 and get prior. */
  cout << "Doing beadfind t0" << endl;
  GenerateBfT0Prior(config, bf1raw->image, bf1raw->baseFrameRate, bf1raw->rows, bf1raw->cols,
                    bf1raw->frames, bf1raw->timestamps,
                    config.row_step, config.col_step, &mask, bfT0, t0Prior);

  GridMesh<T0Prior> t0Prior2;
  T0Calc bfT02;
  GenerateBfT0Prior(config, bf2raw->image, bf2raw->baseFrameRate, bf2raw->rows, bf2raw->cols,
                    bf2raw->frames, bf2raw->timestamps,
                    config.row_step, config.col_step, &mask, bfT02, t0Prior2);

  SigmaTMidNucEstimation sigmaEst;
  sigmaEst.Init(config.rate_sigma_intercept, config.rate_sigma_slope, 
                config.t0_tmid_intercept, config.t0_tmid_slope, bf1raw->baseFrameRate);
  GridMesh<SigmaEst> sigmaTMid;
  bfImg1.Close();
  bfImg2.Close();

  // Calculate individual well t0 by looking at neighboring regions
  vector<float> wellT0;
  bfT0.CalcIndividualT0(wellT0, 0);
  vector<float> wellT02;
  bfT02.CalcIndividualT0(wellT02, 0);
  for (size_t i =0; i< wellT0.size();i++) {
    if (wellT0[i] > 0 && wellT02[i] > 0) {
      wellT0[i] = (wellT0[i] + wellT02[i])/2.0f;
    }
    else {
      wellT0[i] = max(wellT0[i], wellT02[i]);
    }
  }

  // Average the region level t0, should we do this first and then just do sinle well level?
  for (size_t bIx = 0; bIx < bfT0.GetNumRegions(); bIx++) {
    double t1 = bfT0.GetT0(bIx);
    double t2 = bfT02.GetT0(bIx);
    if (t1 > 0 && t2 > 0) {
      t1 = (t1 + t2)/2.0;
    }
    else {
      t1 = max(t1,t2);
    }
    bfT0.SetT0(bIx, t1);
  }

  // Single thread first dat
  for (size_t datIx = 0; datIx < 1; ++datIx) {
    cout << "Doing: " << datIx << endl;
    char buffer[2048];
    snprintf(buffer, sizeof(buffer), "%s/acq_%.4d.dat", inputDir.c_str(), (int) datIx);
    string datFile = buffer;
    /* Use prior to calculate t0 and slope. */
    Image datImg;
    T0Calc t0;
    datImg.LoadRaw(datFile.c_str());
    //    ImageTransformer::XTChannelCorrect(datImg.raw,datImg.results_folder);
    const RawImage *datRaw = datImg.GetImage(); 

    /* Estimate sigma and t_mid_nuc */
    if (datIx == 0) {
      cout << "Doing acquisition t0" << endl;

      GenerateAcqT0Prior(config, datRaw->image, datRaw->baseFrameRate, datRaw->rows, datRaw->cols,
                         datRaw->frames, datRaw->timestamps,
                         config.row_step, config.col_step, &mask, t0, t0Prior);
      
      ClockTimer timer;
      cout << "Estimating sigma." << endl;
      sigmaTMid.Init(datRaw->rows, datRaw->cols, config.row_step, config.col_step);
      for (size_t bIx = 0; bIx < t0.GetNumRegions(); bIx++) {
        t0.SetT0(bIx, bfT0.GetT0(bIx));
      }
      int neighbors = 2;
      if (config.isThumbnail) {
        cout << "Doing thumbnail version of slope." << endl;
        neighbors = 1;
      }
      EstimateSigmaValue(t0, sigmaEst, sigmaTMid, neighbors);
      timer.PrintMilliSeconds(cout,"Sigma Est took:");
      string sigmaFile = outputDir + "/sigma_tmid_est.txt";
      OutputSigmaTmidEstimates(sigmaTMid, sigmaFile.c_str());
    }

    /* For each region do shifting */
    ClockTimer timer;
    cout << "Shifting traces" << endl;
    timer.StartTimer();
    //    ShiftTraces(bfT0, wellT0, datRaw->frames, datRaw->baseFrameRate, datRaw->timestamps, datRaw->image);
    timer.PrintMilliSeconds(cout,"Shift took:");
    if (!config.bg_param.empty()) {
      DataCube<int> rowsCols;
      DataCube<float> tmidSigma;
      DataCube<float> fitTmidSigma;
      string path = config.bg_param + ":/region/region_location";
      if (!H5File::ReadDataCube(path, rowsCols)) {
        ION_ABORT("Couldn't read file: " + path);
      }
      path = config.bg_param + ":/region/region_init_param";
      if (!H5File::ReadDataCube(path, fitTmidSigma)) {
        ION_ABORT("Couldn't read file: " + path);
      }
      for (size_t i = 0; i < rowsCols.GetNumX(); i++) {
        int row = rowsCols.At(i,1,0);
        int col = rowsCols.At(i,0,0);
        SigmaEst &est = sigmaTMid.GetItemByRowCol(row, col);
        float tmid_est =  fitTmidSigma.At(i,0,0);
        float sigma_est = fitTmidSigma.At(i,1,0);
        est.mTMidNuc = tmid_est;
        est.mSigma = sigma_est;
      }
      string fitSigmaFile = outputDir + "/bg_fit_sigma_tmid_est.txt";
      OutputSigmaTmidEstimates(sigmaTMid, fitSigmaFile.c_str());

      // path = config.bg_param + ":/region/region_init_param";
      // if (!H5File::ReadMatrix(path, tmidSigma)) {
      //   ION_ABORT("Couldn't read file: " + path);
      // }
      // for (size_t i = 0; i < rowsCols.n_rows; i++) {
      //   int row = rowsCols.at(i,0);
      //   int col = rowsCols.at(i,1);
      //   SigmaEst &est = sigmaTMid.GetItemByRowCol(row, col);
      //   float tmid_est =  tmidSigma.at(i,0);
      //   float sigma_est = tmidSigma.at(i,1);
      //   est.mTMidNuc = tmid_est;
      //   est.mSigma = sigma_est;
      // }
      // string sigmaFile = outputDir + "/supplied_sigma_tmid_est.txt";
      // OutputSigmaTmidEstimates(sigmaTMid, sigmaFile.c_str());
    }
    else if (config.use_hard_est) {
      for (size_t i = 0; i < bfT0.GetNumRegions(); i++) {
        bfT0.SetT0(i,config.t0_hard * datRaw->baseFrameRate + config.time_start_slop);
      }
      for (size_t i = 0; i < sigmaTMid.GetNumBin(); i++) {
        SigmaEst &est = sigmaTMid.GetItem(i);
        est.mTMidNuc = config.tmid_hard;
        est.mSigma = config.sigma_hard;
        est.mT0 = config.t0_hard;
      }
    }
    /* Use t0 and sigma to get the time compression bkgModel wants. */
    cout << "Generating chunks" << endl;
    //    GridMesh<TraceChunk> traceChunks;
    SynchDat sdat;
    if (datIx == 0  && config.grind_acq_0 > 0) {
      int nTimes = config.grind_acq_0;
      timer.StartTimer();
      size_t processMicroSec = 0;
      size_t hdf5MicroSec = 0;
      size_t compressMicroSec = 0;
      size_t convertMicroSec = 0;
      for (int i = 0; i <nTimes; i++) {
        //GridMesh<TraceChunk> traceChunken;
        SynchDat sdatIn;
        AddMetaData(sdat, datRaw, datIx);
	ClockTimer convTimer;
        GenerateDataChunks(config, bfT0, datRaw, config.row_step, config.col_step, sigmaTMid, sdatIn.mChunks,datImg);
	convertMicroSec += convTimer.GetMicroSec();
        snprintf(buffer, sizeof(buffer), "%s/acq_%.4d.sdat", outputDir.c_str(), (int)datIx);
        serializer.Write(buffer, sdatIn);
	processMicroSec += serializer.computeMicroSec;
	hdf5MicroSec += serializer.ioMicroSec;
	compressMicroSec += serializer.compressMicroSec;
      }
      size_t usec = timer.GetMicroSec();
      cout << "Took: " << usec / 1.0e6 << " seconds, " << usec / (nTimes * 1.0f) << " usec per write." << endl;
      timer.PrintMilliSeconds(cout,"Chunks took:");
      cout << "Read took: " << processMicroSec / (1e3 * nTimes) << " milli seconds per sdat compute." << endl;
      cout << "Read took: " << hdf5MicroSec / (1e3 * nTimes) << " milli seconds per sdat hdf5." << endl;
      cout << "Read took: " << compressMicroSec / (1e3 * nTimes) << " milli seconds per sdat compressing." << endl;
      cout << "Read took: " << convertMicroSec / (1e3 * nTimes) << " milli seconds per sdat converting." << endl;
      exit(0);
    }
    else {
      timer.StartTimer();
      AddMetaData(sdat, datRaw, datIx);
      GenerateDataChunks(config, bfT0, datRaw, config.row_step, config.col_step, sigmaTMid, sdat.mChunks,datImg);
      timer.PrintMilliSeconds(cout,"Chunks took:");
        if (datIx == 0 && config.doDebug) {
          OutputTraceChunks(sdat.mChunks,"flow_0_data_chunks.txt");
        }
    }
    datImg.Close();    

    /* Serialize onto disk. */
    snprintf(buffer, sizeof(buffer), "%s/acq_%.4d.sdat", outputDir.c_str(), (int)datIx);
    serializer.Write(buffer, sdat);
    /* Read back in first flow for checking */
    if (datIx == 0) {
      TraceChunkSerializer readSerializer;
      readSerializer.SetRecklessAbandon(true);
      //      GridMesh<TraceChunk> traceChunksIn;  
      SynchDat sdatIn;
      readSerializer.Read(buffer, sdatIn);
      if (datIx == 0 && config.doDebug) {
        OutputTraceChunks(sdatIn.mChunks, "flow_0_data_chunks_read.txt");
      }
      SampleQuantiles<float> s(50000);
      SampleQuantiles<float> s2(50000);
      SampleQuantiles<float> sAbs(50000);
      SampleStats<double> ss;
      int diffCount = 0;
      for (size_t bIx = 0; bIx < sdatIn.mChunks.mBins.size(); bIx++) {
        if (sdatIn.mChunks.mBins[bIx].mT0 != sdat.mChunks.mBins[bIx].mT0) {
          cout << "Got: " << sdatIn.mChunks.mBins[bIx].mT0 << " vs: " << sdat.mChunks.mBins[bIx].mT0 << endl;
          exit(1);
        }
        for (size_t i = 0; i < sdatIn.mChunks.mBins[bIx].mData.size(); i++) {
          double diff = (double)sdatIn.mChunks.mBins[bIx].mData[i] - (double)sdat.mChunks.mBins[bIx].mData[i];
          if (!std::isfinite(diff)) {
            cout << "NaNs!!" << endl;
          }
          if (diffCount < 10 && fabs(diff) > .00001) { // != 0) {
            diffCount++;
            cout << "Bin: " << bIx << " well: " << i << " diff is: " << diff << endl;
          }
          s.AddValue(diff);
          sAbs.AddValue(fabs(diff));
          ss.AddValue(sqrt(diff * diff));
          s2.AddValue(sqrt(diff * diff));
        }
      }
      cout << "Median rms: " << s2.GetMedian()  << " Avg: " << ss.GetMean() << " diff: " << s.GetMedian() << endl;
      cout << "Abs(diff) Quantiles:" << endl;
      for (size_t i = 0; i <= 100; i+=10) {
        cout << i << "\t" << sAbs.GetQuantile(i/100.0) << endl;
      }
    }      
  }
  // do the next N flows multithreaded
  if (numFlows > 1) {
    PJobQueue jQueue (config.numCores, numFlows-1);  
    vector<CreateSDat> jobs(numFlows -1);
    // for (int i = 0; i < 4; i++) {
    //   char buffer[2048];
    //   snprintf(buffer, sizeof(buffer), "%s/beadfind_pre_%.4d.dat", inputDir.c_str(), (int) i);
    //   string input = buffer;
    //   snprintf(buffer, sizeof(buffer), "%s/beadfind_pre_%.4d.sdat", outputDir.c_str(), (int)i);
    //   string output = buffer;
    //   jobs[i].Init(&config, input, output, &wellT0, &bfT0, &sigmaTMid);
    //   jQueue.AddJob(jobs[i]);
    // }

    // jQueue.WaitUntilDone();
    for (int i = 1; i < numFlows; i++) {
      char buffer[2048];
      snprintf(buffer, sizeof(buffer), "%s/acq_%.4d.dat", inputDir.c_str(), (int) i);
      string input = buffer;
      snprintf(buffer, sizeof(buffer), "%s/acq_%.4d.sdat", outputDir.c_str(), (int)i);
      string output = buffer;
      jobs[i-1].Init(&config, input, output, &wellT0, &bfT0, &sigmaTMid, i);
      jQueue.AddJob(jobs[i-1]);
    }
    jQueue.WaitUntilDone();
  }
  /* Serialize into backbround models */
  cout << "Done." << endl;
}

Пример #25

Показать файл

Файл: justBeadFind.cpp Проект: Brainiarc7/TS

/*************************************************************************************************
 *************************************************************************************************
 *
 *  Start of Main Function
 *
 *************************************************************************************************
 ************************************************************************************************/
int main (int argc, char *argv[])
{
  init_salute();

  ofstream null_ostream("/dev/null"); // must stay live for entire scope, or crash when writing
  TheSilenceOfTheArmadillos(null_ostream);

  TrackProgress my_progress;  
  DumpStartingStateOfProgram (argc,argv,my_progress);
   
  if(argc < 2)
  {
      PrintHelp();
  }

  for(int i = 1; i < argc; ++i)
  {
	  string s = argv[i];
	  if(s == "-" || s == "--")
	  {
	      cerr << "ERROR: command line input \"-\" must be followed by a short option name (a letter) and \"--\" must be followed by a long option name." << endl; 
		  exit ( EXIT_FAILURE );
	  }
	  else if(s == "-?" || s == "-h" || s == "--help")
	  {
	      PrintHelp();
	  }
  }

  ValidateOpts validater;
  validater.Validate(argc, argv);

  char** argv2 = new char*[argc];
  int datind = TrapAndDeprecateOldArgs(argc, argv, argv2);

  OptArgs opts;
  opts.ParseCmdLine(argc, (const char**)argv2);

  for(int k = 0; k < argc ; ++k)
  {
	  delete [] argv2[k];
  }
  delete [] argv2;
   
  Json::Value json_params;
  CommandLineOpts inception_state;
  inception_state.SetOpts(opts, json_params);

  if(datind < 0) // there is no "--dat-source-directory"
  {
	  inception_state.sys_context.dat_source_directory = argv[argc - 1];
	  cout << "dat_source_directory = " << inception_state.sys_context.dat_source_directory << endl;
  }

  inception_state.PostProcessArgs(opts);

  SeqListClass my_keys;
  ImageSpecClass my_image_spec;
  SlicedPrequel my_prequel_setup;  

  SetUpOrLoadInitialState(inception_state, my_keys, my_progress, my_image_spec, my_prequel_setup);

  // Start logging process parameters & timing now that we have somewhere to log
  my_progress.InitFPLog(inception_state);

  // Write processParameters.parse file now that processing is about to begin
  my_progress.WriteProcessParameters(inception_state);
  
  // Do separator
  Region wholeChip(0, 0, my_image_spec.cols, my_image_spec.rows);
  IsolatedBeadFind( my_prequel_setup, my_image_spec, wholeChip, inception_state,
        inception_state.sys_context.GetResultsFolder(), inception_state.sys_context.analysisLocation,  my_keys, my_progress);

  exit (EXIT_SUCCESS);
}

Пример #26

Показать файл

Файл: prepare_hotspots.cpp Проект: GerritvanNiekerk/TS

int PrepareHotspots(int argc, const char *argv[])
{
  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  string input_bed_filename       = opts.GetFirstString ('b', "input-bed", "");
  string input_vcf_filename       = opts.GetFirstString ('v', "input-vcf", "");
  string output_bed_filename      = opts.GetFirstString ('d', "output-bed", "");
  string output_vcf_filename      = opts.GetFirstString ('o', "output-vcf", "");
  string reference_filename       = opts.GetFirstString ('r', "reference", "");
  bool left_alignment             = opts.GetFirstBoolean('a', "left-alignment", false);
  bool filter_bypass              = opts.GetFirstBoolean('f', "filter-bypass", false);
  bool allow_block_substitutions  = opts.GetFirstBoolean('s', "allow-block-substitutions", false);
  opts.CheckNoLeftovers();

  if((input_bed_filename.empty() == input_vcf_filename.empty()) or
      (output_bed_filename.empty() and output_vcf_filename.empty()) or reference_filename.empty()) {
    PrepareHotspotsHelp();
    return 1;
  }


  // Populate chromosome list from reference.fai
  // Use mmap to fetch the entire reference

  int ref_handle = open(reference_filename.c_str(),O_RDONLY);

  struct stat ref_stat;
  fstat(ref_handle, &ref_stat);
  char *ref = (char *)mmap(0, ref_stat.st_size, PROT_READ, MAP_SHARED, ref_handle, 0);


  FILE *fai = fopen((reference_filename+".fai").c_str(), "r");
  if (!fai) {
    fprintf(stderr, "ERROR: Cannot open %s.fai\n", reference_filename.c_str());
    return 1;
  }

  vector<Reference>  ref_index;
  map<string,int> ref_map;
  char line[1024], chrom_name[1024];
  while (fgets(line, 1024, fai) != NULL) {
    Reference ref_entry;
    long chr_start;
    if (5 != sscanf(line, "%s\t%ld\t%ld\t%d\t%d", chrom_name, &ref_entry.size, &chr_start,
                    &ref_entry.bases_per_line, &ref_entry.bytes_per_line))
      continue;
    ref_entry.chr = chrom_name;
    ref_entry.start = ref + chr_start;
    ref_index.push_back(ref_entry);
    ref_map[ref_entry.chr] = (int) ref_index.size() - 1;
  }
  fclose(fai);


  // Load input BED or load input VCF, group by chromosome

  deque<LineStatus> line_status;
  vector<deque<Allele> > alleles(ref_index.size());

  if (!input_bed_filename.empty()) {

    FILE *input = fopen(input_bed_filename.c_str(),"r");
    if (!input) {
      fprintf(stderr,"ERROR: Cannot open %s\n", input_bed_filename.c_str());
      return 1;
    }

    char line2[65536];

    int line_number = 0;
    bool line_overflow = false;
    while (fgets(line2, 65536, input) != NULL) {
      if (line2[0] and line2[strlen(line2)-1] != '\n' and strlen(line2) == 65535) {
        line_overflow = true;
        continue;
      }
      line_number++;
      if (line_overflow) {
        line_overflow = false;
        line_status.push_back(LineStatus(line_number));
        line_status.back().filter_message_prefix = "Malformed hotspot BED line: line length exceeds 64K";
        continue;
      }

      if (strncmp(line2, "browser", 7) == 0)
        continue;

      if (strncmp(line2, "track", 5) == 0) {
        if (string::npos != string(line2).find("allowBlockSubstitutions=true"))
          allow_block_substitutions = true;
        continue;
      }

      char *current_chr = strtok(line2, "\t\r\n");
      char *current_start = strtok(NULL, "\t\r\n");
      char *current_end = strtok(NULL, "\t\r\n");
      char *current_id = strtok(NULL, "\t\r\n");
      char *penultimate = strtok(NULL, "\t\r\n");
      char *ultimate = strtok(NULL, "\t\r\n");
      for (char *next = strtok(NULL, "\t\r\n"); next; next = strtok(NULL, "\t\r\n")) {
        penultimate = ultimate;
        ultimate = next;
      }

      if (!current_chr or !current_start or !current_end or !current_id or !penultimate or !ultimate) {
        line_status.push_back(LineStatus(line_number));
        line_status.back().filter_message_prefix = "Malformed hotspot BED line: expected at least 6 fields";
        continue;
      }

      Allele allele;

      string string_chr(current_chr);
      if (ref_map.find(string_chr) != ref_map.end())
        allele.chr_idx = ref_map[string_chr];
      else if (ref_map.find("chr"+string_chr) != ref_map.end())
        allele.chr_idx = ref_map["chr"+string_chr];
      else if (string_chr == "MT" and ref_map.find("chrM") != ref_map.end())
        allele.chr_idx = ref_map["chrM"];
      else {
        line_status.push_back(LineStatus(line_number));
        line_status.back().filter_message_prefix = "Unknown chromosome name: ";
        line_status.back().filter_message = string_chr;
        continue;
      }

      allele.pos = strtol(current_start,NULL,10);
      allele.id = current_id;

      char *current_ref = NULL;
      char *current_alt = NULL;
      for (char *next = strtok(penultimate, ";"); next; next = strtok(NULL, ";")) {
        if (strncmp(next,"REF=",4) == 0)
          current_ref = next;
        else if (strncmp(next,"OBS=",4) == 0)
          current_alt = next;
      }
      if (!current_ref or !current_alt) {
        line_status.push_back(LineStatus(line_number));
        line_status.back().filter_message_prefix = "Malformed hotspot BED line: REF and OBS fields required in penultimate column";
        continue;
      }
      for (char *pos = current_ref+4; *pos; ++pos)
        allele.ref += toupper(*pos);
      for (char *pos = current_alt+4; *pos; ++pos)
        allele.alt += toupper(*pos);
      allele.filtered = false;
      line_status.push_back(LineStatus(line_number));
      allele.line_status = &line_status.back();
      allele.opos = allele.pos;
      allele.oref = allele.ref;
      allele.oalt = allele.alt;
      alleles[allele.chr_idx].push_back(allele);
      line_status.back().allele = &alleles[allele.chr_idx].back();
    }

    fclose(input);
  }


  if (!input_vcf_filename.empty()) {

    FILE *input = fopen(input_vcf_filename.c_str(),"r");
    if (!input) {
      fprintf(stderr,"ERROR: Cannot open %s\n", input_vcf_filename.c_str());
      return 1;
    }

    char line2[65536];
    int line_number = 0;
    bool line_overflow = false;
    while (fgets(line2, 65536, input) != NULL) {
      if (line2[0] and line2[strlen(line2)-1] != '\n' and strlen(line2) == 65535) {
        line_overflow = true;
        continue;
      }
      line_number++;
      if (line_overflow) {
        line_overflow = false;
        line_status.push_back(LineStatus(line_number));
        line_status.back().filter_message_prefix = "Malformed hotspot VCF line: line length exceeds 64K";
        continue;
      }

      if (strncmp(line2, "##allowBlockSubstitutions=true", 30) == 0) {
        allow_block_substitutions = true;
        continue;
      }
      if (line2[0] == '#')
        continue;

      char *current_chr = strtok(line2, "\t\r\n");
      char *current_start = strtok(NULL, "\t\r\n");
      char *current_id = strtok(NULL, "\t\r\n");
      char *current_ref = strtok(NULL, "\t\r\n");
      char *current_alt = strtok(NULL, "\t\r\n");

      if (!current_chr or !current_start or !current_id or !current_ref or !current_alt) {
        line_status.push_back(LineStatus(line_number));
        line_status.back().filter_message_prefix = "Malformed hotspot VCF line: expected at least 5 fields";
        continue;
      }


      string string_chr(current_chr);
      int chr_idx = 0;
      if (ref_map.find(string_chr) != ref_map.end())
        chr_idx = ref_map[string_chr];
      else if (ref_map.find("chr"+string_chr) != ref_map.end())
        chr_idx = ref_map["chr"+string_chr];
      else if (string_chr == "MT" and ref_map.find("chrM") != ref_map.end())
        chr_idx = ref_map["chrM"];
      else {
        line_status.push_back(LineStatus(line_number));
        line_status.back().filter_message_prefix = "Unknown chromosome name: ";
        line_status.back().filter_message = string_chr;
        continue;
      }

      for (char *pos = current_ref; *pos; ++pos)
        *pos = toupper(*pos);
      for (char *pos = current_alt; *pos; ++pos)
        *pos = toupper(*pos);


      for (char *sub_alt = strtok(current_alt,","); sub_alt; sub_alt = strtok(NULL,",")) {

        Allele allele;
        allele.chr_idx = chr_idx;
        allele.ref = current_ref;
        allele.alt = sub_alt;
        allele.pos = strtol(current_start,NULL,10)-1;
        allele.id = current_id;
        if (allele.id == ".")
          allele.id = "hotspot";

        allele.filtered = false;
        line_status.push_back(LineStatus(line_number));
        allele.line_status = &line_status.back();
        allele.opos = allele.pos;
        allele.oref = allele.ref;
        allele.oalt = allele.alt;
        alleles[allele.chr_idx].push_back(allele);
        line_status.back().allele = &alleles[allele.chr_idx].back();
      }
    }

    fclose(input);
  }

  // Process by chromosome:
  //   - Verify reference allele
  //   - Left align
  //   - Sort
  //   - Filter for block substitutions, write

  FILE *output_vcf = NULL;
  if (!output_vcf_filename.empty()) {
    output_vcf = fopen(output_vcf_filename.c_str(), "w");
    if (!output_vcf) {
      fprintf(stderr,"ERROR: Cannot open %s for writing\n", output_vcf_filename.c_str());
      return 1;
    }
    fprintf(output_vcf, "##fileformat=VCFv4.1\n");
    if (allow_block_substitutions)
      fprintf(output_vcf, "##allowBlockSubstitutions=true\n");
    fprintf(output_vcf, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n");
  }
  FILE *output_bed = NULL;
  if (!output_bed_filename.empty()) {
    output_bed = fopen(output_bed_filename.c_str(), "w");
    if (!output_bed) {
      fprintf(stderr,"ERROR: Cannot open %s for writing\n", output_bed_filename.c_str());
      if (output_vcf)
        fclose(output_vcf);
      return 1;
    }
    if (allow_block_substitutions)
      fprintf(output_bed, "track name=\"hotspot\" type=bedDetail allowBlockSubstitutions=true\n");
    else
      fprintf(output_bed, "track name=\"hotspot\" type=bedDetail\n");
  }


  for (int chr_idx = 0; chr_idx < (int)ref_index.size(); ++chr_idx) {

    for (deque<Allele>::iterator A = alleles[chr_idx].begin(); A != alleles[chr_idx].end(); ++A) {

      // Invalid characters

      bool valid = true;
      for (const char *c = A->ref.c_str(); *c ; ++c)
        if (*c != 'A' and *c != 'C' and *c != 'G' and *c != 'T')
          valid = false;
      for (const char *c = A->alt.c_str(); *c ; ++c)
        if (*c != 'A' and *c != 'C' and *c != 'G' and *c != 'T')
          valid = false;
      if (not valid) {
        A->filtered = true;
        A->line_status->filter_message_prefix = "REF and/or ALT contain characters other than ACGT: ";
        A->line_status->filter_message = "REF = " + A->ref + " ALT = " + A->alt;
        continue;
      }

      // Filter REF == ALT

      if (A->ref == A->alt) {
        A->filtered = true;
        A->line_status->filter_message_prefix = "REF and ALT alleles equal";
        continue;
      }

      // Confirm reference allele.

      string ref_expected;
      for (int idx = 0; idx < (int) A->ref.size(); ++idx)
        ref_expected += ref_index[chr_idx].base(A->pos + idx);
      if (A->ref != ref_expected) {
        A->filtered = true;
        A->line_status->filter_message_prefix = "Provided REF allele does not match reference: ";
        A->line_status->filter_message = "Expected " + ref_expected + ", found " + A->ref;
        continue;
      }

      // Trim

      int ref_start = 0;
      int ref_end = A->ref.size();
      int alt_end = A->alt.size();

      // Option 1: trim all trailing bases

      //while(ref_end and alt_end and A->ref[ref_end-1] == A->alt[alt_end-1]) {
      //  --ref_end;
      //  --alt_end;
      //}

      // Option 2: trim all leading basees

      //while (ref_start < ref_end and ref_start < alt_end and A->ref[ref_start] == A->alt[ref_start])
      //  ++ref_start;


      // Option 3: trim anchor base if vcf

      if (!input_vcf_filename.empty()) {
        if (ref_end and alt_end and (ref_end == 1 or alt_end == 1) and A->ref[0] == A->alt[0])
          ref_start = 1;
      }

      A->pos += ref_start;
      A->ref = A->ref.substr(ref_start, ref_end-ref_start);
      A->alt = A->alt.substr(ref_start, alt_end-ref_start);
      ref_end -= ref_start;
      alt_end -= ref_start;

      // Left align
      if (left_alignment) {
        while (A->pos > 0) {
          char nuc = ref_index[chr_idx].base(A->pos-1);
          if (ref_end > 0 and A->ref[ref_end-1] != nuc)
            break;
          if (alt_end > 0 and A->alt[alt_end-1] != nuc)
            break;
          A->ref = string(1,nuc) + A->ref;
          A->alt = string(1,nuc) + A->alt;
          A->pos--;
        }
      }
      A->ref.resize(ref_end);
      A->alt.resize(alt_end);


      // Filter block substitutions: take 1

      if (ref_end > 0 and alt_end > 0 and ref_end != alt_end and not allow_block_substitutions and not filter_bypass) {
        A->filtered = true;
        A->line_status->filter_message_prefix = "Block substitutions not supported";
        continue;
      }

    }



    if (output_bed) {
      // Sort - without anchor base
      sort(alleles[chr_idx].begin(), alleles[chr_idx].end(), compare_alleles);

      // Write
      for (deque<Allele>::iterator I = alleles[chr_idx].begin(); I != alleles[chr_idx].end(); ++I) {
        if (I->filtered)
          continue;
        if (I->pos)
          fprintf(output_bed, "%s\t%ld\t%ld\t%s\t0\t+\tREF=%s;OBS=%s;ANCHOR=%c\tNONE\n",
              ref_index[chr_idx].chr.c_str(), I->pos, I->pos + I->ref.size(), I->id.c_str(),
              I->ref.c_str(), I->alt.c_str(), ref_index[chr_idx].base(I->pos-1));
        else
          fprintf(output_bed, "%s\t%ld\t%ld\t%s\t0\t+\tREF=%s;OBS=%s;ANCHOR=\tNONE\n",
              ref_index[chr_idx].chr.c_str(), I->pos, I->pos + I->ref.size(), I->id.c_str(),
              I->ref.c_str(), I->alt.c_str());
      }
    }


    if (output_vcf) {

      // Add anchor base to indels
      for (deque<Allele>::iterator I = alleles[chr_idx].begin(); I != alleles[chr_idx].end(); ++I) {
        if (I->filtered)
          continue;
        if (not I->ref.empty() and not I->alt.empty())
          continue;
        if (I->pos == 0) {
          I->filtered = true;
          I->line_status->filter_message_prefix = "INDELs at chromosome start not supported";
          continue;
        }
        I->pos--;
        I->ref = string(1,ref_index[chr_idx].base(I->pos)) + I->ref;
        I->alt = string(1,ref_index[chr_idx].base(I->pos)) + I->alt;
      }

      // Sort - with anchor base
      sort(alleles[chr_idx].begin(), alleles[chr_idx].end(), compare_alleles);


      // Merge alleles, remove block substitutions, write
      for (deque<Allele>::iterator A = alleles[chr_idx].begin(); A != alleles[chr_idx].end(); ) {

        string max_ref;
        deque<Allele>::iterator B = A;
        for (; B != alleles[chr_idx].end() and B->pos == A->pos; ++B)
          if (!B->filtered and max_ref.size() < B->ref.size())
            max_ref = B->ref;

        bool filtered = true;
        for (deque<Allele>::iterator I = A; I != B; ++I) {
          if (I->filtered)
            continue;

          string new_alt = I->alt + max_ref.substr(I->ref.size());

          if (new_alt.size() > 1 and max_ref.size() > 1 and new_alt.size() != max_ref.size() and not allow_block_substitutions and not filter_bypass) {
            I->filtered = true;
            I->line_status->filter_message_prefix = "Block substitutions not supported (post-merge)";
            continue;
          }

          I->ref = max_ref;
          I->alt = new_alt;
          filtered = false;
        }

        if (not filtered) {

          fprintf(output_vcf, "%s\t%ld\t.\t%s\t",
              ref_index[chr_idx].chr.c_str(), A->pos+1, max_ref.c_str());

          bool comma = false;
          set<string> unique_alt_alleles;
          for (deque<Allele>::iterator I = A; I != B; ++I) {
            if (I->filtered)
              continue;
            if (unique_alt_alleles.count(I->alt) > 0)
              continue;
            unique_alt_alleles.insert(I->alt);
            if (comma)
              fprintf(output_vcf, ",");
            comma = true;
            fprintf(output_vcf, "%s", I->alt.c_str());
          }

          fprintf(output_vcf, "\t.\t.\tOID=");
          comma = false;
          for (deque<Allele>::iterator I = A; I != B; ++I) {
            if (I->filtered)
              continue;
            if (comma)
              fprintf(output_vcf, ",");
            comma = true;
            fprintf(output_vcf, "%s", I->id.c_str());
          }

          fprintf(output_vcf, ";OPOS=");
          comma = false;
          for (deque<Allele>::iterator I = A; I != B; ++I) {
            if (I->filtered)
              continue;
            if (comma)
              fprintf(output_vcf, ",");
            comma = true;
            fprintf(output_vcf, "%ld", I->opos+1);
          }

          fprintf(output_vcf, ";OREF=");
          comma = false;
          for (deque<Allele>::iterator I = A; I != B; ++I) {
            if (I->filtered)
              continue;
            if (comma)
              fprintf(output_vcf, ",");
            comma = true;
            fprintf(output_vcf, "%s", I->oref.c_str());
          }

          fprintf(output_vcf, ";OALT=");
          comma = false;
          for (deque<Allele>::iterator I = A; I != B; ++I) {
            if (I->filtered)
              continue;
            if (comma)
              fprintf(output_vcf, ",");
            comma = true;
            fprintf(output_vcf, "%s", I->oalt.c_str());
          }

          fprintf(output_vcf, ";OMAPALT=");
          comma = false;
          for (deque<Allele>::iterator I = A; I != B; ++I) {
            if (I->filtered)
              continue;
            if (comma)
              fprintf(output_vcf, ",");
            comma = true;
            fprintf(output_vcf, "%s", I->alt.c_str());
          }

          fprintf(output_vcf, "\n");
        }

        A = B;
      }
    }
  }



  if (output_bed) {
    fflush(output_bed);
    fclose(output_bed);
  }
  if (output_vcf) {
    fflush(output_vcf);
    fclose(output_vcf);
  }


  int lines_ignored = 0;
  for (deque<LineStatus>::iterator L = line_status.begin(); L != line_status.end(); ++L) {
    if (L->filter_message_prefix) {
      if (L->allele)
        printf("Line %d ignored: [%s:%ld %s] %s%s\n", L->line_number, ref_index[L->allele->chr_idx].chr.c_str(), L->allele->opos+1, L->allele->id.c_str(),
            L->filter_message_prefix, L->filter_message.c_str());
      else
        printf("Line %d ignored: %s%s\n", L->line_number, L->filter_message_prefix, L->filter_message.c_str());
      lines_ignored++;
    }
  }
  printf("Ignored %d out of %d lines\n", lines_ignored, (int)line_status.size());


  munmap(ref, ref_stat.st_size);
  close(ref_handle);

  return 0;
}

Пример #27

Показать файл

Файл: prepare_hotspots.cpp Проект: iontorrent/TS

int PrepareHotspots(int argc, const char *argv[])
{
  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  string input_bed_filename       = opts.GetFirstString ('b', "input-bed", "");
  string input_vcf_filename       = opts.GetFirstString ('v', "input-vcf", "");
  string input_real_vcf_filename  = opts.GetFirstString ('p', "input-real-vcf", "");
  string output_hot_vcf		  = opts.GetFirstString ('q', "output-fake-hot-vcf", "");
  string output_bed_filename      = opts.GetFirstString ('d', "output-bed", "");
  string output_vcf_filename      = opts.GetFirstString ('o', "output-vcf", "");
  string reference_filename       = opts.GetFirstString ('r', "reference", "");
  string unmerged_bed 		  = opts.GetFirstString ('u', "unmerged-bed", "");
  bool left_alignment             = opts.GetFirstBoolean('a', "left-alignment", false);
  bool filter_bypass              = opts.GetFirstBoolean('f', "filter-bypass", false);
  bool allow_block_substitutions  = opts.GetFirstBoolean('s', "allow-block-substitutions", true);
  bool strict_check               = opts.GetFirstBoolean('S', "strict-check", true);
  opts.CheckNoLeftovers();

  if((input_bed_filename.empty() == (input_vcf_filename.empty() and input_real_vcf_filename.empty())) or
      (output_bed_filename.empty() and output_vcf_filename.empty()) or reference_filename.empty()) {
    PrepareHotspotsHelp();
    return 1;
  }
  if ((not input_real_vcf_filename.empty()) and (output_vcf_filename.empty() or not input_vcf_filename.empty())) {
    PrepareHotspotsHelp();
    return 1;
  }


  // Populate chromosome list from reference.fai
  // Use mmap to fetch the entire reference

  int ref_handle = open(reference_filename.c_str(),O_RDONLY);

  struct stat ref_stat;
  fstat(ref_handle, &ref_stat);
  char *ref = (char *)mmap(0, ref_stat.st_size, PROT_READ, MAP_SHARED, ref_handle, 0);


  FILE *fai = fopen((reference_filename+".fai").c_str(), "r");
  if (!fai) {
    fprintf(stderr, "ERROR: Cannot open %s.fai\n", reference_filename.c_str());
    return 1;
  }

  vector<Reference>  ref_index;
  map<string,int> ref_map;
  char line[1024], chrom_name[1024];
  while (fgets(line, 1024, fai) != NULL) {
    Reference ref_entry;
    long chr_start;
    if (5 != sscanf(line, "%1020s\t%ld\t%ld\t%d\t%d", chrom_name, &ref_entry.size, &chr_start,
                    &ref_entry.bases_per_line, &ref_entry.bytes_per_line))
      continue;
    ref_entry.chr = chrom_name;
    ref_entry.start = ref + chr_start;
    ref_index.push_back(ref_entry);
    ref_map[ref_entry.chr] = (int) ref_index.size() - 1;
  }
  fclose(fai);
  junction junc;
  if (!unmerged_bed.empty()) {
    FILE *fp = fopen(unmerged_bed.c_str(), "r");
    if (!fp) {
	fprintf(stderr, "ERROR: Cannot open %s\n", unmerged_bed.c_str());
	return 1;
    }
    char line2[65536];

    junc.init(ref_index.size());
    bool line_overflow = false;
    while (fgets(line2, 65536, fp) != NULL) {
      if (line2[0] and line2[strlen(line2)-1] != '\n' and strlen(line2) == 65535) {
        line_overflow = true;
	continue;
      }
      if (line_overflow) {
        line_overflow = false;
        continue;
      }
     if (strstr(line2, "track")) continue;
      char chr[100];
      int b, e;
      sscanf(line2, "%s %d %d", chr,  &b, &e);
      junc.add(ref_map[chr], b, e);
    }
    fclose(fp);
  }

  // Load input BED or load input VCF, group by chromosome

  deque<LineStatus> line_status;
  vector<deque<Allele> > alleles(ref_index.size());

  if (!input_bed_filename.empty()) {

    FILE *input = fopen(input_bed_filename.c_str(),"r");
    if (!input) {
      fprintf(stderr,"ERROR: Cannot open %s\n", input_bed_filename.c_str());
      return 1;
    }

    char line2[65536];

    int line_number = 0;
    bool line_overflow = false;
    while (fgets(line2, 65536, input) != NULL) {
      if (line2[0] and line2[strlen(line2)-1] != '\n' and strlen(line2) == 65535) {
        line_overflow = true;
        continue;
      }
      line_number++;
      if (line_overflow) {
        line_overflow = false;
        line_status.push_back(LineStatus(line_number));
        line_status.back().filter_message_prefix = "Malformed hotspot BED line: line length exceeds 64K";
        continue;
      }

      if (strncmp(line2, "browser", 7) == 0)
        continue;

      if (strncmp(line2, "track", 5) == 0) {
        if (string::npos != string(line2).find("allowBlockSubstitutions=true"))
          allow_block_substitutions = true;
        continue;
      }

      // OID= table has special meaning
      if (string::npos != string(line2).find("OID=")) {
	line_status.push_back(LineStatus(line_number));
        line_status.back().filter_message_prefix = "Bed line contains OID=";
        continue;
      }

      char *current_chr = strtok(line2, "\t\r\n");
      char *current_start = strtok(NULL, "\t\r\n");
      char *current_end = strtok(NULL, "\t\r\n");
      char *current_id = strtok(NULL, "\t\r\n");
      char *penultimate = strtok(NULL, "\t\r\n");
      char *ultimate = strtok(NULL, "\t\r\n");
      for (char *next = strtok(NULL, "\t\r\n"); next; next = strtok(NULL, "\t\r\n")) {
        penultimate = ultimate;
        ultimate = next;
      }

      if (!current_chr or !current_start or !current_end or !current_id or !penultimate or !ultimate) {
        line_status.push_back(LineStatus(line_number));
        line_status.back().filter_message_prefix = "Malformed hotspot BED line: expected at least 6 fields";
        continue;
      }

      Allele allele;

      string string_chr(current_chr);
      if (ref_map.find(string_chr) != ref_map.end())
        allele.chr_idx = ref_map[string_chr];
      else if (ref_map.find("chr"+string_chr) != ref_map.end())
        allele.chr_idx = ref_map["chr"+string_chr];
      else if (string_chr == "MT" and ref_map.find("chrM") != ref_map.end())
        allele.chr_idx = ref_map["chrM"];
      else {
        line_status.push_back(LineStatus(line_number));
        line_status.back().filter_message_prefix = "Unknown chromosome name: ";
        line_status.back().filter_message = string_chr;
        continue;
      }

      allele.pos = strtol(current_start,NULL,10);
      allele.id = current_id;

      char *current_ref = NULL;
      char *current_alt = NULL;
      for (char *next = strtok(penultimate, ";"); next; next = strtok(NULL, ";")) {
        if (strncmp(next,"REF=",4) == 0)
          current_ref = next;
        else if (strncmp(next,"OBS=",4) == 0)
          current_alt = next;
        else if (strncmp(next,"ANCHOR=",7) == 0) {
          // ignore ANCHOR
        } else {
          char *value = next;
          while (*value and *value != '=')
            ++value;
          if (*value == '=')
            *value++ = 0;
          allele.custom_tags[next] = value;
        }
      }
      if (!current_ref or !current_alt) {
        line_status.push_back(LineStatus(line_number));
        line_status.back().filter_message_prefix = "Malformed hotspot BED line: REF and OBS fields required in penultimate column";
        continue;
      }
      for (char *pos = current_ref+4; *pos; ++pos)
        allele.ref += toupper(*pos);
      for (char *pos = current_alt+4; *pos; ++pos)
        allele.alt += toupper(*pos);
      // here is the place to check the length of the hotspot cover the amplicon junction. ZZ
      /*
      if (junc.contain(allele.chr_idx, allele.pos, (unsigned int) allele.ref.size())) {
	line_status.push_back(LineStatus(line_number));
        line_status.back().filter_message_prefix = "hotspot BED line contain the complete overlapping region of two amplicon, the variant cannot be detected by tvc";
        continue;
      }
      if (not junc.contained_in_ampl(allele.chr_idx, allele.pos, (unsigned int) allele.ref.size())) {
        line_status.push_back(LineStatus(line_number));
        line_status.back().filter_message_prefix = "hotspot BED line is not contained in any amplicon, the variant cannot be detected by tvc";
        continue;
      }
      */

      allele.filtered = false;
      line_status.push_back(LineStatus(line_number));
      allele.line_status = &line_status.back();
      allele.opos = allele.pos;
      allele.oref = allele.ref;
      allele.oalt = allele.alt;
      alleles[allele.chr_idx].push_back(allele);
      //line_status.back().allele = &alleles[allele.chr_idx].back();
      line_status.back().chr_idx = allele.chr_idx;
      line_status.back().opos = allele.opos;
      line_status.back().id = allele.id;
    }

    fclose(input);
  }



  if (!input_vcf_filename.empty() or !input_real_vcf_filename.empty()) {

    bool real_vcf = false;
    FILE *input;
    FILE *out_real = NULL;
    FILE *out_hot = NULL;
    int fake_ = 0;
    int hn = 1;
    if (!input_real_vcf_filename.empty()) {
	real_vcf = true;
	input = fopen(input_real_vcf_filename.c_str(),"r");
	if (!input) {
	    fprintf(stderr,"ERROR: Cannot open %s\n", input_real_vcf_filename.c_str());
            return 1;
	}
	out_real = fopen(output_vcf_filename.c_str(), "w");
	if (!out_real) {
            fprintf(stderr,"ERROR: Cannot open %s\n", output_vcf_filename.c_str());
            return 1;
        }
	if (!output_hot_vcf.empty()) {
	    out_hot = fopen(output_hot_vcf.c_str(), "w");
	    if (!out_hot) {
		fprintf(stderr,"ERROR: Cannot open %s\n", output_hot_vcf.c_str());
		return 1;
	    } 
   	} else out_hot = stdout;
	fprintf(out_hot, "##fileformat=VCFv4.1\n##allowBlockSubstitutions=true\n#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO\n");
    } else {
        input = fopen(input_vcf_filename.c_str(),"r");
        if (!input) {
            fprintf(stderr,"ERROR: Cannot open %s\n", input_vcf_filename.c_str());
            return 1;
    	}
    }

    char line2[65536];
    char line3[65536];
    int line_number = 0;
    bool line_overflow = false;
    list<one_vcfline> vcflist;

    char last_chr[1024] = "";
    while (fgets(line2, 65536, input) != NULL) {
      if (line2[0] and line2[strlen(line2)-1] != '\n' and strlen(line2) == 65535) {
        line_overflow = true;
        continue;
      }
      line_number++;
      if (line_overflow) {
        line_overflow = false;
        line_status.push_back(LineStatus(line_number));
        line_status.back().filter_message_prefix = "Malformed hotspot VCF line: line length exceeds 64K";
        continue;
      }

      if (strncmp(line2, "##allowBlockSubstitutions=true", 30) == 0) {
        allow_block_substitutions = true;
        continue;
      }
      if (line2[0] == '#') {
	if (out_real) { fprintf(out_real, "%s", line2);}
        continue;
      }

      if (real_vcf) strcpy(line3, line2);
      char *current_chr = strtok(line2, "\t\r\n");
      char *current_start = strtok(NULL, "\t\r\n");
      char *current_id = strtok(NULL, "\t\r\n");
      char *current_ref = strtok(NULL, "\t\r\n");
      char *current_alt = strtok(NULL, "\t\r\n");
      strtok(NULL, "\t\r\n"); // Ignore QUAL
      strtok(NULL, "\t\r\n"); // Ignore FILTER
      char *current_info = strtok(NULL, "\t\r\n");
      strtok(NULL, "\t\r\n");
      char *gt = strtok(NULL, "\t\r\n");

      if (!current_chr or !current_start or !current_id or !current_ref or !current_alt) {
        line_status.push_back(LineStatus(line_number));
        if (real_vcf) line_status.back().filter_message_prefix = "Malformed real VCF line: expected at least 5 fields";
	else line_status.back().filter_message_prefix = "Malformed hotspot VCF line: expected at least 5 fields";
        continue;
      }


      string string_chr(current_chr);
      int chr_idx = 0;
      if (ref_map.find(string_chr) != ref_map.end())
        chr_idx = ref_map[string_chr];
      else if (ref_map.find("chr"+string_chr) != ref_map.end())
        chr_idx = ref_map["chr"+string_chr];
      else if (string_chr == "MT" and ref_map.find("chrM") != ref_map.end())
        chr_idx = ref_map["chrM"];
      else {
        line_status.push_back(LineStatus(line_number));
        line_status.back().filter_message_prefix = "Unknown chromosome name: ";
        line_status.back().filter_message = string_chr;
        continue;
      }

      for (char *pos = current_ref; *pos; ++pos)
        *pos = toupper(*pos);
      for (char *pos = current_alt; *pos; ++pos)
        *pos = toupper(*pos);


      // Process custom tags
      vector<string>  bstrand;
      vector<string>  hp_max_length;
      string raw_oid;
      string raw_omapalt;
      string raw_oalt;
      string raw_oref;
      string raw_opos;

      if (current_info) {
        string raw_bstrand;
        string raw_hp_max_length;
        for (char *next = strtok(current_info, ";"); next; next = strtok(NULL, ";")) {

          char *value = next;
          while (*value and *value != '=')
            ++value;
          if (*value == '=')
            *value++ = 0;

          if (strcmp(next, "TYPE") == 0)
            continue;
          if (strcmp(next, "HRUN") == 0)
            continue;
          if (strcmp(next, "HBASE") == 0)
            continue;
          if (strcmp(next, "FR") == 0)
            continue;
          if (strcmp(next, "OPOS") == 0) {
	    raw_opos = value;
            continue;
	  }
          if (strcmp(next, "OREF") == 0) {
	    raw_oref = value;
            continue;
	  }
          if (strcmp(next, "OALT") == 0) {
	    raw_oalt = value;
            continue;
	  }
          if (strcmp(next, "OID") == 0) {
            raw_oid = value;
            continue;
          }
          if (strcmp(next, "OMAPALT") == 0) {
            raw_omapalt = value;
            continue;
          }
          if (strcmp(next, "BSTRAND") == 0) {
            raw_bstrand = value;
            continue;
          }
          if (strcmp(next, "hp_max_length") == 0) {
            raw_hp_max_length = value;
            continue;
          }
        }

        if (not raw_bstrand.empty())
          split(raw_bstrand, ',', bstrand);
        if (not raw_hp_max_length.empty())
          split(raw_hp_max_length, ',', hp_max_length);

      }

      if (real_vcf) {
	//fprintf(stderr, "%s\n", gt);
        if (gt == NULL) continue;
	// get gt
	int g1 = atoi(gt), g2;
	gt = strchr(gt, '/');
	if (gt) g2 = atoi(gt+1);
	else {fprintf(stderr, "GT not formatted right\n"); exit(1);}
	//if (g1 == 0 and g2 == 0) continue;
	unsigned int cur_pos = atoi(current_start);
	one_vcfline newline(current_ref, current_alt, cur_pos, g1, g2, line3);
	bool new_chr = false;
	if (strcmp(current_chr, last_chr) != 0) {
	    new_chr = true;
	}
	while (not vcflist.empty()) {
	    if ((not new_chr) and vcflist.front().pos+strlen(vcflist.front().ref) > cur_pos) break;
	    if (vcflist.front().produce_hot_vcf(last_chr, out_real, hn, out_hot)) fake_++;
	    vcflist.pop_front();
	}
	if (new_chr) strcpy(last_chr, current_chr);
	for (list<one_vcfline>::iterator it = vcflist.begin(); it != vcflist.end(); it++) {
	    it->check_subset(newline);
	}
	if (not newline.alts.empty()) vcflist.push_back(newline);
	continue;
      } 
      unsigned int allele_idx = 0;
      for (char *sub_alt = strtok(current_alt,","); sub_alt; sub_alt = strtok(NULL,",")) {

        Allele allele;
        allele.chr_idx = chr_idx;
        allele.ref = current_ref;
        allele.alt = sub_alt;
        allele.pos = strtol(current_start,NULL,10)-1;
        allele.id = current_id;
        if (allele.id == ".")
          allele.id = "hotspot";

        allele.filtered = false;
        line_status.push_back(LineStatus(line_number));
        allele.line_status = &line_status.back();
        allele.opos = allele.pos;
        allele.oref = allele.ref;
        allele.oalt = allele.alt;

        if (allele_idx < bstrand.size()) {
          if (bstrand[allele_idx] != ".")
            allele.custom_tags["BSTRAND"] = bstrand[allele_idx];
        }

        if (allele_idx < hp_max_length.size()) {
          if (hp_max_length[allele_idx] != ".")
            allele.custom_tags["hp_max_length"] = hp_max_length[allele_idx];
        }

        alleles[allele.chr_idx].push_back(allele);
        //line_status.back().allele = &alleles[allele.chr_idx].back();
        line_status.back().chr_idx = allele.chr_idx;
        line_status.back().opos = allele.opos;
        line_status.back().id = allele.id;
        allele_idx++;
      }
    }

    fclose(input);
    if (real_vcf) {
        while (not vcflist.empty()) {
            if (vcflist.front().produce_hot_vcf(last_chr, out_real, hn, out_hot)) fake_++;
            vcflist.pop_front();
        }
	fclose(out_real);
	fclose(out_hot);
	if (fake_ > 0) 
            return 0;
	else return 1;
    }
  }


  // Process by chromosome:
  //   - Verify reference allele
  //   - Left align
  //   - Sort
  //   - Filter for block substitutions, write

  FILE *output_vcf = NULL;
  if (!output_vcf_filename.empty()) {
    output_vcf = fopen(output_vcf_filename.c_str(), "w");
    if (!output_vcf) {
      fprintf(stderr,"ERROR: Cannot open %s for writing\n", output_vcf_filename.c_str());
      return 1;
    }
    fprintf(output_vcf, "##fileformat=VCFv4.1\n");
    if (allow_block_substitutions)
      fprintf(output_vcf, "##allowBlockSubstitutions=true\n");
    fprintf(output_vcf, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n");
  }
  FILE *output_bed = NULL;
  if (!output_bed_filename.empty()) {
    output_bed = fopen(output_bed_filename.c_str(), "w");
    if (!output_bed) {
      fprintf(stderr,"ERROR: Cannot open %s for writing\n", output_bed_filename.c_str());
      if (output_vcf)
        fclose(output_vcf);
      return 1;
    }
    if (allow_block_substitutions)
      fprintf(output_bed, "track name=\"hotspot\" type=bedDetail allowBlockSubstitutions=true\n");
    else
      fprintf(output_bed, "track name=\"hotspot\" type=bedDetail\n");
  }


  for (int chr_idx = 0; chr_idx < (int)ref_index.size(); ++chr_idx) {

    for (deque<Allele>::iterator A = alleles[chr_idx].begin(); A != alleles[chr_idx].end(); ++A) {

      // check bed file
      if (junc.contain(A->chr_idx, A->pos, (unsigned int) A->ref.size())) {
	A->filtered = true;
        A->line_status->filter_message_prefix = "hotspot BED line contain the complete overlapping region of two amplicon, the variant cannot be detected by tvc";
        continue;
      }
      if (not junc.contained_in_ampl(A->chr_idx, A->pos, (unsigned int) A->ref.size())) {
	A->filtered = true;
        A->line_status->filter_message_prefix = "hotspot BED line is not contained in any amplicon, the variant cannot be detected by tvc";
        continue;
      }


      // Invalid characters

      bool valid = true;
      for (const char *c = A->ref.c_str(); *c ; ++c)
        if (*c != 'A' and *c != 'C' and *c != 'G' and *c != 'T')
          valid = false;
      for (const char *c = A->alt.c_str(); *c ; ++c)
        if (*c != 'A' and *c != 'C' and *c != 'G' and *c != 'T')
          valid = false;
      if (not valid) {
        A->filtered = true;
        A->line_status->filter_message_prefix = "REF and/or ALT contain characters other than ACGT: ";
        A->line_status->filter_message = "REF = " + A->ref + " ALT = " + A->alt;
        continue;
      }

      // Filter REF == ALT

      if (A->ref == A->alt) {
        A->filtered = true;
        A->line_status->filter_message_prefix = "REF and ALT alleles equal";
        continue;
      }

      // Confirm reference allele.

      string ref_expected;
      for (int idx = 0; idx < (int) A->ref.size(); ++idx)
        ref_expected += ref_index[chr_idx].base(A->pos + idx);
      if (A->ref != ref_expected) {
        A->filtered = true;
        A->line_status->filter_message_prefix = "Provided REF allele does not match reference: ";
        A->line_status->filter_message = "Expected " + ref_expected + ", found " + A->ref;
        continue;
      }

      // Trim

      int ref_start = 0;
      int ref_end = A->ref.size();
      int alt_end = A->alt.size();

      // Option 1: trim all trailing bases;

      //while(ref_end and alt_end and A->ref[ref_end-1] == A->alt[alt_end-1]) {
      //  --ref_end;
      //  --alt_end;
      //}

      // Option 2: trim all leading basees;

      //while (ref_start < ref_end and ref_start < alt_end and A->ref[ref_start] == A->alt[ref_start])
      //  ++ref_start;

      // Option 3: trim anchor base if vcf

      if (!input_vcf_filename.empty()) {
        if (ref_end and alt_end and (ref_end == 1 or alt_end == 1) and A->ref[0] == A->alt[0])
          ref_start = 1;
      }

      A->pos += ref_start;
      A->ref = A->ref.substr(ref_start, ref_end-ref_start);
      A->alt = A->alt.substr(ref_start, alt_end-ref_start);
      ref_end -= ref_start;
      alt_end -= ref_start;
      // Left align
      if (left_alignment && A->custom_tags.find("BSTRAND") == A->custom_tags.end()) { // black list variant not to be left aligned.
	string trailing;
	int can_do = 0, need_do = 0;
	int ref_end_orig= ref_end, alt_end_orig = alt_end;
	while(ref_end and alt_end and A->ref[ref_end-1] == A->alt[alt_end-1]) {
	    ref_end--; alt_end--;
	} 
	if (ref_end == 0 || alt_end == 0) {
	    can_do = need_do = 1; // indel type, ZZ
	} else {
	    int tmp_start = ref_start;
	    int ref_end_0 = ref_end, alt_end_0 = alt_end; // end after remove trailing match ZZ
	    while (tmp_start < ref_end and tmp_start < alt_end and A->ref[tmp_start] == A->alt[tmp_start])
     		++tmp_start;
	    if (tmp_start == ref_end || tmp_start == alt_end) {
		can_do = 1; need_do = 0; // indel but indel is not at the left. ZZ
	    } else {
		ref_end--; alt_end--;
		while(ref_end and alt_end and A->ref[ref_end-1] == A->alt[alt_end-1]) {
            	    ref_end--; alt_end--;
        	}
		if (ref_end == 0 || alt_end == 0) {
		   // complex with 1 bp MM at right end
		    can_do = need_do = 1;
		    if (ref_end + alt_end == 0) need_do = 0; // SNP
		} else {
		  int tmp_start0 = tmp_start; // start after removing leading matches
		  tmp_start++;
		  while (tmp_start < ref_end_orig and tmp_start < alt_end_orig and A->ref[tmp_start] == A->alt[tmp_start])
			tmp_start++;
		  if (tmp_start >= ref_end_0 || tmp_start >= alt_end_0 || ref_end <= tmp_start0 || alt_end <= tmp_start0) {
			// 1MM plus indel in middle, by definition cannot move the indel left enough to change A->pos
		    	can_do = 1; need_do = 0;
		  } // else real complex 
		}
	    }
	}
	if (!can_do or !need_do) {
	    // do nothing
	    // if !can_do need add some more DP
	    ref_end = ref_end_orig;
	    alt_end = alt_end_orig;
	} else {
	 // left align the indel part, here either ref_end = 0 or alt_end = 0
	  int opos = A->pos;
          while (A->pos > 0) {
            char nuc = ref_index[chr_idx].base(A->pos-1);
            if (ref_end > 0 and A->ref[ref_end-1] != nuc)
              break;
            if (alt_end > 0 and A->alt[alt_end-1] != nuc)
              break;
            A->ref = string(1,nuc) + A->ref;
            A->alt = string(1,nuc) + A->alt;
            A->pos--;
          }
	  if (ref_end != ref_end_orig) {
	    // trailing part is aligned, the whole ref and alt need to be kept. ZZ
	    ref_end = A->ref.size();
	    alt_end = A->alt.size();
	  } 
	  if (junc.contain(chr_idx, A->pos, ref_end) or not junc.contained_in_ampl(chr_idx, A->pos, ref_end)) {
		// after left align the hotspot contain an overlap region, revert to the original ZZ
		if (opos != A->pos) {
		    A->ref.erase(0, opos-A->pos);
		    A->alt.erase(0, opos-A->pos);
		    A->pos = opos;
		    ref_end = ref_end_orig;
		    alt_end = alt_end_orig;
		}
	  }
       }
      }
      A->ref.resize(ref_end);
      A->alt.resize(alt_end);


      // Filter block substitutions: take 1

      if (ref_end > 0 and alt_end > 0 and ref_end != alt_end and not allow_block_substitutions and not filter_bypass) {
        A->filtered = true;
        A->line_status->filter_message_prefix = "Block substitutions not supported";
        continue;
      }

    }



    if (output_bed) {
      // Sort - without anchor base
      stable_sort(alleles[chr_idx].begin(), alleles[chr_idx].end(), compare_alleles);

      // Write
      for (deque<Allele>::iterator I = alleles[chr_idx].begin(); I != alleles[chr_idx].end(); ++I) {
        if (I->filtered)
          continue;

        fprintf(output_bed, "%s\t%ld\t%ld\t%s\tREF=%s;OBS=%s",
            ref_index[chr_idx].chr.c_str(), I->pos, I->pos + I->ref.size(), I->id.c_str(),
            I->ref.c_str(), I->alt.c_str());

        for (map<string,string>::iterator C = I->custom_tags.begin(); C != I->custom_tags.end(); ++C)
          fprintf(output_bed, ";%s=%s", C->first.c_str(), C->second.c_str());

        fprintf(output_bed, "\tNONE\n");

        /*
        if (I->pos)
          fprintf(output_bed, "%s\t%ld\t%ld\t%s\t0\t+\tREF=%s;OBS=%s;ANCHOR=%c\tNONE\n",
              ref_index[chr_idx].chr.c_str(), I->pos, I->pos + I->ref.size(), I->id.c_str(),
              I->ref.c_str(), I->alt.c_str(), ref_index[chr_idx].base(I->pos-1));
        else
          fprintf(output_bed, "%s\t%ld\t%ld\t%s\t0\t+\tREF=%s;OBS=%s;ANCHOR=\tNONE\n",
              ref_index[chr_idx].chr.c_str(), I->pos, I->pos + I->ref.size(), I->id.c_str(),
              I->ref.c_str(), I->alt.c_str());
        */
      }
    }


    if (output_vcf) {

      // Add anchor base to indels
      for (deque<Allele>::iterator I = alleles[chr_idx].begin(); I != alleles[chr_idx].end(); ++I) {
        if (I->filtered)
          continue;
        if (not I->ref.empty() and not I->alt.empty())
          continue;
        if (I->pos == 0) {
          I->filtered = true;
          I->line_status->filter_message_prefix = "INDELs at chromosome start not supported";
          continue;
        }
        I->pos--;
        I->ref = string(1,ref_index[chr_idx].base(I->pos)) + I->ref;
        I->alt = string(1,ref_index[chr_idx].base(I->pos)) + I->alt;
      }

      // Sort - with anchor base
      stable_sort(alleles[chr_idx].begin(), alleles[chr_idx].end(), compare_alleles);


      // Merge alleles, remove block substitutions, write
      for (deque<Allele>::iterator A = alleles[chr_idx].begin(); A != alleles[chr_idx].end(); ) {

        string max_ref;
        deque<Allele>::iterator B = A;
        for (; B != alleles[chr_idx].end() and B->pos == A->pos; ++B)
          if (!B->filtered and max_ref.size() < B->ref.size())
            max_ref = B->ref;

        bool filtered = true;
        map<string,set<string> > unique_alts_and_ids;
        for (deque<Allele>::iterator I = A; I != B; ++I) {
          if (I->filtered)
            continue;

          string new_alt = I->alt + max_ref.substr(I->ref.size());

          if (new_alt.size() > 1 and max_ref.size() > 1 and new_alt.size() != max_ref.size() and not allow_block_substitutions and not filter_bypass) {
            I->filtered = true;
            I->line_status->filter_message_prefix = "Block substitutions not supported (post-merge)";
            continue;
          }

          I->ref = max_ref;
          I->alt = new_alt;

          // Filter alleles with duplicate ALT + ID pairs
          map<string,set<string> >::iterator alt_iter = unique_alts_and_ids.find(new_alt);
          if (alt_iter != unique_alts_and_ids.end()) {
            if (alt_iter->second.count(I->id) > 0) {
              I->filtered = true;
              I->line_status->filter_message_prefix = "Duplicate allele and ID";
              continue;
            }
          }
          unique_alts_and_ids[new_alt].insert(I->id);

          filtered = false;
        }

        if (not filtered) {



          fprintf(output_vcf, "%s\t%ld\t.\t%s\t",
              ref_index[chr_idx].chr.c_str(), A->pos+1, max_ref.c_str());

          bool comma = false;

          map<string,map<string,string> > unique_alts_and_tags;
          set<string> unique_tags;
	  set<string> unique_alt_alleles;

          for (deque<Allele>::iterator I = A; I != B; ++I) {
            if (I->filtered)
              continue;
            unique_alts_and_tags[I->alt].insert(I->custom_tags.begin(), I->custom_tags.end());
            for (map<string,string>::iterator S = I->custom_tags.begin(); S != I->custom_tags.end(); ++S)
              unique_tags.insert(S->first);
            if (unique_alt_alleles.count(I->alt) > 0)
              continue;
            unique_alt_alleles.insert(I->alt);
            if (comma)
              fprintf(output_vcf, ",");
            comma = true;
            fprintf(output_vcf, "%s", I->alt.c_str());
          }
	  /*
          for (deque<Allele>::iterator I = A; I != B; ++I) {
            if (I->filtered)
              continue;
            map<string,map<string,string> >::iterator Q = unique_alts_and_tags.find(I->alt);
            if (comma)
              fprintf(output_vcf, ",");
            comma = true;
            if (Q == unique_alts_and_tags.end()) {fprintf(output_vcf, "."); continue;}
            fprintf(output_vcf, "%s", Q->first.c_str());
          }
          */

          fprintf(output_vcf, "\t.\t.\tOID=");
          comma = false;
          for (deque<Allele>::iterator I = A; I != B; ++I) {
            if (I->filtered)
              continue;
            if (comma)
              fprintf(output_vcf, ",");
            comma = true;
            fprintf(output_vcf, "%s", I->id.c_str());
          }

          fprintf(output_vcf, ";OPOS=");
          comma = false;
          for (deque<Allele>::iterator I = A; I != B; ++I) {
            if (I->filtered)
              continue;
            if (comma)
              fprintf(output_vcf, ",");
            comma = true;
            fprintf(output_vcf, "%ld", I->opos+1);
          }

          fprintf(output_vcf, ";OREF=");
          comma = false;
          for (deque<Allele>::iterator I = A; I != B; ++I) {
            if (I->filtered)
              continue;
            if (comma)
              fprintf(output_vcf, ",");
            comma = true;
            fprintf(output_vcf, "%s", I->oref.c_str());
          }

          fprintf(output_vcf, ";OALT=");
          comma = false;
          for (deque<Allele>::iterator I = A; I != B; ++I) {
            if (I->filtered)
              continue;
            if (comma)
              fprintf(output_vcf, ",");
            comma = true;
            fprintf(output_vcf, "%s", I->oalt.c_str());
          }

          fprintf(output_vcf, ";OMAPALT=");
          comma = false;
          for (deque<Allele>::iterator I = A; I != B; ++I) {
            if (I->filtered)
              continue;
            if (comma)
              fprintf(output_vcf, ",");
            comma = true;
            fprintf(output_vcf, "%s", I->alt.c_str());
          }

          for (set<string>::iterator S = unique_tags.begin(); S != unique_tags.end(); ++S) {
            fprintf(output_vcf, ";%s=", S->c_str());
            comma=false;
            for (deque<Allele>::iterator I = A; I != B; ++I) {
              if (I->filtered)
                continue;
              map<string,map<string,string> >::iterator Q = unique_alts_and_tags.find(I->alt);
              if (comma)
                fprintf(output_vcf, ",");
              comma = true;
              if (Q == unique_alts_and_tags.end()) {fprintf(output_vcf, "."); continue;}
              map<string,string>::iterator W = Q->second.find(*S);
              if (W == Q->second.end())
                fprintf(output_vcf, ".");
              else
                fprintf(output_vcf, "%s", W->second.c_str());
            }
          }
//            fprintf(output_vcf, ";%s=%s", S->first.c_str(), S->second.c_str());

          fprintf(output_vcf, "\n");
        }

        A = B;
      }
    }
  }



  if (output_bed) {
    fflush(output_bed);
    fclose(output_bed);
  }
  if (output_vcf) {
    fflush(output_vcf);
    fclose(output_vcf);
  }


  int lines_ignored = 0;
  for (deque<LineStatus>::iterator L = line_status.begin(); L != line_status.end(); ++L) {
    if (L->filter_message_prefix) {
      if (L->chr_idx >= 0)
        printf("Line %d ignored: [%s:%ld %s] %s%s\n", L->line_number, ref_index[L->chr_idx].chr.c_str(), L->opos+1, L->id.c_str(),
            L->filter_message_prefix, L->filter_message.c_str());
      else
        printf("Line %d ignored: %s%s\n", L->line_number, L->filter_message_prefix, L->filter_message.c_str());
      lines_ignored++;
    }
  }
  printf("Ignored %d out of %d lines\n", lines_ignored, (int)line_status.size());


  munmap(ref, ref_stat.st_size);
  close(ref_handle);
  if (lines_ignored > 0 and strict_check) return 1;

  return 0;
}

Пример #28

Показать файл

Файл: sff2bam.cpp Проект: Brainiarc7/TS

int main(int argc, const char *argv[])
{
    OptArgs opts;
    opts.ParseCmdLine(argc, argv);
    bool help, combineSffs;
    string sffFile;
    string bamFile;
    vector<string> infiles;
    opts.GetOption(help,"false", 'h', "help");
    opts.GetOption(combineSffs,"false", 'c', "combine-sffs");
    opts.GetOption(bamFile,"",'o',"out-filename");
    opts.GetLeftoverArguments(infiles);

    if(help || infiles.empty())
    {
        usage();
    }

	if((!combineSffs) && infiles.size() > 1)
	{
        cerr << "sff2bam ERROR: if you want to combine all sff files into a single bam file, please use option -c true." << endl;
        usage();
	}

    sffFile= infiles.front();

    if(bamFile.length() < 1)
    {
        bamFile = sffFile.substr(0, sffFile.length() - 3);
        bamFile += "bam";
    }

    sff_file_t* sff_file = sff_fopen(sffFile.c_str(), "r", NULL, NULL);
    if(!sff_file)
    {
        cerr << "sff2bam ERROR: fail to open " << sffFile << endl;
        exit(1);
    }

	// All sff files must have the same flow and key
	if(combineSffs && infiles.size() > 1)
	{
        for(size_t n = 1; n < infiles.size(); ++n)
		{
			sff_file_t* sff_file2 = sff_fopen(infiles[n].c_str(), "r", NULL, NULL);
			if(!sff_file2)
			{
				sff_fclose(sff_file);
				cerr << "sff2bam ERROR: fail to open " << infiles[n] << endl;
				exit(1);
			}

			if(strcmp(sff_file2->header->flow->s, sff_file->header->flow->s) != 0 ||
				strcmp(sff_file2->header->key->s, sff_file->header->key->s) != 0)
			{
				sff_fclose(sff_file);
				sff_fclose(sff_file2);
				cerr << "sff2bam ERROR: " << sffFile << " and " << infiles[n] << " have different flows or keys." << endl;
				exit(1);
			}

			sff_fclose(sff_file2);
		}
	}

    sff_t* sff = NULL;
    // Open 1st read for read group name
    sff = sff_read(sff_file);
    if(!sff)
    {
        sff_fclose(sff_file);
        cerr << "sff2bam ERROR: fail to read " << sffFile << endl;
        exit(1);
    }

    // Set up BAM header
    SamHeader sam_header;
    sam_header.Version = "1.4";
    sam_header.SortOrder = "unsorted";

    SamProgram sam_program("sff2bam");
    sam_program.Name = "sff2bam";
    sam_program.Version = SFF2BAM_VERSION;
    sam_program.CommandLine = "sff2bam";
    sam_header.Programs.Add(sam_program);

    string rgname = sff->rheader->name->s;
    int index = rgname.find(":");
    rgname = rgname.substr(0, index);

    SamReadGroup read_group(rgname);
    read_group.FlowOrder = sff->gheader->flow->s;
    read_group.KeySequence = sff->gheader->key->s;

    sam_header.ReadGroups.Add(read_group);

    RefVector refvec;
    BamWriter bamWriter;
    bamWriter.SetCompressionMode(BamWriter::Compressed);

    if(!bamWriter.Open(bamFile, sam_header, refvec))
    {
        sff_fclose(sff_file);
        cerr << "sff2bam ERROR: failed to open " << bamFile << endl;
        exit(1);
    }

    // Save 1st read
    BamAlignment bam_alignment0;
    bam_alignment0.SetIsMapped(false);
    bam_alignment0.Name = sff->rheader->name->s;
    size_t nBases = sff->rheader->n_bases + 1 - sff->rheader->clip_qual_left;
    if(sff->rheader->clip_qual_right > 0)
    {
        nBases = sff->rheader->clip_qual_right - sff->rheader->clip_qual_left;
    }
    if(nBases > 0)
    {
        bam_alignment0.QueryBases.reserve(nBases);
        bam_alignment0.Qualities.reserve(nBases);
        for (int base = sff->rheader->clip_qual_left - 1; base < sff->rheader->clip_qual_right - 1; ++base)
        {
            bam_alignment0.QueryBases.push_back(sff->read->bases->s[base]);
            bam_alignment0.Qualities.push_back(sff->read->quality->s[base] + 33);
        }
    }

    int clip_flow = 0;
    for (unsigned int base = 0; base < sff->rheader->clip_qual_left && base < sff->rheader->n_bases; ++base)
    {
        clip_flow += sff->read->flow_index[base];
    }
    if (clip_flow > 0)
    {
        clip_flow--;
    }

    bam_alignment0.AddTag("RG","Z", rgname);
    bam_alignment0.AddTag("PG","Z", string("sff2bam"));
    bam_alignment0.AddTag("ZF","i", clip_flow); // TODO: trim flow
    vector<uint16_t> flowgram0(sff->gheader->flow_length);
    copy(sff->read->flowgram, sff->read->flowgram + sff->gheader->flow_length, flowgram0.begin());
    bam_alignment0.AddTag("FZ", flowgram0);
    sff_destroy(sff);
    sff = NULL;

    bamWriter.SaveAlignment(bam_alignment0);

    // Save rest reads
    while(NULL != (sff = sff_read(sff_file)))
    {
        BamAlignment bam_alignment;
        bam_alignment.SetIsMapped(false);
        bam_alignment.Name = sff->rheader->name->s;   
        nBases = sff->rheader->n_bases + 1 - sff->rheader->clip_qual_left;
        if(sff->rheader->clip_qual_right > 0)
        {
            nBases = sff->rheader->clip_qual_right - sff->rheader->clip_qual_left;
        }
        if(nBases > 0)
        {
            bam_alignment.QueryBases.reserve(nBases);
            bam_alignment.Qualities.reserve(nBases);
            for (int base = sff->rheader->clip_qual_left - 1; base < sff->rheader->clip_qual_right - 1; ++base)
            {
                bam_alignment.QueryBases.push_back(sff->read->bases->s[base]);
                bam_alignment.Qualities.push_back(sff->read->quality->s[base] + 33);
            }
        }

        clip_flow = 0;
        for (unsigned int base = 0; base <= sff->rheader->clip_qual_left && base < sff->rheader->n_bases; ++base)
        {
            clip_flow += sff->read->flow_index[base];
        }
        if (clip_flow > 0)
        {
            clip_flow--;
        }

        bam_alignment.AddTag("RG","Z", rgname);
        bam_alignment.AddTag("PG","Z", string("sff2bam"));
        bam_alignment.AddTag("ZF","i", clip_flow); // TODO: trim flow
        vector<uint16_t> flowgram(sff->gheader->flow_length);
        copy(sff->read->flowgram, sff->read->flowgram + sff->gheader->flow_length, flowgram.begin());
        bam_alignment.AddTag("FZ", flowgram);
        sff_destroy(sff);
        sff = NULL;

        bamWriter.SaveAlignment(bam_alignment);
    }

	sff_fclose(sff_file);

	if(combineSffs && infiles.size() > 1)
	{
        for(size_t n = 1; n < infiles.size(); ++n)
		{
			sff_file_t* sff_file2 = sff_fopen(infiles[n].c_str(), "r", NULL, NULL);

			while(NULL != (sff = sff_read(sff_file2)))
			{
				BamAlignment bam_alignment;
				bam_alignment.SetIsMapped(false);
				bam_alignment.Name = sff->rheader->name->s;   
				nBases = sff->rheader->n_bases + 1 - sff->rheader->clip_qual_left;
				if(sff->rheader->clip_qual_right > 0)
				{
					nBases = sff->rheader->clip_qual_right - sff->rheader->clip_qual_left;
				}
				if(nBases > 0)
				{
					bam_alignment.QueryBases.reserve(nBases);
					bam_alignment.Qualities.reserve(nBases);
					for (int base = sff->rheader->clip_qual_left - 1; base < sff->rheader->clip_qual_right - 1; ++base)
					{
						bam_alignment.QueryBases.push_back(sff->read->bases->s[base]);
						bam_alignment.Qualities.push_back(sff->read->quality->s[base] + 33);
					}
				}

				clip_flow = 0;
				for (unsigned int base = 0; base <= sff->rheader->clip_qual_left && base < sff->rheader->n_bases; ++base)
				{
					clip_flow += sff->read->flow_index[base];
				}
				if (clip_flow > 0)
				{
					clip_flow--;
				}

				bam_alignment.AddTag("RG","Z", rgname);
				bam_alignment.AddTag("PG","Z", string("sff2bam"));
				bam_alignment.AddTag("ZF","i", clip_flow); // TODO: trim flow
				vector<uint16_t> flowgram(sff->gheader->flow_length);
				copy(sff->read->flowgram, sff->read->flowgram + sff->gheader->flow_length, flowgram.begin());
				bam_alignment.AddTag("FZ", flowgram);
				sff_destroy(sff);
				sff = NULL;

				bamWriter.SaveAlignment(bam_alignment);
			}

			sff_fclose(sff_file2);
		}
	}

    bamWriter.Close();    

    return 0;
}

Пример #29

Показать файл

Файл: BaseCallerParameters.cpp Проект: Lingrui/TS

bool BaseCallerParameters::InitContextVarsFromOptArgs(OptArgs& opts){

    assert(bc_files.options_set);
    char default_run_id[6]; // Create a run identifier from full output directory string
    ion_run_to_readname (default_run_id, (char*)bc_files.output_directory.c_str(), bc_files.output_directory.length());
    context_vars.run_id                      = opts.GetFirstString ('-', "run-id", default_run_id);
	num_threads_                             = opts.GetFirstInt    ('n', "num-threads", max(2*numCores(), 4));
	num_bamwriter_threads_                   = opts.GetFirstInt    ('-', "num-threads-bamwriter", 6);

    context_vars.flow_signals_type           = opts.GetFirstString ('-', "flow-signals-type", "none");
    context_vars.extra_trim_left             = opts.GetFirstInt    ('-', "extra-trim-left", 0);
    context_vars.only_process_unfiltered_set = opts.GetFirstBoolean('-', "only-process-unfiltered-set", false);

    // Treephaser options
    context_vars.dephaser                    = opts.GetFirstString ('-', "dephaser", "treephaser-sse");
    context_vars.keynormalizer               = opts.GetFirstString ('-', "keynormalizer", "gain");
    context_vars.windowSize                  = opts.GetFirstInt    ('-', "window-size", DPTreephaser::kWindowSizeDefault_);
    context_vars.skip_droop                  = opts.GetFirstBoolean('-', "skip-droop", true);
    context_vars.skip_recal_during_norm      = opts.GetFirstBoolean('-', "skip-recal-during-normalization", false);
    context_vars.diagonal_state_prog         = opts.GetFirstBoolean('-', "diagonal-state-prog", false);

    // Not every combination of options is possible here:
    if (context_vars.diagonal_state_prog and context_vars.dephaser != "treephaser-swan") {
      cout << " === BaseCaller Option Incompatibility: Using dephaser treephaser-swan with diagonal state progression instead of "
           << context_vars.dephaser << endl;
      context_vars.dephaser = "treephaser-swan";
    }

    context_vars.process_tfs      = true;
    context_vars.options_set      = true;
    return true;
};

Пример #30

Показать файл

Файл: Calibration.cpp Проект: iontorrent/TS

int main (int argc, const char *argv[])
{
  time_t program_start_time;
  time(&program_start_time);
  Json::Value calibration_json(Json::objectValue);
  DumpStartingStateOfProgram (argc,argv,program_start_time, calibration_json["Calibration"]);

  //
  // Step 1. Process command line options
  //

  OptArgs opts;
  opts.ParseCmdLine(argc, argv);

  // enable floating point exceptions during program execution
  if (opts.GetFirstBoolean('-', "float-exceptions", true)) {
    cout << "Calibration: Floating point exceptions enabled." << endl;
    feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
  } //*/

  CalibrationContext calib_context;
  if (not calib_context.InitializeFromOpts(opts)){
    PrintHelp_CalModules();
  }

  HistogramCalibration master_histogram(opts, calib_context);
  calib_context.hist_calibration_master = &master_histogram;

  LinearCalibrationModel master_linear_model(opts, calib_context);
  calib_context.linear_model_master = &master_linear_model;

  opts.CheckNoLeftovers();

  //
  // Step 2. Execute threaded calibration
  //
  int calibration_thread_time = 0;

  if (calib_context.successive_fit) {

    // first train linear model
    if (master_linear_model.DoTraining()) {
      int l_thread_time = 0;
      for (int i_iteration=0; i_iteration<calib_context.num_train_iterations; i_iteration++) {
        cout << " -Training Iteration " << i_iteration+1;
        l_thread_time = ExecuteThreadedCalibrationTraining(calib_context);

        // Activate master linear model after every round of training
        master_linear_model.CreateCalibrationModel(false); // make linear model
        master_linear_model.SetModelGainsAndOffsets(); // expand for use in basecalling

        calibration_thread_time += l_thread_time;
        calib_context.bam_reader.Rewind(); // reset all files for another pass
        cout << " Duration = " << l_thread_time << endl;
      }
    }

    // Then apply it during polish model training
    if (master_histogram.DoTraining()) {
      calib_context.local_fit_linear_model = false;
      calib_context.local_fit_polish_model = true;
      calibration_thread_time += ExecuteThreadedCalibrationTraining(calib_context);
    }
  }
  else {
    // Single pass in which both models are fit jointly
    calibration_thread_time=ExecuteThreadedCalibrationTraining(calib_context);
  }


  //
  // Step 3. Create models, write output, and close modules
  //

  // Linear Model
  if (master_linear_model.CreateCalibrationModel())
    master_linear_model.ExportModelToJson(calibration_json["LinearModel"], "");

  // HP histogram calibration
  if (master_histogram.CreateCalibrationModel())
    master_histogram.ExportModelToJson(calibration_json["HPHistogram"]);


  // Transfer stuff from calibration context and close bam reader
  calib_context.Close(calibration_json["Calibration"]);

  time_t program_end_time;
  time(&program_end_time);

  calibration_json["Calibration"]["end_time"] = get_time_iso_string(program_end_time);
  calibration_json["Calibration"]["total_duration"] = (Json::Int)difftime(program_end_time,program_start_time);
  calibration_json["Calibration"]["calibration_duration"] = (Json::Int)calibration_thread_time;

  SaveJson(calibration_json, calib_context.filename_json);
  return EXIT_SUCCESS;
}