예제 #1
0
void OrderedDatasetWriter::Open(const string& base_directory, BarcodeDatasets& datasets, int num_regions, const ion::FlowOrder& flow_order, const string& key,
    const string& basecaller_name, const string& basecalller_version, const string& basecaller_command_line,
    const string& production_date, const string& platform_unit, bool save_filtered_reads)
{
  num_regions_ = num_regions;
  num_regions_written_ = 0;
  region_ready_.assign(num_regions_+1,false);
  region_dropbox_.clear();
  region_dropbox_.resize(num_regions_);

  qv_histogram_.assign(50,0);

  num_datasets_ = datasets.num_datasets();
  num_barcodes_ = datasets.num_barcodes();
  num_read_groups_ = datasets.num_read_groups();
  num_reads_.resize(num_datasets_,0);
  bam_filename_.resize(num_datasets_);

  save_filtered_reads_ = save_filtered_reads;

  read_group_name_.resize(num_read_groups_);
  read_group_dataset_.assign(num_read_groups_, -1);
  read_group_num_Q20_bases_.assign(num_read_groups_,0);
  read_group_num_barcode_errors_.resize(num_read_groups_);

  for (int rg = 0; rg < num_read_groups_; ++rg) {
    read_group_name_[rg] = datasets.read_group_name(rg);
    read_group_num_barcode_errors_[rg].assign(3,0);
  }

  // New filtering and trimming accounting (per read group)

  read_group_stats_.resize(num_read_groups_);

  bam_writer_.resize(num_datasets_, NULL);

  for (int ds = 0; ds < num_datasets_; ++ds) {

    // Set up BAM header

    bam_filename_[ds] = base_directory + "/" + datasets.dataset(ds)["basecaller_bam"].asString();

    SamHeader sam_header;
    sam_header.Version = "1.4";
    sam_header.SortOrder = "unsorted";

    SamProgram sam_program("bc");
    sam_program.Name = basecaller_name;
    sam_program.Version = basecalller_version;
    sam_program.CommandLine = basecaller_command_line;
    sam_header.Programs.Add(sam_program);

    for (Json::Value::iterator rg = datasets.dataset(ds)["read_groups"].begin(); rg != datasets.dataset(ds)["read_groups"].end(); ++rg) {
      string read_group_name = (*rg).asString();
      Json::Value& read_group_json = datasets.read_groups()[read_group_name];

      read_group_dataset_[datasets.read_group_name_to_id(read_group_name)] = ds;

      SamReadGroup read_group (read_group_name);
      read_group.FlowOrder = flow_order.full_nucs();

      read_group.KeySequence          = key;
      read_group.KeySequence          += read_group_json.get("barcode_sequence","").asString();
      read_group.KeySequence          += read_group_json.get("barcode_adapter","").asString();

      read_group.ProductionDate       = production_date;
      read_group.Sample               = read_group_json.get("sample","").asString();
      read_group.Library              = read_group_json.get("library","").asString();
      read_group.Description          = read_group_json.get("description","").asString();
      read_group.PlatformUnit         = read_group_json.get("platform_unit","").asString();
      read_group.SequencingCenter     = datasets.json().get("sequencing_center","").asString();
      read_group.SequencingTechnology = "IONTORRENT";

      sam_header.ReadGroups.Add(read_group);
    }

    // Open Bam for writing

    RefVector empty_reference_vector;
    bam_writer_[ds] = new BamWriter();
    bam_writer_[ds]->SetCompressionMode(BamWriter::Compressed);
    //bam_writer_[ds]->SetCompressionMode(BamWriter::Uncompressed);
    bam_writer_[ds]->Open(bam_filename_[ds], sam_header, empty_reference_vector);
  }

}
예제 #2
0
void OrderedDatasetWriter::Open(const string& base_directory, BarcodeDatasets& datasets, int read_class_idx,
     int num_regions, const ion::FlowOrder& flow_order, const string& key, const vector<string> & bead_adapters,
     int num_bamwriter_threads, const Json::Value & basecaller_json, vector<string>& comments,
     MolecularTagTrimmer& tag_trimmer, bool trim_barcodes)
{
  num_regions_ = num_regions;
  num_regions_written_ = 0;
  region_ready_.assign(num_regions_+1,false);
  region_dropbox_.clear();
  region_dropbox_.resize(num_regions_);

  qv_histogram_.assign(50,0);

  num_datasets_ = datasets.num_datasets();
  num_barcodes_ = datasets.num_barcodes();
  num_read_groups_ = datasets.num_read_groups();
  num_reads_.resize(num_datasets_,0);
  bam_filename_.resize(num_datasets_);

  // A negative read group index indicates untrimmed/unfiltered bam files (w. library key) and we save all reads
  if (read_class_idx < 0) {
    save_filtered_reads_ = true;
    read_class_idx = 0;
  }
  else
    save_filtered_reads_ = false;

  read_group_name_.resize(num_read_groups_);
  read_group_dataset_.assign(num_read_groups_, -1);
  read_group_num_Q20_bases_.assign(num_read_groups_,0);
  read_group_barcode_filt_zero_err_.assign(num_read_groups_, 0);
  read_group_barcode_adapter_rejected_.assign(num_read_groups_, 0);
  read_group_num_barcode_errors_.resize(num_read_groups_);
  read_group_barcode_distance_hist_.resize(num_read_groups_);
  read_group_barcode_bias_.resize(num_read_groups_);

  for (int rg = 0; rg < num_read_groups_; ++rg) {
    read_group_name_[rg] = datasets.read_group_name(rg);
    read_group_num_barcode_errors_[rg].assign(3,0);
    read_group_barcode_bias_[rg].assign(datasets.GetBCmaxFlows(),0.0);
    read_group_barcode_distance_hist_[rg].assign(5,0);

  }

  // New filtering and trimming accounting (per read group)

  read_group_stats_.resize(num_read_groups_);
  for (int rg=0; rg<num_read_groups_; rg++)
	read_group_stats_[rg].SetBeadAdapters(bead_adapters);
  combined_stats_.SetBeadAdapters(bead_adapters);

  bam_writer_.resize(num_datasets_, NULL);
  sam_header_.resize(num_datasets_);
  num_bamwriter_threads_ = num_bamwriter_threads;

  for (int ds = 0; ds < num_datasets_; ++ds) {

    // Set up BAM header

    bam_filename_[ds] = base_directory + "/" + datasets.dataset(ds)["basecaller_bam"].asString();

    SamHeader& sam_header = sam_header_[ds];
    sam_header.Version = "1.4";
    sam_header.SortOrder = "unsorted";

    SamProgram sam_program("bc");
    sam_program.Name        = "BaseCaller";
    sam_program.Version     = basecaller_json["BaseCaller"]["version"].asString() + "/" + basecaller_json["BaseCaller"]["git_hash"].asString();
    sam_program.CommandLine = basecaller_json["BaseCaller"]["command_line"].asString();
    sam_header.Programs.Add(sam_program);

    for (Json::Value::iterator rg = datasets.dataset(ds)["read_groups"].begin(); rg != datasets.dataset(ds)["read_groups"].end(); ++rg) {
      string read_group_name = (*rg).asString();
      Json::Value& read_group_json = datasets.read_groups()[read_group_name];

      read_group_dataset_[datasets.read_group_name_to_id(read_group_name)] = ds;

      SamReadGroup read_group (read_group_name);

      read_group.FlowOrder            = flow_order.full_nucs();
      read_group.KeySequence          = key;
      if (trim_barcodes){ // We only add the barcode info to the key sequence if we hard clipped it
        read_group.KeySequence          += read_group_json.get("barcode_sequence","").asString();
        read_group.KeySequence          += read_group_json.get("barcode_adapter","").asString();
      }

      read_group.ProductionDate       = basecaller_json["BaseCaller"]["start_time"].asString();
      read_group.Sample               = read_group_json.get("sample","").asString();
      read_group.Library              = read_group_json.get("library","").asString();
      read_group.Description          = read_group_json.get("description","").asString();
      read_group.PlatformUnit         = read_group_json.get("platform_unit","").asString();
      read_group.SequencingCenter     = datasets.json().get("sequencing_center","").asString();
      read_group.SequencingTechnology = "IONTORRENT";

      // Add custom tags: Structure of tags per read group XXX
      if (datasets.IsLibraryDataset()) {
        MolTag my_tags = tag_trimmer.GetReadGroupTags(read_group_name);
        AddCustomReadGroupTag(read_group, "zt", my_tags.prefix_mol_tag);
        AddCustomReadGroupTag(read_group, "yt", my_tags.suffix_mol_tag);
      }

      sam_header.ReadGroups.Add(read_group);
    }

    for(size_t i = 0; i < comments.size(); ++i)
      sam_header.Comments.push_back(comments[i]);
  }

}