示例#1
0
static void init_missed_line(args_t *args)
{
    int i;
    for (i=0; i<bcf_hdr_nsamples(args->aux.hdr); i++)
    {
        args->aux.gts[i*2]   = bcf_gt_missing;
        args->aux.gts[i*2+1] = bcf_int32_vector_end;
    }
    args->missed_line = bcf_init1();
    bcf_update_genotypes(args->aux.hdr, args->missed_line, args->aux.gts, 2*bcf_hdr_nsamples(args->aux.hdr));
    bcf_float_set_missing(args->missed_line->qual);
}
/**
 * @brief Construct individual field objects and create a mapping from field logical (header) index to
 *        physical locations in the m_*_fields vectors.
 */
void VariantBuilderIndividualRegion::build_lookup_tables() {
  // Must copy float missing/eov values into local variables; using bcf_float_missing and bcf_float_vector_end
  // values directly doesn't work
  auto float_missing = 0.0f; bcf_float_set_missing(float_missing);
  auto float_vector_end = 0.0f; bcf_float_set_vector_end(float_vector_end);
  const auto num_samples = m_header.n_samples();

  // Create an entry for all possible BCF_DT_ID indices in the VariantHeader
  for ( auto i = 0; i < m_header.m_header->n[BCF_DT_ID]; ++i ) {
    if ( m_header.has_individual_field(i) ) {
      const auto field_type = m_header.individual_field_type(i);
      switch ( field_type ) {
        case BCF_HT_INT :
          m_int_fields.emplace_back(num_samples, i, field_type, bcf_int32_missing, bcf_int32_vector_end, int_field_short_value_threshold);
          m_field_lookup_table[i] = m_int_fields.size() - 1;
          break;
        case BCF_HT_REAL :
          m_float_fields.emplace_back(num_samples, i, field_type, float_missing, float_vector_end, float_field_short_value_threshold);
          m_field_lookup_table[i] = m_float_fields.size() - 1;
          break;
        case BCF_HT_STR :
          // GT is a string field in the header, although we encode it as an int field
          if ( i == m_gt_field_index ) {
            // Note that the missing value for genotypes is 0, not bcf_int32_missing
            m_int_fields.emplace_back(num_samples, i, field_type, 0, bcf_int32_vector_end, int_field_short_value_threshold);
            m_field_lookup_table[i] = m_int_fields.size() - 1;
          }
          else {
            // TODO: should we use bcf_str_missing here? htslib generally uses '.' instead of it...
            m_string_fields.emplace_back(num_samples, i, field_type, '.', bcf_str_vector_end, string_field_short_value_threshold);
            m_field_lookup_table[i] = m_string_fields.size() - 1;
          }
          break;
        default:
          throw logic_error(string{"Found format field in header with unsupported type: "} + to_string(field_type));
      }
    }
    else {
      m_field_lookup_table[i] = -1;
    }
  }
}
示例#3
0
void VariantBuilder::build_from_scratch(const std::shared_ptr<bcf1_t>& new_variant_body) const {
  // Missing rid/pos are errors that will be caught in post-build validation (if validation is turned on)
  new_variant_body->rid = m_contig.is_set() ? m_contig.field_value() : missing_values::int32;
  new_variant_body->pos = m_start_pos.is_set() ? m_start_pos.field_value() : missing_values::int32;

  if ( m_qual.is_set() ) {
    new_variant_body->qual = m_qual.field_value();
  }
  else {
    bcf_float_set_missing(new_variant_body->qual);
  }

  // Set rlen to the reference block size if alignment stop was set, otherwise set it to the ref allele length.
  // If m_start_pos is not set, we'll get an error in the post-validation stage.
  new_variant_body->rlen = m_stop_pos.is_set() ? (m_stop_pos.field_value() - m_start_pos.field_value() + 1) :
                                                 int32_t(m_shared_region.ref_allele_length());
  new_variant_body->n_allele = 1 + m_shared_region.num_alt_alleles();

  // Shared region (always encoded, since at a minimum the ref allele will be present)
  new_variant_body->n_info = m_shared_region.num_present_info_fields();
  auto shared_buffer = utils::initialize_htslib_buffer(m_shared_region.estimate_total_size());
  m_shared_region.encode_into(&shared_buffer);
  new_variant_body->shared = shared_buffer;

  // Individual region (conditionally encoded)
  new_variant_body->n_sample = m_header.n_samples();
  new_variant_body->n_fmt = m_individual_region.num_present_fields();

  if ( m_individual_region.num_present_fields() > 0 ) {
    auto indiv_buffer = utils::initialize_htslib_buffer(m_individual_region.estimate_total_size());
    m_individual_region.encode_into(&indiv_buffer);
    new_variant_body->indiv = indiv_buffer;
  }
  else {
    new_variant_body->indiv = {0, 0, 0};
  }
}
示例#4
0
VariantBuilderMultiSampleVector<float> VariantBuilder::get_float_multi_sample_vector(const uint32_t num_samples, const uint32_t max_values_per_sample) const {
  auto float_missing = 0.0f; bcf_float_set_missing(float_missing);
  auto float_vector_end = 0.0f; bcf_float_set_vector_end(float_vector_end);
  return VariantBuilderMultiSampleVector<float>{num_samples, max_values_per_sample, float_missing, float_vector_end};
}