Esempio n. 1
0
/**
 * @brief Debug dump, optional
 */
void KlibAlignment::update()
{
    _impl->result = ksw_align(
        _impl->reflen, _impl->ref.get(),
        _impl->altlen, _impl->alt.get(),
        5, _impl->mat, _impl->gapo, _impl->gape,
        KSW_XSTART,     // add flags here
        &(_impl->qprofile));

    if(_impl->cigar)
    {
        free(_impl->cigar);
        _impl->cigar = NULL;
        _impl->cigar_len = 0;
    }

    ksw_global(
        _impl->result.qe - _impl->result.qb + 1,
        _impl->ref.get() + _impl->result.qb,
        _impl->result.te - _impl->result.tb + 1,
        _impl->alt.get() + _impl->result.tb,
        5, _impl->mat, _impl->gapo, _impl->gape,
        _impl->altlen,
        &_impl->cigar_len, &_impl->cigar);

    _impl->valid_result = true;
}
Esempio n. 2
0
static aln_v align_read(const kseq_t *read,
                        const kseq_v targets,
                        const align_config_t *conf)
{
    kseq_t *r;
    const int32_t read_len = read->seq.l;

    aln_v result;
    kv_init(result);
    kv_resize(aln_t, result, kv_size(targets));

    uint8_t *read_num = calloc(read_len, sizeof(uint8_t));

    for(size_t k = 0; k < read_len; ++k)
        read_num[k] = conf->table[(int)read->seq.s[k]];

    // Align to each target
    kswq_t *qry = NULL;
    for(size_t j = 0; j < kv_size(targets); j++) {
        // Encode target
        r = &kv_A(targets, j);
        uint8_t *ref_num = calloc(r->seq.l, sizeof(uint8_t));
        for(size_t k = 0; k < r->seq.l; ++k)
            ref_num[k] = conf->table[(int)r->seq.s[k]];

        aln_t aln;
        aln.target_idx = j;
        aln.loc = ksw_align(read_len, read_num,
                            r->seq.l, ref_num,
                            conf->m,
                            conf->mat,
                            conf->gap_o,
                            conf->gap_e,
                            KSW_XSTART,
                            &qry);
        ksw_global(aln.loc.qe - aln.loc.qb + 1,
                   &read_num[aln.loc.qb],
                   aln.loc.te - aln.loc.tb + 1,
                   &ref_num[aln.loc.tb],
                   conf->m,
                   conf->mat,
                   conf->gap_o,
                   conf->gap_e,
                   50, /* TODO: Magic number - band width */
                   &aln.n_cigar,
                   &aln.cigar);

        aln.nm = 0;
        size_t qi = aln.loc.qb, ri = aln.loc.tb;
        for(size_t k = 0; k < aln.n_cigar; k++) {
            const int32_t oplen = bam_cigar_oplen(aln.cigar[k]),
                          optype = bam_cigar_type(aln.cigar[k]);

            if(optype & 3) { // consumes both - check for mismatches
                for(size_t j = 0; j < oplen; j++) {
                    if(UNLIKELY(read_num[qi + j] != ref_num[ri + j]))
                        aln.nm++;
                }
            } else {
                aln.nm += oplen;
            }
            if(optype & 1) qi += oplen;
            if(optype & 2) ri += oplen;
        }

        kv_push(aln_t, result, aln);
        free(ref_num);
    }
    free(qry);
    free(read_num);
    ks_introsort(dec_score, kv_size(result), result.a);

    return result;
}
Esempio n. 3
0
static aln_t align_read_against_one(kseq_t *target, const int read_len,
                                    uint8_t *read_num, kswq_t **qry,
                                    const align_config_t *conf,
                                    const int min_score) {
  uint8_t *ref_num = calloc(target->seq.l, sizeof(uint8_t));
  for (size_t k = 0; k < target->seq.l; ++k)
    ref_num[k] = conf->table[(int)target->seq.s[k]];

  aln_t aln;
  aln.cigar = NULL;
  aln.loc = ksw_align(read_len, read_num, target->seq.l, ref_num, conf->m,
                      conf->mat, conf->gap_o, conf->gap_e, KSW_XSTART, qry);

  aln.target_name = target->name.s;

  if (aln.loc.score < min_score) {
    free(ref_num);
    return aln;
  }

  ksw_global(aln.loc.qe - aln.loc.qb + 1, &read_num[aln.loc.qb],
             aln.loc.te - aln.loc.tb + 1, &ref_num[aln.loc.tb], conf->m,
             conf->mat, conf->gap_o, conf->gap_e, conf->bandwidth, &aln.n_cigar,
             &aln.cigar);

  aln.nm = 0;
  size_t qi = aln.loc.qb, ri = aln.loc.tb;
  for (int k = 0; k < aln.n_cigar; k++) {
    const int32_t oplen = bam_cigar_oplen(aln.cigar[k]),
                  optype = bam_cigar_type(aln.cigar[k]);

    if (optype & 3) { // consumes both - check for mismatches
      for (int j = 0; j < oplen; j++) {
        if (UNLIKELY(read_num[qi + j] != ref_num[ri + j]))
          aln.nm++;
      }
    } else {
      aln.nm += oplen;
    }
    if (optype & 1)
      qi += oplen;
    if (optype & 2)
      ri += oplen;
  }

  free(ref_num);

  /* size_t cigar_len = aln.loc.qb; */
  /* for (int c = 0; c < aln.n_cigar; c++) { */
  /*   int32_t length = (0xfffffff0 & *(aln.cigar + c)) >> 4; */
  /*   cigar_len += length; */
  /* } */
  /* cigar_len += read_len - aln.loc.qe - 1; */
  /* if(cigar_len != (size_t)read_len) { */
  /*   /\* printf("[ig_align] Error: cigar length (score %d) not equal to read length for XXX (target %s): %zu vs %d\n", aln.loc.score, target->name.s, cigar_len, read_len); *\/ */
  /*   // NOTE: */
  /*   //   It is *really* *f*****g* *scary* that it's spitting out cigars that are not the same length as the query sequence. */
  /*   //   Nonetheless, fixing it seems to involve delving into the depths of ksw_align() and ksw_global(), which would be very time consuming, and the length discrepancy seems to ony appear in very poor matches. */
  /*   //   I.e., poor enough that we will subsequently ignore them in partis/python/waterer.py, so it seems to not screw anything up downstream to just set the length-discrepant matches' scores to zero, such that ig-sw doesn't write them to its sam output. */
  /*   //   Note also that it is not always the lowest- or highest-scoring matches that have discrepant lengths (i.e. setting their scores to zero promotes matches swith poorer scores, but which do not have discrepant lengths. */
  /*   /\* aln.loc.score = 0; *\/ */
  /*   aln.cigar = NULL; */
  /* } */

  return aln;
}