Esempio n. 1
0
/**
 * @brief Debug dump, optional
 */
void KlibAlignment::update()
{
    _impl->result = ksw_align(
        _impl->reflen, _impl->ref.get(),
        _impl->altlen, _impl->alt.get(),
        5, _impl->mat, _impl->gapo, _impl->gape,
        KSW_XSTART,     // add flags here
        &(_impl->qprofile));

    if(_impl->cigar)
    {
        free(_impl->cigar);
        _impl->cigar = NULL;
        _impl->cigar_len = 0;
    }

    ksw_global(
        _impl->result.qe - _impl->result.qb + 1,
        _impl->ref.get() + _impl->result.qb,
        _impl->result.te - _impl->result.tb + 1,
        _impl->alt.get() + _impl->result.tb,
        5, _impl->mat, _impl->gapo, _impl->gape,
        _impl->altlen,
        &_impl->cigar_len, &_impl->cigar);

    _impl->valid_result = true;
}
Esempio n. 2
0
void bsw2_pair1(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, const bsw2pestat_t *st, const bsw2hit_t *h, int l_mseq, const char *mseq, bsw2hit_t *a, int8_t g_mat[25])
{
	extern void seq_reverse(int len, ubyte_t *seq, int is_comp);
	int64_t k, beg, end;
	uint8_t *seq, *ref;
	int i;
	// compute the region start and end
	a->n_seeds = 1; a->flag |= BSW2_FLAG_MATESW; // before calling this routine, *a has been cleared with memset(0); the flag is set with 1<<6/7
	if (h->is_rev == 0) {
		beg = (int64_t)(h->k + st->avg - EXT_STDDEV * st->std - l_mseq + .499);
		if (beg < h->k) beg = h->k;
		end = (int64_t)(h->k + st->avg + EXT_STDDEV * st->std + .499);
		a->is_rev = 1; a->flag |= 16;
	} else {
		beg = (int64_t)(h->k + h->end - h->beg - st->avg - EXT_STDDEV * st->std + .499);
		end = (int64_t)(h->k + h->end - h->beg - st->avg + EXT_STDDEV * st->std + l_mseq + .499);
		if (end > h->k + (h->end - h->beg)) end = h->k + (h->end - h->beg);
		a->is_rev = 0;
	}
	if (beg < 1) beg = 1;
	if (end > l_pac) end = l_pac;
	if (end - beg < l_mseq) return;
	// generate the sequence
	seq = malloc(l_mseq + (end - beg));
	ref = seq + l_mseq;
	for (k = beg; k < end; ++k)
		ref[k - beg] = pac[k>>2] >> ((~k&3)<<1) & 0x3;
	if (h->is_rev == 0) {
		for (i = 0; i < l_mseq; ++i) { // on the reverse strand
			int c = nst_nt4_table[(int)mseq[i]];
			seq[l_mseq - 1 - i] = c > 3? 4 : 3 - c;
		}
	} else {
		for (i = 0; i < l_mseq; ++i) // on the forward strand
			seq[i] = nst_nt4_table[(int)mseq[i]];
	}
	{
		int flag = KSW_XSUBO | KSW_XSTART | (l_mseq * g_mat[0] < 250? KSW_XBYTE : 0) | opt->t;
		kswr_t aln;
		aln = ksw_align(l_mseq, seq, end - beg, ref, 5, g_mat, opt->q, opt->r, flag, 0);
		a->G = aln.score;
		a->G2 = aln.score2;
		if (a->G < opt->t) a->G = 0;
		if (a->G2 < opt->t) a->G2 = 0;
		if (a->G2) a->flag |= BSW2_FLAG_TANDEM;
		a->k = beg + aln.tb;
		a->len = aln.te - aln.tb + 1;
		a->beg = aln.qb;
		a->end = aln.qe + 1;
		/*
		printf("[Q] "); for (i = 0; i < l_mseq; ++i) putchar("ACGTN"[(int)seq[i]]); putchar('\n');
		printf("[R] "); for (i = 0; i < end - beg; ++i) putchar("ACGTN"[(int)ref[i]]); putchar('\n');
		printf("G=%d,G2=%d,beg=%d,end=%d,k=%lld,len=%d\n", a->G, a->G2, a->beg, a->end, a->k, a->len);
		*/
	}
	if (a->is_rev) i = a->beg, a->beg = l_mseq - a->end, a->end = l_mseq - i;
	free(seq);
}
Esempio n. 3
0
void ta_trim1(ta_opt_t *opt, char *seq)
{
	int i, j, k;
	kstring_t _str = {0,0,0}, *str = &_str;
	str->l = strlen(seq);
	str->s = malloc(str->l);
	for (i = 0; i < str->l; ++i)
		str->s[i] = seq_nt4_table[(uint8_t)seq[i]];
	for (j = 0; j < opt->n_adaps; ++j) {
		kswr_t r;
		double diff;
		int type;
		ta_adap_t *p = &opt->adaps[j];
		r = ksw_align(p->len, p->seq, str->l, (uint8_t*)str->s, 5, opt->mat, opt->go, opt->ge, KSW_XBYTE|KSW_XSTART|(opt->min_len * opt->sa), 0);
		++r.te; ++r.qe; // change to 0-based
		k = r.qe - r.qb < r.te - r.tb? r.qe - r.qb : r.te - r.tb;
		diff = (double)(k * opt->sa - r.score) / opt->sb / k;
		//printf("%d:%.3f [%d,%d):%d <=> [%d,%d):%d\n", r.score, diff, r.qb, r.qe, p->len, r.tb, r.te, (int)str.l);
		if (r.qb <= r.tb && p->len - r.qe <= str->l - r.te) { // contained
			if (r.qb * opt->sa > opt->sa + opt->sb) continue;
			if ((p->len - r.qe) * opt->sa > opt->sa + opt->sb) continue;
			type = 1;
		} else if (r.qb <= r.tb) { // 3'-end overlap
			if (r.qb * opt->sa > opt->sa + opt->sb) continue;
			if ((str->l - r.te) * opt->sa > opt->sa + opt->sb) continue;
			type = 2;
		} else { // 5'-end overlap
			if ((p->len - r.qe) * opt->sa > opt->sa + opt->sb) continue;
			if (r.tb * opt->sa > opt->sa + opt->sb) continue;
			type = 3;
		}
		if (p->type == 5) {
			if (r.tb == 0 && r.qe == p->len && (r.te - r.tb) * opt->sa == r.score)
				type = 4;
		} else if (p->type == 3) {
			if (r.qb == 0 && r.te == str->l && (r.te - r.tb) * opt->sa == r.score)
				type = 4;
		}
		if (type == 4) { // exact match
			if (r.te - r.tb < opt->min_len) continue;
		} else { // inexact match
			if (r.score < opt->min_sc || diff > opt->max_diff) continue;
		}
		__sync_fetch_and_add(&p->cnt, 1);
		if (p->type == 5) {
			k = r.te + (p->len - r.qe);
			k = k < str->l? k : str->l;
			for (i = 0; i < k; ++i) seq[i] = 'X';
		} else if (p->type == 3) {
			k = r.tb > r.qb? r.tb - r.qb : 0;
			for (i = k; i < str->l; ++i) seq[i] = 'X';
		}
	}
	free(str->s);
}
Esempio n. 4
0
static aln_v align_read(const kseq_t *read,
                        const kseq_v targets,
                        const align_config_t *conf)
{
    kseq_t *r;
    const int32_t read_len = read->seq.l;

    aln_v result;
    kv_init(result);
    kv_resize(aln_t, result, kv_size(targets));

    uint8_t *read_num = calloc(read_len, sizeof(uint8_t));

    for(size_t k = 0; k < read_len; ++k)
        read_num[k] = conf->table[(int)read->seq.s[k]];

    // Align to each target
    kswq_t *qry = NULL;
    for(size_t j = 0; j < kv_size(targets); j++) {
        // Encode target
        r = &kv_A(targets, j);
        uint8_t *ref_num = calloc(r->seq.l, sizeof(uint8_t));
        for(size_t k = 0; k < r->seq.l; ++k)
            ref_num[k] = conf->table[(int)r->seq.s[k]];

        aln_t aln;
        aln.target_idx = j;
        aln.loc = ksw_align(read_len, read_num,
                            r->seq.l, ref_num,
                            conf->m,
                            conf->mat,
                            conf->gap_o,
                            conf->gap_e,
                            KSW_XSTART,
                            &qry);
        ksw_global(aln.loc.qe - aln.loc.qb + 1,
                   &read_num[aln.loc.qb],
                   aln.loc.te - aln.loc.tb + 1,
                   &ref_num[aln.loc.tb],
                   conf->m,
                   conf->mat,
                   conf->gap_o,
                   conf->gap_e,
                   50, /* TODO: Magic number - band width */
                   &aln.n_cigar,
                   &aln.cigar);

        aln.nm = 0;
        size_t qi = aln.loc.qb, ri = aln.loc.tb;
        for(size_t k = 0; k < aln.n_cigar; k++) {
            const int32_t oplen = bam_cigar_oplen(aln.cigar[k]),
                          optype = bam_cigar_type(aln.cigar[k]);

            if(optype & 3) { // consumes both - check for mismatches
                for(size_t j = 0; j < oplen; j++) {
                    if(UNLIKELY(read_num[qi + j] != ref_num[ri + j]))
                        aln.nm++;
                }
            } else {
                aln.nm += oplen;
            }
            if(optype & 1) qi += oplen;
            if(optype & 2) ri += oplen;
        }

        kv_push(aln_t, result, aln);
        free(ref_num);
    }
    free(qry);
    free(read_num);
    ks_introsort(dec_score, kv_size(result), result.a);

    return result;
}
Esempio n. 5
0
int Aligner::getOffsetAgainstMaster(std::string& slaveDR, AlignerFlag_t& flags) {

    int slave_dr_length = static_cast<int>(slaveDR.length());
    uint8_t* slave_dr_forward = new uint8_t[slave_dr_length+1];
    uint8_t* slave_dr_reverse = new uint8_t[slave_dr_length+1];
    
    prepareSlaveForAlignment(slaveDR, slave_dr_forward, slave_dr_reverse);

    // query profile 
    kswq_t *slave_forward_query_profile = 0;
    kswq_t *slave_reverse_query_profile = 0;
    
    
    
    // alignment of slave against master
    kswr_t forward_return = ksw_align(slave_dr_length, 
                                      slave_dr_forward, 
                                      AL_masterDRLength, 
                                      AL_masterDR, 
                                      5, 
                                      AL_scoringMatrix, 
                                      AL_gapOpening, 
                                      AL_gapExtension, 
                                      AL_xtra, 
                                      &slave_forward_query_profile);
    
    
    kswr_t reverse_return = ksw_align(slave_dr_length, 
                                      slave_dr_reverse, 
                                      AL_masterDRLength, 
                                      AL_masterDR, 
                                      5, 
                                      AL_scoringMatrix, 
                                      AL_gapOpening, 
                                      AL_gapExtension, 
                                      AL_xtra, 
                                      &slave_reverse_query_profile);
    
    
    // free the query profile
    free(slave_forward_query_profile); 
    free(slave_reverse_query_profile);
    delete slave_dr_reverse;
    delete slave_dr_forward;
    // figure out which alignment was better
    if (reverse_return.score == forward_return.score) {
        flags[score_equal] = true;
        return 0;
    }
    kswr_t best_alignment_info;
    if(reverse_return.score > forward_return.score) {
        best_alignment_info = reverse_return;
        flags[reversed] = true;
    } else {
        best_alignment_info = forward_return;
    }
    int min_query_seq_coverage = static_cast<int>(slave_dr_length / 2);
    // this is not the way you are supposed to use goto
    // actually you're never supposed to use goto
    // but you know what, I don't care!
    // http://xkcd.com/292/
    if(min_query_seq_coverage > best_alignment_info.score) {
        logWarn("Alignment Warning: Slave Alignment Failure",4);
        logWarn("\tfailed query coverage test", 4);
        logWarn("\trequired: "<<min_query_seq_coverage, 4);
        goto FAILED;
    }
    if(best_alignment_info.score < AL_minAlignmentScore) {
        logWarn("Alignment Warning: Slave Alignment Failure",4);
        logWarn("\tfailed minimum score test", 4);
        goto FAILED;
    }
    //if (best_alignment_info.qb != 0 && best_alignment_info.qe != slave_dr_length - 1) {
    //    logWarn("Alignment Warning: Slave Alignment Failure",4);
    //    logWarn("\tfailed internal only test", 4);
    //    goto FAILED;
    //}
    //std::cerr << best_alignment_info.tb - best_alignment_info.qb << " : ";
    //this->printAlignment(best_alignment_info, slaveDR, std::cerr);
    return best_alignment_info.tb - best_alignment_info.qb;

FAILED:
    logWarn("\tmaster: "<<mStringCheck->getString(AL_masterDRToken) , 4);
    logWarn("\ttb: "<< best_alignment_info.tb, 4);
    logWarn("\tte: "<< best_alignment_info.te+1, 4);
    logWarn("\tslave: "<< slaveDR, 4);
    logWarn("\tqb: "<< best_alignment_info.qb, 4);
    logWarn("\tqe: "<< best_alignment_info.qe+1, 4);
    logWarn("\tscore: "<< best_alignment_info.score, 4);
    logWarn("\t2nd-score: "<< best_alignment_info.score2, 4);
    logWarn("\t2nd-te: "<< best_alignment_info.te2, 4);
    logWarn("\toffset: "<<best_alignment_info.tb - best_alignment_info.qb,4);
    logWarn("******", 4);
    flags[failed] = true;
    return 0;
}
Esempio n. 6
0
static aln_t align_read_against_one(kseq_t *target, const int read_len,
                                    uint8_t *read_num, kswq_t **qry,
                                    const align_config_t *conf,
                                    const int min_score) {
  uint8_t *ref_num = calloc(target->seq.l, sizeof(uint8_t));
  for (size_t k = 0; k < target->seq.l; ++k)
    ref_num[k] = conf->table[(int)target->seq.s[k]];

  aln_t aln;
  aln.cigar = NULL;
  aln.loc = ksw_align(read_len, read_num, target->seq.l, ref_num, conf->m,
                      conf->mat, conf->gap_o, conf->gap_e, KSW_XSTART, qry);

  aln.target_name = target->name.s;

  if (aln.loc.score < min_score) {
    free(ref_num);
    return aln;
  }

  ksw_global(aln.loc.qe - aln.loc.qb + 1, &read_num[aln.loc.qb],
             aln.loc.te - aln.loc.tb + 1, &ref_num[aln.loc.tb], conf->m,
             conf->mat, conf->gap_o, conf->gap_e, conf->bandwidth, &aln.n_cigar,
             &aln.cigar);

  aln.nm = 0;
  size_t qi = aln.loc.qb, ri = aln.loc.tb;
  for (int k = 0; k < aln.n_cigar; k++) {
    const int32_t oplen = bam_cigar_oplen(aln.cigar[k]),
                  optype = bam_cigar_type(aln.cigar[k]);

    if (optype & 3) { // consumes both - check for mismatches
      for (int j = 0; j < oplen; j++) {
        if (UNLIKELY(read_num[qi + j] != ref_num[ri + j]))
          aln.nm++;
      }
    } else {
      aln.nm += oplen;
    }
    if (optype & 1)
      qi += oplen;
    if (optype & 2)
      ri += oplen;
  }

  free(ref_num);

  /* size_t cigar_len = aln.loc.qb; */
  /* for (int c = 0; c < aln.n_cigar; c++) { */
  /*   int32_t length = (0xfffffff0 & *(aln.cigar + c)) >> 4; */
  /*   cigar_len += length; */
  /* } */
  /* cigar_len += read_len - aln.loc.qe - 1; */
  /* if(cigar_len != (size_t)read_len) { */
  /*   /\* printf("[ig_align] Error: cigar length (score %d) not equal to read length for XXX (target %s): %zu vs %d\n", aln.loc.score, target->name.s, cigar_len, read_len); *\/ */
  /*   // NOTE: */
  /*   //   It is *really* *f*****g* *scary* that it's spitting out cigars that are not the same length as the query sequence. */
  /*   //   Nonetheless, fixing it seems to involve delving into the depths of ksw_align() and ksw_global(), which would be very time consuming, and the length discrepancy seems to ony appear in very poor matches. */
  /*   //   I.e., poor enough that we will subsequently ignore them in partis/python/waterer.py, so it seems to not screw anything up downstream to just set the length-discrepant matches' scores to zero, such that ig-sw doesn't write them to its sam output. */
  /*   //   Note also that it is not always the lowest- or highest-scoring matches that have discrepant lengths (i.e. setting their scores to zero promotes matches swith poorer scores, but which do not have discrepant lengths. */
  /*   /\* aln.loc.score = 0; *\/ */
  /*   aln.cigar = NULL; */
  /* } */

  return aln;
}
Esempio n. 7
0
    uint8_t* slave_dr_reverse = new uint8_t[slave_dr_length+1];
    
    prepareSlaveForAlignment(slaveDR, slave_dr_forward, slave_dr_reverse);

    // query profile 
    kswq_t *slave_forward_query_profile = 0;
    kswq_t *slave_reverse_query_profile = 0;
    
    
    
    // alignment of slave against master
    kswr_t forward_return = ksw_align(slave_dr_length, 
                                      slave_dr_forward, 
                                      AL_masterDRLength, 
                                      AL_masterDR, 
                                      5, 
                                      AL_scoringMatrix, 
                                      AL_gapOpening, 
                                      AL_gapExtension, 
                                      AL_xtra, 
                                      &slave_forward_query_profile);
    
    
    kswr_t reverse_return = ksw_align(slave_dr_length, 
                                      slave_dr_reverse, 
                                      AL_masterDRLength, 
                                      AL_masterDR, 
                                      5, 
                                      AL_scoringMatrix, 
                                      AL_gapOpening, 
                                      AL_gapExtension, 
                                      AL_xtra,