Beispiel #1
0
/* generate CIGAR array(s) in b->cigar[] */
static void gen_cigar(const bsw2opt_t *opt, int lq, uint8_t *seq[2], uint8_t *pac, bwtsw2_t *b)
{
	uint8_t *target;
	int i, matrix[25];
	AlnParam par;
	path_t *path;

	par.matrix = matrix;
	__gen_ap(par, opt);
	i = ((lq + 1) / 2 * opt->a + opt->r) / opt->r + lq; // maximum possible target length
	target = calloc(i, 1);
	path = calloc(i + lq, sizeof(path_t));
	// memory clean up for b
	if (b->n < b->max) {
		b->max = b->n;
		b->hits = realloc(b->hits, b->n * sizeof(bsw2hit_t));
	}
	if (b->cigar) free(b->cigar);
	if (b->n_cigar) free(b->n_cigar);
	b->cigar = (uint32_t**)calloc(b->max, sizeof(void*));
	b->n_cigar = (int*)calloc(b->max, sizeof(int));
	// generate CIGAR
	for (i = 0; i < b->n; ++i) {
		bsw2hit_t *p = b->hits + i;
		uint8_t *query;
		uint32_t k;
		int score, path_len, beg, end;
		if (p->l) continue;
		beg = (p->flag & 0x10)? lq - p->end : p->beg;
		end = (p->flag & 0x10)? lq - p->beg : p->end;
		query = seq[(p->flag & 0x10)? 1 : 0] + beg;
		for (k = p->k; k < p->k + p->len; ++k) // in principle, no out-of-boundary here
			target[k - p->k] = pac[k>>2] >> (~k&3)*2 & 0x3;
		score = aln_global_core(target, p->len, query, end - beg, &par, path, &path_len);
		b->cigar[i] = aln_path2cigar32(path, path_len, &b->n_cigar[i]);
		if (beg != 0 || end < lq) { // write soft clipping
			b->cigar[i] = realloc(b->cigar[i], 4 * (b->n_cigar[i] + 2));
			if (beg != 0) {
				memmove(b->cigar[i] + 1, b->cigar[i], b->n_cigar[i] * 4);
				b->cigar[i][0] = beg<<4 | 4;
				++b->n_cigar[i];
			}
			if (end < lq) {
				b->cigar[i][b->n_cigar[i]] = (lq - end)<<4 | 4;
				++b->n_cigar[i];
			}
		}
	}
	free(target); free(path);
}
Beispiel #2
0
/* generate CIGAR array(s) in b->cigar[] */
static void gen_cigar(const bsw2opt_t *opt, int lq, uint8_t *seq[2], const uint8_t *pac, bwtsw2_t *b, const char *name)
{
	uint8_t *target;
	int i, matrix[25];
	AlnParam par;
	path_t *path;

	par.matrix = matrix;
	__gen_ap(par, opt);
	i = ((lq + 1) / 2 * opt->a + opt->r) / opt->r + lq; // maximum possible target length
	target = calloc(i, 1);
	path = calloc(i + lq, sizeof(path_t));
	// generate CIGAR
	for (i = 0; i < b->n; ++i) {
		bsw2hit_t *p = b->hits + i;
		bsw2aux_t *q = b->aux + i;
		uint8_t *query;
		bwtint_t k;
		int path_len, beg, end;
		if (p->l) continue;
		beg = (p->flag & 0x10)? lq - p->end : p->beg;
		end = (p->flag & 0x10)? lq - p->beg : p->end;
		query = seq[(p->flag & 0x10)? 1 : 0] + beg;
		for (k = p->k; k < p->k + p->len; ++k) // in principle, no out-of-boundary here
			target[k - p->k] = pac[k>>2] >> (~k&3)*2 & 0x3;
		aln_global_core(target, p->len, query, end - beg, &par, path, &path_len);
		q->cigar = aln_path2cigar32(path, path_len, &q->n_cigar);
#if 0
		if (name && score != p->G) { // debugging only
			int j, glen = 0;
			for (j = 0; j < q->n_cigar; ++j)
				if ((q->cigar[j]&0xf) == 1 || (q->cigar[j]&0xf) == 2)
					glen += q->cigar[j]>>4;
			fprintf(stderr, "[E::%s] %s - unequal score: %d != %d; (qlen, aqlen, arlen, glen, bw) = (%d, %d, %d, %d, %d)\n",
					__func__, name, score, p->G, lq, end - beg, p->len, glen, opt->bw);
		}
#endif
		if (beg != 0 || end < lq) { // write soft clipping
			q->cigar = realloc(q->cigar, 4 * (q->n_cigar + 2));
			if (beg != 0) {
				memmove(q->cigar + 1, q->cigar, q->n_cigar * 4);
				q->cigar[0] = beg<<4 | 4;
				++q->n_cigar;
			}
			if (end < lq) {
				q->cigar[q->n_cigar] = (lq - end)<<4 | 4;
				++q->n_cigar;
			}
		}
	}
Beispiel #3
0
/* l_pac: the size of the reference genome
 * *pacseq: the packed reference genome
 * len: the size of the query sequence
 * seq: the query sequence

 * *_pos: the position of the 5' end of the reads on the positive strand of the genome
 * ref_len: the size of reference genome sequence aligned to the query
 */
static jigsaw_cigar_t *refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const ubyte_t *seq, bwtint_t *_pos,
									int ref_len, int *n_cigar /*, int is_end_correct*/, int band_width)
{
	jigsaw_cigar_t *cigar = 0;
	ubyte_t *ref_seq;
	int l = 0, path_len;
	AlnParam ap = aln_param_bwa;
	ap.band_width = band_width; //override band width
	path_t *path;
	int64_t k, __pos = *_pos > l_pac? (int64_t)((int32_t)*_pos) : *_pos;

	ref_seq = (ubyte_t*)calloc(ref_len, 1);
	for (k = __pos; k < __pos + ref_len && k < l_pac; ++k)
		ref_seq[l++] = pacseq[k>>2] >> ((~k&3)<<1) & 3;


	//l is the actual aligned reference sequence
	path = (path_t*)calloc(l+len, sizeof(path_t));

	aln_global_core(ref_seq, l, (ubyte_t*)seq, len, &ap, path, &path_len);
	cigar = bwa_aln_path2cigar(path, path_len, n_cigar);
	
/*
	if (ext < 0 && is_end_correct) { // fix coordinate for reads mapped on the forward strand
		for (l = k = 0; k < *n_cigar; ++k) {
			if (__cigar_op(cigar[k]) == FROM_D) l -= __cigar_len(cigar[k]);
			else if (__cigar_op(cigar[k]) == FROM_I) l += __cigar_len(cigar[k]);
		}
		__pos += l;
	}
*/
	if (__cigar_op(cigar[0]) == FROM_D) { // deletion at the 5'-end
		__pos += __cigar_len(cigar[0]);
		for (k = 0; k < *n_cigar - 1; ++k) cigar[k] = cigar[k+1];
		--(*n_cigar);
	}
	if (__cigar_op(cigar[*n_cigar-1]) == FROM_D) --(*n_cigar); // deletion at the 3'-end

	// change "I" at either end of the read to S. just in case. This should rarely happen...
	if (__cigar_op(cigar[*n_cigar-1]) == FROM_I) cigar[*n_cigar-1] = __cigar_create(3, (__cigar_len(cigar[*n_cigar-1])));
	if (__cigar_op(cigar[0]) == FROM_I) cigar[0] = __cigar_create(3, (__cigar_len(cigar[0])));

	*_pos = (bwtint_t)__pos;
	free(ref_seq); free(path);
	return cigar;
}
Beispiel #4
0
/* generate CIGAR array(s) in b->cigar[] */
static void gen_cigar(const bsw2opt_t *opt, int lq, uint8_t *seq[2], const uint8_t *pac, bwtsw2_t *b)
{
	uint8_t *target;
	int i, matrix[25];
	AlnParam par;
	path_t *path;

	par.matrix = matrix;
	__gen_ap(par, opt);
	i = ((lq + 1) / 2 * opt->a + opt->r) / opt->r + lq; // maximum possible target length
	target = calloc(i, 1);
	path = calloc(i + lq, sizeof(path_t));
	// generate CIGAR
	for (i = 0; i < b->n; ++i) {
		bsw2hit_t *p = b->hits + i;
		bsw2aux_t *q = b->aux + i;
		uint8_t *query;
		bwtint_t k;
		int score, path_len, beg, end;
		if (p->l) continue;
		beg = (p->flag & 0x10)? lq - p->end : p->beg;
		end = (p->flag & 0x10)? lq - p->beg : p->end;
		query = seq[(p->flag & 0x10)? 1 : 0] + beg;
		for (k = p->k; k < p->k + p->len; ++k) // in principle, no out-of-boundary here
			target[k - p->k] = pac[k>>2] >> (~k&3)*2 & 0x3;
		score = aln_global_core(target, p->len, query, end - beg, &par, path, &path_len);
		q->cigar = aln_path2cigar32(path, path_len, &q->n_cigar);
		if (beg != 0 || end < lq) { // write soft clipping
			q->cigar = realloc(q->cigar, 4 * (q->n_cigar + 2));
			if (beg != 0) {
				memmove(q->cigar + 1, q->cigar, q->n_cigar * 4);
				q->cigar[0] = beg<<4 | 4;
				++q->n_cigar;
			}
			if (end < lq) {
				q->cigar[q->n_cigar] = (lq - end)<<4 | 4;
				++q->n_cigar;
			}
		}
	}
	free(target); free(path);
}