Ejemplo n.º 1
0
static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_comp, int trim_qual)
{
	bwa_seq_t *seqs, *p;
	int n_seqs, l, i;
	long n_trimmed = 0, n_tot = 0;
	bam1_t *b;
	int res;

	b = bam_init1();
	n_seqs = 0;
	seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
	while ((res = bam_read1(bs->fp, b)) >= 0) {
		uint8_t *s, *q;
		int go = 0;
		if ((bs->which & 1) && (b->core.flag & BAM_FREAD1)) go = 1;
		if ((bs->which & 2) && (b->core.flag & BAM_FREAD2)) go = 1;
		if ((bs->which & 4) && !(b->core.flag& BAM_FREAD1) && !(b->core.flag& BAM_FREAD2))go = 1;
		if (go == 0) continue;
		l = b->core.l_qseq;
		p = &seqs[n_seqs++];
		p->tid = -1; // no assigned to a thread
		p->qual = 0;
		p->full_len = p->clip_len = p->len = l;
		n_tot += p->full_len;
		s = bam1_seq(b); q = bam1_qual(b);
		p->seq = (ubyte_t*)calloc(p->len + 1, 1);
		p->qual = (ubyte_t*)calloc(p->len + 1, 1);
		for (i = 0; i != p->full_len; ++i) {
			p->seq[i] = bam_nt16_nt4_table[(int)bam1_seqi(s, i)];
			p->qual[i] = q[i] + 33 < 126? q[i] + 33 : 126;
		}
		if (bam1_strand(b)) { // then reverse 
			seq_reverse(p->len, p->seq, 1);
			seq_reverse(p->len, p->qual, 0);
		}
		if (trim_qual >= 1) n_trimmed += bwa_trim_read(trim_qual, p);
		p->rseq = (ubyte_t*)calloc(p->full_len, 1);
		memcpy(p->rseq, p->seq, p->len);
		seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped()
		seq_reverse(p->len, p->rseq, is_comp);
		p->name = strdup((const char*)bam1_qname(b));
		if (n_seqs == n_needed) break;
	}
	if (res < 0 && res != -1) err_fatal_simple("Error reading bam file");
	*n = n_seqs;
	if (n_seqs && trim_qual >= 1)
		fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot);
	if (n_seqs == 0) {
		free(seqs);
		bam_destroy1(b);
		return 0;
	}
	bam_destroy1(b);
	return seqs;
}
Ejemplo n.º 2
0
void BWA::copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length) 
{
  // seq, rseq will ultimately be freed by bwa_cal_sa_reg_gap
  sequence->seq = new ubyte_t[read_length];
  sequence->rseq = new ubyte_t[read_length];
  for(unsigned i = 0; i < read_length; i++) sequence->seq[i] = nst_nt4_table[(unsigned)bases[i]];
  memcpy(sequence->rseq,sequence->seq,read_length);

  // BWA expects the read bases to arrive reversed.
  seq_reverse(read_length,sequence->seq,0);
  seq_reverse(read_length,sequence->rseq,1);

  sequence->full_len = sequence->len = read_length;
}
Ejemplo n.º 3
0
struct rld_t *fml_fmi_gen(int n, bseq1_t *seq, int is_mt)
{
	mrope_t *mr;
	kstring_t str = {0,0,0};
	mritr_t itr;
	rlditr_t di;
	const uint8_t *block;
	rld_t *e = 0;
	int k;

	for (k = 0; k < n; ++k)
		if (seq[k].l_seq > 0)
			break;
	if (k == n) return 0;

	mr = mr_init(ROPE_DEF_MAX_NODES, ROPE_DEF_BLOCK_LEN, MR_SO_RCLO);
	for (k = 0; k < n; ++k) {
		int i;
		bseq1_t *s = &seq[k];
		if (s->l_seq == 0) continue;
		free(s->qual);
		for (i = 0; i < s->l_seq; ++i)
			s->seq[i] = seq_nt6_table[(int)s->seq[i]];
		for (i = 0; i < s->l_seq; ++i)
			if (s->seq[i] == 5) break;
		if (i < s->l_seq) {
			free(s->seq);
			continue;
		}
		if (is_rev_same(s->l_seq, s->seq))
			--s->l_seq, s->seq[s->l_seq] = 0;
		seq_reverse(s->l_seq, (uint8_t*)s->seq);
		kputsn(s->seq, s->l_seq + 1, &str);
		seq_revcomp6(s->l_seq, (uint8_t*)s->seq);
		kputsn(s->seq, s->l_seq + 1, &str);
		free(s->seq);
	}
	free(seq);
	mr_insert_multi(mr, str.l, (uint8_t*)str.s, is_mt);
	free(str.s);

	e = rld_init(6, 3);
	rld_itr_init(e, &di, 0);
	mr_itr_first(mr, &itr, 1);
	while ((block = mr_itr_next_block(&itr)) != 0) {
		const uint8_t *q = block + 2, *end = block + 2 + *rle_nptr(block);
		while (q < end) {
			int c = 0;
			int64_t l;
			rle_dec1(q, c, l);
			rld_enc(e, &di, l, c);
		}
	}
	rld_enc_finish(e, &di);

	mr_destroy(mr);
	return e;
}
Ejemplo n.º 4
0
// Mostly stolen from bwa_read_bam.
void bam1_to_seq(bam1_t *raw, bwa_seq_t *p, int is_comp, int trim_qual)
{
    // long n_trimmed = 0;

    uint8_t *s, *q;
    int i, l = raw->core.l_qseq;
    p->tid = -1; // no assigned to a thread
    p->qual = 0;
    p->full_len = p->clip_len = p->len = l;
    // n_tot += p->full_len;
    s = bam1_seq(raw);
    q = bam1_qual(raw);
    p->seq = (ubyte_t*)calloc(p->len + 1, 1);
    p->qual = (ubyte_t*)calloc(p->len + 1, 1);
    for (i = 0; i != p->full_len; ++i) {
        p->seq[i] = bam_nt16_nt4_table[(int)bam1_seqi(s, i)];
        p->qual[i] = q[i] + 33 < 126? q[i] + 33 : 126;
    }
    if (bam1_strand(raw)) { // then reverse
        seq_reverse(p->len, p->seq, 1);
        seq_reverse(p->len, p->qual, 0);
    }
    if (trim_qual >= 1) /* n_trimmed += */ bwa_trim_read(trim_qual, p);
    p->rseq = (ubyte_t*)calloc(p->full_len, 1);
    memcpy(p->rseq, p->seq, p->len);
    seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped()
    seq_reverse(p->len, p->rseq, is_comp);
    p->max_entries = 0 ;

    // We don't set a name, it's contained in the original record
    // anyway.
    // p->name = strdup((const char*)bam1_qname(raw));

    // No place to put the tally right now.
    // if (n_seqs && trim_qual >= 1)
    // fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot);
}
Ejemplo n.º 5
0
bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int mode, int trim_qual)
{
	bwa_seq_t *seqs, *p;
	kseq_t *seq = bs->ks;
	int n_seqs, l, i, is_comp = mode&BWA_MODE_COMPREAD, is_64 = mode&BWA_MODE_IL13, l_bc = mode>>24;
	long n_trimmed = 0, n_tot = 0;

	if (l_bc > BWA_MAX_BCLEN) {
		fprintf(stderr, "[%s] the maximum barcode length is %d.\n", __func__, BWA_MAX_BCLEN);
		return 0;
	}
	if (bs->is_bam) return bwa_read_bam(bs, n_needed, n, is_comp, trim_qual); // l_bc has no effect for BAM input
	n_seqs = 0;
	seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
	while ((l = kseq_read(seq)) >= 0) {
		if ((mode & BWA_MODE_CFY) && (seq->comment.l != 0)) {
			// skip reads that are marked to be filtered by Casava
			char *s = index(seq->comment.s, ':');
			if (s && *(++s) == 'Y') {
				continue;
			}
		}
		if (is_64 && seq->qual.l)
			for (i = 0; i < seq->qual.l; ++i) seq->qual.s[i] -= 31;
		if (seq->seq.l <= l_bc) continue; // sequence length equals or smaller than the barcode length
		p = &seqs[n_seqs++];
		if (l_bc) { // then trim barcode
			for (i = 0; i < l_bc; ++i)
				p->bc[i] = (seq->qual.l && seq->qual.s[i]-33 < BARCODE_LOW_QUAL)? tolower(seq->seq.s[i]) : toupper(seq->seq.s[i]);
			p->bc[i] = 0;
			for (; i < seq->seq.l; ++i)
				seq->seq.s[i - l_bc] = seq->seq.s[i];
			seq->seq.l -= l_bc; seq->seq.s[seq->seq.l] = 0;
			if (seq->qual.l) {
				for (i = l_bc; i < seq->qual.l; ++i)
					seq->qual.s[i - l_bc] = seq->qual.s[i];
				seq->qual.l -= l_bc; seq->qual.s[seq->qual.l] = 0;
			}
			l = seq->seq.l;
		} else p->bc[0] = 0;
		p->tid = -1; // no assigned to a thread
		p->qual = 0;
		p->full_len = p->clip_len = p->len = l;
		n_tot += p->full_len;
		p->seq = (ubyte_t*)calloc(p->len, 1);
		for (i = 0; i != p->full_len; ++i)
			p->seq[i] = nst_nt4_table[(int)seq->seq.s[i]];
		if (seq->qual.l) { // copy quality
			p->qual = (ubyte_t*)strdup((char*)seq->qual.s);
			if (trim_qual >= 1) n_trimmed += bwa_trim_read(trim_qual, p);
		}
		p->rseq = (ubyte_t*)calloc(p->full_len, 1);
		memcpy(p->rseq, p->seq, p->len);
		seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped()
		seq_reverse(p->len, p->rseq, is_comp);
		p->name = strdup((const char*)seq->name.s);
		{ // trim /[12]$
			int t = strlen(p->name);
			if (t > 2 && p->name[t-2] == '/' && (p->name[t-1] == '1' || p->name[t-1] == '2')) p->name[t-2] = '\0';
		}
		if (n_seqs == n_needed) break;
	}
	*n = n_seqs;
	if (n_seqs && trim_qual >= 1)
		fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed. \n", 100.0f * n_trimmed/n_tot);
	if (n_seqs == 0) {
		free(seqs);
		return 0;
	}
	return seqs;
}
Ejemplo n.º 6
0
void BWA::generate_alignments_from_paths(const char* bases, 
                                         const unsigned read_length, 
                                         bwt_aln1_t* paths, 
                                         const unsigned num_paths, 
                                         const unsigned best_count,
                                         const unsigned second_best_count,
                                         Alignment*& alignments, 
                                         unsigned& num_alignments) 
{
  bwa_seq_t* sequence = create_sequence(bases,read_length);

  sequence->aln = paths;
  sequence->n_aln = num_paths;

  // (Ab)use bwa_aln2seq to propagate values stored in the path out into the sequence itself.
  bwa_aln2seq(sequence->n_aln,sequence->aln,sequence);

  // But overwrite key parts of the sequence in case the user passed back only a smaller subset
  // of the paths.
  sequence->c1 = best_count;
  sequence->c2 = second_best_count;
  sequence->type = sequence->c1 > 1 ? BWA_TYPE_REPEAT : BWA_TYPE_UNIQUE;

  num_alignments = 0;
  for(unsigned i = 0; i < (unsigned)sequence->n_aln; i++)
    num_alignments += (sequence->aln + i)->l - (sequence->aln + i)->k + 1;

  alignments = new Alignment[num_alignments];
  unsigned alignment_idx = 0;

  for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) {
    // Stub in a 'working' path, so that only the desired alignment is local-aligned.
    const bwt_aln1_t* path = paths + path_idx;
    bwt_aln1_t working_path = *path;

    // Loop through all alignments, aligning each one individually.
    for(unsigned sa_idx = path->k; sa_idx <= path->l; sa_idx++) {
      working_path.k = working_path.l = sa_idx;
      sequence->aln = &working_path;
      sequence->n_aln = 1;

      sequence->sa = sa_idx;
      sequence->strand = path->a;
      sequence->score = path->score;

      // Each time through bwa_refine_gapped, seq gets reversed.  Revert the reverse.
      // TODO: Fix the interface to bwa_refine_gapped so its easier to work with.
      if(alignment_idx > 0)
        seq_reverse(sequence->len, sequence->seq, 0);

      // Copy the local alignment data into the alignment object.
      *(alignments + alignment_idx) = generate_final_alignment_from_sequence(sequence);

      alignment_idx++;
    }
  }

  sequence->aln = NULL;
  sequence->n_aln = 0;

  bwa_free_read_seq(1,sequence);
}
Ejemplo n.º 7
0
void bwa_rg_tpx(int iidx, const bntseq_t *bns, int n_seqs1, int n_seqs2, 
                bwa_seq_t *seqs, ubyte_t *pacseq, bntseq_t *ntbns)
{
	ubyte_t *ntpac = 0;
	int i, j;
	kstring_t *str;
#ifdef _TIMING
	struct timeval st;
	uint64_t s1, e1;
	double pos1_time = 0.0;
#endif

#ifdef _TIMING
	gettimeofday(&st, NULL);
	s1 = st.tv_sec * 1000000L + (time_t)st.tv_usec;
#endif

	for (i = n_seqs1; i < n_seqs2; ++i) {
		bwa_seq_t *s = seqs + i;
		seq_reverse(s->len, s->seq, 0); // IMPORTANT: s->seq is reversed here!!!
		for (j = 0; j < s->n_multi; ++j) {
			bwt_multi1_t *q = s->multi + j;
			int n_cigar;
			if (q->gap == 0) continue;
			q->cigar = refine_gapped_core(bns->l_pac, pacseq, s->len, q->strand? s->rseq : s->seq, &q->pos,
										  (q->strand? 1 : -1) * q->gap, &n_cigar, 1);
			q->n_cigar = n_cigar;
		}
		if (s->type == BWA_TYPE_NO_MATCH || s->type == BWA_TYPE_MATESW || s->n_gapo == 0) continue;
		s->cigar = refine_gapped_core(bns->l_pac, pacseq, s->len, s->strand? s->rseq : s->seq, &s->pos,
									  (s->strand? 1 : -1) * (s->n_gapo + s->n_gape), &s->n_cigar, 1);
	}

	if (ntbns) { // in color space
		for (i = n_seqs1; i < n_seqs2; ++i) {
			bwa_seq_t *s = seqs + i;
			bwa_cs2nt_core(s, bns->l_pac, ntpac);
			for (j = 0; j < s->n_multi; ++j) {
				bwt_multi1_t *q = s->multi + j;
				int n_cigar;
				if (q->gap == 0) continue;
				free(q->cigar);
				q->cigar = refine_gapped_core(bns->l_pac, ntpac, s->len, q->strand? s->rseq : s->seq, &q->pos,
											  (q->strand? 1 : -1) * q->gap, &n_cigar, 0);
				q->n_cigar = n_cigar;
			}
			if (s->type != BWA_TYPE_NO_MATCH && s->cigar) { // update cigar again
				free(s->cigar);
				s->cigar = refine_gapped_core(bns->l_pac, ntpac, s->len, s->strand? s->rseq : s->seq, &s->pos,
											  (s->strand? 1 : -1) * (s->n_gapo + s->n_gape), &s->n_cigar, 0);
			}
		}
	}

	// generate MD tag
	str = (kstring_t*)calloc(1, sizeof(kstring_t));

	for (i = n_seqs1; i < n_seqs2; ++i) {
		bwa_seq_t *s = seqs + i;
		if (s->type != BWA_TYPE_NO_MATCH) {
			int nm;
			s->md = bwa_cal_md1(s->n_cigar, s->cigar, s->len, s->pos, s->strand? s->rseq : s->seq,
								bns->l_pac, ntbns? ntpac : pacseq, str, &nm);
			s->nm = nm;
		}
	}

        free(str->s); free(str);

	// correct for trimmed reads
	if (!ntbns) // trimming is only enabled for Illumina reads
		for (i = n_seqs1; i < n_seqs2; ++i) bwa_correct_trimmed(seqs + i);
       
#ifdef _TIMING
	gettimeofday(&st, NULL);
	e1 = st.tv_sec * 1000000L + (time_t)st.tv_usec;
	pos1_time = (double)((double)e1 - (double)s1) / 1000000.0;

# ifdef HAVE_PTHREAD
	pthread_mutex_lock(&pe_lock);
# endif // HAVE_PTHREAD
	fprintf(stderr,"bwapese1 time = %lf (sec)\n",pos1_time);
# ifdef HAVE_PTHREAD
	pthread_mutex_unlock(&pe_lock);
# endif // HAVE_PTHREAD
#endif
 
	return;
}
s_align* ssw_align (const s_profile* prof,
					const int8_t* ref,
				  	int32_t refLen,
				  	const uint8_t weight_gapO,
				  	const uint8_t weight_gapE,
					const uint8_t flag,	//  (from high to low) bit 5: return the best alignment beginning position; 6: if (ref_end1 - ref_begin1 <= filterd) && (read_end1 - read_begin1 <= filterd), return cigar; 7: if max score >= filters, return cigar; 8: always return cigar; if 6 & 7 are both setted, only return cigar when both filter fulfilled
					const uint16_t filters,
					const int32_t filterd,
					const int32_t maskLen) {

	alignment_end* bests = 0, *bests_reverse = 0;
	__m128i* vP = 0;
	int32_t word = 0, band_width = 0, readLen = prof->readLen;
	int8_t* read_reverse = 0;
	cigar* path;
	s_align* r = (s_align*)calloc(1, sizeof(s_align));
	r->ref_begin1 = -1;
	r->read_begin1 = -1;
	r->cigar = 0;
	r->cigarLen = 0;
	if (maskLen < 15) {
		fprintf(stderr, "When maskLen < 15, the function ssw_align doesn't return 2nd best alignment information.\n");
	}

	// Find the alignment scores and ending positions
	if (prof->profile_byte) {
		bests = sw_sse2_byte(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_byte, -1, prof->bias, maskLen);
		if (prof->profile_word && bests[0].score == 255) {
			free(bests);
			bests = sw_sse2_word(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_word, -1, maskLen);
			word = 1;
		} else if (bests[0].score == 255) {
			fprintf(stderr, "Please set 2 to the score_size parameter of the function ssw_init, otherwise the alignment results will be incorrect.\n");
			free(r);
			return NULL;
		}
	}else if (prof->profile_word) {
		bests = sw_sse2_word(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_word, -1, maskLen);
		word = 1;
	}else {
		fprintf(stderr, "Please call the function ssw_init before ssw_align.\n");
		free(r);
		return NULL;
	}
	r->score1 = bests[0].score;
	r->ref_end1 = bests[0].ref;
	r->read_end1 = bests[0].read;
	if (maskLen >= 15) {
		r->score2 = bests[1].score;
		r->ref_end2 = bests[1].ref;
	} else {
		r->score2 = 0;
		r->ref_end2 = -1;
	}
	free(bests);
	if (flag == 0 || (flag == 2 && r->score1 < filters)) goto end;

	// Find the beginning position of the best alignment.
	read_reverse = seq_reverse(prof->read, r->read_end1);
	if (word == 0) {
		vP = qP_byte(read_reverse, prof->mat, r->read_end1 + 1, prof->n, prof->bias);
		bests_reverse = sw_sse2_byte(ref, 1, r->ref_end1 + 1, r->read_end1 + 1, weight_gapO, weight_gapE, vP, r->score1, prof->bias, maskLen);
	} else {
		vP = qP_word(read_reverse, prof->mat, r->read_end1 + 1, prof->n);
		bests_reverse = sw_sse2_word(ref, 1, r->ref_end1 + 1, r->read_end1 + 1, weight_gapO, weight_gapE, vP, r->score1, maskLen);
	}
	free(vP);
	free(read_reverse);
	r->ref_begin1 = bests_reverse[0].ref;
	r->read_begin1 = r->read_end1 - bests_reverse[0].read;
	free(bests_reverse);
	if ((7&flag) == 0 || ((2&flag) != 0 && r->score1 < filters) || ((4&flag) != 0 && (r->ref_end1 - r->ref_begin1 > filterd || r->read_end1 - r->read_begin1 > filterd))) goto end;

	// Generate cigar.
	refLen = r->ref_end1 - r->ref_begin1 + 1;
	readLen = r->read_end1 - r->read_begin1 + 1;
	band_width = abs(refLen - readLen) + 1;
	path = banded_sw(ref + r->ref_begin1, prof->read + r->read_begin1, refLen, readLen, r->score1, weight_gapO, weight_gapE, band_width, prof->mat, prof->n);
	if (path == 0) {
		free(r);
		r = NULL;
	}
	else {
		r->cigar = path->seq;
		r->cigarLen = path->length;
		free(path);
	}

end:
	return r;
}
Ejemplo n.º 9
0
int bwa_read_seq1(bwa_seqio_t *bs, int iter, int tid, int thrds, bwa_seq_t **_seqs, int *n_avail, int mode, int trim_qual)
{
	bwa_seq_t *p;
   bwa_seq_t *seqs = *_seqs;
	kseq_t *seq = bs->ks;
	int n_seqs, l, i, is_comp = mode&BWA_MODE_COMPREAD, is_64 = mode&BWA_MODE_IL13, l_bc = mode>>24;
	long n_trimmed = 0, n_tot = 0;
   bool first;

	if (l_bc > BWA_MAX_BCLEN) {
		fprintf(stderr, "[%s] the maximum barcode length is %d.\n", __func__, BWA_MAX_BCLEN);
		return 0;
	}
	if (bs->is_bam) {
      fprintf (stderr, "IS BAM! --- Port bwa_read_bam function\n");
      bwa_free_read_seq(*n_avail, seqs);
      // return bwa_read_bam(bs, n_needed, n, is_comp, trim_qual); // l_bc has no effect for BAM input
      exit(0);
   }
   if (*n_avail == 0) {
      if (*n_avail) bwa_free_read_seq(*n_avail, seqs);
	   seqs = (bwa_seq_t*)calloc(READ_SEQ_SIZE, sizeof(bwa_seq_t));
      *_seqs = seqs;
      *n_avail = READ_SEQ_SIZE;
   }
	n_seqs = 0;
   first = true;
//err_fwrite("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", strlen("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"), 1, stdout);
//long cksm = 0;
	while ((l = kseq_read1(seq, iter, tid, thrds, &first)) >= 0) {
		if ((mode & BWA_MODE_CFY) && (seq->comment.l != 0)) {
			// skip reads that are marked to be filtered by Casava
			char *s = index(seq->comment.s, ':');
			if (s && *(++s) == 'Y') {
				continue;
			}
		}
		if (is_64 && seq->qual.l)
			for (i = 0; i < seq->qual.l; ++i) seq->qual.s[i] -= 31;
		if (seq->seq.l <= l_bc) continue; // sequence length equals or smaller than the barcode length
		p = &seqs[n_seqs++];
      if(n_seqs > READ_SEQ_SIZE) {
         fprintf (stderr, "READ_SEQ_SIZE not big enough\n");
         abort();
      }
      init_bwa_seq_t(p);
		if (l_bc) { // then trim barcode
			for (i = 0; i < l_bc; ++i)
				p->bc[i] = (seq->qual.l && seq->qual.s[i]-33 < BARCODE_LOW_QUAL)? tolower(seq->seq.s[i]) : toupper(seq->seq.s[i]);
			p->bc[i] = 0;
			for (; i < seq->seq.l; ++i)
				seq->seq.s[i - l_bc] = seq->seq.s[i];
			seq->seq.l -= l_bc; seq->seq.s[seq->seq.l] = 0;
			if (seq->qual.l) {
				for (i = l_bc; i < seq->qual.l; ++i)
					seq->qual.s[i - l_bc] = seq->qual.s[i];
				seq->qual.l -= l_bc; seq->qual.s[seq->qual.l] = 0;
			}
			l = seq->seq.l;
		} else p->bc[0] = 0;
//ComputeChecksum(seq->seq.s,seq->seq.l,&cksm);
		p->tid = -1; // no assigned to a thread
		p->full_len = p->clip_len = p->len = l;
		n_tot += p->full_len;
      if (p->llen < p->len) {
         if(p->seq) free(p->seq);
         p->llen = p->len;
		   p->seq = (ubyte_t*)calloc(p->len, 1);
		   p->rseq = (ubyte_t*)calloc(p->full_len, 1);
      }
		for (i = 0; i != p->full_len; ++i)
			p->seq[i] = nst_nt4_table[(int)seq->seq.s[i]];
		if (seq->qual.l) { // copy quality
         if(p->qual) free(p->qual);
			p->qual = (ubyte_t*)strdup((char*)seq->qual.s);
			if (trim_qual >= 1) n_trimmed += bwa_trim_read(trim_qual, p);
		}
		memcpy(p->rseq, p->seq, p->len);
		seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped()
		seq_reverse(p->len, p->rseq, is_comp);
      if(p->name) free(p->name);
		p->name = strdup((const char*)seq->name.s);
		{ // trim /[12]$
			int t = strlen(p->name);
			if (t > 2 && p->name[t-2] == '/' && (p->name[t-1] == '1' || p->name[t-1] == '2')) p->name[t-2] = '\0';
		}
		//if (n_seqs == n_needed) break;
		if(kseq_end(seq)) break;
	}
	if (n_seqs && trim_qual >= 1)
		fprintf(stderr, "[bwa_read_seq1] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot);
//fprintf(stderr, "%d tid: %d sequences: %d cksum: %lx\n", iter, tid, n_seqs, cksm);
	return n_seqs;
}
Ejemplo n.º 10
0
static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_comp, int trim_qual)
{
	bwa_seq_t *seqs, *p;
	int n_seqs, l, i;
	long n_trimmed = 0, n_tot = 0;
	bam1_t *b;
	int res;

	b = bam_init1();
	n_seqs = 0;
	seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
#ifdef USE_HTSLIB
	while ((res = sam_read1(bs->fp, bs->h, b)) >= 0) {
#else
	while ((res = bam_read1(bs->fp, b)) >= 0) {
#endif
		uint8_t *s, *q;
		int go = 0;
		if ((bs->which & 1) && (b->core.flag & BAM_FREAD1)) go = 1;
		if ((bs->which & 2) && (b->core.flag & BAM_FREAD2)) go = 1;
		if ((bs->which & 4) && !(b->core.flag& BAM_FREAD1) && !(b->core.flag& BAM_FREAD2))go = 1;
		if (go == 0) continue;
		l = b->core.l_qseq;
		p = &seqs[n_seqs++];
		p->tid = -1; // no assigned to a thread
		p->qual = 0;
		p->full_len = p->clip_len = p->len = l;
		n_tot += p->full_len;
#ifdef USE_HTSLIB
		s = bam_get_seq(b); q = bam_get_qual(b);
#else
		s = bam1_seq(b); q = bam1_qual(b);
#endif
		p->seq = (ubyte_t*)calloc(p->len + 1, 1);
		p->qual = (ubyte_t*)calloc(p->len + 1, 1);
		for (i = 0; i != p->full_len; ++i) {
#ifdef USE_HTSLIB
			p->seq[i] = bam_nt16_nt4_table[(int)bam_seqi(s, i)];
#else
			p->seq[i] = bam_nt16_nt4_table[(int)bam1_seqi(s, i)];
#endif
			p->qual[i] = q[i] + 33 < 126? q[i] + 33 : 126;
		}
#ifdef USE_HTSLIB
		if (bam_is_rev(b)) { // then reverse 
#else
		if (bam1_strand(b)) { // then reverse 
#endif
			seq_reverse(p->len, p->seq, 1);
			seq_reverse(p->len, p->qual, 0);
		}
		if (trim_qual >= 1) n_trimmed += bwa_trim_read(trim_qual, p);
		p->rseq = (ubyte_t*)calloc(p->full_len, 1);
		memcpy(p->rseq, p->seq, p->len);
		seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped()
		seq_reverse(p->len, p->rseq, is_comp);
#ifdef USE_HTSLIB
		p->name = strdup((const char*)bam_get_qname(b));
#else
		p->name = strdup((const char*)bam1_qname(b));
#endif
		if (n_seqs == n_needed) break;
	}
	if (res < 0 && res != -1) err_fatal_simple("Error reading bam file");
	*n = n_seqs;
	if (n_seqs && trim_qual >= 1)
		fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot);
	if (n_seqs == 0) {
		free(seqs);
		bam_destroy1(b);
		return 0;
	}
	bam_destroy1(b);
	return seqs;
}

#define BARCODE_LOW_QUAL 13

bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int mode, int trim_qual)
{
	bwa_seq_t *seqs, *p;
	kseq_t *seq = bs->ks;
	int n_seqs, l, i, is_comp = mode&BWA_MODE_COMPREAD, is_64 = mode&BWA_MODE_IL13, l_bc = mode>>24;
	long n_trimmed = 0, n_tot = 0;

	if (l_bc > BWA_MAX_BCLEN) {
		fprintf(stderr, "[%s] the maximum barcode length is %d.\n", __func__, BWA_MAX_BCLEN);
		return 0;
	}
	if (bs->is_bam) return bwa_read_bam(bs, n_needed, n, is_comp, trim_qual); // l_bc has no effect for BAM input
	n_seqs = 0;
	seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
	while ((l = kseq_read(seq)) >= 0) {
		if ((mode & BWA_MODE_CFY) && (seq->comment.l != 0)) {
			// skip reads that are marked to be filtered by Casava
			char *s = index(seq->comment.s, ':');
			if (s && *(++s) == 'Y') {
				continue;
			}
		}
		if (is_64 && seq->qual.l)
			for (i = 0; i < seq->qual.l; ++i) seq->qual.s[i] -= 31;
		if (seq->seq.l <= l_bc) continue; // sequence length equals or smaller than the barcode length
		p = &seqs[n_seqs++];
		if (l_bc) { // then trim barcode
			for (i = 0; i < l_bc; ++i)
				p->bc[i] = (seq->qual.l && seq->qual.s[i]-33 < BARCODE_LOW_QUAL)? tolower(seq->seq.s[i]) : toupper(seq->seq.s[i]);
			p->bc[i] = 0;
			for (; i < seq->seq.l; ++i)
				seq->seq.s[i - l_bc] = seq->seq.s[i];
			seq->seq.l -= l_bc; seq->seq.s[seq->seq.l] = 0;
			if (seq->qual.l) {
				for (i = l_bc; i < seq->qual.l; ++i)
					seq->qual.s[i - l_bc] = seq->qual.s[i];
				seq->qual.l -= l_bc; seq->qual.s[seq->qual.l] = 0;
			}
			l = seq->seq.l;
		} else p->bc[0] = 0;
		p->tid = -1; // no assigned to a thread
		p->qual = 0;
		p->full_len = p->clip_len = p->len = l;
		n_tot += p->full_len;
		p->seq = (ubyte_t*)calloc(p->full_len, 1);
		for (i = 0; i != p->full_len; ++i)
			p->seq[i] = nst_nt4_table[(int)seq->seq.s[i]];
		if (seq->qual.l) { // copy quality
			p->qual = (ubyte_t*)strdup((char*)seq->qual.s);
			if (trim_qual >= 1) n_trimmed += bwa_trim_read(trim_qual, p);
		}
		p->rseq = (ubyte_t*)calloc(p->full_len, 1);
		memcpy(p->rseq, p->seq, p->len);
		seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped()
		seq_reverse(p->len, p->rseq, is_comp);
		p->name = strdup((const char*)seq->name.s);
		{ // trim /[12]$
			int t = strlen(p->name);
			if (t > 2 && p->name[t-2] == '/' && (p->name[t-1] == '1' || p->name[t-1] == '2')) p->name[t-2] = '\0';
		}
		if (n_seqs == n_needed) break;
	}
	*n = n_seqs;
	if (n_seqs && trim_qual >= 1)
		fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot);
	if (n_seqs == 0) {
		free(seqs);
		return 0;
	}
	return seqs;
}

void bwa_free_read_seq(int n_seqs, bwa_seq_t *seqs)
{
	int i, j;
	for (i = 0; i != n_seqs; ++i) {
		bwa_seq_t *p = seqs + i;
		for (j = 0; j < p->n_multi; ++j)
			if (p->multi[j].cigar) free(p->multi[j].cigar);
		free(p->name);
		free(p->seq); free(p->rseq); free(p->qual); free(p->aln); free(p->md); free(p->multi);
		free(p->cigar);
	}
	free(seqs);
}
Ejemplo n.º 11
0
bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, unsigned int n_needed, unsigned int *n, int is_comp, int mid)
{
	bwa_seq_t *seqs, *p;
	kseq_t *seq = bs->ks;
	int n_seqs, l, i; //, j;

	n_seqs = 0;
	seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
	while ((l = kseq_read(seq)) >= 0) {
		if ( l > MAX_READ_LENGTH ) l = MAX_READ_LENGTH; //put a limit on sequence length
		p = &seqs[n_seqs++];
		p->tid = -1; // no assigned to a thread
		p->qual = 0;
		//p->len = l;
		p->len = l - mid;
		p->seq = (ubyte_t*)calloc(p->len, 1);
		for (i = 0; i != p->len; ++i)
			p->seq[i] = nst_nt4_table[(int)seq->seq.s[i+mid]];
		p->rseq = (ubyte_t*)calloc(p->len, 1);
		memcpy(p->rseq, p->seq, p->len);
		seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped()
		seq_reverse(p->len, p->rseq, is_comp);

		/*

		printf("Options is_comp = opt->mode & BWA_MODE_COMPREAD is %d\n", is_comp);

		printf("Forward Sequence:");

		int j;

		for ( j = 0; j < p->len; ++j)
		{
			printf("%d", p->seq[j]);

		}
		printf("\nReverse Sequence:");

		for (j = 0; j < p->len; ++j)
		{
			printf("%d", p->rseq[j]);

		}
		printf("\n");

		*/

		p->name = strdup((const char*)seq->name.s);
		{ // trim /[12]$
			int t = strlen(p->name);
			if (t > 2 && p->name[t-2] == '/' && (p->name[t-1] == '1' || p->name[t-1] == '2')) p->name[t-2] = '\0';
		}
		if (seq->qual.l) // copy quality
		{
			//p->qual = (ubyte_t*)strdup((char*)seq->qual.s);
			p->qual = (ubyte_t*)calloc(p->len, 1);
			int i;
			for (i = 0; i != p->len; ++i)
			{
				p->qual[i] = seq->qual.s[mid+i];
			}
		}

		if (n_seqs == n_needed) break;
	}
	*n = n_seqs;
	if (n_seqs == 0) {
		free(seqs);
		return 0;
	}
	return seqs;
}
Ejemplo n.º 12
0
void bsw2_pair1(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, const bsw2pestat_t *st, const bsw2hit_t *h, int l_mseq, const char *mseq, bsw2hit_t *a, int8_t g_mat[25])
{
	extern void seq_reverse(int len, ubyte_t *seq, int is_comp);
	int64_t k, beg, end;
	uint8_t *seq, *ref;
	int i;
	// compute the region start and end
	a->n_seeds = 1; a->flag |= BSW2_FLAG_MATESW; // before calling this routine, *a has been cleared with memset(0); the flag is set with 1<<6/7
	if (h->is_rev == 0) {
		beg = (int64_t)(h->k + st->avg - EXT_STDDEV * st->std - l_mseq + .499);
		if (beg < h->k) beg = h->k;
		end = (int64_t)(h->k + st->avg + EXT_STDDEV * st->std + .499);
		a->is_rev = 1; a->flag |= 16;
	} else {
		beg = (int64_t)(h->k + h->end - h->beg - st->avg - EXT_STDDEV * st->std + .499);
		end = (int64_t)(h->k + h->end - h->beg - st->avg + EXT_STDDEV * st->std + l_mseq + .499);
		if (end > h->k + (h->end - h->beg)) end = h->k + (h->end - h->beg);
		a->is_rev = 0;
	}
	if (beg < 1) beg = 1;
	if (end > l_pac) end = l_pac;
	if (end - beg < l_mseq) return;
	// generate the sequence
	seq = malloc(l_mseq + (end - beg));
	ref = seq + l_mseq;
	for (k = beg; k < end; ++k)
		ref[k - beg] = pac[k>>2] >> ((~k&3)<<1) & 0x3;
	if (h->is_rev == 0) {
		for (i = 0; i < l_mseq; ++i) { // on the reverse strand
			int c = nst_nt4_table[(int)mseq[i]];
			seq[l_mseq - 1 - i] = c > 3? 4 : 3 - c;
		}
	} else {
		for (i = 0; i < l_mseq; ++i) // on the forward strand
			seq[i] = nst_nt4_table[(int)mseq[i]];
	}
#ifndef _NO_SSE2
	{
		ksw_query_t *q;
		ksw_aux_t aux[2];
		// forward Smith-Waterman
		aux[0].T = opt->t; aux[0].gapo = opt->q; aux[0].gape = opt->r; aux[1] = aux[0];
		q = ksw_qinit(l_mseq * g_mat[0] < 250? 1 : 2, l_mseq, seq, 5, g_mat);
		ksw_sse2(q, end - beg, ref, &aux[0]);
		free(q);
		if (aux[0].score < opt->t) {
			free(seq);
			return;
		}
		++aux[0].qe; ++aux[0].te;
		// reverse Smith-Waterman
		seq_reverse(aux[0].qe, seq, 0);
		seq_reverse(aux[0].te, ref, 0);
		q = ksw_qinit(aux[0].qe * g_mat[0] < 250? 1 : 2, aux[0].qe, seq, 5, g_mat);
		ksw_sse2(q, aux[0].te, ref, &aux[1]);
		free(q);
		++aux[1].qe; ++aux[1].te;
		// write output
		a->G = aux[0].score;
		a->G2 = aux[0].score2 > aux[1].score2? aux[0].score2 : aux[1].score2;
		if (a->G2 < opt->t) a->G2 = 0;
		if (a->G2) a->flag |= BSW2_FLAG_TANDEM;
		a->k = beg + (aux[0].te - aux[1].te);
		a->len = aux[1].te;
		a->beg = aux[0].qe - aux[1].qe;
		a->end = aux[0].qe;
	}
#else
	{
		AlnParam ap;
		path_t path[2];
		int matrix[25];
		for (i = 0; i < 25; ++i) matrix[i] = g_mat[i];
		ap.gap_open = opt->q; ap.gap_ext = opt->r; ap.gap_end = opt->r;
		ap.matrix = matrix; ap.row = 5; ap.band_width = 50;
		a->G = aln_local_core(ref, end - beg, seq, l_mseq, &ap, path, 0, opt->t, &a->G2);
		if (a->G < opt->t) a->G = 0;
		if (a->G2 < opt->t) a->G2 = 0;
		a->k = beg + path[0].i - 1;
		a->len = path[1].i - path[0].i + 1;
		a->beg = path[0].j - 1;
		a->end = path[1].j;
	}
#endif
	if (a->is_rev) i = a->beg, a->beg = l_mseq - a->end, a->end = l_mseq - i;
	free(seq);
}
Ejemplo n.º 13
0
void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2, const char *bwa_rg_id)
{
	int j;
	//if (strcmp (p->name, "HWUSI-EAS1600:WT2_250_read_1:11_30_09:3:1:83:1066#0") == 0)
	//{
	//	fprintf (stderr, "found %s\n", p->name);
	//}
	if (p->type != BWA_TYPE_NO_MATCH || (mate && mate->type != BWA_TYPE_NO_MATCH)) {
		int seqid, nn, am = 0, flag = p->extra_flag;
		char XT;

		if (p->type == BWA_TYPE_NO_MATCH) {
			p->pos = mate->pos;
			p->strand = mate->strand;
			flag |= SAM_FSU;
			j = 1;
		} else j = pos_end(p) - p->pos; // j is the length of the reference in the alignment

		// get seqid
		nn = bns_coor_pac2real(bns, p->pos, j, &seqid);
		if (p->type != BWA_TYPE_NO_MATCH && p->pos + j - bns->anns[seqid].offset > bns->anns[seqid].len)
			flag |= SAM_FSU; // flag UNMAP as this alignment bridges two adjacent reference sequences

		// update flag and print it
		if (p->strand) flag |= SAM_FSR;
		if (mate) {
			if (mate->type != BWA_TYPE_NO_MATCH) {
				if (mate->strand) flag |= SAM_FMR;
			} else flag |= SAM_FMU;
		}
		printf("%s\t%d\t%s\t", p->name, flag, bns->anns[seqid].name);
		printf("%d\t%d\t", (int)(p->pos - bns->anns[seqid].offset + 1), p->mapQ);

		// print CIGAR
		if (p->cigar) {
			for (j = 0; j != p->n_cigar; ++j)
				printf("%d%c", __cigar_len(p->cigar[j]), "MIDSN"[__cigar_op(p->cigar[j])]);
		} else if (p->type == BWA_TYPE_NO_MATCH) printf("*");
		else printf("%dM", p->len);

		// print mate coordinate
		if (mate && mate->type != BWA_TYPE_NO_MATCH) {
			int m_seqid, m_is_N;
			long long isize;
			am = mate->seQ < p->seQ? mate->seQ : p->seQ; // smaller single-end mapping quality
			// redundant calculation here, but should not matter too much
			m_is_N = bns_coor_pac2real(bns, mate->pos, mate->len, &m_seqid);
			printf("\t%s\t", (seqid == m_seqid)? "=" : bns->anns[m_seqid].name);
			isize = (seqid == m_seqid)? pos_5(mate) - pos_5(p) : 0;
			if (p->type == BWA_TYPE_NO_MATCH) isize = 0;
			printf("%d\t%lld\t", (int)(mate->pos - bns->anns[m_seqid].offset + 1), isize);
		} else if (mate) printf("\t=\t%d\t0\t", (int)(p->pos - bns->anns[seqid].offset + 1));
		else printf("\t*\t0\t0\t");

		// print sequence and quality
		if (p->strand == 0)
			for (j = 0; j != p->full_len; ++j) putchar("ACGTN"[(int)p->seq[j]]);
		else for (j = 0; j != p->full_len; ++j) putchar("TGCAN"[p->seq[p->full_len - 1 - j]]);
		putchar('\t');
		if (p->qual) {
			if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality
			printf("%s", p->qual);
		} else printf("*");

		if (bwa_rg_id) printf("\tRG:Z:%s", bwa_rg_id);
		if (p->clip_len < p->full_len) printf("\tXC:i:%d", p->clip_len);
		if (p->type != BWA_TYPE_NO_MATCH) {
			int i;
			// calculate XT tag
			XT = "NURM"[p->type];
			if (nn > 10) XT = 'N';
			// print tags
			printf("\tXT:A:%c\t%s:i:%d", XT, (mode & BWA_MODE_COMPREAD)? "NM" : "CM", p->nm);
			// print XS tag, to be compatible with Cufflinks
			if (p->sense_strand != 2 ) printf("\tXS:A:%c", p->sense_strand ? '-':'+' );
			else printf("\tXS:A:.");
			if (nn) printf("\tXN:i:%d", nn);
			if (mate) printf("\tSM:i:%lu\tAM:i:%d", p->seQ, am);
			if (p->type != BWA_TYPE_MATESW) { // X0 and X1 are not available for this type of alignment
				printf("\tX0:i:%lu", p->c1);
				if (p->c1 <= max_top2) printf("\tX1:i:%lu", p->c2);
			}
			printf("\tXM:i:%d\tXO:i:%d\tXG:i:%d", p->n_mm, p->n_gapo_t + p->n_gapo_q, p->n_gapo_t+p->n_gape_t+p->n_gapo_q+p->n_gape_q);
			if (p->md) printf("\tMD:Z:%s", p->md);
			// print multiple hits
			if (p->n_multi) {
				bool header_printed = 0;
				for (i = 0; i < p->n_multi; ++i) {
					bwt_multi1_t *q = p->multi + i;
					j = pos_end_multi(q, p->len) - q->pos;
					nn = bns_coor_pac2real(bns, q->pos, j, &seqid);
					if(pos_end_multi(q, p->len) - bns->anns[seqid].offset > bns->anns[seqid].len) continue; //the alignment bridges adjacent sequences (chroms)
//TODO: need to avoid this at the first place in the junction discovery step, but this should be rare for mm or human
					if (! header_printed) {
						header_printed = 1;
						printf("\tXA:Z:");
					}
					int k;
					printf("%s,%c%d,", bns->anns[seqid].name, q->strand? '-' : '+',
						   (int)(q->pos - bns->anns[seqid].offset + 1));
					if (q->cigar) {
						for (k = 0; k < q->n_cigar; ++k)
							printf("%d%c", __cigar_len(q->cigar[k]), "MIDSN"[__cigar_op(q->cigar[k])]);
					} else printf("%dM", p->len);
					printf(",%d", q->nm); //q->gap_t + q->gap_q + q->mm);
					if (q->sense_strand != 2) printf(",%c;", q->sense_strand? '-' : '+' );
					else printf(",.;");
				}
			}
		}
		putchar('\n');
	} else { // this read has no match
		ubyte_t *s = p->strand? p->rseq : p->seq;
		int flag = p->extra_flag | SAM_FSU;
		if (mate && mate->type == BWA_TYPE_NO_MATCH) flag |= SAM_FMU;
		printf("%s\t%d\t*\t0\t0\t*\t*\t0\t0\t", p->name, flag);
		for (j = 0; j != p->len; ++j) putchar("ACGTN"[(int)s[j]]);
		putchar('\t');
		if (p->qual) {
			if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality
			printf("%s", p->qual);
		} else printf("*");
		if (p->clip_len < p->full_len) printf("\tXC:i:%d", p->clip_len);
		putchar('\n');
	}
}
Ejemplo n.º 14
0
extern "C" bwa_seq_t * bwa_seed2genome_map(const char* read, int read_len, int strand, uint64_t *num, uint64_t *sa_k, uint64_t *sa_l)
{
	bwa_seq_t *p=(bwa_seq_t*)calloc(1, sizeof(bwa_seq_t)) ;
	int n_seqs=1 ;
	
	int l = read_len ;
	//fprintf(stdout, "read=%s\n", read) ;
	
	p->tid = -1; // no assigned to a thread
	p->qual = NULL ;
	p->full_len = p->clip_len = p->len = l;
	p->seq = (ubyte_t*)calloc(p->len, 1);
	for (int i = 0; i != p->full_len; ++i)
	{
		p->seq[i] = nst_nt4_table[(int)read[i]];
		//fprintf(stdout, "seq[%i]=%i\n", i, p->seq[i]) ;
	}
	
	p->rseq = (ubyte_t*)calloc(p->full_len, 1);
	memcpy(p->rseq, p->seq, p->len);
	seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped()
	seq_reverse(p->len, p->rseq, bwt_opt->mode & BWA_MODE_COMPREAD);
	p->name = strdup("seq") ;
	p->cigar=NULL ;
	
	mybwa_cal_sa_reg_gap(0, bwt_bwt, n_seqs, p, bwt_opt);
	//fprintf(stdout, "n_aln=%i\n", p->n_aln) ;
	
	if (p->n_aln>0)
	{
		assert(p->n_aln<=2) ;
		if (p->aln[0].a==strand)
		{
			*sa_k=p->aln[0].k ;
			*sa_l=p->aln[0].l ;
			*num=*sa_l-*sa_k+1 ;
		} else
			if (p->n_aln>=2 && p->aln[1].a==strand)
			{
				*sa_k=p->aln[1].k ;
				*sa_l=p->aln[1].l ;
				*num=*sa_l-*sa_k+1 ;
			}
			else
			{
				*sa_k=1 ;
				*sa_l=0 ;
				*num=0 ;
			}
		//fprintf(stdout, "k=%lld, l=%lld\n", *sa_k, *sa_l) ;
	}
	else
	{
		*sa_k=1 ;
		*sa_l=0 ;
		*num=0 ;
	}

	/*p->sa = *sa_k ;
	p->c1 = 1 ;
	p->type=BWA_TYPE_UNIQUE ;
	
	mybwa_cal_pac_pos_core(bwt_bwt[0], bwt_bwt[1], p, 0, 0); 
				
	int len = pos_end(p) - p->pos; 
	int seq_id=0 ;
	
	bns_coor_pac2real(bwt_bns, p->pos, len, &seq_id) ;
	int pos = (int)(p->pos - bwt_bns->anns[seq_id].offset) ;
	fprintf(stdout, "seq_id=%i, pos=%i, n_aln=%i, multi=%i\n", seq_id, pos, p->n_aln, p->n_multi) ;
	*/
	
	return p ;
}