void bwa_cal_pac_pos(const char *prefix, int n_seqs, bwa_seq_t *seqs, int max_mm, float fnr) { int i; char str[1024]; bwt_t *bwt; // load forward SA strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str); strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt); for (i = 0; i != n_seqs; ++i) { bwa_seq_t *p = seqs + i; int max_diff = fnr > 0.0? bwa_cal_maxdiff(p->len, BWA_AVG_ERR, fnr) : max_mm; if ((p->type == BWA_TYPE_UNIQUE || p->type == BWA_TYPE_REPEAT) && p->strand) { // reverse strand only p->pos = bwt_sa(bwt, p->sa); p->seQ = p->mapQ = bwa_approx_mapQ(p, max_diff); } } bwt_destroy(bwt); // load reverse BWT and SA strcpy(str, prefix); strcat(str, ".rbwt"); bwt = bwt_restore_bwt(str); strcpy(str, prefix); strcat(str, ".rsa"); bwt_restore_sa(str, bwt); for (i = 0; i != n_seqs; ++i) { bwa_seq_t *p = seqs + i; int max_diff = fnr > 0.0? bwa_cal_maxdiff(p->len, BWA_AVG_ERR, fnr) : max_mm; if ((p->type == BWA_TYPE_UNIQUE || p->type == BWA_TYPE_REPEAT) && !p->strand) { // forward strand only /* NB: For gapped alignment, p->pos may not be correct, * which will be fixed in refine_gapped_core(). This * line also determines the way "x" is calculated in * refine_gapped_core() when (ext < 0 && is_end == 0). */ p->pos = bwt->seq_len - (bwt_sa(bwt, p->sa) + p->len); p->seQ = p->mapQ = bwa_approx_mapQ(p, max_diff); } } bwt_destroy(bwt); }
void bwa_cal_sa_reg_gap(int tid, bwt_t *const bwt[2], int n_seqs, bwa_seq_t *seqs, const gap_opt_t *opt) { int i, max_l = 0, max_len; gap_stack_t *stack; bwt_width_t *w[2], *seed_w[2]; const ubyte_t *seq[2]; gap_opt_t local_opt = *opt; // initiate priority stack for (i = max_len = 0; i != n_seqs; ++i) if (seqs[i].len > max_len) max_len = seqs[i].len; if (opt->fnr > 0.0) local_opt.max_diff = bwa_cal_maxdiff(max_len, BWA_AVG_ERR, opt->fnr); if (local_opt.max_diff < local_opt.max_gapo) local_opt.max_gapo = local_opt.max_diff; stack = gap_init_stack(local_opt.max_diff, local_opt.max_gapo, local_opt.max_gape, &local_opt); seed_w[0] = (bwt_width_t*)calloc(opt->seed_len+1, sizeof(bwt_width_t)); seed_w[1] = (bwt_width_t*)calloc(opt->seed_len+1, sizeof(bwt_width_t)); w[0] = w[1] = 0; for (i = 0; i != n_seqs; ++i) { bwa_seq_t *p = seqs + i; #ifdef HAVE_PTHREAD if (opt->n_threads > 1) { pthread_mutex_lock(&g_seq_lock); if (p->tid < 0) { // unassigned int j; for (j = i; j < n_seqs && j < i + THREAD_BLOCK_SIZE; ++j) seqs[j].tid = tid; } else if (p->tid != tid) { pthread_mutex_unlock(&g_seq_lock); continue; } pthread_mutex_unlock(&g_seq_lock); } #endif p->sa = 0; p->type = BWA_TYPE_NO_MATCH; p->c1 = p->c2 = 0; p->n_aln = 0; p->aln = 0; seq[0] = p->seq; seq[1] = p->rseq; if (max_l < p->len) { max_l = p->len; w[0] = (bwt_width_t*)realloc(w[0], (max_l + 1) * sizeof(bwt_width_t)); w[1] = (bwt_width_t*)realloc(w[1], (max_l + 1) * sizeof(bwt_width_t)); memset(w[0], 0, (max_l + 1) * sizeof(bwt_width_t)); memset(w[1], 0, (max_l + 1) * sizeof(bwt_width_t)); } bwt_cal_width(bwt[0], p->len, seq[0], w[0]); bwt_cal_width(bwt[1], p->len, seq[1], w[1]); if (opt->fnr > 0.0) local_opt.max_diff = bwa_cal_maxdiff(p->len, BWA_AVG_ERR, opt->fnr); local_opt.seed_len = opt->seed_len < p->len? opt->seed_len : 0x7fffffff; if (p->len > opt->seed_len) { bwt_cal_width(bwt[0], opt->seed_len, seq[0] + (p->len - opt->seed_len), seed_w[0]); bwt_cal_width(bwt[1], opt->seed_len, seq[1] + (p->len - opt->seed_len), seed_w[1]); } // core function p->aln = bwt_match_gap(bwt, p->len, seq, w, p->len <= opt->seed_len? 0 : seed_w, &local_opt, &p->n_aln, stack); // store the alignment free(p->name); free(p->seq); free(p->rseq); free(p->qual); p->name = 0; p->seq = p->rseq = p->qual = 0; } free(seed_w[0]); free(seed_w[1]); free(w[0]); free(w[1]); gap_destroy_stack(stack); }
void mybwa_cal_sa_reg_gap(int tid, bwt_t *const bwt[2], int n_seqs, bwa_seq_t *seqs, const gap_opt_t *opt) { int i, max_l = 0, max_len; gap_stack_t *stack; bwt_width_t *w[2], *seed_w[2]; const ubyte_t *seq[2]; gap_opt_t local_opt = *opt; // no mismatches or gaps in seed local_opt.max_gape=0 ; local_opt.max_gapo=0 ; local_opt.max_seed_diff=0 ; local_opt.max_diff=0 ; // initiate priority stack for (i = max_len = 0; i != n_seqs; ++i) if (seqs[i].len > max_len) max_len = seqs[i].len; if (opt->fnr > 0.0) local_opt.max_diff = bwa_cal_maxdiff(max_len, BWA_AVG_ERR, opt->fnr); if (local_opt.max_diff < local_opt.max_gapo) local_opt.max_gapo = local_opt.max_diff; stack = gap_init_stack(local_opt.max_diff, local_opt.max_gapo, local_opt.max_gape, &local_opt); seed_w[0] = (bwt_width_t*)calloc(opt->seed_len+1, sizeof(bwt_width_t)); seed_w[1] = (bwt_width_t*)calloc(opt->seed_len+1, sizeof(bwt_width_t)); w[0] = w[1] = 0; for (i = 0; i != n_seqs; ++i) { bwa_seq_t *p = seqs + i; p->sa = 0; p->type = BWA_TYPE_NO_MATCH; p->c1 = p->c2 = 0; p->n_aln = 0; p->aln = 0; seq[0] = p->seq; seq[1] = p->rseq; if (max_l < p->len) { max_l = p->len; w[0] = (bwt_width_t*)realloc(w[0], (max_l + 1) * sizeof(bwt_width_t)); w[1] = (bwt_width_t*)realloc(w[1], (max_l + 1) * sizeof(bwt_width_t)); memset(w[0], 0, (max_l + 1) * sizeof(bwt_width_t)); memset(w[1], 0, (max_l + 1) * sizeof(bwt_width_t)); } bwt_cal_width(bwt[0], p->len, seq[0], w[0]); bwt_cal_width(bwt[1], p->len, seq[1], w[1]); //fprintf(stdout, "w[0]=%i, w[1]=%i\n", w[0]->w, w[1]->w) ; if (opt->fnr > 0.0) local_opt.max_diff = bwa_cal_maxdiff(p->len, BWA_AVG_ERR, opt->fnr); local_opt.seed_len = opt->seed_len < p->len? opt->seed_len : 0x7fffffff; if (p->len > opt->seed_len) { bwt_cal_width(bwt[0], opt->seed_len, seq[0] + (p->len - opt->seed_len), seed_w[0]); bwt_cal_width(bwt[1], opt->seed_len, seq[1] + (p->len - opt->seed_len), seed_w[1]); } // core function p->aln = bwt_match_gap(bwt, p->len, seq, w, p->len <= opt->seed_len? 0 : seed_w, &local_opt, &p->n_aln, stack); // store the alignment free(p->name); free(p->seq); free(p->rseq); free(p->qual); p->name = 0; p->seq = p->rseq = p->qual = 0; } free(seed_w[0]); free(seed_w[1]); free(w[0]); free(w[1]); gap_destroy_stack(stack); }
void bwa_cal_pac_pos_core(const bwt_t *bwt, bwa_seq_t *seq, const int max_mm, const float fnr, const int *g_log_n) { int max_diff; if (seq->type != BWA_TYPE_UNIQUE && seq->type != BWA_TYPE_REPEAT) return; max_diff = fnr > 0.0? bwa_cal_maxdiff(seq->len, BWA_AVG_ERR, fnr) : max_mm; seq->pos = bwt_sa(bwt, seq->sa); seq->seQ = seq->mapQ = bwa_approx_mapQ(seq, max_diff, g_log_n); }
void bwa_cal_sa_reg_gap(int tid, bwt_t *const bwt, int n_seqs, bwa_seq_t *seqs, const gap_opt_t *opt) { int i, j, max_l = 0, max_len; gap_stack_t *stack; bwt_width_t *w, *seed_w; gap_opt_t local_opt = *opt; // initiate priority stack for (i = max_len = 0; i != n_seqs; ++i) if (seqs[i].len > max_len) max_len = seqs[i].len; if (opt->fnr > 0.0) local_opt.max_diff = bwa_cal_maxdiff(max_len, BWA_AVG_ERR, opt->fnr); if (local_opt.max_diff < local_opt.max_gapo) local_opt.max_gapo = local_opt.max_diff; stack = gap_init_stack(local_opt.max_diff, local_opt.max_gapo, local_opt.max_gape, &local_opt); seed_w = (bwt_width_t*)calloc(opt->seed_len+1, sizeof(bwt_width_t)); w = 0; for (i = 0; i != n_seqs; ++i) { bwa_seq_t *p = seqs + i; #ifdef HAVE_PTHREAD if (i % opt->n_threads != tid) continue; #endif p->sa = 0; p->type = BWA_TYPE_NO_MATCH; p->c1 = p->c2 = 0; p->n_aln = 0; p->aln = 0; if (max_l < p->len) { max_l = p->len; w = (bwt_width_t*)realloc(w, (max_l + 1) * sizeof(bwt_width_t)); memset(w, 0, (max_l + 1) * sizeof(bwt_width_t)); } bwt_cal_width(bwt, p->len, p->seq, w); if (opt->fnr > 0.0) local_opt.max_diff = bwa_cal_maxdiff(p->len, BWA_AVG_ERR, opt->fnr); local_opt.seed_len = opt->seed_len < p->len? opt->seed_len : 0x7fffffff; if (p->len > opt->seed_len) bwt_cal_width(bwt, opt->seed_len, p->seq + (p->len - opt->seed_len), seed_w); // core function for (j = 0; j < p->len; ++j) // we need to complement p->seq[j] = p->seq[j] > 3? 4 : 3 - p->seq[j]; p->aln = bwt_match_gap(bwt, p->len, p->seq, w, p->len <= opt->seed_len? 0 : seed_w, &local_opt, &p->n_aln, stack); //fprintf(stderr, "mm=%lld,ins=%lld,del=%lld,gapo=%lld\n", p->aln->n_mm, p->aln->n_ins, p->aln->n_del, p->aln->n_gapo); // clean up the unused data in the record free(p->name); free(p->seq); free(p->rseq); free(p->qual); p->name = 0; p->seq = p->rseq = p->qual = 0; } free(seed_w); free(w); gap_destroy_stack(stack); }
/** * Derive the actual position in the read from the given suffix array * coordinates. Note that the position will be approximate based on * whether indels appear in the read and whether calculations are * performed from the start or end of the read. */ void bwa_cal_pac_pos_core(const bntseq_t *bns, const bwt_t *bwt, bwa_seq_t *seq, const int max_mm, const float fnr) { int max_diff, strand; if (seq->type != BWA_TYPE_UNIQUE && seq->type != BWA_TYPE_REPEAT) return; max_diff = fnr > 0.0? bwa_cal_maxdiff(seq->len, BWA_AVG_ERR, fnr) : max_mm; seq->seQ = seq->mapQ = bwa_approx_mapQ(seq, max_diff); seq->pos = bwa_sa2pos(bns, bwt, seq->sa, seq->len, &strand); seq->strand = strand; seq->seQ = seq->mapQ = bwa_approx_mapQ(seq, max_diff); }
void bwa_cal_pac_pos_core(const bwt_t *forward_bwt, const bwt_t *reverse_bwt, bwa_seq_t *seq, const int max_mm, const float fnr) { int max_diff; if (seq->type != BWA_TYPE_UNIQUE && seq->type != BWA_TYPE_REPEAT) return; max_diff = fnr > 0.0? bwa_cal_maxdiff(seq->len, BWA_AVG_ERR, fnr) : max_mm; if (seq->strand) { // reverse strand only seq->pos = bwt_sa(forward_bwt, seq->sa); } else { // forward strand only /* NB: For gapped alignment, p->pos may not be correct, which * will be fixed in refine_gapped_core(). This line also * determines the way "x" is calculated in * refine_gapped_core() when (ext < 0 && is_end == 0). */ seq->pos = reverse_bwt->seq_len - (bwt_sa(reverse_bwt, seq->sa) + seq->len); } seq->seQ = seq->mapQ = bwa_approx_mapQ(seq, max_diff); }
int bwa_aln(int argc, char *argv[]) { int c, opte = -1; gap_opt_t *opt; opt = gap_init_opt(); while ((c = getopt(argc, argv, "n:o:e:i:d:l:k:cLR:m:t:NM:O:E:q:f:b012IYB:")) >= 0) { switch (c) { case 'n': if (strstr(optarg, ".")) opt->fnr = atof(optarg), opt->max_diff = -1; else opt->max_diff = atoi(optarg), opt->fnr = -1.0; break; case 'o': opt->max_gapo = atoi(optarg); break; case 'e': opte = atoi(optarg); break; case 'M': opt->s_mm = atoi(optarg); break; case 'O': opt->s_gapo = atoi(optarg); break; case 'E': opt->s_gape = atoi(optarg); break; case 'd': opt->max_del_occ = atoi(optarg); break; case 'i': opt->indel_end_skip = atoi(optarg); break; case 'l': opt->seed_len = atoi(optarg); break; case 'k': opt->max_seed_diff = atoi(optarg); break; case 'm': opt->max_entries = atoi(optarg); break; case 't': opt->n_threads = atoi(optarg); break; case 'L': opt->mode |= BWA_MODE_LOGGAP; break; case 'R': opt->max_top2 = atoi(optarg); break; case 'q': opt->trim_qual = atoi(optarg); break; case 'c': opt->mode &= ~BWA_MODE_COMPREAD; break; case 'N': opt->mode |= BWA_MODE_NONSTOP; opt->max_top2 = 0x7fffffff; break; case 'f': xreopen(optarg, "wb", stdout); break; case 'b': opt->mode |= BWA_MODE_BAM; break; case '0': opt->mode |= BWA_MODE_BAM_SE; break; case '1': opt->mode |= BWA_MODE_BAM_READ1; break; case '2': opt->mode |= BWA_MODE_BAM_READ2; break; case 'I': opt->mode |= BWA_MODE_IL13; break; case 'Y': opt->mode |= BWA_MODE_CFY; break; case 'B': opt->mode |= atoi(optarg) << 24; break; default: return 1; } } if (opte > 0) { opt->max_gape = opte; opt->mode &= ~BWA_MODE_GAPE; } if (optind + 2 > argc) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: bwa aln [options] <prefix> <in.fq>\n\n"); fprintf(stderr, "Options: -n NUM max #diff (int) or missing prob under %.2f err rate (float) [%.2f]\n", BWA_AVG_ERR, opt->fnr); fprintf(stderr, " -o INT maximum number or fraction of gap opens [%d]\n", opt->max_gapo); fprintf(stderr, " -e INT maximum number of gap extensions, -1 for disabling long gaps [-1]\n"); fprintf(stderr, " -i INT do not put an indel within INT bp towards the ends [%d]\n", opt->indel_end_skip); fprintf(stderr, " -d INT maximum occurrences for extending a long deletion [%d]\n", opt->max_del_occ); fprintf(stderr, " -l INT seed length [%d]\n", opt->seed_len); fprintf(stderr, " -k INT maximum differences in the seed [%d]\n", opt->max_seed_diff); fprintf(stderr, " -m INT maximum entries in the queue [%d]\n", opt->max_entries); fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads); fprintf(stderr, " -M INT mismatch penalty [%d]\n", opt->s_mm); fprintf(stderr, " -O INT gap open penalty [%d]\n", opt->s_gapo); fprintf(stderr, " -E INT gap extension penalty [%d]\n", opt->s_gape); fprintf(stderr, " -R INT stop searching when there are >INT equally best hits [%d]\n", opt->max_top2); fprintf(stderr, " -q INT quality threshold for read trimming down to %dbp [%d]\n", BWA_MIN_RDLEN, opt->trim_qual); fprintf(stderr, " -f FILE file to write output to instead of stdout\n"); fprintf(stderr, " -B INT length of barcode\n"); // fprintf(stderr, " -c input sequences are in the color space\n"); fprintf(stderr, " -L log-scaled gap penalty for long deletions\n"); fprintf(stderr, " -N non-iterative mode: search for all n-difference hits (slooow)\n"); fprintf(stderr, " -I the input is in the Illumina 1.3+ FASTQ-like format\n"); fprintf(stderr, " -b the input read file is in the BAM format\n"); fprintf(stderr, " -0 use single-end reads only (effective with -b)\n"); fprintf(stderr, " -1 use the 1st read in a pair (effective with -b)\n"); fprintf(stderr, " -2 use the 2nd read in a pair (effective with -b)\n"); fprintf(stderr, " -Y filter Casava-filtered sequences\n"); fprintf(stderr, "\n"); return 1; } if (opt->fnr > 0.0) { int i, k; for (i = 17, k = 0; i <= 250; ++i) { int l = bwa_cal_maxdiff(i, BWA_AVG_ERR, opt->fnr); if (l != k) fprintf(stderr, "[bwa_aln] %dbp reads: max_diff = %d\n", i, l); k = l; } } bwa_aln_core(argv[optind], argv[optind+1], opt); free(opt); return 0; }