BWA::BWA(const char* ann_filename, const char* amb_filename, const char* pac_filename, const char* forward_bwt_filename, const char* forward_sa_filename, const char* reverse_bwt_filename, const char* reverse_sa_filename) { // Load the bns (?) and reference bns = bns_restore_core(ann_filename,amb_filename,pac_filename); reference = new ubyte_t[bns->l_pac/4+1]; rewind(bns->fp_pac); fread(reference, 1, bns->l_pac/4+1, bns->fp_pac); fclose(bns->fp_pac); bns->fp_pac = NULL; // Load the BWTs (both directions) and suffix arrays (both directions) bwts[0] = bwt_restore_bwt(forward_bwt_filename); bwt_restore_sa(forward_sa_filename, bwts[0]); bwts[1] = bwt_restore_bwt(reverse_bwt_filename); bwt_restore_sa(reverse_sa_filename, bwts[1]); load_default_options(); // Always reinitialize the random seed whenever a new set of files are loaded. initialize_random_seed(); // initialize the bwase subsystem bwase_initialize(); }
void bwa_cal_pac_pos(const char *prefix, int n_seqs, bwa_seq_t *seqs, int max_mm, float fnr) { int i; char str[1024]; bwt_t *bwt; // load forward SA strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str); strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt); for (i = 0; i != n_seqs; ++i) { bwa_seq_t *p = seqs + i; int max_diff = fnr > 0.0? bwa_cal_maxdiff(p->len, BWA_AVG_ERR, fnr) : max_mm; if ((p->type == BWA_TYPE_UNIQUE || p->type == BWA_TYPE_REPEAT) && p->strand) { // reverse strand only p->pos = bwt_sa(bwt, p->sa); p->seQ = p->mapQ = bwa_approx_mapQ(p, max_diff); } } bwt_destroy(bwt); // load reverse BWT and SA strcpy(str, prefix); strcat(str, ".rbwt"); bwt = bwt_restore_bwt(str); strcpy(str, prefix); strcat(str, ".rsa"); bwt_restore_sa(str, bwt); for (i = 0; i != n_seqs; ++i) { bwa_seq_t *p = seqs + i; int max_diff = fnr > 0.0? bwa_cal_maxdiff(p->len, BWA_AVG_ERR, fnr) : max_mm; if ((p->type == BWA_TYPE_UNIQUE || p->type == BWA_TYPE_REPEAT) && !p->strand) { // forward strand only /* NB: For gapped alignment, p->pos may not be correct, * which will be fixed in refine_gapped_core(). This * line also determines the way "x" is calculated in * refine_gapped_core() when (ext < 0 && is_end == 0). */ p->pos = bwt->seq_len - (bwt_sa(bwt, p->sa) + p->len); p->seQ = p->mapQ = bwa_approx_mapQ(p, max_diff); } } bwt_destroy(bwt); }
void bwa_cal_pac_pos(const char *prefix, int n_seqs, bwa_seq_t *seqs, int max_mm, float fnr) { int i, j; char str[1024]; bwt_t *bwt; // load forward SA strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str); strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt); for (i = 0; i != n_seqs; ++i) { if (seqs[i].strand) bwa_cal_pac_pos_core(bwt, 0, &seqs[i], max_mm, fnr); for (j = 0; j < seqs[i].n_multi; ++j) { bwt_multi1_t *p = seqs[i].multi + j; if (p->strand) p->pos = bwt_sa(bwt, p->pos); } } bwt_destroy(bwt); // load reverse BWT and SA strcpy(str, prefix); strcat(str, ".rbwt"); bwt = bwt_restore_bwt(str); strcpy(str, prefix); strcat(str, ".rsa"); bwt_restore_sa(str, bwt); for (i = 0; i != n_seqs; ++i) { if (!seqs[i].strand) bwa_cal_pac_pos_core(0, bwt, &seqs[i], max_mm, fnr); for (j = 0; j < seqs[i].n_multi; ++j) { bwt_multi1_t *p = seqs[i].multi + j; if (!p->strand) p->pos = bwt->seq_len - (bwt_sa(bwt, p->pos) + seqs[i].len); } } bwt_destroy(bwt); }
void bwt_index(char *prefix) { int block_size = 10000000; int64_t l_pac; char *str, *str2, *str3; str = (char*)calloc(strlen(prefix) + 10, 1); str2 = (char*)calloc(strlen(prefix) + 10, 1); str3 = (char*)calloc(strlen(prefix) + 10, 1); fprintf(stderr, "[bwt_index] Building bwt-index for genome...\n"); { // for&rev.pac .ann .amb gzFile fp = gzopen(prefix, "r"); l_pac = bns_fasta2bntseq(fp, prefix, 0); gzclose(fp); } { // .bwt strcpy(str, prefix); strcat(str, ".pac"); strcpy(str2, prefix); strcat(str2, ".bwt"); bwt_bwtgen2(str, str2, block_size); } { // update .bwt bwt_t *bwt; strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str); bwt_bwtupdate_core(bwt); bwt_dump_bwt(str, bwt); bwt_destroy(bwt); } { // forward.pac gzFile fp = gzopen(prefix, "r"); l_pac = bns_fasta2bntseq(fp, prefix, 1); gzclose(fp); } { // .sa bwt_t *bwt; strcpy(str, prefix); strcat(str, ".bwt"); strcpy(str3, prefix); strcat(str3, ".sa"); bwt = bwt_restore_bwt(str); bwt_cal_sa(bwt, 32); bwt_dump_sa(str3, bwt); bwt_destroy(bwt); } fprintf(stderr, "[bwt_index] Building done!\n"); free(str); free(str2); free(str3); }
extern "C" void bwa_seed2genome_init(const char *prefix, gap_opt_t *opt) { if (!opt) opt=mygap_init_opt() ; opt->mode=BWA_MODE_BAM_SE ; bwt_opt = opt ; { // load BWT char *str = (char*)calloc(strlen(prefix) + 10, 1); strcpy(str, prefix); strcat(str, ".bwt"); bwt_bwt[0] = bwt_restore_bwt(str); strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt_bwt[0]); strcpy(str, prefix); strcat(str, ".rbwt"); bwt_bwt[1] = bwt_restore_bwt(str); strcpy(str, prefix); strcat(str, ".rsa"); bwt_restore_sa(str, bwt_bwt[1]); free(str); bwt_bns = bns_restore(prefix); } }
void bwa_cal_pac_pos(const bntseq_t *bns, const char *prefix, int n_seqs, bwa_seq_t *seqs, int max_mm, float fnr) { int i, j, strand, n_multi; char str[1024]; bwt_t *bwt; // load forward SA strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str); strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt); for (i = 0; i != n_seqs; ++i) { bwa_seq_t *p = &seqs[i]; bwa_cal_pac_pos_core(bns, bwt, p, max_mm, fnr); for (j = n_multi = 0; j < p->n_multi; ++j) { bwt_multi1_t *q = p->multi + j; q->pos = bwa_sa2pos(bns, bwt, q->pos, p->len, &strand); q->strand = strand; if (q->pos != p->pos) p->multi[n_multi++] = *q; } p->n_multi = n_multi; } bwt_destroy(bwt); }
int bwa_index(int argc, char *argv[]) { char *prefix = 0, *str, *str2, *str3; int c, algo_type = 0, is_color = 0, is_64 = 0; clock_t t; int64_t l_pac; while ((c = getopt(argc, argv, "6ca:p:")) >= 0) { switch (c) { case 'a': // if -a is not set, algo_type will be determined later if (strcmp(optarg, "div") == 0) algo_type = 1; else if (strcmp(optarg, "bwtsw") == 0) algo_type = 2; else if (strcmp(optarg, "is") == 0) algo_type = 3; else err_fatal(__func__, "unknown algorithm: '%s'.", optarg); break; case 'p': prefix = strdup(optarg); break; case 'c': is_color = 1; break; case '6': is_64 = 1; break; default: return 1; } } if (optind + 1 > argc) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: bwa index [-a bwtsw|is] [-c] <in.fasta>\n\n"); fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw or is [auto]\n"); fprintf(stderr, " -p STR prefix of the index [same as fasta name]\n"); fprintf(stderr, " -6 index files named as <in.fasta>.64.* instead of <in.fasta>.* \n"); // fprintf(stderr, " -c build color-space index\n"); fprintf(stderr, "\n"); fprintf(stderr, "Warning: `-a bwtsw' does not work for short genomes, while `-a is' and\n"); fprintf(stderr, " `-a div' do not work not for long genomes. Please choose `-a'\n"); fprintf(stderr, " according to the length of the genome.\n\n"); return 1; } if (prefix == 0) { prefix = malloc(strlen(argv[optind]) + 4); strcpy(prefix, argv[optind]); if (is_64) strcat(prefix, ".64"); } str = (char*)calloc(strlen(prefix) + 10, 1); str2 = (char*)calloc(strlen(prefix) + 10, 1); str3 = (char*)calloc(strlen(prefix) + 10, 1); if (is_color == 0) { // nucleotide indexing gzFile fp = xzopen(argv[optind], "r"); t = clock(); fprintf(stderr, "[bwa_index] Pack FASTA... "); l_pac = bns_fasta2bntseq(fp, prefix, 0); fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); gzclose(fp); } else { // color indexing gzFile fp = xzopen(argv[optind], "r"); strcat(strcpy(str, prefix), ".nt"); t = clock(); fprintf(stderr, "[bwa_index] Pack nucleotide FASTA... "); l_pac = bns_fasta2bntseq(fp, str, 0); fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); gzclose(fp); { char *tmp_argv[3]; tmp_argv[0] = argv[0]; tmp_argv[1] = str; tmp_argv[2] = prefix; t = clock(); fprintf(stderr, "[bwa_index] Convert nucleotide PAC to color PAC... "); bwa_pac2cspac(3, tmp_argv); fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); } } if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT { strcpy(str, prefix); strcat(str, ".pac"); strcpy(str2, prefix); strcat(str2, ".bwt"); t = clock(); fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n"); if (algo_type == 2) bwt_bwtgen(str, str2); else if (algo_type == 1 || algo_type == 3) { bwt_t *bwt; bwt = bwt_pac2bwt(str, algo_type == 3); bwt_dump_bwt(str2, bwt); bwt_destroy(bwt); } fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC); } { bwt_t *bwt; strcpy(str, prefix); strcat(str, ".bwt"); t = clock(); fprintf(stderr, "[bwa_index] Update BWT... "); bwt = bwt_restore_bwt(str); bwt_bwtupdate_core(bwt); bwt_dump_bwt(str, bwt); bwt_destroy(bwt); fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); } { gzFile fp = xzopen(argv[optind], "r"); t = clock(); fprintf(stderr, "[bwa_index] Pack forward-only FASTA... "); l_pac = bns_fasta2bntseq(fp, prefix, 1); fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); gzclose(fp); } { bwt_t *bwt; strcpy(str, prefix); strcat(str, ".bwt"); strcpy(str3, prefix); strcat(str3, ".sa"); t = clock(); fprintf(stderr, "[bwa_index] Construct SA from BWT and Occ... "); bwt = bwt_restore_bwt(str); bwt_cal_sa(bwt, 32); bwt_dump_sa(str3, bwt); bwt_destroy(bwt); fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); } free(str3); free(str2); free(str); free(prefix); return 0; }
void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt) { int i, n_seqs, tot_seqs = 0; bwa_seq_t *seqs; bwa_seqio_t *ks; clock_t t; bwt_t *bwt; // initialization ks = bwa_open_reads(opt->mode, fn_fa); { // load BWT char *str = (char*)calloc(strlen(prefix) + 10, 1); strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str); free(str); } // core loop err_fwrite(opt, sizeof(gap_opt_t), 1, stdout); while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt->mode, opt->trim_qual)) != 0) { tot_seqs += n_seqs; t = clock(); fprintf(stderr, "[bwa_aln_core] calculate SA coordinate... "); #ifdef HAVE_PTHREAD if (opt->n_threads <= 1) { // no multi-threading at all bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt); } else { pthread_t *tid; pthread_attr_t attr; thread_aux_t *data; int j; pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); data = (thread_aux_t*)calloc(opt->n_threads, sizeof(thread_aux_t)); tid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t)); for (j = 0; j < opt->n_threads; ++j) { data[j].tid = j; data[j].bwt = bwt; data[j].n_seqs = n_seqs; data[j].seqs = seqs; data[j].opt = opt; pthread_create(&tid[j], &attr, worker, data + j); } for (j = 0; j < opt->n_threads; ++j) pthread_join(tid[j], 0); free(data); free(tid); } #else bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt); #endif fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock(); t = clock(); fprintf(stderr, "[bwa_aln_core] write to the disk... "); for (i = 0; i < n_seqs; ++i) { bwa_seq_t *p = seqs + i; err_fwrite(&p->n_aln, 4, 1, stdout); if (p->n_aln) err_fwrite(p->aln, sizeof(bwt_aln1_t), p->n_aln, stdout); } fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock(); bwa_free_read_seq(n_seqs, seqs); fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs); } // destroy bwt_destroy(bwt); bwa_seq_close(ks); }
void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt) { int i, n_seqs, tot_seqs = 0; bwa_seq_t *seqs; bwa_seqio_t *ks; clock_t t; bwt_t *bwt; // initialization ks = bwa_open_reads(opt->mode, fn_fa); { // load BWT char *str = (char*)calloc(strlen(prefix) + 10, 1); strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str); free(str); } // core loop err_fwrite(opt, sizeof(gap_opt_t), 1, stdout); while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt->mode, opt->trim_qual)) != 0) { tot_seqs += n_seqs; t = clock(); fprintf(stderr, "[bwa_aln_core] calculate SA coordinate... "); #ifdef THREAD if (opt->n_threads <= 1) { // no multi-threading at all bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt); } else { DWORD ThreadID; HANDLE *tid; thread_aux_t *data; int j; data = (thread_aux_t*)calloc(opt->n_threads, sizeof(thread_aux_t)); tid = (HANDLE*)calloc(opt->n_threads, sizeof(HANDLE)); for (j = 0; j < opt->n_threads; ++j) { data[j].tid = j; data[j].bwt = bwt; data[j].n_seqs = n_seqs; data[j].seqs = seqs; data[j].opt = opt; //create threads tid[j] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) worker, data + j, 0, &ThreadID); if (tid[j] == NULL) { printf("CreateThread error: %d\n", GetLastError()); return; } } // Wait for all threads to terminate WaitForMultipleObjects(opt->n_threads, &tid[0], TRUE, INFINITE); free(data); free(tid); } #else bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt); #endif fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock(); t = clock(); fprintf(stderr, "[bwa_aln_core] write to the disk... "); for (i = 0; i < n_seqs; ++i) { bwa_seq_t *p = seqs + i; err_fwrite(&p->n_aln, 4, 1, stdout); if (p->n_aln) err_fwrite(p->aln, sizeof(bwt_aln1_t), p->n_aln, stdout); } fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock(); bwa_free_read_seq(n_seqs, seqs); fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs); } // destroy bwt_destroy(bwt); bwa_seq_close(ks); }
int bwa_bwtsw2(int argc, char *argv[]) { bsw2opt_t *opt; bwt_t *target[2]; char buf[1024]; bntseq_t *bns; int c; opt = bsw2_init_opt(); srand48(11); optind = 1; while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:y:s:c:N:Hf:")) >= 0) { switch (c) { case 'q': opt->q = atoi(optarg); break; case 'r': opt->r = atoi(optarg); break; case 'a': opt->a = atoi(optarg); break; case 'b': opt->b = atoi(optarg); break; case 'w': opt->bw = atoi(optarg); break; case 'T': opt->t = atoi(optarg); break; case 't': opt->n_threads = atoi(optarg); break; case 'z': opt->z = atoi(optarg); break; case 'y': opt->yita = atof(optarg); break; case 's': opt->is = atoi(optarg); break; case 'm': opt->mask_level = atof(optarg); break; case 'c': opt->coef = atof(optarg); break; case 'N': opt->t_seeds = atoi(optarg); break; case 'H': opt->hard_clip = 1; break; case 'f': xreopen(optarg, "w", stdout); break; } } opt->qr = opt->q + opt->r; if (optind + 2 > argc) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: bwa bwasw [options] <target.prefix> <query.fa>\n\n"); fprintf(stderr, "Options: -a INT score for a match [%d]\n", opt->a); fprintf(stderr, " -b INT mismatch penalty [%d]\n", opt->b); fprintf(stderr, " -q INT gap open penalty [%d]\n", opt->q); fprintf(stderr, " -r INT gap extension penalty [%d]\n", opt->r); // fprintf(stderr, " -y FLOAT error recurrence coef. (4..16) [%.1f]\n", opt->yita); fprintf(stderr, "\n"); fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads); fprintf(stderr, " -s INT size of a chunk of reads [%d]\n", opt->chunk_size); fprintf(stderr, "\n"); fprintf(stderr, " -w INT band width [%d]\n", opt->bw); fprintf(stderr, " -m FLOAT mask level [%.2f]\n", opt->mask_level); fprintf(stderr, "\n"); fprintf(stderr, " -T INT score threshold divided by a [%d]\n", opt->t); fprintf(stderr, " -s INT maximum seeding interval size [%d]\n", opt->is); fprintf(stderr, " -z INT Z-best [%d]\n", opt->z); fprintf(stderr, " -N INT # seeds to trigger reverse alignment [%d]\n", opt->t_seeds); fprintf(stderr, " -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef); fprintf(stderr, " -H in SAM output, use hard clipping rather than soft\n"); fprintf(stderr, " -f FILE file to output results to instead of stdout\n\n"); fprintf(stderr, "Note: For long Illumina, 454 and Sanger reads, assembly contigs, fosmids and\n"); fprintf(stderr, " BACs, the default setting usually works well. For the current PacBio\n"); fprintf(stderr, " reads (end of 2010), '-b5 -q2 -r1 -z10' is recommended. One may also\n"); fprintf(stderr, " increase '-z' for better sensitivity.\n"); fprintf(stderr, "\n"); if (0) { double c, theta, eps, delta; c = opt->a / log(opt->yita); theta = exp(-opt->b / c) / opt->yita; eps = exp(-opt->q / c); delta = exp(-opt->r / c); fprintf(stderr, "mismatch: %lf, gap_open: %lf, gap_ext: %lf\n\n", theta, eps, delta); } return 1; } // adjust opt for opt->a opt->t *= opt->a; opt->coef *= opt->a; strcpy(buf, argv[optind]); target[0] = bwt_restore_bwt(strcat(buf, ".bwt")); strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".sa"), target[0]); strcpy(buf, argv[optind]); target[1] = bwt_restore_bwt(strcat(buf, ".rbwt")); strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".rsa"), target[1]); bns = bns_restore(argv[optind]); bsw2_aln(opt, bns, target, argv[optind+1]); bns_destroy(bns); bwt_destroy(target[0]); bwt_destroy(target[1]); free(opt); fflush(stdout); xreopen("/dev/tty","w",stdout); return 0; }
int bwa_bwtsw2(int argc, char *argv[]) { extern char *bwa_infer_prefix(const char *hint); bsw2opt_t *opt; bwt_t *target; char buf[1024], *prefix; bntseq_t *bns; int c; opt = bsw2_init_opt(); srand48(11); while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:s:c:N:Hf:MI:SG:C")) >= 0) { switch (c) { case 'q': opt->q = atoi(optarg); break; case 'r': opt->r = atoi(optarg); break; case 'a': opt->a = atoi(optarg); break; case 'b': opt->b = atoi(optarg); break; case 'w': opt->bw = atoi(optarg); break; case 'T': opt->t = atoi(optarg); break; case 't': opt->n_threads = atoi(optarg); break; case 'z': opt->z = atoi(optarg); break; case 's': opt->is = atoi(optarg); break; case 'm': opt->mask_level = atof(optarg); break; case 'c': opt->coef = atof(optarg); break; case 'N': opt->t_seeds = atoi(optarg); break; case 'M': opt->multi_2nd = 1; break; case 'H': opt->hard_clip = 1; break; case 'f': xreopen(optarg, "w", stdout); break; case 'I': opt->max_ins = atoi(optarg); break; case 'S': opt->skip_sw = 1; break; case 'C': opt->cpy_cmt = 1; break; case 'G': opt->max_chain_gap = atoi(optarg); break; } } opt->qr = opt->q + opt->r; if (optind + 2 > argc) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: bwa bwasw [options] <target.prefix> <query.fa> [query2.fa]\n\n"); fprintf(stderr, "Options: -a INT score for a match [%d]\n", opt->a); fprintf(stderr, " -b INT mismatch penalty [%d]\n", opt->b); fprintf(stderr, " -q INT gap open penalty [%d]\n", opt->q); fprintf(stderr, " -r INT gap extension penalty [%d]\n", opt->r); fprintf(stderr, " -w INT band width [%d]\n", opt->bw); fprintf(stderr, " -m FLOAT mask level [%.2f]\n", opt->mask_level); fprintf(stderr, "\n"); fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads); fprintf(stderr, " -f FILE file to output results to instead of stdout\n"); fprintf(stderr, " -H in SAM output, use hard clipping instead of soft clipping\n"); fprintf(stderr, " -C copy FASTA/Q comment to SAM output\n"); fprintf(stderr, " -M mark multi-part alignments as secondary\n"); fprintf(stderr, " -S skip Smith-Waterman read pairing\n"); fprintf(stderr, " -I INT ignore pairs with insert >=INT for inferring the size distr [%d]\n", opt->max_ins); fprintf(stderr, "\n"); fprintf(stderr, " -T INT score threshold divided by a [%d]\n", opt->t); fprintf(stderr, " -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef); fprintf(stderr, " -z INT Z-best [%d]\n", opt->z); fprintf(stderr, " -s INT maximum seeding interval size [%d]\n", opt->is); fprintf(stderr, " -N INT # seeds to trigger rev aln; 2*INT is also the chaining threshold [%d]\n", opt->t_seeds); fprintf(stderr, " -G INT maximum gap size during chaining [%d]\n", opt->max_chain_gap); fprintf(stderr, "\n"); fprintf(stderr, "Note: For long Illumina, 454 and Sanger reads, assembly contigs, fosmids and\n"); fprintf(stderr, " BACs, the default setting usually works well. For the current PacBio\n"); fprintf(stderr, " reads (end of 2010), '-b5 -q2 -r1 -z10' is recommended. One may also\n"); fprintf(stderr, " increase '-z' for better sensitivity.\n"); fprintf(stderr, "\n"); return 1; } // adjust opt for opt->a opt->t *= opt->a; opt->coef *= opt->a; if ((prefix = bwa_infer_prefix(argv[optind])) == 0) { fprintf(stderr, "[%s] fail to locate the index\n", __func__); return 0; } strcpy(buf, prefix); target = bwt_restore_bwt(strcat(buf, ".bwt")); strcpy(buf, prefix); bwt_restore_sa(strcat(buf, ".sa"), target); bns = bns_restore(prefix); bsw2_aln(opt, bns, target, argv[optind+1], optind+2 < argc? argv[optind+2] : 0); bns_destroy(bns); bwt_destroy(target); free(opt); free(prefix); return 0; }
int bwa_bwtsw2(int argc, char *argv[]) { bsw2opt_t *opt; bwt_t *target[2]; char buf[1024]; bntseq_t *bns; int c; opt = bsw2_init_opt(); srand48(11); while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:y:s:c:N:H")) >= 0) { switch (c) { case 'q': opt->q = atoi(optarg); break; case 'r': opt->r = atoi(optarg); break; case 'a': opt->a = atoi(optarg); break; case 'b': opt->b = atoi(optarg); break; case 'w': opt->bw = atoi(optarg); break; case 'T': opt->t = atoi(optarg); break; case 't': opt->n_threads = atoi(optarg); break; case 'z': opt->z = atoi(optarg); break; case 'y': opt->yita = atof(optarg); break; case 's': opt->is = atoi(optarg); break; case 'm': opt->mask_level = atof(optarg); break; case 'c': opt->coef = atof(optarg); break; case 'N': opt->t_seeds = atoi(optarg); break; case 'H': opt->hard_clip = 1; break; } } opt->qr = opt->q + opt->r; if (optind + 2 > argc) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: bwa dbwtsw [options] <target.prefix> <query.fa>\n\n"); fprintf(stderr, "Options: -a INT score for a match [%d]\n", opt->a); fprintf(stderr, " -b INT mismatch penalty [%d]\n", opt->b); fprintf(stderr, " -q INT gap open penalty [%d]\n", opt->q); fprintf(stderr, " -r INT gap extension penalty [%d]\n", opt->r); // fprintf(stderr, " -y FLOAT error recurrence coef. (4..16) [%.1f]\n", opt->yita); fprintf(stderr, "\n"); fprintf(stderr, " -t INT nmber of threads [%d]\n", opt->n_threads); fprintf(stderr, " -s INT size of a chunk of reads [%d]\n", opt->chunk_size); fprintf(stderr, "\n"); fprintf(stderr, " -w INT band width [%d]\n", opt->bw); fprintf(stderr, " -m FLOAT mask level [%.2f]\n", opt->mask_level); fprintf(stderr, "\n"); fprintf(stderr, " -T INT score threshold divided by a [%d]\n", opt->t); fprintf(stderr, " -s INT maximum seeding interval size [%d]\n", opt->is); fprintf(stderr, " -z INT Z-best [%d]\n", opt->z); fprintf(stderr, " -N INT # seeds to trigger reverse alignment [%d]\n", opt->t_seeds); fprintf(stderr, " -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef); fprintf(stderr, " -H in SAM output, use hard clipping rather than soft\n"); fprintf(stderr, "\n"); { double c, theta, eps, delta; c = opt->a / log(opt->yita); theta = exp(-opt->b / c) / opt->yita; eps = exp(-opt->q / c); delta = exp(-opt->r / c); fprintf(stderr, "mismatch: %lf, gap_open: %lf, gap_ext: %lf\n\n", theta, eps, delta); } return 1; } // adjust opt for opt->a opt->t *= opt->a; opt->coef *= opt->a; strcpy(buf, argv[optind]); target[0] = bwt_restore_bwt(strcat(buf, ".bwt")); strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".sa"), target[0]); strcpy(buf, argv[optind]); target[1] = bwt_restore_bwt(strcat(buf, ".rbwt")); strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".rsa"), target[1]); bns = bns_restore(argv[optind]); bsw2_aln(opt, bns, target, argv[optind+1]); bns_destroy(bns); bwt_destroy(target[0]); bwt_destroy(target[1]); free(opt); return 0; }