Пример #1
0
BWA::BWA(const char* ann_filename,
	 const char* amb_filename,
	 const char* pac_filename,
	 const char* forward_bwt_filename, 
	 const char* forward_sa_filename, 
	 const char* reverse_bwt_filename, 
	 const char* reverse_sa_filename) 
{
  // Load the bns (?) and reference
  bns = bns_restore_core(ann_filename,amb_filename,pac_filename);
  reference = new ubyte_t[bns->l_pac/4+1];
  rewind(bns->fp_pac);
  fread(reference, 1, bns->l_pac/4+1, bns->fp_pac);
  fclose(bns->fp_pac);
  bns->fp_pac = NULL;

  // Load the BWTs (both directions) and suffix arrays (both directions)
  bwts[0] = bwt_restore_bwt(forward_bwt_filename);
  bwt_restore_sa(forward_sa_filename, bwts[0]);
  bwts[1] = bwt_restore_bwt(reverse_bwt_filename);
  bwt_restore_sa(reverse_sa_filename, bwts[1]);
  load_default_options();

  // Always reinitialize the random seed whenever a new set of files are loaded.
  initialize_random_seed();

  // initialize the bwase subsystem
  bwase_initialize();
}
Пример #2
0
void bwa_cal_pac_pos(const char *prefix, int n_seqs, bwa_seq_t *seqs, int max_mm, float fnr)
{
	int i;
	char str[1024];
	bwt_t *bwt;
	// load forward SA
	strcpy(str, prefix); strcat(str, ".bwt");  bwt = bwt_restore_bwt(str);
	strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt);
	for (i = 0; i != n_seqs; ++i) {
		bwa_seq_t *p = seqs + i;
		int max_diff = fnr > 0.0? bwa_cal_maxdiff(p->len, BWA_AVG_ERR, fnr) : max_mm;
		if ((p->type == BWA_TYPE_UNIQUE || p->type == BWA_TYPE_REPEAT) && p->strand) { // reverse strand only
			p->pos = bwt_sa(bwt, p->sa);
			p->seQ = p->mapQ = bwa_approx_mapQ(p, max_diff);
		}
	}
	bwt_destroy(bwt);
	// load reverse BWT and SA
	strcpy(str, prefix); strcat(str, ".rbwt"); bwt = bwt_restore_bwt(str);
	strcpy(str, prefix); strcat(str, ".rsa"); bwt_restore_sa(str, bwt);
	for (i = 0; i != n_seqs; ++i) {
		bwa_seq_t *p = seqs + i;
		int max_diff = fnr > 0.0? bwa_cal_maxdiff(p->len, BWA_AVG_ERR, fnr) : max_mm;
		if ((p->type == BWA_TYPE_UNIQUE || p->type == BWA_TYPE_REPEAT) && !p->strand) { // forward strand only
			/* NB: For gapped alignment, p->pos may not be correct,
			 *     which will be fixed in refine_gapped_core(). This
			 *     line also determines the way "x" is calculated in
			 *     refine_gapped_core() when (ext < 0 && is_end == 0). */
			p->pos = bwt->seq_len - (bwt_sa(bwt, p->sa) + p->len);
			p->seQ = p->mapQ = bwa_approx_mapQ(p, max_diff);
		}
	}
	bwt_destroy(bwt);
}
Пример #3
0
void bwa_cal_pac_pos(const char *prefix, int n_seqs, bwa_seq_t *seqs, int max_mm, float fnr)
{
	int i, j;
	char str[1024];
	bwt_t *bwt;
	// load forward SA
	strcpy(str, prefix); strcat(str, ".bwt");  bwt = bwt_restore_bwt(str);
	strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt);
	for (i = 0; i != n_seqs; ++i) {
		if (seqs[i].strand) bwa_cal_pac_pos_core(bwt, 0, &seqs[i], max_mm, fnr);
		for (j = 0; j < seqs[i].n_multi; ++j) {
			bwt_multi1_t *p = seqs[i].multi + j;
			if (p->strand) p->pos = bwt_sa(bwt, p->pos);
		}
	}
	bwt_destroy(bwt);
	// load reverse BWT and SA
	strcpy(str, prefix); strcat(str, ".rbwt"); bwt = bwt_restore_bwt(str);
	strcpy(str, prefix); strcat(str, ".rsa"); bwt_restore_sa(str, bwt);
	for (i = 0; i != n_seqs; ++i) {
		if (!seqs[i].strand) bwa_cal_pac_pos_core(0, bwt, &seqs[i], max_mm, fnr);
		for (j = 0; j < seqs[i].n_multi; ++j) {
			bwt_multi1_t *p = seqs[i].multi + j;
			if (!p->strand) p->pos = bwt->seq_len - (bwt_sa(bwt, p->pos) + seqs[i].len);
		}
	}
	bwt_destroy(bwt);
}
Пример #4
0
void bwt_index(char *prefix)
{
    int block_size = 10000000;
    int64_t l_pac;
    char *str, *str2, *str3;

	str  = (char*)calloc(strlen(prefix) + 10, 1);
	str2 = (char*)calloc(strlen(prefix) + 10, 1);
	str3 = (char*)calloc(strlen(prefix) + 10, 1);

    fprintf(stderr, "[bwt_index] Building bwt-index for genome...\n");
    { // for&rev.pac .ann .amb 
        gzFile fp = gzopen(prefix, "r");
        l_pac = bns_fasta2bntseq(fp, prefix, 0);
        gzclose(fp);
    }
    { // .bwt
        strcpy(str, prefix); strcat(str, ".pac");
        strcpy(str2, prefix); strcat(str2, ".bwt");
        bwt_bwtgen2(str, str2, block_size);
    }
    { // update .bwt
        bwt_t *bwt;
        strcpy(str, prefix); strcat(str, ".bwt");
        bwt = bwt_restore_bwt(str);
        bwt_bwtupdate_core(bwt);
        bwt_dump_bwt(str, bwt);
        bwt_destroy(bwt);
    }
    { // forward.pac
        gzFile fp = gzopen(prefix, "r");
        l_pac = bns_fasta2bntseq(fp, prefix, 1);
        gzclose(fp);
    }
    { // .sa
        bwt_t *bwt;
        strcpy(str, prefix); strcat(str, ".bwt");
        strcpy(str3, prefix); strcat(str3, ".sa");
        bwt = bwt_restore_bwt(str);
        bwt_cal_sa(bwt, 32);
        bwt_dump_sa(str3, bwt);
        bwt_destroy(bwt);
    }
    fprintf(stderr, "[bwt_index] Building done!\n");
    free(str); free(str2); free(str3);
}
Пример #5
0
extern "C" void bwa_seed2genome_init(const char *prefix, gap_opt_t *opt)
{
	if (!opt)
		opt=mygap_init_opt() ;
	opt->mode=BWA_MODE_BAM_SE ;
	bwt_opt = opt ;
	
	{ // load BWT
		char *str = (char*)calloc(strlen(prefix) + 10, 1);
		strcpy(str, prefix); strcat(str, ".bwt");  bwt_bwt[0] = bwt_restore_bwt(str);
		strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt_bwt[0]);
		strcpy(str, prefix); strcat(str, ".rbwt"); bwt_bwt[1] = bwt_restore_bwt(str);
		strcpy(str, prefix); strcat(str, ".rsa"); bwt_restore_sa(str, bwt_bwt[1]);
		free(str);
		bwt_bns = bns_restore(prefix);
	}
}
Пример #6
0
void bwa_cal_pac_pos(const bntseq_t *bns, const char *prefix, int n_seqs, bwa_seq_t *seqs, int max_mm, float fnr)
{
	int i, j, strand, n_multi;
	char str[1024];
	bwt_t *bwt;
	// load forward SA
	strcpy(str, prefix); strcat(str, ".bwt");  bwt = bwt_restore_bwt(str);
	strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt);
	for (i = 0; i != n_seqs; ++i) {
		bwa_seq_t *p = &seqs[i];
		bwa_cal_pac_pos_core(bns, bwt, p, max_mm, fnr);
		for (j = n_multi = 0; j < p->n_multi; ++j) {
			bwt_multi1_t *q = p->multi + j;
			q->pos = bwa_sa2pos(bns, bwt, q->pos, p->len, &strand);
			q->strand = strand;
			if (q->pos != p->pos)
				p->multi[n_multi++] = *q;
		}
		p->n_multi = n_multi;
	}
	bwt_destroy(bwt);
}
Пример #7
0
int bwa_index(int argc, char *argv[])
{
	char *prefix = 0, *str, *str2, *str3;
	int c, algo_type = 0, is_color = 0, is_64 = 0;
	clock_t t;
	int64_t l_pac;

	while ((c = getopt(argc, argv, "6ca:p:")) >= 0) {
		switch (c) {
		case 'a': // if -a is not set, algo_type will be determined later
			if (strcmp(optarg, "div") == 0) algo_type = 1;
			else if (strcmp(optarg, "bwtsw") == 0) algo_type = 2;
			else if (strcmp(optarg, "is") == 0) algo_type = 3;
			else err_fatal(__func__, "unknown algorithm: '%s'.", optarg);
			break;
		case 'p': prefix = strdup(optarg); break;
		case 'c': is_color = 1; break;
		case '6': is_64 = 1; break;
		default: return 1;
		}
	}

	if (optind + 1 > argc) {
		fprintf(stderr, "\n");
		fprintf(stderr, "Usage:   bwa index [-a bwtsw|is] [-c] <in.fasta>\n\n");
		fprintf(stderr, "Options: -a STR    BWT construction algorithm: bwtsw or is [auto]\n");
		fprintf(stderr, "         -p STR    prefix of the index [same as fasta name]\n");
		fprintf(stderr, "         -6        index files named as <in.fasta>.64.* instead of <in.fasta>.* \n");
//		fprintf(stderr, "         -c        build color-space index\n");
		fprintf(stderr, "\n");
		fprintf(stderr,	"Warning: `-a bwtsw' does not work for short genomes, while `-a is' and\n");
		fprintf(stderr, "         `-a div' do not work not for long genomes. Please choose `-a'\n");
		fprintf(stderr, "         according to the length of the genome.\n\n");
		return 1;
	}
	if (prefix == 0) {
		prefix = malloc(strlen(argv[optind]) + 4);
		strcpy(prefix, argv[optind]);
		if (is_64) strcat(prefix, ".64");
	}
	str  = (char*)calloc(strlen(prefix) + 10, 1);
	str2 = (char*)calloc(strlen(prefix) + 10, 1);
	str3 = (char*)calloc(strlen(prefix) + 10, 1);

	if (is_color == 0) { // nucleotide indexing
		gzFile fp = xzopen(argv[optind], "r");
		t = clock();
		fprintf(stderr, "[bwa_index] Pack FASTA... ");
		l_pac = bns_fasta2bntseq(fp, prefix, 0);
		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
		gzclose(fp);
	} else { // color indexing
		gzFile fp = xzopen(argv[optind], "r");
		strcat(strcpy(str, prefix), ".nt");
		t = clock();
		fprintf(stderr, "[bwa_index] Pack nucleotide FASTA... ");
		l_pac = bns_fasta2bntseq(fp, str, 0);
		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
		gzclose(fp);
		{
			char *tmp_argv[3];
			tmp_argv[0] = argv[0]; tmp_argv[1] = str; tmp_argv[2] = prefix;
			t = clock();
			fprintf(stderr, "[bwa_index] Convert nucleotide PAC to color PAC... ");
			bwa_pac2cspac(3, tmp_argv);
			fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
		}
	}
	if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT
	{
		strcpy(str, prefix); strcat(str, ".pac");
		strcpy(str2, prefix); strcat(str2, ".bwt");
		t = clock();
		fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n");
		if (algo_type == 2) bwt_bwtgen(str, str2);
		else if (algo_type == 1 || algo_type == 3) {
			bwt_t *bwt;
			bwt = bwt_pac2bwt(str, algo_type == 3);
			bwt_dump_bwt(str2, bwt);
			bwt_destroy(bwt);
		}
		fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC);
	}
	{
		bwt_t *bwt;
		strcpy(str, prefix); strcat(str, ".bwt");
		t = clock();
		fprintf(stderr, "[bwa_index] Update BWT... ");
		bwt = bwt_restore_bwt(str);
		bwt_bwtupdate_core(bwt);
		bwt_dump_bwt(str, bwt);
		bwt_destroy(bwt);
		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
	}
	{
		gzFile fp = xzopen(argv[optind], "r");
		t = clock();
		fprintf(stderr, "[bwa_index] Pack forward-only FASTA... ");
		l_pac = bns_fasta2bntseq(fp, prefix, 1);
		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
		gzclose(fp);
	}
	{
		bwt_t *bwt;
		strcpy(str, prefix); strcat(str, ".bwt");
		strcpy(str3, prefix); strcat(str3, ".sa");
		t = clock();
		fprintf(stderr, "[bwa_index] Construct SA from BWT and Occ... ");
		bwt = bwt_restore_bwt(str);
		bwt_cal_sa(bwt, 32);
		bwt_dump_sa(str3, bwt);
		bwt_destroy(bwt);
		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
	}
	free(str3); free(str2); free(str); free(prefix);
	return 0;
}
Пример #8
0
void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
{
	int i, n_seqs, tot_seqs = 0;
	bwa_seq_t *seqs;
	bwa_seqio_t *ks;
	clock_t t;
	bwt_t *bwt;

	// initialization
	ks = bwa_open_reads(opt->mode, fn_fa);

	{ // load BWT
		char *str = (char*)calloc(strlen(prefix) + 10, 1);
		strcpy(str, prefix); strcat(str, ".bwt");  bwt = bwt_restore_bwt(str);
		free(str);
	}

	// core loop
	err_fwrite(opt, sizeof(gap_opt_t), 1, stdout);
	while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt->mode, opt->trim_qual)) != 0) {
		tot_seqs += n_seqs;
		t = clock();

		fprintf(stderr, "[bwa_aln_core] calculate SA coordinate... ");

#ifdef HAVE_PTHREAD
		if (opt->n_threads <= 1) { // no multi-threading at all
			bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
		} else {
			pthread_t *tid;
			pthread_attr_t attr;
			thread_aux_t *data;
			int j;
			pthread_attr_init(&attr);
			pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
			data = (thread_aux_t*)calloc(opt->n_threads, sizeof(thread_aux_t));
			tid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t));
			for (j = 0; j < opt->n_threads; ++j) {
				data[j].tid = j; data[j].bwt = bwt;
				data[j].n_seqs = n_seqs; data[j].seqs = seqs; data[j].opt = opt;
				pthread_create(&tid[j], &attr, worker, data + j);
			}
			for (j = 0; j < opt->n_threads; ++j) pthread_join(tid[j], 0);
			free(data); free(tid);
		}
#else
		bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
#endif

		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();

		t = clock();
		fprintf(stderr, "[bwa_aln_core] write to the disk... ");
		for (i = 0; i < n_seqs; ++i) {
			bwa_seq_t *p = seqs + i;
			err_fwrite(&p->n_aln, 4, 1, stdout);
			if (p->n_aln) err_fwrite(p->aln, sizeof(bwt_aln1_t), p->n_aln, stdout);
		}
		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();

		bwa_free_read_seq(n_seqs, seqs);
		fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs);
	}

	// destroy
	bwt_destroy(bwt);
	bwa_seq_close(ks);
}
Пример #9
0
void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
{
    int i, n_seqs, tot_seqs = 0;
    bwa_seq_t *seqs;
    bwa_seqio_t *ks;
    clock_t t;
    bwt_t *bwt;

    // initialization
    ks = bwa_open_reads(opt->mode, fn_fa);

    { // load BWT
        char *str = (char*)calloc(strlen(prefix) + 10, 1);
        strcpy(str, prefix); strcat(str, ".bwt");  bwt = bwt_restore_bwt(str);
        free(str);
    }

    // core loop
    err_fwrite(opt, sizeof(gap_opt_t), 1, stdout);
    while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt->mode, opt->trim_qual)) != 0) {
        tot_seqs += n_seqs;
        t = clock();

        fprintf(stderr, "[bwa_aln_core] calculate SA coordinate... ");

#ifdef THREAD
        if (opt->n_threads <= 1) { // no multi-threading at all
            bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
        } else {
            DWORD ThreadID;
            HANDLE *tid;
            thread_aux_t *data;
            int j;
            data = (thread_aux_t*)calloc(opt->n_threads, sizeof(thread_aux_t));
            tid = (HANDLE*)calloc(opt->n_threads, sizeof(HANDLE));
            for (j = 0; j < opt->n_threads; ++j) {
                data[j].tid = j; data[j].bwt = bwt;
                data[j].n_seqs = n_seqs; data[j].seqs = seqs; data[j].opt = opt;

                //create threads
                tid[j] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) worker, data + j, 0, &ThreadID);
                if (tid[j] == NULL)
                {
                    printf("CreateThread error: %d\n", GetLastError());
                    return;
                }
            }
            // Wait for all threads to terminate
            WaitForMultipleObjects(opt->n_threads, &tid[0], TRUE, INFINITE);
            free(data); free(tid);
        }
#else
        bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
#endif

        fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();

        t = clock();
        fprintf(stderr, "[bwa_aln_core] write to the disk... ");
        for (i = 0; i < n_seqs; ++i) {
            bwa_seq_t *p = seqs + i;
            err_fwrite(&p->n_aln, 4, 1, stdout);
            if (p->n_aln) err_fwrite(p->aln, sizeof(bwt_aln1_t), p->n_aln, stdout);
        }
        fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();

        bwa_free_read_seq(n_seqs, seqs);
        fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs);
    }

    // destroy
    bwt_destroy(bwt);
    bwa_seq_close(ks);
}
Пример #10
0
int bwa_bwtsw2(int argc, char *argv[])
{
	bsw2opt_t *opt;
	bwt_t *target[2];
	char buf[1024];
	bntseq_t *bns;
	int c;

	opt = bsw2_init_opt();
	srand48(11);
	optind = 1;
	while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:y:s:c:N:Hf:")) >= 0) {
		switch (c) {
		case 'q': opt->q = atoi(optarg); break;
		case 'r': opt->r = atoi(optarg); break;
		case 'a': opt->a = atoi(optarg); break;
		case 'b': opt->b = atoi(optarg); break;
		case 'w': opt->bw = atoi(optarg); break;
		case 'T': opt->t = atoi(optarg); break;
		case 't': opt->n_threads = atoi(optarg); break;
		case 'z': opt->z = atoi(optarg); break;
		case 'y': opt->yita = atof(optarg); break;
		case 's': opt->is = atoi(optarg); break;
		case 'm': opt->mask_level = atof(optarg); break;
		case 'c': opt->coef = atof(optarg); break;
		case 'N': opt->t_seeds = atoi(optarg); break;
		case 'H': opt->hard_clip = 1; break;
		case 'f': xreopen(optarg, "w", stdout); break;
		}
	}
	opt->qr = opt->q + opt->r;

	if (optind + 2 > argc) {
		fprintf(stderr, "\n");
		fprintf(stderr, "Usage:   bwa bwasw [options] <target.prefix> <query.fa>\n\n");
		fprintf(stderr, "Options: -a INT   score for a match [%d]\n", opt->a);
		fprintf(stderr, "         -b INT   mismatch penalty [%d]\n", opt->b);
		fprintf(stderr, "         -q INT   gap open penalty [%d]\n", opt->q);
		fprintf(stderr, "         -r INT   gap extension penalty [%d]\n", opt->r);
//		fprintf(stderr, "         -y FLOAT error recurrence coef. (4..16) [%.1f]\n", opt->yita);
		fprintf(stderr, "\n");
		fprintf(stderr, "         -t INT   number of threads [%d]\n", opt->n_threads);
		fprintf(stderr, "         -s INT   size of a chunk of reads [%d]\n", opt->chunk_size);
		fprintf(stderr, "\n");
		fprintf(stderr, "         -w INT   band width [%d]\n", opt->bw);
		fprintf(stderr, "         -m FLOAT mask level [%.2f]\n", opt->mask_level);
		fprintf(stderr, "\n");
		fprintf(stderr, "         -T INT   score threshold divided by a [%d]\n", opt->t);
		fprintf(stderr, "         -s INT   maximum seeding interval size [%d]\n", opt->is);
		fprintf(stderr, "         -z INT   Z-best [%d]\n", opt->z);
		fprintf(stderr, "         -N INT   # seeds to trigger reverse alignment [%d]\n", opt->t_seeds);
		fprintf(stderr, "         -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef);
		fprintf(stderr, "         -H       in SAM output, use hard clipping rather than soft\n");
        fprintf(stderr, "         -f FILE  file to output results to instead of stdout\n\n");
		fprintf(stderr, "Note: For long Illumina, 454 and Sanger reads, assembly contigs, fosmids and\n");
		fprintf(stderr, "      BACs, the default setting usually works well. For the current PacBio\n");
		fprintf(stderr, "      reads (end of 2010), '-b5 -q2 -r1 -z10' is recommended. One may also\n");
		fprintf(stderr, "      increase '-z' for better sensitivity.\n");
		fprintf(stderr, "\n");

		if (0) {
			double c, theta, eps, delta;
			c = opt->a / log(opt->yita);
			theta = exp(-opt->b / c) / opt->yita;
			eps = exp(-opt->q / c);
			delta = exp(-opt->r / c);
			fprintf(stderr, "mismatch: %lf, gap_open: %lf, gap_ext: %lf\n\n",
					theta, eps, delta);
		}
		return 1;
	}

	// adjust opt for opt->a
	opt->t *= opt->a;
	opt->coef *= opt->a;

	strcpy(buf, argv[optind]); target[0] = bwt_restore_bwt(strcat(buf, ".bwt"));
	strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".sa"), target[0]);
	strcpy(buf, argv[optind]); target[1] = bwt_restore_bwt(strcat(buf, ".rbwt"));
	strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".rsa"), target[1]);
	bns = bns_restore(argv[optind]);

	bsw2_aln(opt, bns, target, argv[optind+1]);

	bns_destroy(bns);
	bwt_destroy(target[0]); bwt_destroy(target[1]);
	free(opt);
	fflush(stdout);
	xreopen("/dev/tty","w",stdout);
	return 0;
}
Пример #11
0
int bwa_bwtsw2(int argc, char *argv[])
{
	extern char *bwa_infer_prefix(const char *hint);
	bsw2opt_t *opt;
	bwt_t *target;
	char buf[1024], *prefix;
	bntseq_t *bns;
	int c;

	opt = bsw2_init_opt();
	srand48(11);
	while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:s:c:N:Hf:MI:SG:C")) >= 0) {
		switch (c) {
		case 'q': opt->q = atoi(optarg); break;
		case 'r': opt->r = atoi(optarg); break;
		case 'a': opt->a = atoi(optarg); break;
		case 'b': opt->b = atoi(optarg); break;
		case 'w': opt->bw = atoi(optarg); break;
		case 'T': opt->t = atoi(optarg); break;
		case 't': opt->n_threads = atoi(optarg); break;
		case 'z': opt->z = atoi(optarg); break;
		case 's': opt->is = atoi(optarg); break;
		case 'm': opt->mask_level = atof(optarg); break;
		case 'c': opt->coef = atof(optarg); break;
		case 'N': opt->t_seeds = atoi(optarg); break;
		case 'M': opt->multi_2nd = 1; break;
		case 'H': opt->hard_clip = 1; break;
		case 'f': xreopen(optarg, "w", stdout); break;
		case 'I': opt->max_ins = atoi(optarg); break;
		case 'S': opt->skip_sw = 1; break;
		case 'C': opt->cpy_cmt = 1; break;
		case 'G': opt->max_chain_gap = atoi(optarg); break;
		}
	}
	opt->qr = opt->q + opt->r;

	if (optind + 2 > argc) {
		fprintf(stderr, "\n");
		fprintf(stderr, "Usage:   bwa bwasw [options] <target.prefix> <query.fa> [query2.fa]\n\n");
		fprintf(stderr, "Options: -a INT   score for a match [%d]\n", opt->a);
		fprintf(stderr, "         -b INT   mismatch penalty [%d]\n", opt->b);
		fprintf(stderr, "         -q INT   gap open penalty [%d]\n", opt->q);
		fprintf(stderr, "         -r INT   gap extension penalty [%d]\n", opt->r);
		fprintf(stderr, "         -w INT   band width [%d]\n", opt->bw);
		fprintf(stderr, "         -m FLOAT mask level [%.2f]\n", opt->mask_level);
		fprintf(stderr, "\n");
		fprintf(stderr, "         -t INT   number of threads [%d]\n", opt->n_threads);
		fprintf(stderr, "         -f FILE  file to output results to instead of stdout\n");
		fprintf(stderr, "         -H       in SAM output, use hard clipping instead of soft clipping\n");
		fprintf(stderr, "         -C       copy FASTA/Q comment to SAM output\n");
		fprintf(stderr, "         -M       mark multi-part alignments as secondary\n");
		fprintf(stderr, "         -S       skip Smith-Waterman read pairing\n");
		fprintf(stderr, "         -I INT   ignore pairs with insert >=INT for inferring the size distr [%d]\n", opt->max_ins);
		fprintf(stderr, "\n");
		fprintf(stderr, "         -T INT   score threshold divided by a [%d]\n", opt->t);
		fprintf(stderr, "         -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef);
		fprintf(stderr, "         -z INT   Z-best [%d]\n", opt->z);
		fprintf(stderr, "         -s INT   maximum seeding interval size [%d]\n", opt->is);
		fprintf(stderr, "         -N INT   # seeds to trigger rev aln; 2*INT is also the chaining threshold [%d]\n", opt->t_seeds);
		fprintf(stderr, "         -G INT   maximum gap size during chaining [%d]\n", opt->max_chain_gap);
		fprintf(stderr, "\n");
		fprintf(stderr, "Note: For long Illumina, 454 and Sanger reads, assembly contigs, fosmids and\n");
		fprintf(stderr, "      BACs, the default setting usually works well. For the current PacBio\n");
		fprintf(stderr, "      reads (end of 2010), '-b5 -q2 -r1 -z10' is recommended. One may also\n");
		fprintf(stderr, "      increase '-z' for better sensitivity.\n");
		fprintf(stderr, "\n");

		return 1;
	}

	// adjust opt for opt->a
	opt->t *= opt->a;
	opt->coef *= opt->a;

	if ((prefix = bwa_infer_prefix(argv[optind])) == 0) {
		fprintf(stderr, "[%s] fail to locate the index\n", __func__);
		return 0;
	}
	strcpy(buf, prefix); target = bwt_restore_bwt(strcat(buf, ".bwt"));
	strcpy(buf, prefix); bwt_restore_sa(strcat(buf, ".sa"), target);
	bns = bns_restore(prefix);

	bsw2_aln(opt, bns, target, argv[optind+1], optind+2 < argc? argv[optind+2] : 0);

	bns_destroy(bns);
	bwt_destroy(target);
	free(opt); free(prefix);
	
	return 0;
}
Пример #12
0
int bwa_bwtsw2(int argc, char *argv[])
{
	bsw2opt_t *opt;
	bwt_t *target[2];
	char buf[1024];
	bntseq_t *bns;
	int c;

	opt = bsw2_init_opt();
	srand48(11);
	while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:y:s:c:N:H")) >= 0) {
		switch (c) {
		case 'q': opt->q = atoi(optarg); break;
		case 'r': opt->r = atoi(optarg); break;
		case 'a': opt->a = atoi(optarg); break;
		case 'b': opt->b = atoi(optarg); break;
		case 'w': opt->bw = atoi(optarg); break;
		case 'T': opt->t = atoi(optarg); break;
		case 't': opt->n_threads = atoi(optarg); break;
		case 'z': opt->z = atoi(optarg); break;
		case 'y': opt->yita = atof(optarg); break;
		case 's': opt->is = atoi(optarg); break;
		case 'm': opt->mask_level = atof(optarg); break;
		case 'c': opt->coef = atof(optarg); break;
		case 'N': opt->t_seeds = atoi(optarg); break;
		case 'H': opt->hard_clip = 1; break;
		}
	}
	opt->qr = opt->q + opt->r;

	if (optind + 2 > argc) {
		fprintf(stderr, "\n");
		fprintf(stderr, "Usage:   bwa dbwtsw [options] <target.prefix> <query.fa>\n\n");
		fprintf(stderr, "Options: -a INT   score for a match [%d]\n", opt->a);
		fprintf(stderr, "         -b INT   mismatch penalty [%d]\n", opt->b);
		fprintf(stderr, "         -q INT   gap open penalty [%d]\n", opt->q);
		fprintf(stderr, "         -r INT   gap extension penalty [%d]\n", opt->r);
//		fprintf(stderr, "         -y FLOAT error recurrence coef. (4..16) [%.1f]\n", opt->yita);
		fprintf(stderr, "\n");
		fprintf(stderr, "         -t INT   nmber of threads [%d]\n", opt->n_threads);
		fprintf(stderr, "         -s INT   size of a chunk of reads [%d]\n", opt->chunk_size);
		fprintf(stderr, "\n");
		fprintf(stderr, "         -w INT   band width [%d]\n", opt->bw);
		fprintf(stderr, "         -m FLOAT mask level [%.2f]\n", opt->mask_level);
		fprintf(stderr, "\n");
		fprintf(stderr, "         -T INT   score threshold divided by a [%d]\n", opt->t);
		fprintf(stderr, "         -s INT   maximum seeding interval size [%d]\n", opt->is);
		fprintf(stderr, "         -z INT   Z-best [%d]\n", opt->z);
		fprintf(stderr, "         -N INT   # seeds to trigger reverse alignment [%d]\n", opt->t_seeds);
		fprintf(stderr, "         -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef);
		fprintf(stderr, "         -H       in SAM output, use hard clipping rather than soft\n");
		fprintf(stderr, "\n");

		{
			double c, theta, eps, delta;
			c = opt->a / log(opt->yita);
			theta = exp(-opt->b / c) / opt->yita;
			eps = exp(-opt->q / c);
			delta = exp(-opt->r / c);
			fprintf(stderr, "mismatch: %lf, gap_open: %lf, gap_ext: %lf\n\n",
					theta, eps, delta);
		}
		return 1;
	}

	// adjust opt for opt->a
	opt->t *= opt->a;
	opt->coef *= opt->a;

	strcpy(buf, argv[optind]); target[0] = bwt_restore_bwt(strcat(buf, ".bwt"));
	strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".sa"), target[0]);
	strcpy(buf, argv[optind]); target[1] = bwt_restore_bwt(strcat(buf, ".rbwt"));
	strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".rsa"), target[1]);
	bns = bns_restore(argv[optind]);

	bsw2_aln(opt, bns, target, argv[optind+1]);

	bns_destroy(bns);
	bwt_destroy(target[0]); bwt_destroy(target[1]);
	free(opt);
	
	return 0;
}