Пример #1
0
extern "C" void bwa_seed2genome_destroy()
{
	if (bwt_bwt[0])
		bwt_destroy(bwt_bwt[0]); 
	if (bwt_bwt[1])
		bwt_destroy(bwt_bwt[1]);
	if (bwt_bns)
		bns_destroy(bwt_bns) ;
}
Пример #2
0
Файл: bntseq.c Проект: a113n/bwa
int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only)
{
	extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c
	kseq_t *seq;
	char name[1024];
	bntseq_t *bns;
	uint8_t *pac = 0;
	int32_t m_seqs, m_holes;
	int64_t ret = -1, m_pac, l;
	bntamb1_t *q;
	FILE *fp;

	// initialization
	seq = kseq_init(fp_fa);
	bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
	bns->seed = 11; // fixed seed for random generator
	srand48(bns->seed);
	m_seqs = m_holes = 8; m_pac = 0x10000;
	bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t));
	bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t));
	pac = calloc(m_pac/4, 1);
	q = bns->ambs;
	strcpy(name, prefix); strcat(name, ".pac");
	fp = xopen(name, "wb");
	// read sequences
	while (kseq_read(seq) >= 0) pac = add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q);
	if (!for_only) { // add the reverse complemented sequence
		m_pac = (bns->l_pac * 2 + 3) / 4 * 4;
		pac = realloc(pac, m_pac/4);
		memset(pac + (bns->l_pac+3)/4, 0, (m_pac - (bns->l_pac+3)/4*4) / 4);
		for (l = bns->l_pac - 1; l >= 0; --l, ++bns->l_pac)
			_set_pac(pac, bns->l_pac, 3-_get_pac(pac, l));
	}
	ret = bns->l_pac;
	{ // finalize .pac file
		ubyte_t ct;
		err_fwrite(pac, 1, (bns->l_pac>>2) + ((bns->l_pac&3) == 0? 0 : 1), fp);
		// the following codes make the pac file size always (l_pac/4+1+1)
		if (bns->l_pac % 4 == 0) {
			ct = 0;
			err_fwrite(&ct, 1, 1, fp);
		}
		ct = bns->l_pac % 4;
		err_fwrite(&ct, 1, 1, fp);
		// close .pac file
		err_fflush(fp);
		err_fclose(fp);
	}
	bns_dump(bns, prefix);
	bns_destroy(bns);
	kseq_destroy(seq);
	free(pac);
	return ret;
}
Пример #3
0
int64_t dump_forward_pac(gzFile fp_fa, const char *prefix)
{
	extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c
	kseq_t *seq;
	char name[1024];
	bntseq_t *bns;
	uint8_t *pac = 0;
	int32_t m_seqs, m_holes;
	int64_t ret = -1, m_pac;
	bntamb1_t *q;
	FILE *fp;

	// initialization
	seq = kseq_init(fp_fa);
	bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
	bns->seed = 11; // fixed seed for random generator
	srand48(bns->seed);
	m_seqs = m_holes = 8; m_pac = 0x10000;
	bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t));
	bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t));
	pac = calloc(m_pac/4, 1);
	q = bns->ambs;
	strcpy(name, prefix); strcat(name, ".bis.pac");
	fp = xopen(name, "wb");
	// read sequences
	while (kseq_read(seq) >= 0) pac = add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q);

	ret = bns->l_pac;
	{ // finalize .pac file
		ubyte_t ct;
		err_fwrite(pac, 1, (bns->l_pac>>2) + ((bns->l_pac&3) == 0? 0 : 1), fp);
		// the following codes make the pac file size always (l_pac/4+1+1)
		if (bns->l_pac % 4 == 0) {
			ct = 0;
			err_fwrite(&ct, 1, 1, fp);
		}
		ct = bns->l_pac % 4;
		err_fwrite(&ct, 1, 1, fp);
		// close .pac file
		err_fflush(fp);
		err_fclose(fp);
	}
  /* re-dump forward bis bns, otherwise the .bis.ann and .bis.amb have twice as long pac  */
  /* strcpy(name, prefix); strcat(name, ".bis"); */
  /* bis_bns_dump(bns, prefix); */

  bns_destroy(bns);
	kseq_destroy(seq);
	free(pac);
	return ret;
}
Пример #4
0
int main(int argc, char *argv[])
{
    bntseq_t *bns;
    bns = bns_restore(argv[1]);
    uint8_t *pac;
    pac = calloc(bns->l_pac/2+2, 1);
    fread(pac, 1, bns->l_pac/2+2, bns->fp_pac);
    int i;
    for(i = 0; i < bns->l_pac; ++i){
        putchar( "ACGT#"[_get_pac(pac, i)]);         
    }
    bns_destroy(bns);


}
Пример #5
0
BWA::~BWA() {
  delete[] reference;
  bns_destroy(bns);
  bwt_destroy(bwts[0]);
  bwt_destroy(bwts[1]);
}
Пример #6
0
void bns_fasta2bntseq(gzFile fp_fa, const char *prefix)
{
	kseq_t *seq;
	char name[1024];
	bntseq_t *bns;
	bntamb1_t *q;
	int l_buf;
	unsigned char buf[0x10000];
	int32_t m_seqs, m_holes, l, i;
	FILE *fp;

	// initialization
	seq = kseq_init(fp_fa);
	bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
	bns->seed = 11; // fixed seed for random generator
	srand48(bns->seed);
	m_seqs = m_holes = 8;
	bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t));
	bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t));
	q = bns->ambs;
	l_buf = 0;
	strcpy(name, prefix); strcat(name, ".pac");
	fp = xopen(name, "wb");
	memset(buf, 0, 0x10000);
	// read sequences
	while ((l = kseq_read(seq)) >= 0) {
		bntann1_t *p;
		int lasts;
		if (bns->n_seqs == m_seqs) {
			m_seqs <<= 1;
			bns->anns = (bntann1_t*)realloc(bns->anns, m_seqs * sizeof(bntann1_t));
		}
		p = bns->anns + bns->n_seqs;
		p->name = strdup((char*)seq->name.s);
		p->anno = seq->comment.s? strdup((char*)seq->comment.s) : strdup("(null)");
		p->gi = 0; p->len = l;
		p->offset = (bns->n_seqs == 0)? 0 : (p-1)->offset + (p-1)->len;
		p->n_ambs = 0;
		for (i = 0, lasts = 0; i < l; ++i) {
			int c = nst_nt4_table[(int)seq->seq.s[i]];
			if (c >= 4) { // N
				if (lasts == seq->seq.s[i]) { // contiguous N
					++q->len;
				} else {
					if (bns->n_holes == m_holes) {
						m_holes <<= 1;
						bns->ambs = (bntamb1_t*)realloc(bns->ambs, m_holes * sizeof(bntamb1_t));
					}
					q = bns->ambs + bns->n_holes;
					q->len = 1;
					q->offset = p->offset + i;
					q->amb = seq->seq.s[i];
					++p->n_ambs;
					++bns->n_holes;
				}
			}
			lasts = seq->seq.s[i];
			{ // fill buffer
				if (c >= 4) c = lrand48()&0x3;
				if (l_buf == 0x40000) {
					fwrite(buf, 1, 0x10000, fp);
					memset(buf, 0, 0x10000);
					l_buf = 0;
				}
				buf[l_buf>>2] |= c << ((3 - (l_buf&3)) << 1);
				++l_buf;
			}
		}
		++bns->n_seqs;
		bns->l_pac += seq->seq.l;
	}
	xassert(bns->l_pac, "zero length sequence.");
	{ // finalize .pac file
		ubyte_t ct;
		fwrite(buf, 1, (l_buf>>2) + ((l_buf&3) == 0? 0 : 1), fp);
		// the following codes make the pac file size always (l_pac/4+1+1)
		if (bns->l_pac % 4 == 0) {
			ct = 0;
			fwrite(&ct, 1, 1, fp);
		}
		ct = bns->l_pac % 4;
		fwrite(&ct, 1, 1, fp);
		// close .pac file
		fclose(fp);
	}
	bns_dump(bns, prefix);
	bns_destroy(bns);
	kseq_destroy(seq);
}
Пример #7
0
int bwa_bwtsw2(int argc, char *argv[])
{
	bsw2opt_t *opt;
	bwt_t *target[2];
	char buf[1024];
	bntseq_t *bns;
	int c;

	opt = bsw2_init_opt();
	srand48(11);
	optind = 1;
	while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:y:s:c:N:Hf:")) >= 0) {
		switch (c) {
		case 'q': opt->q = atoi(optarg); break;
		case 'r': opt->r = atoi(optarg); break;
		case 'a': opt->a = atoi(optarg); break;
		case 'b': opt->b = atoi(optarg); break;
		case 'w': opt->bw = atoi(optarg); break;
		case 'T': opt->t = atoi(optarg); break;
		case 't': opt->n_threads = atoi(optarg); break;
		case 'z': opt->z = atoi(optarg); break;
		case 'y': opt->yita = atof(optarg); break;
		case 's': opt->is = atoi(optarg); break;
		case 'm': opt->mask_level = atof(optarg); break;
		case 'c': opt->coef = atof(optarg); break;
		case 'N': opt->t_seeds = atoi(optarg); break;
		case 'H': opt->hard_clip = 1; break;
		case 'f': xreopen(optarg, "w", stdout); break;
		}
	}
	opt->qr = opt->q + opt->r;

	if (optind + 2 > argc) {
		fprintf(stderr, "\n");
		fprintf(stderr, "Usage:   bwa bwasw [options] <target.prefix> <query.fa>\n\n");
		fprintf(stderr, "Options: -a INT   score for a match [%d]\n", opt->a);
		fprintf(stderr, "         -b INT   mismatch penalty [%d]\n", opt->b);
		fprintf(stderr, "         -q INT   gap open penalty [%d]\n", opt->q);
		fprintf(stderr, "         -r INT   gap extension penalty [%d]\n", opt->r);
//		fprintf(stderr, "         -y FLOAT error recurrence coef. (4..16) [%.1f]\n", opt->yita);
		fprintf(stderr, "\n");
		fprintf(stderr, "         -t INT   number of threads [%d]\n", opt->n_threads);
		fprintf(stderr, "         -s INT   size of a chunk of reads [%d]\n", opt->chunk_size);
		fprintf(stderr, "\n");
		fprintf(stderr, "         -w INT   band width [%d]\n", opt->bw);
		fprintf(stderr, "         -m FLOAT mask level [%.2f]\n", opt->mask_level);
		fprintf(stderr, "\n");
		fprintf(stderr, "         -T INT   score threshold divided by a [%d]\n", opt->t);
		fprintf(stderr, "         -s INT   maximum seeding interval size [%d]\n", opt->is);
		fprintf(stderr, "         -z INT   Z-best [%d]\n", opt->z);
		fprintf(stderr, "         -N INT   # seeds to trigger reverse alignment [%d]\n", opt->t_seeds);
		fprintf(stderr, "         -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef);
		fprintf(stderr, "         -H       in SAM output, use hard clipping rather than soft\n");
        fprintf(stderr, "         -f FILE  file to output results to instead of stdout\n\n");
		fprintf(stderr, "Note: For long Illumina, 454 and Sanger reads, assembly contigs, fosmids and\n");
		fprintf(stderr, "      BACs, the default setting usually works well. For the current PacBio\n");
		fprintf(stderr, "      reads (end of 2010), '-b5 -q2 -r1 -z10' is recommended. One may also\n");
		fprintf(stderr, "      increase '-z' for better sensitivity.\n");
		fprintf(stderr, "\n");

		if (0) {
			double c, theta, eps, delta;
			c = opt->a / log(opt->yita);
			theta = exp(-opt->b / c) / opt->yita;
			eps = exp(-opt->q / c);
			delta = exp(-opt->r / c);
			fprintf(stderr, "mismatch: %lf, gap_open: %lf, gap_ext: %lf\n\n",
					theta, eps, delta);
		}
		return 1;
	}

	// adjust opt for opt->a
	opt->t *= opt->a;
	opt->coef *= opt->a;

	strcpy(buf, argv[optind]); target[0] = bwt_restore_bwt(strcat(buf, ".bwt"));
	strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".sa"), target[0]);
	strcpy(buf, argv[optind]); target[1] = bwt_restore_bwt(strcat(buf, ".rbwt"));
	strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".rsa"), target[1]);
	bns = bns_restore(argv[optind]);

	bsw2_aln(opt, bns, target, argv[optind+1]);

	bns_destroy(bns);
	bwt_destroy(target[0]); bwt_destroy(target[1]);
	free(opt);
	fflush(stdout);
	xreopen("/dev/tty","w",stdout);
	return 0;
}
Пример #8
0
int64_t bis_bns_fasta2bntseq(gzFile fp_fa, const char *prefix, uint8_t parent) {
 
  extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c
  kseq_t *seq;
  char name[1024];
  bntseq_t *bns;
  uint8_t *pac = 0, *_pac = 0;
  int32_t m_seqs, m_holes;
  int64_t ret = -1, m_pac;
  bntamb1_t *q;
  FILE *fp;

  // initialization
  gzseek(fp_fa, 0, SEEK_SET);
  seq = kseq_init(fp_fa);
  bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
  bns->seed = 11; // fixed seed for random generator
  srand48(bns->seed);
  m_seqs = m_holes = 8; m_pac = 0x10000;
  bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t));
  bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t));
  _pac = calloc(m_pac/4, 1);
  q = bns->ambs;
  if (parent) {
    strcpy(name, prefix); strcat(name, ".par.pac");
  } else {
    strcpy(name, prefix); strcat(name, ".dau.pac");
  }
  fp = xopen(name, "wb");
  // read sequences

  while (kseq_read(seq) >= 0) {
    _pac = bis_add1(seq, bns, _pac, &m_pac, &m_seqs, &m_holes, &q);
  }
  /* kseq_rewind(seq); */
  /* gzseek(seq->f->f, 0, SEEK_SET); */
  /* fprintf(stderr, "foward end\n"); */
  /* fflush(stderr); */
  /* while (kseq_read(seq) >= 0) { */
  /* if (parent) nt256char_rev_ip(seq->seq.s, seq->seq.l); */
  /* pac = bis_add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q, parent, 1); */
  /* } */

  int64_t l,k;
  m_pac = (bns->l_pac*2+3)/4*4; /* in bit */
  pac = calloc(m_pac/4,sizeof(uint8_t));
  for (l=0; l<bns->l_pac; ++l) {
    uint8_t c = _get_pac(_pac,l);
    if (parent && c == 1) c = 3;
    if (!parent && c == 2) c = 0;
    _set_pac(pac, l, c);
  }

  for (k=bns->l_pac-1; k>=0; --k,++l) {
    uint8_t c = 3-_get_pac(_pac,k);
    if (parent && c == 1) c = 3;
    if (!parent && c == 2) c = 0;
    _set_pac(pac, l, c);
  }
  free(_pac);
  /* int64_t l; */
  /* fprintf(stderr, "reverse end\n"); */
  /* fflush(stderr); */
  /* if (!for_only) { // add the reverse complemented sequence */
  /*   m_pac = (bns->l_pac * 2 + 3) / 4 * 4; */
  /*   pac = realloc(pac, m_pac/4); */
  /*   memset(pac + (bns->l_pac+3)/4, 0, (m_pac - (bns->l_pac+3)/4*4) / 4); */
  /*   for (l = bns->l_pac - 1; l >= 0; --l, ++bns->l_pac) */
  /*     _set_pac(pac, bns->l_pac, 3-_get_pac(pac, l)); */
  /* } */

  assert(bns->l_pac<<1 == l);
  { // finalize .pac file
    ubyte_t ct;
    err_fwrite(pac, 1, (l>>2) + ((l&3) == 0? 0 : 1), fp);
    // the following codes make the pac file size always (l_pac/4+1+1)
    if (l % 4 == 0) {
      ct = 0;
      err_fwrite(&ct, 1, 1, fp);
    }
    ct = l % 4;
    err_fwrite(&ct, 1, 1, fp);
    // close .pac file
    err_fflush(fp);
    err_fclose(fp);
  }
  if (parent) bis_bns_dump(bns, prefix);
  bns_destroy(bns);
  kseq_destroy(seq);
  free(pac);
  return l;
}
Пример #9
0
int bwa_bwtsw2(int argc, char *argv[])
{
	extern char *bwa_infer_prefix(const char *hint);
	bsw2opt_t *opt;
	bwt_t *target;
	char buf[1024], *prefix;
	bntseq_t *bns;
	int c;

	opt = bsw2_init_opt();
	srand48(11);
	while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:s:c:N:Hf:MI:SG:C")) >= 0) {
		switch (c) {
		case 'q': opt->q = atoi(optarg); break;
		case 'r': opt->r = atoi(optarg); break;
		case 'a': opt->a = atoi(optarg); break;
		case 'b': opt->b = atoi(optarg); break;
		case 'w': opt->bw = atoi(optarg); break;
		case 'T': opt->t = atoi(optarg); break;
		case 't': opt->n_threads = atoi(optarg); break;
		case 'z': opt->z = atoi(optarg); break;
		case 's': opt->is = atoi(optarg); break;
		case 'm': opt->mask_level = atof(optarg); break;
		case 'c': opt->coef = atof(optarg); break;
		case 'N': opt->t_seeds = atoi(optarg); break;
		case 'M': opt->multi_2nd = 1; break;
		case 'H': opt->hard_clip = 1; break;
		case 'f': xreopen(optarg, "w", stdout); break;
		case 'I': opt->max_ins = atoi(optarg); break;
		case 'S': opt->skip_sw = 1; break;
		case 'C': opt->cpy_cmt = 1; break;
		case 'G': opt->max_chain_gap = atoi(optarg); break;
		}
	}
	opt->qr = opt->q + opt->r;

	if (optind + 2 > argc) {
		fprintf(stderr, "\n");
		fprintf(stderr, "Usage:   bwa bwasw [options] <target.prefix> <query.fa> [query2.fa]\n\n");
		fprintf(stderr, "Options: -a INT   score for a match [%d]\n", opt->a);
		fprintf(stderr, "         -b INT   mismatch penalty [%d]\n", opt->b);
		fprintf(stderr, "         -q INT   gap open penalty [%d]\n", opt->q);
		fprintf(stderr, "         -r INT   gap extension penalty [%d]\n", opt->r);
		fprintf(stderr, "         -w INT   band width [%d]\n", opt->bw);
		fprintf(stderr, "         -m FLOAT mask level [%.2f]\n", opt->mask_level);
		fprintf(stderr, "\n");
		fprintf(stderr, "         -t INT   number of threads [%d]\n", opt->n_threads);
		fprintf(stderr, "         -f FILE  file to output results to instead of stdout\n");
		fprintf(stderr, "         -H       in SAM output, use hard clipping instead of soft clipping\n");
		fprintf(stderr, "         -C       copy FASTA/Q comment to SAM output\n");
		fprintf(stderr, "         -M       mark multi-part alignments as secondary\n");
		fprintf(stderr, "         -S       skip Smith-Waterman read pairing\n");
		fprintf(stderr, "         -I INT   ignore pairs with insert >=INT for inferring the size distr [%d]\n", opt->max_ins);
		fprintf(stderr, "\n");
		fprintf(stderr, "         -T INT   score threshold divided by a [%d]\n", opt->t);
		fprintf(stderr, "         -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef);
		fprintf(stderr, "         -z INT   Z-best [%d]\n", opt->z);
		fprintf(stderr, "         -s INT   maximum seeding interval size [%d]\n", opt->is);
		fprintf(stderr, "         -N INT   # seeds to trigger rev aln; 2*INT is also the chaining threshold [%d]\n", opt->t_seeds);
		fprintf(stderr, "         -G INT   maximum gap size during chaining [%d]\n", opt->max_chain_gap);
		fprintf(stderr, "\n");
		fprintf(stderr, "Note: For long Illumina, 454 and Sanger reads, assembly contigs, fosmids and\n");
		fprintf(stderr, "      BACs, the default setting usually works well. For the current PacBio\n");
		fprintf(stderr, "      reads (end of 2010), '-b5 -q2 -r1 -z10' is recommended. One may also\n");
		fprintf(stderr, "      increase '-z' for better sensitivity.\n");
		fprintf(stderr, "\n");

		return 1;
	}

	// adjust opt for opt->a
	opt->t *= opt->a;
	opt->coef *= opt->a;

	if ((prefix = bwa_infer_prefix(argv[optind])) == 0) {
		fprintf(stderr, "[%s] fail to locate the index\n", __func__);
		return 0;
	}
	strcpy(buf, prefix); target = bwt_restore_bwt(strcat(buf, ".bwt"));
	strcpy(buf, prefix); bwt_restore_sa(strcat(buf, ".sa"), target);
	bns = bns_restore(prefix);

	bsw2_aln(opt, bns, target, argv[optind+1], optind+2 < argc? argv[optind+2] : 0);

	bns_destroy(bns);
	bwt_destroy(target);
	free(opt); free(prefix);
	
	return 0;
}
Пример #10
0
int bwa_bwtsw2(int argc, char *argv[])
{
	bsw2opt_t *opt;
	bwt_t *target[2];
	char buf[1024];
	bntseq_t *bns;
	int c;

	opt = bsw2_init_opt();
	srand48(11);
	while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:y:s:c:N:H")) >= 0) {
		switch (c) {
		case 'q': opt->q = atoi(optarg); break;
		case 'r': opt->r = atoi(optarg); break;
		case 'a': opt->a = atoi(optarg); break;
		case 'b': opt->b = atoi(optarg); break;
		case 'w': opt->bw = atoi(optarg); break;
		case 'T': opt->t = atoi(optarg); break;
		case 't': opt->n_threads = atoi(optarg); break;
		case 'z': opt->z = atoi(optarg); break;
		case 'y': opt->yita = atof(optarg); break;
		case 's': opt->is = atoi(optarg); break;
		case 'm': opt->mask_level = atof(optarg); break;
		case 'c': opt->coef = atof(optarg); break;
		case 'N': opt->t_seeds = atoi(optarg); break;
		case 'H': opt->hard_clip = 1; break;
		}
	}
	opt->qr = opt->q + opt->r;

	if (optind + 2 > argc) {
		fprintf(stderr, "\n");
		fprintf(stderr, "Usage:   bwa dbwtsw [options] <target.prefix> <query.fa>\n\n");
		fprintf(stderr, "Options: -a INT   score for a match [%d]\n", opt->a);
		fprintf(stderr, "         -b INT   mismatch penalty [%d]\n", opt->b);
		fprintf(stderr, "         -q INT   gap open penalty [%d]\n", opt->q);
		fprintf(stderr, "         -r INT   gap extension penalty [%d]\n", opt->r);
//		fprintf(stderr, "         -y FLOAT error recurrence coef. (4..16) [%.1f]\n", opt->yita);
		fprintf(stderr, "\n");
		fprintf(stderr, "         -t INT   nmber of threads [%d]\n", opt->n_threads);
		fprintf(stderr, "         -s INT   size of a chunk of reads [%d]\n", opt->chunk_size);
		fprintf(stderr, "\n");
		fprintf(stderr, "         -w INT   band width [%d]\n", opt->bw);
		fprintf(stderr, "         -m FLOAT mask level [%.2f]\n", opt->mask_level);
		fprintf(stderr, "\n");
		fprintf(stderr, "         -T INT   score threshold divided by a [%d]\n", opt->t);
		fprintf(stderr, "         -s INT   maximum seeding interval size [%d]\n", opt->is);
		fprintf(stderr, "         -z INT   Z-best [%d]\n", opt->z);
		fprintf(stderr, "         -N INT   # seeds to trigger reverse alignment [%d]\n", opt->t_seeds);
		fprintf(stderr, "         -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef);
		fprintf(stderr, "         -H       in SAM output, use hard clipping rather than soft\n");
		fprintf(stderr, "\n");

		{
			double c, theta, eps, delta;
			c = opt->a / log(opt->yita);
			theta = exp(-opt->b / c) / opt->yita;
			eps = exp(-opt->q / c);
			delta = exp(-opt->r / c);
			fprintf(stderr, "mismatch: %lf, gap_open: %lf, gap_ext: %lf\n\n",
					theta, eps, delta);
		}
		return 1;
	}

	// adjust opt for opt->a
	opt->t *= opt->a;
	opt->coef *= opt->a;

	strcpy(buf, argv[optind]); target[0] = bwt_restore_bwt(strcat(buf, ".bwt"));
	strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".sa"), target[0]);
	strcpy(buf, argv[optind]); target[1] = bwt_restore_bwt(strcat(buf, ".rbwt"));
	strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".rsa"), target[1]);
	bns = bns_restore(argv[optind]);

	bsw2_aln(opt, bns, target, argv[optind+1]);

	bns_destroy(bns);
	bwt_destroy(target[0]); bwt_destroy(target[1]);
	free(opt);
	
	return 0;
}
Пример #11
0
int64_t R_bns_fasta2bntseq(gzFile fp_fa, const char *prefix)
{
//	extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c
	kseq_t *seq;
	char name[1024];
	bntseq_t *bns;
	uint8_t *pac = 0;
    uint8_t *reverse_pac = NULL;
	int32_t m_seqs, m_holes;
	int64_t ret = -1, m_pac, l, m_r_pac;
	bntamb1_t *q;
	FILE *fp, *fp_r;
    int i;
	// initialization
	seq = kseq_init(fp_fa);
	bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
	bns->seed = 11; // fixed seed for random generator
	srand48(bns->seed);
	m_seqs = m_holes = 8; m_pac = 0x10000;
	bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t));
	bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t));
	pac = calloc(m_pac/NT_PER_BYTE, 1);
	q = bns->ambs;
	strcpy(name, prefix); strcat(name, ".pac");
	fp = xopen(name, "wb");
    memset(name, '\0', 1024);strcpy(name, prefix); strcat(name, ".rpac");
    fp_r = xopen(name, "wb");
	// read sequences
	while (kseq_read(seq) >= 0) pac = R_add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q);
    
   // add the reverse complemented sequence





    ret = bns->l_pac;
    fprintf(stderr, "[R_bns_fasta2bntseq]:reverse_pac!\n");
    m_r_pac = (bns->l_pac +NT_PER_BYTE-1)/NT_PER_BYTE *NT_PER_BYTE;
    reverse_pac = calloc(m_r_pac/NT_PER_BYTE, sizeof(uint8_t));
    for(l = bns->l_pac-1, i =0; l>=0, i < bns->l_pac; --l, ++i)
    {
        _set_pac(reverse_pac, i, _get_pac(pac, l)); 
    } 
	ubyte_t ct;
    { // finalize .pac and  file
		fwrite(pac, 1, (bns->l_pac>>1) + ((bns->l_pac&1) == 0? 0 : 1), fp);
		// the following codes make the pac file size always (l_pac/4+1+1)
		if (bns->l_pac % NT_PER_BYTE == 0) {
			ct = 0;
			fwrite(&ct, 1, 1, fp);
		}
		ct = bns->l_pac % NT_PER_BYTE;
		fwrite(&ct, 1, 1, fp);
		// close .pac file
		fclose(fp);
	}	
    { // finalize .rpac and  file
		fwrite(reverse_pac, 1, (bns->l_pac>>1) + ((bns->l_pac&1) == 0? 0 : 1), fp_r);
		// the following codes make the pac file size always (l_pac/4+1+1)
		if (bns->l_pac % NT_PER_BYTE == 0) {
			ct = 0;
			fwrite(&ct, 1, 1, fp_r);
		}
		ct = bns->l_pac % NT_PER_BYTE;
		fwrite(&ct, 1, 1, fp_r);
		// close .rpac file
		fclose(fp_r);
	}
	bns_dump(bns, prefix);
	bns_destroy(bns);
	kseq_destroy(seq);
	free(pac);
    free(reverse_pac);
	return ret;
}