Exemple #1
0
int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only)
{
	extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c
	kseq_t *seq;
	char name[1024];
	bntseq_t *bns;
	uint8_t *pac = 0;
	int32_t m_seqs, m_holes;
	int64_t ret = -1, m_pac, l;
	bntamb1_t *q;
	FILE *fp;

	// initialization
	seq = kseq_init(fp_fa);
	bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
	bns->seed = 11; // fixed seed for random generator
	srand48(bns->seed);
	m_seqs = m_holes = 8; m_pac = 0x10000;
	bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t));
	bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t));
	pac = calloc(m_pac/4, 1);
	q = bns->ambs;
	strcpy(name, prefix); strcat(name, ".pac");
	fp = xopen(name, "wb");
	// read sequences
	while (kseq_read(seq) >= 0) pac = add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q);
	if (!for_only) { // add the reverse complemented sequence
		m_pac = (bns->l_pac * 2 + 3) / 4 * 4;
		pac = realloc(pac, m_pac/4);
		memset(pac + (bns->l_pac+3)/4, 0, (m_pac - (bns->l_pac+3)/4*4) / 4);
		for (l = bns->l_pac - 1; l >= 0; --l, ++bns->l_pac)
			_set_pac(pac, bns->l_pac, 3-_get_pac(pac, l));
	}
	ret = bns->l_pac;
	{ // finalize .pac file
		ubyte_t ct;
		err_fwrite(pac, 1, (bns->l_pac>>2) + ((bns->l_pac&3) == 0? 0 : 1), fp);
		// the following codes make the pac file size always (l_pac/4+1+1)
		if (bns->l_pac % 4 == 0) {
			ct = 0;
			err_fwrite(&ct, 1, 1, fp);
		}
		ct = bns->l_pac % 4;
		err_fwrite(&ct, 1, 1, fp);
		// close .pac file
		err_fflush(fp);
		err_fclose(fp);
	}
	bns_dump(bns, prefix);
	bns_destroy(bns);
	kseq_destroy(seq);
	free(pac);
	return ret;
}
Exemple #2
0
int64_t dump_forward_pac(gzFile fp_fa, const char *prefix)
{
	extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c
	kseq_t *seq;
	char name[1024];
	bntseq_t *bns;
	uint8_t *pac = 0;
	int32_t m_seqs, m_holes;
	int64_t ret = -1, m_pac;
	bntamb1_t *q;
	FILE *fp;

	// initialization
	seq = kseq_init(fp_fa);
	bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
	bns->seed = 11; // fixed seed for random generator
	srand48(bns->seed);
	m_seqs = m_holes = 8; m_pac = 0x10000;
	bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t));
	bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t));
	pac = calloc(m_pac/4, 1);
	q = bns->ambs;
	strcpy(name, prefix); strcat(name, ".bis.pac");
	fp = xopen(name, "wb");
	// read sequences
	while (kseq_read(seq) >= 0) pac = add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q);

	ret = bns->l_pac;
	{ // finalize .pac file
		ubyte_t ct;
		err_fwrite(pac, 1, (bns->l_pac>>2) + ((bns->l_pac&3) == 0? 0 : 1), fp);
		// the following codes make the pac file size always (l_pac/4+1+1)
		if (bns->l_pac % 4 == 0) {
			ct = 0;
			err_fwrite(&ct, 1, 1, fp);
		}
		ct = bns->l_pac % 4;
		err_fwrite(&ct, 1, 1, fp);
		// close .pac file
		err_fflush(fp);
		err_fclose(fp);
	}
  /* re-dump forward bis bns, otherwise the .bis.ann and .bis.amb have twice as long pac  */
  /* strcpy(name, prefix); strcat(name, ".bis"); */
  /* bis_bns_dump(bns, prefix); */

  bns_destroy(bns);
	kseq_destroy(seq);
	free(pac);
	return ret;
}
Exemple #3
0
void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
{
	int i, n_seqs, tot_seqs = 0;
	bwa_seq_t *seqs;
	bwa_seqio_t *ks;
	clock_t t;
	bwt_t *bwt;

	// initialization
	ks = bwa_open_reads(opt->mode, fn_fa);

	{ // load BWT
		char *str = (char*)calloc(strlen(prefix) + 10, 1);
		strcpy(str, prefix); strcat(str, ".bwt");  bwt = bwt_restore_bwt(str);
		free(str);
	}

	// core loop
	err_fwrite(opt, sizeof(gap_opt_t), 1, stdout);
	while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt->mode, opt->trim_qual)) != 0) {
		tot_seqs += n_seqs;
		t = clock();

		fprintf(stderr, "[bwa_aln_core] calculate SA coordinate... ");

#ifdef HAVE_PTHREAD
		if (opt->n_threads <= 1) { // no multi-threading at all
			bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
		} else {
			pthread_t *tid;
			pthread_attr_t attr;
			thread_aux_t *data;
			int j;
			pthread_attr_init(&attr);
			pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
			data = (thread_aux_t*)calloc(opt->n_threads, sizeof(thread_aux_t));
			tid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t));
			for (j = 0; j < opt->n_threads; ++j) {
				data[j].tid = j; data[j].bwt = bwt;
				data[j].n_seqs = n_seqs; data[j].seqs = seqs; data[j].opt = opt;
				pthread_create(&tid[j], &attr, worker, data + j);
			}
			for (j = 0; j < opt->n_threads; ++j) pthread_join(tid[j], 0);
			free(data); free(tid);
		}
#else
		bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
#endif

		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();

		t = clock();
		fprintf(stderr, "[bwa_aln_core] write to the disk... ");
		for (i = 0; i < n_seqs; ++i) {
			bwa_seq_t *p = seqs + i;
			err_fwrite(&p->n_aln, 4, 1, stdout);
			if (p->n_aln) err_fwrite(p->aln, sizeof(bwt_aln1_t), p->n_aln, stdout);
		}
		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();

		bwa_free_read_seq(n_seqs, seqs);
		fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs);
	}

	// destroy
	bwt_destroy(bwt);
	bwa_seq_close(ks);
}
Exemple #4
0
void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
{
    int i, n_seqs, tot_seqs = 0;
    bwa_seq_t *seqs;
    bwa_seqio_t *ks;
    clock_t t;
    bwt_t *bwt;

    // initialization
    ks = bwa_open_reads(opt->mode, fn_fa);

    { // load BWT
        char *str = (char*)calloc(strlen(prefix) + 10, 1);
        strcpy(str, prefix); strcat(str, ".bwt");  bwt = bwt_restore_bwt(str);
        free(str);
    }

    // core loop
    err_fwrite(opt, sizeof(gap_opt_t), 1, stdout);
    while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt->mode, opt->trim_qual)) != 0) {
        tot_seqs += n_seqs;
        t = clock();

        fprintf(stderr, "[bwa_aln_core] calculate SA coordinate... ");

#ifdef THREAD
        if (opt->n_threads <= 1) { // no multi-threading at all
            bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
        } else {
            DWORD ThreadID;
            HANDLE *tid;
            thread_aux_t *data;
            int j;
            data = (thread_aux_t*)calloc(opt->n_threads, sizeof(thread_aux_t));
            tid = (HANDLE*)calloc(opt->n_threads, sizeof(HANDLE));
            for (j = 0; j < opt->n_threads; ++j) {
                data[j].tid = j; data[j].bwt = bwt;
                data[j].n_seqs = n_seqs; data[j].seqs = seqs; data[j].opt = opt;

                //create threads
                tid[j] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) worker, data + j, 0, &ThreadID);
                if (tid[j] == NULL)
                {
                    printf("CreateThread error: %d\n", GetLastError());
                    return;
                }
            }
            // Wait for all threads to terminate
            WaitForMultipleObjects(opt->n_threads, &tid[0], TRUE, INFINITE);
            free(data); free(tid);
        }
#else
        bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
#endif

        fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();

        t = clock();
        fprintf(stderr, "[bwa_aln_core] write to the disk... ");
        for (i = 0; i < n_seqs; ++i) {
            bwa_seq_t *p = seqs + i;
            err_fwrite(&p->n_aln, 4, 1, stdout);
            if (p->n_aln) err_fwrite(p->aln, sizeof(bwt_aln1_t), p->n_aln, stdout);
        }
        fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();

        bwa_free_read_seq(n_seqs, seqs);
        fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs);
    }

    // destroy
    bwt_destroy(bwt);
    bwa_seq_close(ks);
}
Exemple #5
0
int64_t bis_bns_fasta2bntseq(gzFile fp_fa, const char *prefix, uint8_t parent) {
 
  extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c
  kseq_t *seq;
  char name[1024];
  bntseq_t *bns;
  uint8_t *pac = 0, *_pac = 0;
  int32_t m_seqs, m_holes;
  int64_t ret = -1, m_pac;
  bntamb1_t *q;
  FILE *fp;

  // initialization
  gzseek(fp_fa, 0, SEEK_SET);
  seq = kseq_init(fp_fa);
  bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
  bns->seed = 11; // fixed seed for random generator
  srand48(bns->seed);
  m_seqs = m_holes = 8; m_pac = 0x10000;
  bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t));
  bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t));
  _pac = calloc(m_pac/4, 1);
  q = bns->ambs;
  if (parent) {
    strcpy(name, prefix); strcat(name, ".par.pac");
  } else {
    strcpy(name, prefix); strcat(name, ".dau.pac");
  }
  fp = xopen(name, "wb");
  // read sequences

  while (kseq_read(seq) >= 0) {
    _pac = bis_add1(seq, bns, _pac, &m_pac, &m_seqs, &m_holes, &q);
  }
  /* kseq_rewind(seq); */
  /* gzseek(seq->f->f, 0, SEEK_SET); */
  /* fprintf(stderr, "foward end\n"); */
  /* fflush(stderr); */
  /* while (kseq_read(seq) >= 0) { */
  /* if (parent) nt256char_rev_ip(seq->seq.s, seq->seq.l); */
  /* pac = bis_add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q, parent, 1); */
  /* } */

  int64_t l,k;
  m_pac = (bns->l_pac*2+3)/4*4; /* in bit */
  pac = calloc(m_pac/4,sizeof(uint8_t));
  for (l=0; l<bns->l_pac; ++l) {
    uint8_t c = _get_pac(_pac,l);
    if (parent && c == 1) c = 3;
    if (!parent && c == 2) c = 0;
    _set_pac(pac, l, c);
  }

  for (k=bns->l_pac-1; k>=0; --k,++l) {
    uint8_t c = 3-_get_pac(_pac,k);
    if (parent && c == 1) c = 3;
    if (!parent && c == 2) c = 0;
    _set_pac(pac, l, c);
  }
  free(_pac);
  /* int64_t l; */
  /* fprintf(stderr, "reverse end\n"); */
  /* fflush(stderr); */
  /* if (!for_only) { // add the reverse complemented sequence */
  /*   m_pac = (bns->l_pac * 2 + 3) / 4 * 4; */
  /*   pac = realloc(pac, m_pac/4); */
  /*   memset(pac + (bns->l_pac+3)/4, 0, (m_pac - (bns->l_pac+3)/4*4) / 4); */
  /*   for (l = bns->l_pac - 1; l >= 0; --l, ++bns->l_pac) */
  /*     _set_pac(pac, bns->l_pac, 3-_get_pac(pac, l)); */
  /* } */

  assert(bns->l_pac<<1 == l);
  { // finalize .pac file
    ubyte_t ct;
    err_fwrite(pac, 1, (l>>2) + ((l&3) == 0? 0 : 1), fp);
    // the following codes make the pac file size always (l_pac/4+1+1)
    if (l % 4 == 0) {
      ct = 0;
      err_fwrite(&ct, 1, 1, fp);
    }
    ct = l % 4;
    err_fwrite(&ct, 1, 1, fp);
    // close .pac file
    err_fflush(fp);
    err_fclose(fp);
  }
  if (parent) bis_bns_dump(bns, prefix);
  bns_destroy(bns);
  kseq_destroy(seq);
  free(pac);
  return l;
}