int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only) { extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c kseq_t *seq; char name[1024]; bntseq_t *bns; uint8_t *pac = 0; int32_t m_seqs, m_holes; int64_t ret = -1, m_pac, l; bntamb1_t *q; FILE *fp; // initialization seq = kseq_init(fp_fa); bns = (bntseq_t*)calloc(1, sizeof(bntseq_t)); bns->seed = 11; // fixed seed for random generator srand48(bns->seed); m_seqs = m_holes = 8; m_pac = 0x10000; bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t)); bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t)); pac = calloc(m_pac/4, 1); q = bns->ambs; strcpy(name, prefix); strcat(name, ".pac"); fp = xopen(name, "wb"); // read sequences while (kseq_read(seq) >= 0) pac = add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q); if (!for_only) { // add the reverse complemented sequence m_pac = (bns->l_pac * 2 + 3) / 4 * 4; pac = realloc(pac, m_pac/4); memset(pac + (bns->l_pac+3)/4, 0, (m_pac - (bns->l_pac+3)/4*4) / 4); for (l = bns->l_pac - 1; l >= 0; --l, ++bns->l_pac) _set_pac(pac, bns->l_pac, 3-_get_pac(pac, l)); } ret = bns->l_pac; { // finalize .pac file ubyte_t ct; err_fwrite(pac, 1, (bns->l_pac>>2) + ((bns->l_pac&3) == 0? 0 : 1), fp); // the following codes make the pac file size always (l_pac/4+1+1) if (bns->l_pac % 4 == 0) { ct = 0; err_fwrite(&ct, 1, 1, fp); } ct = bns->l_pac % 4; err_fwrite(&ct, 1, 1, fp); // close .pac file err_fflush(fp); err_fclose(fp); } bns_dump(bns, prefix); bns_destroy(bns); kseq_destroy(seq); free(pac); return ret; }
int64_t dump_forward_pac(gzFile fp_fa, const char *prefix) { extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c kseq_t *seq; char name[1024]; bntseq_t *bns; uint8_t *pac = 0; int32_t m_seqs, m_holes; int64_t ret = -1, m_pac; bntamb1_t *q; FILE *fp; // initialization seq = kseq_init(fp_fa); bns = (bntseq_t*)calloc(1, sizeof(bntseq_t)); bns->seed = 11; // fixed seed for random generator srand48(bns->seed); m_seqs = m_holes = 8; m_pac = 0x10000; bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t)); bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t)); pac = calloc(m_pac/4, 1); q = bns->ambs; strcpy(name, prefix); strcat(name, ".bis.pac"); fp = xopen(name, "wb"); // read sequences while (kseq_read(seq) >= 0) pac = add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q); ret = bns->l_pac; { // finalize .pac file ubyte_t ct; err_fwrite(pac, 1, (bns->l_pac>>2) + ((bns->l_pac&3) == 0? 0 : 1), fp); // the following codes make the pac file size always (l_pac/4+1+1) if (bns->l_pac % 4 == 0) { ct = 0; err_fwrite(&ct, 1, 1, fp); } ct = bns->l_pac % 4; err_fwrite(&ct, 1, 1, fp); // close .pac file err_fflush(fp); err_fclose(fp); } /* re-dump forward bis bns, otherwise the .bis.ann and .bis.amb have twice as long pac */ /* strcpy(name, prefix); strcat(name, ".bis"); */ /* bis_bns_dump(bns, prefix); */ bns_destroy(bns); kseq_destroy(seq); free(pac); return ret; }
void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt) { int i, n_seqs, tot_seqs = 0; bwa_seq_t *seqs; bwa_seqio_t *ks; clock_t t; bwt_t *bwt; // initialization ks = bwa_open_reads(opt->mode, fn_fa); { // load BWT char *str = (char*)calloc(strlen(prefix) + 10, 1); strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str); free(str); } // core loop err_fwrite(opt, sizeof(gap_opt_t), 1, stdout); while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt->mode, opt->trim_qual)) != 0) { tot_seqs += n_seqs; t = clock(); fprintf(stderr, "[bwa_aln_core] calculate SA coordinate... "); #ifdef HAVE_PTHREAD if (opt->n_threads <= 1) { // no multi-threading at all bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt); } else { pthread_t *tid; pthread_attr_t attr; thread_aux_t *data; int j; pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); data = (thread_aux_t*)calloc(opt->n_threads, sizeof(thread_aux_t)); tid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t)); for (j = 0; j < opt->n_threads; ++j) { data[j].tid = j; data[j].bwt = bwt; data[j].n_seqs = n_seqs; data[j].seqs = seqs; data[j].opt = opt; pthread_create(&tid[j], &attr, worker, data + j); } for (j = 0; j < opt->n_threads; ++j) pthread_join(tid[j], 0); free(data); free(tid); } #else bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt); #endif fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock(); t = clock(); fprintf(stderr, "[bwa_aln_core] write to the disk... "); for (i = 0; i < n_seqs; ++i) { bwa_seq_t *p = seqs + i; err_fwrite(&p->n_aln, 4, 1, stdout); if (p->n_aln) err_fwrite(p->aln, sizeof(bwt_aln1_t), p->n_aln, stdout); } fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock(); bwa_free_read_seq(n_seqs, seqs); fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs); } // destroy bwt_destroy(bwt); bwa_seq_close(ks); }
void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt) { int i, n_seqs, tot_seqs = 0; bwa_seq_t *seqs; bwa_seqio_t *ks; clock_t t; bwt_t *bwt; // initialization ks = bwa_open_reads(opt->mode, fn_fa); { // load BWT char *str = (char*)calloc(strlen(prefix) + 10, 1); strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str); free(str); } // core loop err_fwrite(opt, sizeof(gap_opt_t), 1, stdout); while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt->mode, opt->trim_qual)) != 0) { tot_seqs += n_seqs; t = clock(); fprintf(stderr, "[bwa_aln_core] calculate SA coordinate... "); #ifdef THREAD if (opt->n_threads <= 1) { // no multi-threading at all bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt); } else { DWORD ThreadID; HANDLE *tid; thread_aux_t *data; int j; data = (thread_aux_t*)calloc(opt->n_threads, sizeof(thread_aux_t)); tid = (HANDLE*)calloc(opt->n_threads, sizeof(HANDLE)); for (j = 0; j < opt->n_threads; ++j) { data[j].tid = j; data[j].bwt = bwt; data[j].n_seqs = n_seqs; data[j].seqs = seqs; data[j].opt = opt; //create threads tid[j] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) worker, data + j, 0, &ThreadID); if (tid[j] == NULL) { printf("CreateThread error: %d\n", GetLastError()); return; } } // Wait for all threads to terminate WaitForMultipleObjects(opt->n_threads, &tid[0], TRUE, INFINITE); free(data); free(tid); } #else bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt); #endif fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock(); t = clock(); fprintf(stderr, "[bwa_aln_core] write to the disk... "); for (i = 0; i < n_seqs; ++i) { bwa_seq_t *p = seqs + i; err_fwrite(&p->n_aln, 4, 1, stdout); if (p->n_aln) err_fwrite(p->aln, sizeof(bwt_aln1_t), p->n_aln, stdout); } fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock(); bwa_free_read_seq(n_seqs, seqs); fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs); } // destroy bwt_destroy(bwt); bwa_seq_close(ks); }
int64_t bis_bns_fasta2bntseq(gzFile fp_fa, const char *prefix, uint8_t parent) { extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c kseq_t *seq; char name[1024]; bntseq_t *bns; uint8_t *pac = 0, *_pac = 0; int32_t m_seqs, m_holes; int64_t ret = -1, m_pac; bntamb1_t *q; FILE *fp; // initialization gzseek(fp_fa, 0, SEEK_SET); seq = kseq_init(fp_fa); bns = (bntseq_t*)calloc(1, sizeof(bntseq_t)); bns->seed = 11; // fixed seed for random generator srand48(bns->seed); m_seqs = m_holes = 8; m_pac = 0x10000; bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t)); bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t)); _pac = calloc(m_pac/4, 1); q = bns->ambs; if (parent) { strcpy(name, prefix); strcat(name, ".par.pac"); } else { strcpy(name, prefix); strcat(name, ".dau.pac"); } fp = xopen(name, "wb"); // read sequences while (kseq_read(seq) >= 0) { _pac = bis_add1(seq, bns, _pac, &m_pac, &m_seqs, &m_holes, &q); } /* kseq_rewind(seq); */ /* gzseek(seq->f->f, 0, SEEK_SET); */ /* fprintf(stderr, "foward end\n"); */ /* fflush(stderr); */ /* while (kseq_read(seq) >= 0) { */ /* if (parent) nt256char_rev_ip(seq->seq.s, seq->seq.l); */ /* pac = bis_add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q, parent, 1); */ /* } */ int64_t l,k; m_pac = (bns->l_pac*2+3)/4*4; /* in bit */ pac = calloc(m_pac/4,sizeof(uint8_t)); for (l=0; l<bns->l_pac; ++l) { uint8_t c = _get_pac(_pac,l); if (parent && c == 1) c = 3; if (!parent && c == 2) c = 0; _set_pac(pac, l, c); } for (k=bns->l_pac-1; k>=0; --k,++l) { uint8_t c = 3-_get_pac(_pac,k); if (parent && c == 1) c = 3; if (!parent && c == 2) c = 0; _set_pac(pac, l, c); } free(_pac); /* int64_t l; */ /* fprintf(stderr, "reverse end\n"); */ /* fflush(stderr); */ /* if (!for_only) { // add the reverse complemented sequence */ /* m_pac = (bns->l_pac * 2 + 3) / 4 * 4; */ /* pac = realloc(pac, m_pac/4); */ /* memset(pac + (bns->l_pac+3)/4, 0, (m_pac - (bns->l_pac+3)/4*4) / 4); */ /* for (l = bns->l_pac - 1; l >= 0; --l, ++bns->l_pac) */ /* _set_pac(pac, bns->l_pac, 3-_get_pac(pac, l)); */ /* } */ assert(bns->l_pac<<1 == l); { // finalize .pac file ubyte_t ct; err_fwrite(pac, 1, (l>>2) + ((l&3) == 0? 0 : 1), fp); // the following codes make the pac file size always (l_pac/4+1+1) if (l % 4 == 0) { ct = 0; err_fwrite(&ct, 1, 1, fp); } ct = l % 4; err_fwrite(&ct, 1, 1, fp); // close .pac file err_fflush(fp); err_fclose(fp); } if (parent) bis_bns_dump(bns, prefix); bns_destroy(bns); kseq_destroy(seq); free(pac); return l; }