extern "C" void bwa_seed2genome_destroy() { if (bwt_bwt[0]) bwt_destroy(bwt_bwt[0]); if (bwt_bwt[1]) bwt_destroy(bwt_bwt[1]); if (bwt_bns) bns_destroy(bwt_bns) ; }
int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only) { extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c kseq_t *seq; char name[1024]; bntseq_t *bns; uint8_t *pac = 0; int32_t m_seqs, m_holes; int64_t ret = -1, m_pac, l; bntamb1_t *q; FILE *fp; // initialization seq = kseq_init(fp_fa); bns = (bntseq_t*)calloc(1, sizeof(bntseq_t)); bns->seed = 11; // fixed seed for random generator srand48(bns->seed); m_seqs = m_holes = 8; m_pac = 0x10000; bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t)); bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t)); pac = calloc(m_pac/4, 1); q = bns->ambs; strcpy(name, prefix); strcat(name, ".pac"); fp = xopen(name, "wb"); // read sequences while (kseq_read(seq) >= 0) pac = add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q); if (!for_only) { // add the reverse complemented sequence m_pac = (bns->l_pac * 2 + 3) / 4 * 4; pac = realloc(pac, m_pac/4); memset(pac + (bns->l_pac+3)/4, 0, (m_pac - (bns->l_pac+3)/4*4) / 4); for (l = bns->l_pac - 1; l >= 0; --l, ++bns->l_pac) _set_pac(pac, bns->l_pac, 3-_get_pac(pac, l)); } ret = bns->l_pac; { // finalize .pac file ubyte_t ct; err_fwrite(pac, 1, (bns->l_pac>>2) + ((bns->l_pac&3) == 0? 0 : 1), fp); // the following codes make the pac file size always (l_pac/4+1+1) if (bns->l_pac % 4 == 0) { ct = 0; err_fwrite(&ct, 1, 1, fp); } ct = bns->l_pac % 4; err_fwrite(&ct, 1, 1, fp); // close .pac file err_fflush(fp); err_fclose(fp); } bns_dump(bns, prefix); bns_destroy(bns); kseq_destroy(seq); free(pac); return ret; }
int64_t dump_forward_pac(gzFile fp_fa, const char *prefix) { extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c kseq_t *seq; char name[1024]; bntseq_t *bns; uint8_t *pac = 0; int32_t m_seqs, m_holes; int64_t ret = -1, m_pac; bntamb1_t *q; FILE *fp; // initialization seq = kseq_init(fp_fa); bns = (bntseq_t*)calloc(1, sizeof(bntseq_t)); bns->seed = 11; // fixed seed for random generator srand48(bns->seed); m_seqs = m_holes = 8; m_pac = 0x10000; bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t)); bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t)); pac = calloc(m_pac/4, 1); q = bns->ambs; strcpy(name, prefix); strcat(name, ".bis.pac"); fp = xopen(name, "wb"); // read sequences while (kseq_read(seq) >= 0) pac = add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q); ret = bns->l_pac; { // finalize .pac file ubyte_t ct; err_fwrite(pac, 1, (bns->l_pac>>2) + ((bns->l_pac&3) == 0? 0 : 1), fp); // the following codes make the pac file size always (l_pac/4+1+1) if (bns->l_pac % 4 == 0) { ct = 0; err_fwrite(&ct, 1, 1, fp); } ct = bns->l_pac % 4; err_fwrite(&ct, 1, 1, fp); // close .pac file err_fflush(fp); err_fclose(fp); } /* re-dump forward bis bns, otherwise the .bis.ann and .bis.amb have twice as long pac */ /* strcpy(name, prefix); strcat(name, ".bis"); */ /* bis_bns_dump(bns, prefix); */ bns_destroy(bns); kseq_destroy(seq); free(pac); return ret; }
int main(int argc, char *argv[]) { bntseq_t *bns; bns = bns_restore(argv[1]); uint8_t *pac; pac = calloc(bns->l_pac/2+2, 1); fread(pac, 1, bns->l_pac/2+2, bns->fp_pac); int i; for(i = 0; i < bns->l_pac; ++i){ putchar( "ACGT#"[_get_pac(pac, i)]); } bns_destroy(bns); }
BWA::~BWA() { delete[] reference; bns_destroy(bns); bwt_destroy(bwts[0]); bwt_destroy(bwts[1]); }
void bns_fasta2bntseq(gzFile fp_fa, const char *prefix) { kseq_t *seq; char name[1024]; bntseq_t *bns; bntamb1_t *q; int l_buf; unsigned char buf[0x10000]; int32_t m_seqs, m_holes, l, i; FILE *fp; // initialization seq = kseq_init(fp_fa); bns = (bntseq_t*)calloc(1, sizeof(bntseq_t)); bns->seed = 11; // fixed seed for random generator srand48(bns->seed); m_seqs = m_holes = 8; bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t)); bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t)); q = bns->ambs; l_buf = 0; strcpy(name, prefix); strcat(name, ".pac"); fp = xopen(name, "wb"); memset(buf, 0, 0x10000); // read sequences while ((l = kseq_read(seq)) >= 0) { bntann1_t *p; int lasts; if (bns->n_seqs == m_seqs) { m_seqs <<= 1; bns->anns = (bntann1_t*)realloc(bns->anns, m_seqs * sizeof(bntann1_t)); } p = bns->anns + bns->n_seqs; p->name = strdup((char*)seq->name.s); p->anno = seq->comment.s? strdup((char*)seq->comment.s) : strdup("(null)"); p->gi = 0; p->len = l; p->offset = (bns->n_seqs == 0)? 0 : (p-1)->offset + (p-1)->len; p->n_ambs = 0; for (i = 0, lasts = 0; i < l; ++i) { int c = nst_nt4_table[(int)seq->seq.s[i]]; if (c >= 4) { // N if (lasts == seq->seq.s[i]) { // contiguous N ++q->len; } else { if (bns->n_holes == m_holes) { m_holes <<= 1; bns->ambs = (bntamb1_t*)realloc(bns->ambs, m_holes * sizeof(bntamb1_t)); } q = bns->ambs + bns->n_holes; q->len = 1; q->offset = p->offset + i; q->amb = seq->seq.s[i]; ++p->n_ambs; ++bns->n_holes; } } lasts = seq->seq.s[i]; { // fill buffer if (c >= 4) c = lrand48()&0x3; if (l_buf == 0x40000) { fwrite(buf, 1, 0x10000, fp); memset(buf, 0, 0x10000); l_buf = 0; } buf[l_buf>>2] |= c << ((3 - (l_buf&3)) << 1); ++l_buf; } } ++bns->n_seqs; bns->l_pac += seq->seq.l; } xassert(bns->l_pac, "zero length sequence."); { // finalize .pac file ubyte_t ct; fwrite(buf, 1, (l_buf>>2) + ((l_buf&3) == 0? 0 : 1), fp); // the following codes make the pac file size always (l_pac/4+1+1) if (bns->l_pac % 4 == 0) { ct = 0; fwrite(&ct, 1, 1, fp); } ct = bns->l_pac % 4; fwrite(&ct, 1, 1, fp); // close .pac file fclose(fp); } bns_dump(bns, prefix); bns_destroy(bns); kseq_destroy(seq); }
int bwa_bwtsw2(int argc, char *argv[]) { bsw2opt_t *opt; bwt_t *target[2]; char buf[1024]; bntseq_t *bns; int c; opt = bsw2_init_opt(); srand48(11); optind = 1; while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:y:s:c:N:Hf:")) >= 0) { switch (c) { case 'q': opt->q = atoi(optarg); break; case 'r': opt->r = atoi(optarg); break; case 'a': opt->a = atoi(optarg); break; case 'b': opt->b = atoi(optarg); break; case 'w': opt->bw = atoi(optarg); break; case 'T': opt->t = atoi(optarg); break; case 't': opt->n_threads = atoi(optarg); break; case 'z': opt->z = atoi(optarg); break; case 'y': opt->yita = atof(optarg); break; case 's': opt->is = atoi(optarg); break; case 'm': opt->mask_level = atof(optarg); break; case 'c': opt->coef = atof(optarg); break; case 'N': opt->t_seeds = atoi(optarg); break; case 'H': opt->hard_clip = 1; break; case 'f': xreopen(optarg, "w", stdout); break; } } opt->qr = opt->q + opt->r; if (optind + 2 > argc) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: bwa bwasw [options] <target.prefix> <query.fa>\n\n"); fprintf(stderr, "Options: -a INT score for a match [%d]\n", opt->a); fprintf(stderr, " -b INT mismatch penalty [%d]\n", opt->b); fprintf(stderr, " -q INT gap open penalty [%d]\n", opt->q); fprintf(stderr, " -r INT gap extension penalty [%d]\n", opt->r); // fprintf(stderr, " -y FLOAT error recurrence coef. (4..16) [%.1f]\n", opt->yita); fprintf(stderr, "\n"); fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads); fprintf(stderr, " -s INT size of a chunk of reads [%d]\n", opt->chunk_size); fprintf(stderr, "\n"); fprintf(stderr, " -w INT band width [%d]\n", opt->bw); fprintf(stderr, " -m FLOAT mask level [%.2f]\n", opt->mask_level); fprintf(stderr, "\n"); fprintf(stderr, " -T INT score threshold divided by a [%d]\n", opt->t); fprintf(stderr, " -s INT maximum seeding interval size [%d]\n", opt->is); fprintf(stderr, " -z INT Z-best [%d]\n", opt->z); fprintf(stderr, " -N INT # seeds to trigger reverse alignment [%d]\n", opt->t_seeds); fprintf(stderr, " -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef); fprintf(stderr, " -H in SAM output, use hard clipping rather than soft\n"); fprintf(stderr, " -f FILE file to output results to instead of stdout\n\n"); fprintf(stderr, "Note: For long Illumina, 454 and Sanger reads, assembly contigs, fosmids and\n"); fprintf(stderr, " BACs, the default setting usually works well. For the current PacBio\n"); fprintf(stderr, " reads (end of 2010), '-b5 -q2 -r1 -z10' is recommended. One may also\n"); fprintf(stderr, " increase '-z' for better sensitivity.\n"); fprintf(stderr, "\n"); if (0) { double c, theta, eps, delta; c = opt->a / log(opt->yita); theta = exp(-opt->b / c) / opt->yita; eps = exp(-opt->q / c); delta = exp(-opt->r / c); fprintf(stderr, "mismatch: %lf, gap_open: %lf, gap_ext: %lf\n\n", theta, eps, delta); } return 1; } // adjust opt for opt->a opt->t *= opt->a; opt->coef *= opt->a; strcpy(buf, argv[optind]); target[0] = bwt_restore_bwt(strcat(buf, ".bwt")); strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".sa"), target[0]); strcpy(buf, argv[optind]); target[1] = bwt_restore_bwt(strcat(buf, ".rbwt")); strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".rsa"), target[1]); bns = bns_restore(argv[optind]); bsw2_aln(opt, bns, target, argv[optind+1]); bns_destroy(bns); bwt_destroy(target[0]); bwt_destroy(target[1]); free(opt); fflush(stdout); xreopen("/dev/tty","w",stdout); return 0; }
int64_t bis_bns_fasta2bntseq(gzFile fp_fa, const char *prefix, uint8_t parent) { extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c kseq_t *seq; char name[1024]; bntseq_t *bns; uint8_t *pac = 0, *_pac = 0; int32_t m_seqs, m_holes; int64_t ret = -1, m_pac; bntamb1_t *q; FILE *fp; // initialization gzseek(fp_fa, 0, SEEK_SET); seq = kseq_init(fp_fa); bns = (bntseq_t*)calloc(1, sizeof(bntseq_t)); bns->seed = 11; // fixed seed for random generator srand48(bns->seed); m_seqs = m_holes = 8; m_pac = 0x10000; bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t)); bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t)); _pac = calloc(m_pac/4, 1); q = bns->ambs; if (parent) { strcpy(name, prefix); strcat(name, ".par.pac"); } else { strcpy(name, prefix); strcat(name, ".dau.pac"); } fp = xopen(name, "wb"); // read sequences while (kseq_read(seq) >= 0) { _pac = bis_add1(seq, bns, _pac, &m_pac, &m_seqs, &m_holes, &q); } /* kseq_rewind(seq); */ /* gzseek(seq->f->f, 0, SEEK_SET); */ /* fprintf(stderr, "foward end\n"); */ /* fflush(stderr); */ /* while (kseq_read(seq) >= 0) { */ /* if (parent) nt256char_rev_ip(seq->seq.s, seq->seq.l); */ /* pac = bis_add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q, parent, 1); */ /* } */ int64_t l,k; m_pac = (bns->l_pac*2+3)/4*4; /* in bit */ pac = calloc(m_pac/4,sizeof(uint8_t)); for (l=0; l<bns->l_pac; ++l) { uint8_t c = _get_pac(_pac,l); if (parent && c == 1) c = 3; if (!parent && c == 2) c = 0; _set_pac(pac, l, c); } for (k=bns->l_pac-1; k>=0; --k,++l) { uint8_t c = 3-_get_pac(_pac,k); if (parent && c == 1) c = 3; if (!parent && c == 2) c = 0; _set_pac(pac, l, c); } free(_pac); /* int64_t l; */ /* fprintf(stderr, "reverse end\n"); */ /* fflush(stderr); */ /* if (!for_only) { // add the reverse complemented sequence */ /* m_pac = (bns->l_pac * 2 + 3) / 4 * 4; */ /* pac = realloc(pac, m_pac/4); */ /* memset(pac + (bns->l_pac+3)/4, 0, (m_pac - (bns->l_pac+3)/4*4) / 4); */ /* for (l = bns->l_pac - 1; l >= 0; --l, ++bns->l_pac) */ /* _set_pac(pac, bns->l_pac, 3-_get_pac(pac, l)); */ /* } */ assert(bns->l_pac<<1 == l); { // finalize .pac file ubyte_t ct; err_fwrite(pac, 1, (l>>2) + ((l&3) == 0? 0 : 1), fp); // the following codes make the pac file size always (l_pac/4+1+1) if (l % 4 == 0) { ct = 0; err_fwrite(&ct, 1, 1, fp); } ct = l % 4; err_fwrite(&ct, 1, 1, fp); // close .pac file err_fflush(fp); err_fclose(fp); } if (parent) bis_bns_dump(bns, prefix); bns_destroy(bns); kseq_destroy(seq); free(pac); return l; }
int bwa_bwtsw2(int argc, char *argv[]) { extern char *bwa_infer_prefix(const char *hint); bsw2opt_t *opt; bwt_t *target; char buf[1024], *prefix; bntseq_t *bns; int c; opt = bsw2_init_opt(); srand48(11); while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:s:c:N:Hf:MI:SG:C")) >= 0) { switch (c) { case 'q': opt->q = atoi(optarg); break; case 'r': opt->r = atoi(optarg); break; case 'a': opt->a = atoi(optarg); break; case 'b': opt->b = atoi(optarg); break; case 'w': opt->bw = atoi(optarg); break; case 'T': opt->t = atoi(optarg); break; case 't': opt->n_threads = atoi(optarg); break; case 'z': opt->z = atoi(optarg); break; case 's': opt->is = atoi(optarg); break; case 'm': opt->mask_level = atof(optarg); break; case 'c': opt->coef = atof(optarg); break; case 'N': opt->t_seeds = atoi(optarg); break; case 'M': opt->multi_2nd = 1; break; case 'H': opt->hard_clip = 1; break; case 'f': xreopen(optarg, "w", stdout); break; case 'I': opt->max_ins = atoi(optarg); break; case 'S': opt->skip_sw = 1; break; case 'C': opt->cpy_cmt = 1; break; case 'G': opt->max_chain_gap = atoi(optarg); break; } } opt->qr = opt->q + opt->r; if (optind + 2 > argc) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: bwa bwasw [options] <target.prefix> <query.fa> [query2.fa]\n\n"); fprintf(stderr, "Options: -a INT score for a match [%d]\n", opt->a); fprintf(stderr, " -b INT mismatch penalty [%d]\n", opt->b); fprintf(stderr, " -q INT gap open penalty [%d]\n", opt->q); fprintf(stderr, " -r INT gap extension penalty [%d]\n", opt->r); fprintf(stderr, " -w INT band width [%d]\n", opt->bw); fprintf(stderr, " -m FLOAT mask level [%.2f]\n", opt->mask_level); fprintf(stderr, "\n"); fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads); fprintf(stderr, " -f FILE file to output results to instead of stdout\n"); fprintf(stderr, " -H in SAM output, use hard clipping instead of soft clipping\n"); fprintf(stderr, " -C copy FASTA/Q comment to SAM output\n"); fprintf(stderr, " -M mark multi-part alignments as secondary\n"); fprintf(stderr, " -S skip Smith-Waterman read pairing\n"); fprintf(stderr, " -I INT ignore pairs with insert >=INT for inferring the size distr [%d]\n", opt->max_ins); fprintf(stderr, "\n"); fprintf(stderr, " -T INT score threshold divided by a [%d]\n", opt->t); fprintf(stderr, " -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef); fprintf(stderr, " -z INT Z-best [%d]\n", opt->z); fprintf(stderr, " -s INT maximum seeding interval size [%d]\n", opt->is); fprintf(stderr, " -N INT # seeds to trigger rev aln; 2*INT is also the chaining threshold [%d]\n", opt->t_seeds); fprintf(stderr, " -G INT maximum gap size during chaining [%d]\n", opt->max_chain_gap); fprintf(stderr, "\n"); fprintf(stderr, "Note: For long Illumina, 454 and Sanger reads, assembly contigs, fosmids and\n"); fprintf(stderr, " BACs, the default setting usually works well. For the current PacBio\n"); fprintf(stderr, " reads (end of 2010), '-b5 -q2 -r1 -z10' is recommended. One may also\n"); fprintf(stderr, " increase '-z' for better sensitivity.\n"); fprintf(stderr, "\n"); return 1; } // adjust opt for opt->a opt->t *= opt->a; opt->coef *= opt->a; if ((prefix = bwa_infer_prefix(argv[optind])) == 0) { fprintf(stderr, "[%s] fail to locate the index\n", __func__); return 0; } strcpy(buf, prefix); target = bwt_restore_bwt(strcat(buf, ".bwt")); strcpy(buf, prefix); bwt_restore_sa(strcat(buf, ".sa"), target); bns = bns_restore(prefix); bsw2_aln(opt, bns, target, argv[optind+1], optind+2 < argc? argv[optind+2] : 0); bns_destroy(bns); bwt_destroy(target); free(opt); free(prefix); return 0; }
int bwa_bwtsw2(int argc, char *argv[]) { bsw2opt_t *opt; bwt_t *target[2]; char buf[1024]; bntseq_t *bns; int c; opt = bsw2_init_opt(); srand48(11); while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:y:s:c:N:H")) >= 0) { switch (c) { case 'q': opt->q = atoi(optarg); break; case 'r': opt->r = atoi(optarg); break; case 'a': opt->a = atoi(optarg); break; case 'b': opt->b = atoi(optarg); break; case 'w': opt->bw = atoi(optarg); break; case 'T': opt->t = atoi(optarg); break; case 't': opt->n_threads = atoi(optarg); break; case 'z': opt->z = atoi(optarg); break; case 'y': opt->yita = atof(optarg); break; case 's': opt->is = atoi(optarg); break; case 'm': opt->mask_level = atof(optarg); break; case 'c': opt->coef = atof(optarg); break; case 'N': opt->t_seeds = atoi(optarg); break; case 'H': opt->hard_clip = 1; break; } } opt->qr = opt->q + opt->r; if (optind + 2 > argc) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: bwa dbwtsw [options] <target.prefix> <query.fa>\n\n"); fprintf(stderr, "Options: -a INT score for a match [%d]\n", opt->a); fprintf(stderr, " -b INT mismatch penalty [%d]\n", opt->b); fprintf(stderr, " -q INT gap open penalty [%d]\n", opt->q); fprintf(stderr, " -r INT gap extension penalty [%d]\n", opt->r); // fprintf(stderr, " -y FLOAT error recurrence coef. (4..16) [%.1f]\n", opt->yita); fprintf(stderr, "\n"); fprintf(stderr, " -t INT nmber of threads [%d]\n", opt->n_threads); fprintf(stderr, " -s INT size of a chunk of reads [%d]\n", opt->chunk_size); fprintf(stderr, "\n"); fprintf(stderr, " -w INT band width [%d]\n", opt->bw); fprintf(stderr, " -m FLOAT mask level [%.2f]\n", opt->mask_level); fprintf(stderr, "\n"); fprintf(stderr, " -T INT score threshold divided by a [%d]\n", opt->t); fprintf(stderr, " -s INT maximum seeding interval size [%d]\n", opt->is); fprintf(stderr, " -z INT Z-best [%d]\n", opt->z); fprintf(stderr, " -N INT # seeds to trigger reverse alignment [%d]\n", opt->t_seeds); fprintf(stderr, " -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef); fprintf(stderr, " -H in SAM output, use hard clipping rather than soft\n"); fprintf(stderr, "\n"); { double c, theta, eps, delta; c = opt->a / log(opt->yita); theta = exp(-opt->b / c) / opt->yita; eps = exp(-opt->q / c); delta = exp(-opt->r / c); fprintf(stderr, "mismatch: %lf, gap_open: %lf, gap_ext: %lf\n\n", theta, eps, delta); } return 1; } // adjust opt for opt->a opt->t *= opt->a; opt->coef *= opt->a; strcpy(buf, argv[optind]); target[0] = bwt_restore_bwt(strcat(buf, ".bwt")); strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".sa"), target[0]); strcpy(buf, argv[optind]); target[1] = bwt_restore_bwt(strcat(buf, ".rbwt")); strcpy(buf, argv[optind]); bwt_restore_sa(strcat(buf, ".rsa"), target[1]); bns = bns_restore(argv[optind]); bsw2_aln(opt, bns, target, argv[optind+1]); bns_destroy(bns); bwt_destroy(target[0]); bwt_destroy(target[1]); free(opt); return 0; }
int64_t R_bns_fasta2bntseq(gzFile fp_fa, const char *prefix) { // extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c kseq_t *seq; char name[1024]; bntseq_t *bns; uint8_t *pac = 0; uint8_t *reverse_pac = NULL; int32_t m_seqs, m_holes; int64_t ret = -1, m_pac, l, m_r_pac; bntamb1_t *q; FILE *fp, *fp_r; int i; // initialization seq = kseq_init(fp_fa); bns = (bntseq_t*)calloc(1, sizeof(bntseq_t)); bns->seed = 11; // fixed seed for random generator srand48(bns->seed); m_seqs = m_holes = 8; m_pac = 0x10000; bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t)); bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t)); pac = calloc(m_pac/NT_PER_BYTE, 1); q = bns->ambs; strcpy(name, prefix); strcat(name, ".pac"); fp = xopen(name, "wb"); memset(name, '\0', 1024);strcpy(name, prefix); strcat(name, ".rpac"); fp_r = xopen(name, "wb"); // read sequences while (kseq_read(seq) >= 0) pac = R_add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q); // add the reverse complemented sequence ret = bns->l_pac; fprintf(stderr, "[R_bns_fasta2bntseq]:reverse_pac!\n"); m_r_pac = (bns->l_pac +NT_PER_BYTE-1)/NT_PER_BYTE *NT_PER_BYTE; reverse_pac = calloc(m_r_pac/NT_PER_BYTE, sizeof(uint8_t)); for(l = bns->l_pac-1, i =0; l>=0, i < bns->l_pac; --l, ++i) { _set_pac(reverse_pac, i, _get_pac(pac, l)); } ubyte_t ct; { // finalize .pac and file fwrite(pac, 1, (bns->l_pac>>1) + ((bns->l_pac&1) == 0? 0 : 1), fp); // the following codes make the pac file size always (l_pac/4+1+1) if (bns->l_pac % NT_PER_BYTE == 0) { ct = 0; fwrite(&ct, 1, 1, fp); } ct = bns->l_pac % NT_PER_BYTE; fwrite(&ct, 1, 1, fp); // close .pac file fclose(fp); } { // finalize .rpac and file fwrite(reverse_pac, 1, (bns->l_pac>>1) + ((bns->l_pac&1) == 0? 0 : 1), fp_r); // the following codes make the pac file size always (l_pac/4+1+1) if (bns->l_pac % NT_PER_BYTE == 0) { ct = 0; fwrite(&ct, 1, 1, fp_r); } ct = bns->l_pac % NT_PER_BYTE; fwrite(&ct, 1, 1, fp_r); // close .rpac file fclose(fp_r); } bns_dump(bns, prefix); bns_destroy(bns); kseq_destroy(seq); free(pac); free(reverse_pac); return ret; }