void bwt_index(char *prefix) { int block_size = 10000000; int64_t l_pac; char *str, *str2, *str3; str = (char*)calloc(strlen(prefix) + 10, 1); str2 = (char*)calloc(strlen(prefix) + 10, 1); str3 = (char*)calloc(strlen(prefix) + 10, 1); fprintf(stderr, "[bwt_index] Building bwt-index for genome...\n"); { // for&rev.pac .ann .amb gzFile fp = gzopen(prefix, "r"); l_pac = bns_fasta2bntseq(fp, prefix, 0); gzclose(fp); } { // .bwt strcpy(str, prefix); strcat(str, ".pac"); strcpy(str2, prefix); strcat(str2, ".bwt"); bwt_bwtgen2(str, str2, block_size); } { // update .bwt bwt_t *bwt; strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str); bwt_bwtupdate_core(bwt); bwt_dump_bwt(str, bwt); bwt_destroy(bwt); } { // forward.pac gzFile fp = gzopen(prefix, "r"); l_pac = bns_fasta2bntseq(fp, prefix, 1); gzclose(fp); } { // .sa bwt_t *bwt; strcpy(str, prefix); strcat(str, ".bwt"); strcpy(str3, prefix); strcat(str3, ".sa"); bwt = bwt_restore_bwt(str); bwt_cal_sa(bwt, 32); bwt_dump_sa(str3, bwt); bwt_destroy(bwt); } fprintf(stderr, "[bwt_index] Building done!\n"); free(str); free(str2); free(str3); }
int bwa_index(int argc, char *argv[]) { char *prefix = 0, *str, *str2, *str3; int c, algo_type = 0, is_color = 0, is_64 = 0; clock_t t; int64_t l_pac; while ((c = getopt(argc, argv, "6ca:p:")) >= 0) { switch (c) { case 'a': // if -a is not set, algo_type will be determined later if (strcmp(optarg, "div") == 0) algo_type = 1; else if (strcmp(optarg, "bwtsw") == 0) algo_type = 2; else if (strcmp(optarg, "is") == 0) algo_type = 3; else err_fatal(__func__, "unknown algorithm: '%s'.", optarg); break; case 'p': prefix = strdup(optarg); break; case 'c': is_color = 1; break; case '6': is_64 = 1; break; default: return 1; } } if (optind + 1 > argc) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: bwa index [-a bwtsw|is] [-c] <in.fasta>\n\n"); fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw or is [auto]\n"); fprintf(stderr, " -p STR prefix of the index [same as fasta name]\n"); fprintf(stderr, " -6 index files named as <in.fasta>.64.* instead of <in.fasta>.* \n"); // fprintf(stderr, " -c build color-space index\n"); fprintf(stderr, "\n"); fprintf(stderr, "Warning: `-a bwtsw' does not work for short genomes, while `-a is' and\n"); fprintf(stderr, " `-a div' do not work not for long genomes. Please choose `-a'\n"); fprintf(stderr, " according to the length of the genome.\n\n"); return 1; } if (prefix == 0) { prefix = malloc(strlen(argv[optind]) + 4); strcpy(prefix, argv[optind]); if (is_64) strcat(prefix, ".64"); } str = (char*)calloc(strlen(prefix) + 10, 1); str2 = (char*)calloc(strlen(prefix) + 10, 1); str3 = (char*)calloc(strlen(prefix) + 10, 1); if (is_color == 0) { // nucleotide indexing gzFile fp = xzopen(argv[optind], "r"); t = clock(); fprintf(stderr, "[bwa_index] Pack FASTA... "); l_pac = bns_fasta2bntseq(fp, prefix, 0); fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); gzclose(fp); } else { // color indexing gzFile fp = xzopen(argv[optind], "r"); strcat(strcpy(str, prefix), ".nt"); t = clock(); fprintf(stderr, "[bwa_index] Pack nucleotide FASTA... "); l_pac = bns_fasta2bntseq(fp, str, 0); fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); gzclose(fp); { char *tmp_argv[3]; tmp_argv[0] = argv[0]; tmp_argv[1] = str; tmp_argv[2] = prefix; t = clock(); fprintf(stderr, "[bwa_index] Convert nucleotide PAC to color PAC... "); bwa_pac2cspac(3, tmp_argv); fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); } } if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT { strcpy(str, prefix); strcat(str, ".pac"); strcpy(str2, prefix); strcat(str2, ".bwt"); t = clock(); fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n"); if (algo_type == 2) bwt_bwtgen(str, str2); else if (algo_type == 1 || algo_type == 3) { bwt_t *bwt; bwt = bwt_pac2bwt(str, algo_type == 3); bwt_dump_bwt(str2, bwt); bwt_destroy(bwt); } fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC); } { bwt_t *bwt; strcpy(str, prefix); strcat(str, ".bwt"); t = clock(); fprintf(stderr, "[bwa_index] Update BWT... "); bwt = bwt_restore_bwt(str); bwt_bwtupdate_core(bwt); bwt_dump_bwt(str, bwt); bwt_destroy(bwt); fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); } { gzFile fp = xzopen(argv[optind], "r"); t = clock(); fprintf(stderr, "[bwa_index] Pack forward-only FASTA... "); l_pac = bns_fasta2bntseq(fp, prefix, 1); fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); gzclose(fp); } { bwt_t *bwt; strcpy(str, prefix); strcat(str, ".bwt"); strcpy(str3, prefix); strcat(str3, ".sa"); t = clock(); fprintf(stderr, "[bwa_index] Construct SA from BWT and Occ... "); bwt = bwt_restore_bwt(str); bwt_cal_sa(bwt, 32); bwt_dump_sa(str3, bwt); bwt_destroy(bwt); fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); } free(str3); free(str2); free(str); free(prefix); return 0; }