void csa_new_from_bwt(CSA csa, char *fname, char *fidx, int psi_id, int idx_id, bool coded) { int k; i64 i,j,v,m; FILE *f2; i64 psize,isize; i64 n; psi_id = csa.id; if (psi_id >= 0) { printf("create psi: id=%d\n",psi_id); } if (idx_id >= 0) { printf("create idx: id=%d D=%d D2=%d\n",idx_id,csa.D,csa.D2); } psize = 0; if (psi_id >= 0) { switch (psi_id & 0x3f) { case ID_DIFF_GAMMA: case ID_DIFF_GAMMA_RL: case ID_DIFF_GAMMA_SPARSE: case ID_DIFF_GAMMA_RL_SPARSE: psize = psi1_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("Psi %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; case ID_DIFF_GAMMA_RR: psize = psi12_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("Psi %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; case ID_BWT_DNA: psize = lf_dna_makeindex(&csa, fname, coded); printf("n %ld\n",csa.n); printf("BW %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; case ID_BWT_BIT: psize = lf_bit_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("BW %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; case ID_BWT_WT: case ID_BWT_WT_HUF: case ID_BWT_WT_DENSE: case ID_BWT_WT_SPARSE4: case ID_BWT_WT_RR: psize = lf_wt_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("BW %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; #if 0 case ID_BWT_HUF: psize = lf_bwt_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("BW %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; #endif case ID_SPARSE4: psize = psi2_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("Psi %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; default: printf("psi_id = %d\n",psi_id); exit(1); } } csa.k = (blog(csa.n+1)+1+8-1)/8; for (i=0; i<SIGMA; i++) csa.CtoA[i] = -1; // csa.K[-1+1] = 0; csa.K[-1+1] = 1; for (m=0,v=1,i=0; i<SIGMA; i++) { if (csa.C[i]>0) { csa.AtoC[m] = i; csa.CtoA[i] = m; csa.K[m+1] = v; // printf("i=%ld v = %ld C[i] = %ld\n",i,v,csa.C[i]); v += csa.C[i]; m++; } } csa.K[m+1] = v; csa.m = m; if (csa.D >= csa.n) { printf("D=%d >= n=%ld\n",csa.D,csa.n); exit(0); } if (csa.D2 >= csa.n) { printf("D2=%d >= n=%ld\n",csa.D2,csa.n); exit(0); } if (idx_id >= 0) { n = csa.n; k = csa.k; //// compute SA and ISA if (csa.D > 0) csa.SA = (uchar *) mymalloc(((n-1)/csa.D+1+1)*k); if (csa.D2 > 0) csa.ISA = (uchar *) mymalloc(((n-1)/csa.D2+1+1)*k); if (csa.D == 0 && csa.D2 == 0) goto brk; switch (psi_id & 0x3f) { case ID_DIFF_GAMMA: case ID_DIFF_GAMMA_RL: case ID_DIFF_GAMMA_SPARSE: case ID_DIFF_GAMMA_RL_SPARSE: case ID_SPARSE4: case ID_DIFF_GAMMA_RR: j = 0; for (i=0; i<=n; i++) { display_progressbar("making sa ",i,n); j = csa.psi(&csa,j); // sa[j] = i; if (csa.D > 0 && j % csa.D == 0) { putuint(csa.SA,j / csa.D,i,k); } if (csa.D2 > 0 && i % csa.D2 == 0) { putuint(csa.ISA,i / csa.D2,j,k); } } // putuint(csa.SA,0,n,k); break; case ID_BWT_DNA: case ID_BWT_BIT: case ID_BWT_WT: case ID_BWT_WT_HUF: case ID_BWT_WT_DENSE: case ID_BWT_WT_SPARSE4: case ID_BWT_WT_RR: case ID_BWT_HUF: j = 0; for (i=n-1; i>=0; i--) { display_progressbar("making sa ",i,n); v = csa.LF(&csa,j); // printf("LF[%ld] = %ld\n",j,v); j = v; if (csa.D > 0 && j % csa.D == 0) putuint(csa.SA, j/csa.D , i, k); if (csa.D2 > 0 && i % csa.D2 == 0) putuint(csa.ISA, i/csa.D2, j, k); } putuint(csa.SA,0,n,k); break; default: break; } brk: //// write idx f2 = fopen(fidx,"wb"); /* directory */ if (f2 == NULL) { perror("csa2_new1: "); exit(1); } isize = 0; writeint(4,VERSION,f2); /* version */ isize += 4; writeint(1,ID_HEADER,f2); // header ID isize += 1; isize = write_header(&csa, f2, isize); if (csa.D > 0) { writeint(1,ID_SA,f2); isize += 1; isize = write_sa(&csa, f2, isize); } if (csa.D2 > 0) { writeint(1,ID_ISA,f2); isize += 1; isize = write_isa(&csa, f2, isize); } fclose(f2); if (csa.D > 0) free(csa.SA); if (csa.D2 > 0) free(csa.ISA); printf("Total %ld bytes (%1.3f bpc)\n",(psize+isize), (double)(psize+isize)*8/csa.n); } free(fidx); }
void csa_new_from_bwt(int argc, char *argv[]) { i64 i,j,v,m; FILE *f2; i64 psize,isize; i64 n; int k; char *fname,*fidx; char *p; int psi_id, idx_id; CSA csa; int sigma; csa.sigma = 256; /* default alphabet size */ csa.k2 = 1; // for (i=0; i<SIGMA+2; i++) csa.C[i] = 0; // for (i=0; i<SIGMA; i++) csa.C[i] = 0; fname = NULL; fidx = NULL; psi_id = idx_id = -1; for (i=1; i<argc; i++) { p = argv[i]; if (p[0] == '-') { p++; switch (toupper(p[0])) { case 'I': // -I[n]:[D]:[D2] p++; idx_id = 0; csa_options(&csa, p); break; case 'P': // -P[n]:[L] p++; psi_id = 0; psi_options(&csa, p); break; case 'C': // -C[s] p++; sigma_options(&csa, p); break; default: printf("??? no such option %s\n",argv[i]); exit(1); } } else { fname = argv[i]; k = strlen(fname); fidx = mymalloc(k+5); sprintf(fidx,"%s.idx",fname); } } if (fname == NULL) { printf("no input file.\n"); exit(0); } printf("sigma = %d k2 = %d\n", csa.sigma, csa.k2); sigma = csa.sigma; csa.C = mymalloc(sizeof(*csa.C)*sigma); // csa.CtoA = mymalloc(sizeof(*csa.CtoA)*sigma); // csa.AtoC = mymalloc(sizeof(*csa.AtoC)*sigma); // csa.K = mymalloc(sizeof(*csa.K)*(sigma+2)); // for (i=0; i<sigma; i++) csa.C[i] = 0; psi_id = csa.id; if (psi_id >= 0) { printf("create psi: id=%d\n",psi_id); } if (idx_id >= 0) { printf("create idx: id=%d D=%d D2=%d\n",idx_id,csa.D,csa.D2); } psize = 0; if (psi_id >= 0) { switch (psi_id & 0x3f) { case ID_DIFF_GAMMA: case ID_DIFF_GAMMA_RL: case ID_DIFF_GAMMA_SPARSE: case ID_DIFF_GAMMA_RL_SPARSE: psize = psi1_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("Psi %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; case ID_DIFF_GAMMA_RR: psize = psi12_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("Psi %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; case ID_BWT_DNA: psize = lf_dna_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("BW %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; case ID_BWT_DNA2: psize = lf_dna2_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("BW %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; case ID_BWT_BIT: psize = lf_bit_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("BW %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; case ID_BWT_WT: case ID_BWT_WT_HUF: case ID_BWT_WT_DENSE: case ID_BWT_WT_SPARSE4: case ID_BWT_WT_RR: psize = lf_wt_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("BW %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; #if 0 case ID_BWT_HUF: psize = lf_bwt_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("BW %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; #endif case ID_SPARSE4: psize = psi2_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("Psi %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; default: printf("psi_id = %d\n",psi_id); exit(1); } } csa.k = (blog(csa.n+1)+1+8-1)/8; for (i=0; i<sigma; i++) csa.CtoA[i] = -1; csa.K[-1+1] = 1; for (m=0,v=1,i=0; i<sigma; i++) { if (csa.C[i]>0) { csa.AtoC[m] = i; csa.CtoA[i] = m; csa.K[m+1] = v; // printf("i=%ld v = %ld C[i] = %ld\n",i,v,csa.C[i]); v += csa.C[i]; m++; } } csa.K[m+1] = v; csa.m = m; if (csa.D >= csa.n) { printf("D=%d >= n=%ld\n",csa.D,csa.n); exit(0); } if (csa.D2 >= csa.n) { printf("D2=%d >= n=%ld\n",csa.D2,csa.n); exit(0); } if (idx_id >= 0) { n = csa.n; k = csa.k; //// compute SA and ISA if (csa.D > 0) csa.SA = mymalloc(((n-1)/csa.D+1+1)*k); if (csa.D2 > 0) csa.ISA = mymalloc(((n-1)/csa.D2+1+1)*k); if (csa.D == 0 && csa.D2 == 0) goto brk; switch (psi_id & 0x3f) { case ID_DIFF_GAMMA: case ID_DIFF_GAMMA_RL: case ID_DIFF_GAMMA_SPARSE: case ID_DIFF_GAMMA_RL_SPARSE: case ID_SPARSE4: case ID_DIFF_GAMMA_RR: j = 0; for (i=0; i<=n; i++) { display_progressbar("making sa ",i,n); j = csa.psi(&csa,j); // sa[j] = i; if (csa.D > 0 && j % csa.D == 0) { putuint(csa.SA,j / csa.D,i,k); } if (csa.D2 > 0 && i % csa.D2 == 0) { putuint(csa.ISA,i / csa.D2,j,k); } } // putuint(csa.SA,0,n,k); break; case ID_BWT_DNA: case ID_BWT_DNA2: case ID_BWT_BIT: case ID_BWT_WT: case ID_BWT_WT_HUF: case ID_BWT_WT_DENSE: case ID_BWT_WT_SPARSE4: case ID_BWT_WT_RR: case ID_BWT_HUF: j = 0; for (i=n-1; i>=0; i--) { display_progressbar("making sa ",i,n); v = csa.LF(&csa,j); // printf("LF[%ld] = %ld\n",j,v); j = v; if (csa.D > 0 && j % csa.D == 0) putuint(csa.SA, j/csa.D , i, k); if (csa.D2 > 0 && i % csa.D2 == 0) putuint(csa.ISA, i/csa.D2, j, k); } // putuint(csa.SA,0,n,k); if (csa.D > 0) putuint(csa.SA,0,n,k); // 2011-12-20 break; default: break; } brk: //// write idx f2 = fopen(fidx,"wb"); /* directory */ if (f2 == NULL) { perror("csa2_new1: "); exit(1); } isize = 0; writeint(4,VERSION,f2); /* version */ isize += 4; writeint(1,ID_HEADER,f2); // header ID isize += 1; isize = write_header(&csa, f2, isize); if (csa.D > 0) { writeint(1,ID_SA,f2); isize += 1; isize = write_sa(&csa, f2, isize); } if (csa.D2 > 0) { writeint(1,ID_ISA,f2); isize += 1; isize = write_isa(&csa, f2, isize); } fclose(f2); if (csa.D > 0) free(csa.SA); if (csa.D2 > 0) free(csa.ISA); printf("Total %ld bytes (%1.3f bpc)\n",(psize+isize), (double)(psize+isize)*8/csa.n); } free(fidx); }