void bw_to_psi(FILE *out, CSA *csa, char *fbw, char *flst, int *k) { FILE *in; i64 last,i,j; i64 C2[SIGMA]; i64 c; in = fopen(flst,"r"); if (in == NULL) { perror("bw_to_psi:"); exit(1); } fscanf(in,"%ld",&last); fclose(in); for (c=0; c<SIGMA; c++) { csa->C[c] = 0; } in = fopen(fbw,"r"); if (in == NULL) { perror("bw_to_psi:"); exit(1); } csa->n = 0; while (1) { display_progressbar("reading ",csa->n,0L); c = fgetc(in); if (c == EOF) break; csa->C[c]++; csa->n++; } rewind(in); printf("n = %ld last = %ld\n",csa->n,last); *k = (blog(csa->n+1)+1+8-1)/8; for (c=0; c<SIGMA; c++) { psi[c] = open_diskbuf(out,*k); } for (j=1,c=0; c<SIGMA; c++) { C2[c] = j; j += csa->C[c]; } for (i = 0; i<=csa->n; i++) { display_progressbar("computing psi ",i,csa->n); if (i == last) { setint_diskbuf(psi[0], 0, i); } else { c = fgetc(in); setint_diskbuf(psi[c], C2[c]++, i); } } fclose(in); for (c=0; c<SIGMA; c++) { close_diskbuf(psi[c]); } }
void bw_to_psi(FILE *out, CSA *csa, char *fbw, char *flst, int *k) { FILE *in; i64 last,i,j; i64 *C2; i64 c; diskbuf **psi; int sigma; int k2; in = fopen(flst,"r"); if (in == NULL) { perror("bw_to_psi:"); exit(1); } fscanf(in,"%ld",&last); fclose(in); sigma = csa->sigma; k2 = csa->k2; csa->C = mymalloc(sizeof(*csa->C)*sigma); C2 = mymalloc(sizeof(*C2)*sigma); for (c=0; c<sigma; c++) { csa->C[c] = 0; } in = fopen(fbw,"r"); if (in == NULL) { perror("bw_to_psi:"); exit(1); } csa->n = 0; while (1) { display_progressbar("reading ",csa->n,0L); // c = fgetc(in); c = readint(k2,in); if (c == EOF) break; if (c >= sigma) { printf("bw_to_psi: c = %d sigma = %d\n", c, sigma); exit(1); } csa->C[c]++; csa->n++; } rewind(in); printf("n = %ld last = %ld\n",csa->n,last); psi = mymalloc(sizeof(diskbuf)*sigma); *k = (blog(csa->n+1)+1+8-1)/8; for (c=0; c<sigma; c++) { psi[c] = open_diskbuf(out,*k); } for (j=1,c=0; c<sigma; c++) { C2[c] = j; j += csa->C[c]; } for (i = 0; i<=csa->n; i++) { display_progressbar("computing psi ",i,csa->n); if (i == last) { setint_diskbuf(psi[0], 0, i); } else { // c = fgetc(in); c = readint(k2,in); setint_diskbuf(psi[c], C2[c]++, i); } } fclose(in); for (c=0; c<sigma; c++) { close_diskbuf(psi[c]); } free(psi); free(C2); }
i64 psi12_makeindex(CSA *csa, char *fname) { i64 psize,psize1,psize2; i64 b, b2; i64 i,j,x,xx; i64 y,d,w; int k; FILE *f1,*f2; char *fpsi, *fpsd; //psi1_iterator *pi; i64 runlen; i64 maxrun; i64 n,L; psi1 *ps; int id,id2; FILE *out; diskbuf *psi; char *fbw, *flst; SPARSEARRAY sx, sb; int mm; ps = (psi1 *)csa->psi_struc; id = ps->id; id2 = id & 0x3f; k = strlen(fname); fbw = mymalloc(k+5); flst = mymalloc(k+5); fpsi = mymalloc(k+5); fpsd = mymalloc(k+5); sprintf(fbw,"%s.bw",fname); sprintf(flst,"%s.lst",fname); switch (id2) { case ID_DIFF_GAMMA_RR: sprintf(fpsi,"%s.pxi",fname); sprintf(fpsd,"%s.pxd",fname); break; } out = create_tmp(0); bw_to_psi(out,csa,fbw,flst,&k); psi = open_diskbuf(out,k); ps->last = getint_diskbuf(psi,0); printf("last = %ld\n",ps->last); n = csa->n; L = ps->L; if (L >= n) { printf("L=%ld >= n=%ld\n",L,n); exit(0); } maxrun = L; mkdecodetable(); f1 = fopen(fpsi,"wb"); psize1 = 0; f2 = fopen(fpsd,"wb"); psize2 = 0; ps->k = k = (blog(n+1)+1+8-1)/8; // pi = psi1_iterator_new(ps,0); writeint(1,ID_PSI,f2); writeint(1,k,f2); /* #bytes of integer */ writeint(k,n,f2); writeint(k,L,f2); psize2 += 1+1+2*k; writeint(1,id,f2); psize2 += 1; if (id & ID_COMPPTR) { mm = 0; for (i=0; i<SIGMA; i++) { if (csa->C[i] > 0) mm++; } SPARSEARRAY_construct_init(&sx, (mm+1)*(n+1), n/L+1); SPARSEARRAY_construct_init(&sb, n, n/L+1); } b = b2 = 0; mm = 0; xx = 0; for (j=0; j<=n/L; j++) { // display_progressbar("writing psi ",j,n/L); if (j % 100000 == 0) { printf("%ld %1.3f bpc\r",j,(double)psize2*8/(j+1)/L); fflush(stdout); } y = getint_diskbuf(psi,j*L); if (id & ID_COMPPTR) { if (y <= xx) { mm++; } SPARSEARRAY_construct_set(&sx, j, mm*(n+1) + y); SPARSEARRAY_construct_set(&sb, j, b); xx = y; } else { // printf("%ld x=%ld sp=%ld\n",j,y,b); writeint(k,y,f2); writeint(k,b,f2); psize2 += 2*k; } x = y; runlen = 0; b2 = 0; for (i=j*L+1; i<(j+1)*L && i <= n; i++) { /* psi[j*L] are not encoded */ y = getint_diskbuf(psi,i); d = y - x; if (d <= 0) { d += n+1; } if (i == j*L+1) { if (d == 1) {setbit(Btmp,b2+1,1); runlen = 1;} else {setbit(Btmp,b2+1,0); runlen = 0;} b2++; } if (d > 1) { if (runlen>0) { // printf("d=%ld encode runlen=%ld\n",d,runlen); w = ENCODENUM(Btmp,b2,runlen); b2 += w; runlen = 0; } // printf("encode d-1=%ld\n",d-1); w = ENCODENUM(Btmp,b2,d-1); b2 += w; runlen = 1; } else { runlen++; } x = y; } if (runlen>0) { // printf("encode runlen=%ld\n",runlen); w = ENCODENUM(Btmp,b2,runlen); b2 += w; runlen = 0; } fwrite(Btmp,(b2+15) / 16,sizeof(short),f1); psize1 += (b2+15)/16*sizeof(short); b += (b2+15) / 16; b2 = 0; } if (b2 > 0) { fwrite(Btmp,(b2+15) / 16,sizeof(short),f1); psize1 += (b2+15)/16*sizeof(short); } fwrite(Btmp,1,sizeof(short),f1); // getbitDで1ワード余計に読むため psize1 += 1*sizeof(short); if (id & ID_COMPPTR) { SPARSEARRAY_construct_end(&sx, SDARRAY_SELECT1); SPARSEARRAY_construct_end(&sb, SDARRAY_SELECT1); SPARSEARRAY_write(&sx, f2); SPARSEARRAY_write(&sb, f2); } psize = psize1 + psize2; printf("size %ld (%1.3f bpc)\n",psize,(double)psize*8 / n); fclose(f1); fclose(f2); // psi1_iterator_remove(pi); close_diskbuf(psi); fclose(out); remove_tmp(0); psi1_read(csa, fpsd); free(fpsi); free(fpsd); free(fbw); free(flst); return psize; }
i64 psi2_makeindex(CSA *csa, char *fname) { i64 psize; i64 b, b2; i64 i,j,x; i64 y,d,w; int k; FILE *f1,*f2; char *fpsi; //psi1_iterator *pi; i64 n,m; i64 nn, mm; psi2 *ps; int id; FILE *out; diskbuf *psi; char *fbw, *flst; sparsearray4 sa; ps = (psi2 *)csa->psi_struc; id = csa->id; k = strlen(fname); fbw = (char *) mymalloc(k+5); flst = (char *) mymalloc(k+5); fpsi = (char *) mymalloc(k+5); sprintf(fbw,"%s.bw",fname); sprintf(flst,"%s.lst",fname); sprintf(fpsi,"%s.psa",fname); out = create_tmp(0); bw_to_psi(out,csa,fbw,flst,&k); m = 0; for (i=0; i<SIGMA; i++) { if (csa->C[i] > 0) m++; } psi = open_diskbuf(out,k); ps->last = getint_diskbuf(psi,0); printf("last = %ld\n",ps->last); n = csa->n; mm = n+1; nn = (m+1) * (n+1); sparsearray4_construct_init(&sa, nn, mm); y = 0; d = 0; for (j=0; j<=n; j++) { display_progressbar("compressing psi ",j,n); x = getint_diskbuf(psi,j); if (x <= y) d++; sparsearray4_construct_set(&sa, j, d*(n+1)+x); y = x; } sparsearray4_construct_end(&sa,0, SDARRAY_SELECT1); f2 = fopen(fpsi,"wb"); psize = 0; ps->k = k = (blog(n+1)+1+8-1)/8; writeint(1,ID_PSI,f2); writeint(1,k,f2); /* #bytes of integer */ writeint(k,n,f2); psize += 1+1+k; writeint(1,id,f2); psize += 1; psize += sparsearray4_write(&sa, f2); printf("size %ld (%1.3f bpc)\n",psize,(double)psize*8 / n); fclose(f2); // psi1_iterator_remove(pi); close_diskbuf(psi); fclose(out); remove_tmp(0); psi2_read(csa, fpsi); free(fpsi); free(fbw); free(flst); return psize; }