i64 psi12_makeindex(CSA *csa, char *fname) { i64 psize,psize1,psize2; i64 b, b2; i64 i,j,x,xx; i64 y,d,w; int k; FILE *f1,*f2; char *fpsi, *fpsd; //psi1_iterator *pi; i64 runlen; i64 maxrun; i64 n,L; psi1 *ps; int id,id2; FILE *out; diskbuf *psi; char *fbw, *flst; SPARSEARRAY sx, sb; int mm; ps = (psi1 *)csa->psi_struc; id = ps->id; id2 = id & 0x3f; k = strlen(fname); fbw = mymalloc(k+5); flst = mymalloc(k+5); fpsi = mymalloc(k+5); fpsd = mymalloc(k+5); sprintf(fbw,"%s.bw",fname); sprintf(flst,"%s.lst",fname); switch (id2) { case ID_DIFF_GAMMA_RR: sprintf(fpsi,"%s.pxi",fname); sprintf(fpsd,"%s.pxd",fname); break; } out = create_tmp(0); bw_to_psi(out,csa,fbw,flst,&k); psi = open_diskbuf(out,k); ps->last = getint_diskbuf(psi,0); printf("last = %ld\n",ps->last); n = csa->n; L = ps->L; if (L >= n) { printf("L=%ld >= n=%ld\n",L,n); exit(0); } maxrun = L; mkdecodetable(); f1 = fopen(fpsi,"wb"); psize1 = 0; f2 = fopen(fpsd,"wb"); psize2 = 0; ps->k = k = (blog(n+1)+1+8-1)/8; // pi = psi1_iterator_new(ps,0); writeint(1,ID_PSI,f2); writeint(1,k,f2); /* #bytes of integer */ writeint(k,n,f2); writeint(k,L,f2); psize2 += 1+1+2*k; writeint(1,id,f2); psize2 += 1; if (id & ID_COMPPTR) { mm = 0; for (i=0; i<SIGMA; i++) { if (csa->C[i] > 0) mm++; } SPARSEARRAY_construct_init(&sx, (mm+1)*(n+1), n/L+1); SPARSEARRAY_construct_init(&sb, n, n/L+1); } b = b2 = 0; mm = 0; xx = 0; for (j=0; j<=n/L; j++) { // display_progressbar("writing psi ",j,n/L); if (j % 100000 == 0) { printf("%ld %1.3f bpc\r",j,(double)psize2*8/(j+1)/L); fflush(stdout); } y = getint_diskbuf(psi,j*L); if (id & ID_COMPPTR) { if (y <= xx) { mm++; } SPARSEARRAY_construct_set(&sx, j, mm*(n+1) + y); SPARSEARRAY_construct_set(&sb, j, b); xx = y; } else { // printf("%ld x=%ld sp=%ld\n",j,y,b); writeint(k,y,f2); writeint(k,b,f2); psize2 += 2*k; } x = y; runlen = 0; b2 = 0; for (i=j*L+1; i<(j+1)*L && i <= n; i++) { /* psi[j*L] are not encoded */ y = getint_diskbuf(psi,i); d = y - x; if (d <= 0) { d += n+1; } if (i == j*L+1) { if (d == 1) {setbit(Btmp,b2+1,1); runlen = 1;} else {setbit(Btmp,b2+1,0); runlen = 0;} b2++; } if (d > 1) { if (runlen>0) { // printf("d=%ld encode runlen=%ld\n",d,runlen); w = ENCODENUM(Btmp,b2,runlen); b2 += w; runlen = 0; } // printf("encode d-1=%ld\n",d-1); w = ENCODENUM(Btmp,b2,d-1); b2 += w; runlen = 1; } else { runlen++; } x = y; } if (runlen>0) { // printf("encode runlen=%ld\n",runlen); w = ENCODENUM(Btmp,b2,runlen); b2 += w; runlen = 0; } fwrite(Btmp,(b2+15) / 16,sizeof(short),f1); psize1 += (b2+15)/16*sizeof(short); b += (b2+15) / 16; b2 = 0; } if (b2 > 0) { fwrite(Btmp,(b2+15) / 16,sizeof(short),f1); psize1 += (b2+15)/16*sizeof(short); } fwrite(Btmp,1,sizeof(short),f1); // getbitDで1ワード余計に読むため psize1 += 1*sizeof(short); if (id & ID_COMPPTR) { SPARSEARRAY_construct_end(&sx, SDARRAY_SELECT1); SPARSEARRAY_construct_end(&sb, SDARRAY_SELECT1); SPARSEARRAY_write(&sx, f2); SPARSEARRAY_write(&sb, f2); } psize = psize1 + psize2; printf("size %ld (%1.3f bpc)\n",psize,(double)psize*8 / n); fclose(f1); fclose(f2); // psi1_iterator_remove(pi); close_diskbuf(psi); fclose(out); remove_tmp(0); psi1_read(csa, fpsd); free(fpsi); free(fpsd); free(fbw); free(flst); return psize; }
void csa_new(int n, int *p, unsigned char *s, char *fname1, char *fname2, int rankb_w, int rankb_w2) { int i,v,b,x,b2,d,w,m; int *I,*J; int K[SIGMA+2],C[SIGMA+1],C2[SIGMA+1]; unsigned short Btmp[64]; FILE *f1,*f2; int psize,isize; f1 = fopen(fname1,"wb"); /* psi */ f2 = fopen(fname2,"wb"); /* directory */ if (f1 == NULL || f2 == NULL) { perror("csa2_new1: "); exit(1); } for (i=0; i<SIGMA; ++i) { C[i] = 0; } for (i=0; i<n; ++i) { C[s[i]]++; } for (m=0,v=1,i=0; i<SIGMA; i++) { if (C[i]>0) { m++; C2[m] = i; K[m] = v; v += C[i]; } } K[m+1] = v; for (v=0,i=0; i<SIGMA; i++) { v = v + C[i]; C[i] = v; } psize = isize = 0; writeint(n,f2); /* eLXg */ writeint(rankb_w2,f2); /* psii[ */ writeint(rankb_w,f2); /* SAi[ */ /* ISAi[ */ writeint((rankb_w*16),f2); writeint(SIGMA,f2); /* At@xbgTCY */ writeint(m,f2); /* */ isize += 6*sizeof(int); for (i = 0; i < SIGMA; i++) { writeint(C[i],f2); /* -> */ } isize += SIGMA*sizeof(int); for (i = 1; i <= m+1; i++) { writeint(K[i],f2); /* px */ } isize += (m+1)*sizeof(int); for (i = 1; i <= m; i++) { writeint(C2[i],f2); /* ->R[h */ } isize += m*sizeof(int); I=(int *)malloc((n+2) * sizeof(*I)); if (I==NULL) { fprintf(stderr, "psi_new2 malloc I failed\n"); exit(1); } psisort2(p,I,s-1,n); writeint(-1,f2); /* R[0] */ writeint(0,f2); /* P[0] */ isize += 2*sizeof(int); x = -1; b = b2 = 0; for (i=1; i<=n; i++) { if (I[i] < x) { d = (n+65536) - x; } else { d = I[i] - x; } w = ENCODENUM(Btmp,b2,d); b += w; b2 += w; if (b2 >= 16) { fwrite(Btmp,b2 / 16,sizeof(short),f1); psize += (b2/16)*sizeof(short); Btmp[0] = Btmp[b2 / 16]; b2 = b2 % 16; }; if (I[i] < x) { x = -1; i--; } else { x = I[i]; if (i % rankb_w2 == 0) { /* R[i / L] */ writeint(I[i],f2); /* P[i / L] */ writeint(b,f2); isize += 2*sizeof(int); } } } if (b2 > 0) { fwrite(Btmp,(b2+15) / 16,sizeof(short),f1); psize += ((b2+15)/16)*sizeof(short); }; writeint(n+1,f2); /* SA[0] */ isize += sizeof(int); for (i=rankb_w; i<=n; i+=rankb_w) { writeint(p[i],f2); isize += sizeof(int); } J = (int *)malloc(((n-1)/(rankb_w*16)+1)*sizeof(*J)); if (J==NULL) { perror("csa2_new\n"); exit(1); } for (i=1; i<=n; i++) { if ((p[i]-1) % (rankb_w*16) == 0) { J[(p[i]-1) / (rankb_w*16)] = i; } } for (i = 0; i <= (n-1)/(rankb_w*16); i++) { writeint(J[i],f2); isize += sizeof(int); } fclose(f1); fclose(f2); free(I); free(J); }