static i64 psi12_psi(CSA *csa, i64 i) { i64 j,k; i64 x; i64 k2,p,n; i64 L; i64 b,d,sp; unsigned short *B; psi1 *ps; int runlen; ps = (psi1 *)csa->psi_struc; L = ps->L; if (ps->id & ID_COMPPTR) { x = SPARSEARRAY_select(ps->sx, (i/L)+1) % (csa->n+1); sp = SPARSEARRAY_select(ps->sb, (i/L)+1); } else { x = getuint(ps->R,(i / L)*2,ps->k); sp = getuint(ps->R,(i / L)*2+1,ps->k); } b = 0; j = i % L; n = ps->n; B = ps->B; b = 0; d = getbit(B+sp,b+1); if (d == 1) runlen = 1; else runlen = 0; b++; k = 1; while (k<=j) { if (runlen == 1) { b += DECODENUM(B+sp,b,&d); // printf("1. k = %d j = %d x = %ld d = %d runlen = %d\n",k,j,x,d,runlen); if (k+d-1 > j) { x += j-k+1; x %= (n+1); break; } x += d-1; k += d-1; // printf("11. k = %d j = %d x = %ld d = %d runlen = %d\n",k,j,x,d,runlen); } if (k > j) break; b += DECODENUM(B+sp,b,&d); // printf("2. k = %d j = %d x = %ld d = %d runlen = %d\n",k,j,x,d,runlen); x += d+1; x %= (n+1); k++; // printf("22. k = %d j = %d x = %ld d = %d runlen = %d\n",k,j,x,d,runlen); runlen = 1; } return x; }
void csa_decodeall(unsigned char *p,CSA *SA) { int *I; int i,n,pos; int x,b,d; unsigned short *B; n = SA->n; I =(int *)malloc((n+1)*sizeof(*I)); if (I == NULL) {perror("decodeall"); exit(1);} B = SA->B; x = -1; b = 0; for (i=1; i<=n; i++) { b += DECODENUM(B,b,&d); x += d; if (x > n) { x = -1; i--; } else { I[i] = x; } } pos = csa_inverse(SA,1); for (i=1; i<=n; i++) { if (pos < 1 || pos > n) { printf("i %d pos %d\n",i,pos); } *p++ = csa_T(SA,pos); pos = I[pos]; } }
static void psi1_iterator_readpage(psi1_iterator *pi, i64 page) { i64 L,i,j,k,id; i64 x,sp,n,b,d; unsigned short *B; i64 maxrun; psi1 *ps; n = pi->n; ps = pi->ps; L = ps->L; id = ps->id; maxrun = L; B = ps->B; j = L; if (page*L + j > n) j = n - page*L; x = getuint(ps->R,page*2,ps->k); sp = getuint(ps->R,page*2+1,ps->k); pi->buf[0] = x; b = 0; for (k=1; k<j; k++) { b += DECODENUM(B+sp,b,&d); if (id == ID_DIFF_GAMMA) { x += d; x %= n; pi->buf[k] = x; } else if (id == ID_DIFF_GAMMA_RL) { if (d <= maxrun*2) { if (d % 2 == 0) { for (i=0; i<d/2; i++) { x += 1; x %= n; if (k+i >= L) { printf("readpage: error k=%ld i=%ld l=%ld\n",k,i,L); } pi->buf[k+i] = x; } k += (d/2)-1; } else { x += (d+3)/2; x %= n; pi->buf[k] = x; } } else { x += d-maxrun+1; x %= n; pi->buf[k] = x; } } else { printf("??? id = %ld\n",id); } } pi->page = page; }
static void mkdecodetable(void) { unsigned short B[256]; i64 i,j,b,b2,d,x; for (i=0; i<256; i++) B[i] = 0xffff; for (i = 0; i < TBLSIZE; i++) { B[0] = i; b = 0; j = 0; x = 0; while (1) { b2 = DECODENUM(B,b,&d); if (b+b2 > DD) break; b += b2; x += d; j++; } R5n[i] = j; R5b[i] = b; R5x[i] = x; } }
static i64 psi1_pred(CSA *csa, i64 pl, i64 l, i64 r) { i64 m,ll,rr,j; i64 x; i64 sp,L,d,n; uchar *R; unsigned short *B, *buf, offset; i64 maxrun; int k; psi1 *ps; ps = (psi1 *)csa->psi_struc; R = ps->R; B = ps->B; L = ps->L; k = ps->k; n = ps->n; maxrun = L; ll = (l+L-1) / L; // ll = l / L + 1; rr = r / L; while (ll <= rr) { m = (ll + rr) / 2; x = getuint(R,m*2,k); if (x >= pl) rr = m-1; else ll = m+1; } m = rr*L; x = getuint(R,(m / L)*2,k); sp = getuint(R,(m / L)*2+1,k); // r = (rr+1)*L; if (r > n) r = n+1; if ((rr+1)*L <= r) r = (rr+1)*L; else r = r+1; if (r > n) r = n+1; buf = B + sp; offset = 0; switch (ps->id & 0x3f) { case ID_DIFF_GAMMA: while (m < r) { if (m >= l && x >= pl) break; offset += DECODENUM(buf,offset,&d); x += d; x %= (n+1); m++; } return m; break; case ID_DIFF_GAMMA_RL: while (m < r) { if (m >= l && x >= pl) break; offset += DECODENUM(buf,offset,&d); if (d <= maxrun*2) { if (d % 2 == 0) { for (j=0; j<d/2 && m<r; j++) { if (m >= l && x >= pl) goto end; x += 1; x %= (n+1); m++; } } else { x += (d+3)/2; x %= (n+1); m++; } } else { x += d-maxrun+1; x %= (n+1); m++; } } end: return m; break; } return -1; }
static i64 psi1_psi(CSA *csa, i64 i) { i64 j,k; i64 x; i64 k2,p,n; i64 L; i64 b,d,sp; i64 maxrun; unsigned short *B; psi1 *ps; ps = (psi1 *)csa->psi_struc; // printf("psi1_psi[%ld] (no run)\n",i); #ifdef DEBUG if (i > csa->n || i < 1) { printf("error csa2_psi i=%u n=%u\n",i,csa->n); exit(1); } #endif L = ps->L; if (ps->id & ID_COMPPTR) { x = SPARSEARRAY_select(ps->sx, (i/L)+1) % (csa->n+1); sp = SPARSEARRAY_select(ps->sb, (i/L)+1); } else { x = getuint(ps->R,(i / L)*2,ps->k); sp = getuint(ps->R,(i / L)*2+1,ps->k); } maxrun = L; b = 0; j = i % L; n = ps->n; B = ps->B; switch (ps->id & 0x3f) { case ID_DIFF_GAMMA: case ID_DIFF_GAMMA_SPARSE: k = 0; while (k < j) { p = getbitD(B+sp,1+b); k2 = R5n[p]; if (k2 == 0) { b += DECODENUM(B+sp,b,&d); x += d; x %= (n+1); k++; } else { if (k+k2 > j) break; k += k2; b += R5b[p]; x += R5x[p]; x %= (n+1); } } for (; k<j; k++) { b += DECODENUM(B+sp,b,&d); x += d; x %= (n+1); } break; case ID_DIFF_GAMMA_RL: case ID_DIFF_GAMMA_RL_SPARSE: // psi1_decbuf[0] = x; b = 0; for (k=1; k<=j; k++) { b += DECODENUM(B+sp,b,&d); if (d <= maxrun*2) { if (d % 2 == 0) { #if 0 for (i=0; i<d/2; i++) { x += 1; x %= (n+1); if (k+i >= L) { printf("psi1_psi: error k=%d i=%d l=%d\n",k,i,L); } // psi1_decbuf[k+i] = x; if (k+i == j) break; } #else if (k+d/2-1 >= j) { x += j-k+1; x %= (n+1); break; } x += d/2; #endif k += (d/2)-1; } else { x += (d+3)/2; x %= (n+1); // psi1_decbuf[k] = x; } } else { x += d-maxrun+1; x %= (n+1); // psi1_decbuf[k] = x; } } // x = psi1_decbuf[j]; break; } #ifdef DEBUG if (x < 0 || x > csa->n) { printf("error csa2_psi(%u) %u\n",i,x); } #endif // printf("psi1_psi: psi[%ld] = %ld\n",i,x); return x; }
/* backward search */ int csa_bsearch(unsigned char *key,int keylen,CSA *SA,int *li,int *ri) { int c,h,l,r,m,ll,rr,pl,pr; int x,b,w,d,n,*R; unsigned short *B; int len; c = key[keylen-1]; r = SA->C[c]; if (c>0) l = SA->C[c-1]+1; else l = 1; len = 0; if (l > r) goto end; len++; for (h = keylen-2; h >= 0; h--) { pl = l; pr = r; c = key[h]; r = SA->C[c]; if (c>0) l = SA->C[c-1]+1; else l = 1; if (l > r) goto end; #if 0 while (1) { // find maximum r such that Psi[r] <= pr j = csa_psi(SA,r); if (j <= pr) break; r--; //if (l > r) goto end; } #else #if 0 ll = l; rr = r; while (ll <= rr) { m = (ll + rr) / 2; if (csa_psi(SA,m) <= pr) ll = m+1; else rr = m-1; } r = ll-1; #else R = SA->R; B = SA->B; w = SA->l; n = SA->n; ll = l / w + 1; rr = r / w; while (ll <= rr) { m = (ll + rr) / 2; if (R[m*2] <= pr) ll = m+1; else rr = m-1; } m = (ll-1)*w; x = R[(m / w)*2]; b = R[(m / w)*2+1]; #if 1 while (m < l) { b += DECODENUM(B,b,&d); x += d; //if (x > n) printf("??? \n"); if (x > n) {x = -1; m--;} m++; } #endif while (x <= pr && m <= r) { b += DECODENUM(B,b,&d); x += d; //if (x > n) printf("??? \n"); m++; } r = m-1; #endif #endif #if 0 while (1) { // find minimum l such that Psi[l] >= pl j = csa_psi(SA,l); if (j >= pl) break; l++; //if (l > r) goto end; } #else #if 0 ll = l; rr = r; while (ll <= rr) { m = (ll + rr) / 2; if (csa_psi(SA,m) >= pl) rr = m-1; else ll = m+1; } l = rr+1; #else //ll = l / w + 1; ll = l / w; rr = r / w; while (ll <= rr) { m = (ll + rr) / 2; if (R[m*2] >= pl) rr = m-1; else ll = m+1; } m = rr*w; x = R[(m / w)*2]; b = R[(m / w)*2+1]; while (m < l) { b += DECODENUM(B,b,&d); x += d; if (x > n) {x = -1; m--;} m++; } while (x < pl && m <= r) { b += DECODENUM(B,b,&d); x += d; m++; } l = m; #endif #endif if (l > r) goto end; len++; } end: *li = l; *ri = r; return len; }
int *csa_batchlookup3(CSA *SA,int l, int r,int len) { int *I; /* z */ int *P; /* r i z */ int v; /* [ */ int m; /* SA */ int q; int i,j; int two; int *sa; int k,b,d,x,n,w; unsigned short *B; n = SA->n; B = SA->B; two = SA->two; sa = SA->SA; w = SA->l; I =(int *) malloc((r-l+1+1)*sizeof(*I)); P =(int *) malloc((r-l+1+1)*sizeof(*I)); #if 1 x = SA->R[(l / w)*2]; b = SA->R[(l / w)*2+1]; j = l % w; for (k=0; k<j; k++) { b += DECODENUM(B,b,&d); x += d; if (x > n) {x = -1; k--;} } for (m = 0, q = 0, i = l; i <= r; i++) { if (i % two == 0) { I[1+m] = sa[i / two]; m++; } else { P[q++] = x; } b += DECODENUM(B,b,&d); x += d; if (x > n) { x = -1; b += DECODENUM(B,b,&d); x += d; } } v = 1; #else for (q = 0, i = l; i <= r; i++) { P[q++] = i; } v = 0; m = 0; #endif while (q > 0 && v <= len) { for (k = 0, j = 0; j < q; j++) { i = P[j]; if (i % two == 0) { I[1+m] = sa[i / two] - v; m++; } else { P[k++] = csa_psi(SA,i); } } q = k; v++; } for (j = 0; j < q; j++) { I[1+m] = csa_lookup(SA,P[j])-v; m++; } qsort(I+1, r-l+1, sizeof(int), intcompare); I[0] = r-l+1; free(P); return I; }
inline int csa_psi(CSA *SA, int i) { int j,k,b,d,x; int k2,p,n; int l; unsigned short *B; #ifdef DEBUG if (i > SA->n || i < 1) { printf("error csa2_psi i=%d n=%d\n",i,SA->n); exit(1); } #endif l = SA->l; x = SA->R[(i / l)*2]; b = SA->R[(i / l)*2+1]; j = i % l; n = SA->n; B = SA->B; #if 0 for (k=0; k<j; k++) { b += DECODENUM(B,b,&d); x += d; if (x > n) { //printf("i %d k %d d %d x %d n %d\n",i,k,d,x,n); x = -1; k--; } //printf("k %d j %d b %d \n",k,j,b); } #else k = 0; while (k < j) { p = getbitD(B,1+b); k2 = R5n[p]; if (k2 == 0) { b += DECODENUM(B,b,&d); x += d; k++; if (x > n) { x = -1; k--; } } else { if (k+k2 > j) break; k += k2; b += R5b[p]; x += R5x[p]; } } for (; k<j; k++) { b += DECODENUM(B,b,&d); x += d; if (x > n) { x = -1; k--; } } #endif #ifdef DEBUG if (x < 0 || x > SA->n) { printf("error csa2_psi(%d) %d\n",i,x); } #endif return x; }