Example #1
0
static i64 psi12_psi(CSA *csa, i64 i)
{
  i64 j,k;
  i64 x;
  i64 k2,p,n;
  i64 L;
  i64 b,d,sp;
  unsigned short *B;
  psi1 *ps;
  int runlen;

  ps = (psi1 *)csa->psi_struc;

  L = ps->L;
  if (ps->id & ID_COMPPTR) {
    x = SPARSEARRAY_select(ps->sx, (i/L)+1) % (csa->n+1);
    sp = SPARSEARRAY_select(ps->sb, (i/L)+1);
  } else {
    x = getuint(ps->R,(i / L)*2,ps->k);
    sp = getuint(ps->R,(i / L)*2+1,ps->k);
  }

  b = 0;
  j = i % L;

  n = ps->n;
  B = ps->B;

  b = 0;
  d = getbit(B+sp,b+1);
  if (d == 1) runlen = 1;  else runlen = 0;
  b++;
  k = 1;
  while (k<=j) {
    if (runlen == 1) {
      b += DECODENUM(B+sp,b,&d);
//      printf("1. k = %d j = %d x = %ld d = %d runlen = %d\n",k,j,x,d,runlen);
      if (k+d-1 > j) {
        x += j-k+1;
        x %= (n+1);
        break;
      }
      x += d-1;
      k += d-1;
//      printf("11. k = %d j = %d x = %ld d = %d runlen = %d\n",k,j,x,d,runlen);
    }
    if (k > j) break;

    b += DECODENUM(B+sp,b,&d);
//      printf("2. k = %d j = %d x = %ld d = %d runlen = %d\n",k,j,x,d,runlen);
    x += d+1;
    x %= (n+1);
    k++;
//      printf("22. k = %d j = %d x = %ld d = %d runlen = %d\n",k,j,x,d,runlen);
    runlen = 1;
  }

  return x;
}
Example #2
0
	void csa_decodeall(unsigned char *p,CSA *SA) {
		int *I;
		int i,n,pos;
		int x,b,d;
		unsigned short *B;
		n = SA->n;
		I =(int *)malloc((n+1)*sizeof(*I));
		if (I == NULL) {perror("decodeall");  exit(1);}

		B = SA->B;
		x = -1;  b = 0;
		for (i=1; i<=n; i++) {
			b += DECODENUM(B,b,&d);
			x += d;
			if (x > n) {
				x = -1;  i--;
			}
			else {
				I[i] = x;
			}
		}
		pos = csa_inverse(SA,1);
		for (i=1; i<=n; i++) {
			if (pos < 1 || pos > n) {
				printf("i %d pos %d\n",i,pos);
			}
			*p++ = csa_T(SA,pos);
			pos = I[pos];
		}
	}
Example #3
0
static void psi1_iterator_readpage(psi1_iterator *pi, i64 page)
{
  i64 L,i,j,k,id;
  i64 x,sp,n,b,d;
  unsigned short *B;
  i64 maxrun;
  psi1 *ps;

  n = pi->n;
  ps = pi->ps;
  L = ps->L;
  id = ps->id;
  maxrun = L;

  B = ps->B;
  j = L;
  if (page*L + j > n) j = n - page*L;
  x = getuint(ps->R,page*2,ps->k);
  sp = getuint(ps->R,page*2+1,ps->k);

  pi->buf[0] = x;

  b = 0;
  for (k=1; k<j; k++) {
    b += DECODENUM(B+sp,b,&d);
    if (id == ID_DIFF_GAMMA) {
      x += d;
      x %= n;
      pi->buf[k] = x;
    } else if (id == ID_DIFF_GAMMA_RL) {
      if (d <= maxrun*2) {
        if (d % 2 == 0) {
          for (i=0; i<d/2; i++) {
            x += 1;
            x %= n;
            if (k+i >= L) {
              printf("readpage: error k=%ld i=%ld l=%ld\n",k,i,L);
            }
            pi->buf[k+i] = x;
          }
          k += (d/2)-1;
        } else {
          x += (d+3)/2;
          x %= n;
          pi->buf[k] = x;
        }
      } else {
        x += d-maxrun+1;
        x %= n;
        pi->buf[k] = x;
      }
    } else {
      printf("??? id = %ld\n",id);
    }
  }
  pi->page = page;
}
Example #4
0
static void mkdecodetable(void)
{
  unsigned short B[256];
  i64 i,j,b,b2,d,x;

  for (i=0; i<256; i++) B[i] = 0xffff;
  for (i = 0; i < TBLSIZE; i++) {
    B[0] = i;
    b = 0;  j = 0;  x = 0;
    while (1) {
      b2 = DECODENUM(B,b,&d);
      if (b+b2 > DD) break;
      b += b2;
      x += d;
      j++;
    }
    R5n[i] = j;  R5b[i] = b;  R5x[i] = x;
  }
}
Example #5
0
static i64 psi1_pred(CSA *csa, i64 pl, i64 l, i64 r)
{
  i64 m,ll,rr,j;
  i64 x;
  i64 sp,L,d,n;
  uchar *R;
  unsigned short *B, *buf, offset;
  i64 maxrun;
  int k;
  psi1 *ps;

  ps = (psi1 *)csa->psi_struc;

  R = ps->R;  B = ps->B;  L = ps->L;  k = ps->k;
  n = ps->n;
  maxrun = L;

  ll = (l+L-1) / L;
//  ll = l / L + 1;
  rr = r / L;
  while (ll <= rr) {
    m = (ll + rr) / 2;
    x = getuint(R,m*2,k);
    if (x >= pl) rr = m-1; else ll = m+1;
  }
  m = rr*L;
  x = getuint(R,(m / L)*2,k);
  sp = getuint(R,(m / L)*2+1,k);

//  r = (rr+1)*L;  if (r > n) r = n+1;
  if ((rr+1)*L <= r) r = (rr+1)*L; else r = r+1;
  if (r > n) r = n+1;

  buf = B + sp;
  offset = 0;

  switch (ps->id & 0x3f) {
    case ID_DIFF_GAMMA:
      while (m < r) {
        if (m >= l && x >= pl) break;
        offset += DECODENUM(buf,offset,&d);
        x += d;
        x %= (n+1);
        m++;
      }
      return m;
      break;
    case ID_DIFF_GAMMA_RL:
      while (m < r) {
        if (m >= l && x >= pl) break;
        offset += DECODENUM(buf,offset,&d);
        if (d <= maxrun*2) {
          if (d % 2 == 0) {
            for (j=0; j<d/2 && m<r; j++) {
              if (m >= l && x >= pl) goto end;
              x += 1;
              x %= (n+1);
              m++;
            }
          } else {
            x += (d+3)/2;
            x %= (n+1);
            m++;
          }
        } else {
          x += d-maxrun+1;
          x %= (n+1);
          m++;
        }
      }
end:  return m;
      break;
  }    
  return -1;
}
Example #6
0
static i64 psi1_psi(CSA *csa, i64 i)
{
  i64 j,k;
  i64 x;
  i64 k2,p,n;
  i64 L;
  i64 b,d,sp;
  i64 maxrun;
  unsigned short *B;
  psi1 *ps;

  ps = (psi1 *)csa->psi_struc;

//  printf("psi1_psi[%ld] (no run)\n",i);

#ifdef DEBUG
  if (i > csa->n || i < 1) {
    printf("error csa2_psi i=%u n=%u\n",i,csa->n);
    exit(1);
  }
#endif

  L = ps->L;
  if (ps->id & ID_COMPPTR) {
    x = SPARSEARRAY_select(ps->sx, (i/L)+1) % (csa->n+1);
    sp = SPARSEARRAY_select(ps->sb, (i/L)+1);
  } else {
    x = getuint(ps->R,(i / L)*2,ps->k);
    sp = getuint(ps->R,(i / L)*2+1,ps->k);
  }
  maxrun = L;

  b = 0;
  j = i % L;

  n = ps->n;
  B = ps->B;

  switch (ps->id & 0x3f) {
    case ID_DIFF_GAMMA:
    case ID_DIFF_GAMMA_SPARSE:
      k = 0;
      while (k < j) {
        p = getbitD(B+sp,1+b);
        k2 = R5n[p];
        if (k2 == 0) {
          b += DECODENUM(B+sp,b,&d);
          x += d;
          x %= (n+1);
          k++;
        } else {
          if (k+k2 > j) break;
          k += k2;
          b += R5b[p];
          x += R5x[p];
          x %= (n+1);
        }
      }

      for (; k<j; k++) {
        b += DECODENUM(B+sp,b,&d);
        x += d;
        x %= (n+1);
      }
      break;
    case ID_DIFF_GAMMA_RL:
    case ID_DIFF_GAMMA_RL_SPARSE:
//      psi1_decbuf[0] = x;

      b = 0;
      for (k=1; k<=j; k++) {
        b += DECODENUM(B+sp,b,&d);
        if (d <= maxrun*2) {
          if (d % 2 == 0) {
#if 0
            for (i=0; i<d/2; i++) {
              x += 1;
              x %= (n+1);
              if (k+i >= L) {
                printf("psi1_psi: error k=%d i=%d l=%d\n",k,i,L);
              }
//              psi1_decbuf[k+i] = x;
                if (k+i == j) break;
            }
#else
            if (k+d/2-1 >= j) {
              x += j-k+1;
              x %= (n+1);
              break;
            }
            x += d/2;
#endif
            k += (d/2)-1;
          } else {
            x += (d+3)/2;
            x %= (n+1);
//            psi1_decbuf[k] = x;
          }
        } else {
          x += d-maxrun+1;
          x %= (n+1);
//          psi1_decbuf[k] = x;
        }
      }
//      x = psi1_decbuf[j];
      break;
  }

#ifdef DEBUG
  if (x < 0 || x > csa->n) {
    printf("error csa2_psi(%u) %u\n",i,x);
  }
#endif
//  printf("psi1_psi: psi[%ld] = %ld\n",i,x);
  return x;
}
Example #7
0
	/* backward search */
	int csa_bsearch(unsigned char *key,int keylen,CSA *SA,int *li,int *ri) {
		int c,h,l,r,m,ll,rr,pl,pr;
		int x,b,w,d,n,*R;
		unsigned short *B;
		int len;

		c = key[keylen-1];
		r = SA->C[c];  if (c>0) l = SA->C[c-1]+1; else l = 1;
		len = 0;
		if (l > r) goto end;
		len++;
		for (h = keylen-2; h >= 0; h--) {
			pl = l;  pr = r;
			c = key[h];
			r = SA->C[c];  if (c>0) l = SA->C[c-1]+1; else l = 1;
			if (l > r) goto end;
			#if 0
			while (1) {			 // find maximum r such that Psi[r] <= pr
				j = csa_psi(SA,r);
				if (j <= pr) break;
				r--;
				//if (l > r) goto end;
			}
			#else
			#if 0
			ll = l;  rr = r;
			while (ll <= rr) {
				m = (ll + rr) / 2;
				if (csa_psi(SA,m) <= pr) ll = m+1; else rr = m-1;
			}
			r = ll-1;
			#else
			R = SA->R;  B = SA->B;  w = SA->l;  n = SA->n;
			ll = l / w + 1;
			rr = r / w;
			while (ll <= rr) {
				m = (ll + rr) / 2;
				if (R[m*2] <= pr) ll = m+1; else rr = m-1;
			}
			m = (ll-1)*w;
			x = R[(m / w)*2];
			b = R[(m / w)*2+1];

			#if 1
			while (m < l) {
				b += DECODENUM(B,b,&d);
				x += d;
				//if (x > n) printf("??? \n");
				if (x > n) {x = -1;  m--;}
				m++;
			}
			#endif
			while (x <= pr && m <= r) {
				b += DECODENUM(B,b,&d);
				x += d;
				//if (x > n) printf("??? \n");
				m++;
			}
			r = m-1;
			#endif
			#endif
			#if 0
			while (1) {			 // find minimum l such that Psi[l] >= pl
				j = csa_psi(SA,l);
				if (j >= pl) break;
				l++;
				//if (l > r) goto end;
			}
			#else
			#if 0
			ll = l;  rr = r;
			while (ll <= rr) {
				m = (ll + rr) / 2;
				if (csa_psi(SA,m) >= pl) rr = m-1; else ll = m+1;
			}
			l = rr+1;
			#else
			//ll = l / w + 1;
			ll = l / w;
			rr = r / w;
			while (ll <= rr) {
				m = (ll + rr) / 2;
				if (R[m*2] >= pl) rr = m-1; else ll = m+1;
			}
			m = rr*w;
			x = R[(m / w)*2];
			b = R[(m / w)*2+1];

			while (m < l) {
				b += DECODENUM(B,b,&d);
				x += d;
				if (x > n) {x = -1;  m--;}
				m++;
			}
			while (x < pl && m <= r) {
				b += DECODENUM(B,b,&d);
				x += d;
				m++;
			}
			l = m;
			#endif
			#endif
			if (l > r) goto end;
			len++;
		}
		end:
		*li = l;  *ri = r;
		return len;
	}
Example #8
0
	int *csa_batchlookup3(CSA *SA,int l, int r,int len) {
		int *I;					 /* z */
		int *P;					 /* r i z */
		int v;					 /* [ */
		int m;					 /* SA */
		int q;
		int i,j;
		int two;
		int *sa;
		int k,b,d,x,n,w;
		unsigned short *B;

		n = SA->n;
		B = SA->B;
		two = SA->two;
		sa = SA->SA;
		w = SA->l;

		I =(int *) malloc((r-l+1+1)*sizeof(*I));
		P =(int *) malloc((r-l+1+1)*sizeof(*I));
		#if 1
		x = SA->R[(l / w)*2];
		b = SA->R[(l / w)*2+1];
		j = l % w;
		for (k=0; k<j; k++) {
			b += DECODENUM(B,b,&d);
			x += d;
			if (x > n) {x = -1;  k--;}
		}
		for (m = 0, q = 0, i = l; i <= r; i++) {
			if (i % two == 0) {
				I[1+m] = sa[i / two];
				m++;
			}
			else {
				P[q++] = x;
			}
			b += DECODENUM(B,b,&d);
			x += d;
			if (x > n) {
				x = -1;
				b += DECODENUM(B,b,&d);
				x += d;
			}
		}
		v = 1;
		#else
		for (q = 0, i = l; i <= r; i++) {
			P[q++] = i;
		}
		v = 0;
		m = 0;
		#endif
		while (q > 0 && v <= len) {
			for (k = 0, j = 0; j < q; j++) {
				i = P[j];
				if (i % two == 0) {
					I[1+m] = sa[i / two] - v;
					m++;
				}
				else {
					P[k++] = csa_psi(SA,i);
				}
			}
			q = k;
			v++;
		}
		for (j = 0; j < q; j++) {
			I[1+m] = csa_lookup(SA,P[j])-v;
			m++;
		}
		qsort(I+1, r-l+1, sizeof(int), intcompare);
		I[0] = r-l+1;
		free(P);
		return I;
	}
Example #9
0
	inline
	int csa_psi(CSA *SA, int i) {
		int j,k,b,d,x;
		int k2,p,n;
		int l;
		unsigned short *B;
		#ifdef DEBUG
		if (i > SA->n || i < 1) {
			printf("error csa2_psi i=%d n=%d\n",i,SA->n);
			exit(1);
		}
		#endif

		l = SA->l;
		x = SA->R[(i / l)*2];
		b = SA->R[(i / l)*2+1];
		j = i % l;

		n = SA->n;
		B = SA->B;

		#if 0
		for (k=0; k<j; k++) {
			b += DECODENUM(B,b,&d);
			x += d;
			if (x > n) {
				//printf("i %d k %d d %d x %d n %d\n",i,k,d,x,n);
				x = -1;
				k--;
			}
			//printf("k %d j %d b %d \n",k,j,b);
		}
		#else

		k = 0;
		while (k < j) {
			p = getbitD(B,1+b);
			k2 = R5n[p];
			if (k2 == 0) {
				b += DECODENUM(B,b,&d);
				x += d;
				k++;
				if (x > n) {
					x = -1;
					k--;
				}
			}
			else {
				if (k+k2 > j) break;
				k += k2;
				b += R5b[p];
				x += R5x[p];
			}
		}

		for (; k<j; k++) {
			b += DECODENUM(B,b,&d);
			x += d;
			if (x > n) {
				x = -1;
				k--;
			}
		}
		#endif
		#ifdef DEBUG
		if (x < 0 || x > SA->n) {
			printf("error csa2_psi(%d) %d\n",i,x);
		}
		#endif
		return x;
	}