示例#1
0
	unsigned long *csa_batchlookup2(CSA *SA,int l, int r) {
		unsigned long *I;		 /* z */
		int *V;					 /* vz */
		int *J;					 /* Itz */
		int v;					 /* [ */
		int m;					 /* psivZ(testp) */
		int q;
		int i,j;
		int two;
		int *sa;
		int f,s;

		two = SA->two;
		sa = SA->SA;

		I = (unsigned long  *)malloc((r-l+1)*sizeof(*I));
		V = (int *)malloc((r-l+1+1)*sizeof(*V));
		J = (int *)malloc((r-l+1+1)*sizeof(*J));

		for (j=l; j<=r; j++) J[j-l] = -1;
		for (j=l; j<=r; j++) I[j-l] = 0;
		for (m=0,j=l; j<=r; j++) {
			f = 0;
			i = j;  v = 0;
			while (i % two !=0) {
				i = csa_psi(SA,i);
				v++;
				m++;
				if (l <= i && i <= r) {
					V[j-l] = v;
					J[i-l] = j;
					f = 1;
					break;
				}
			}
			if (f==0) {
				i = i / two;
				I[j-l] = sa[i]-v;
			}
		}
		for (j=l; j<=r; j++) {
			if (I[j-l] != 0) {
				q = j;
				while (J[q-l] != -1) {
					s = I[q-l];
					i = J[q-l];
					v = V[i-l];
					I[i-l] = s - v;
					J[q-l] = -1;
					q = i;
				}
			}
		}

		for (j=l; j<=r; j++)
			I[j-l]--;

		free(V);  free(J);
		return I;
	}
/*///////////////////////
//Accessing the indexed//
///////////////////////*/
int display(void *index, UCHAR *pattern, ULONG length, ULONG numc, ULONG *numocc, UCHAR **snippet_text, ULONG **snippet_lengths) {
  int l,r;
  int pos;
  ULONG *occ, i, j, from, to, len, x;
  UCHAR *text_aux;
  CSA *SA=(CSA *) index;
  csa_bsearch(pattern,length,SA,&l,&r);
  *numocc = r-l+1;
  occ = csa_batchlookup2(SA,l,r);
  *snippet_lengths = (ULONG *) malloc((*numocc)*sizeof(ULONG));
  if (!(*snippet_lengths)) return 1;
  *snippet_text = (UCHAR *) malloc((*numocc)*(length+2*numc)*sizeof(UCHAR));
  if (!(*snippet_text)) return 1;
  text_aux=*snippet_text;

  for (i=0;i<(*numocc);i++) {
    x=occ[i];
    if (x>numc) from = x-numc;
    else from=0;
    to= ((int)(x+length+numc-1)<(int)(SA->n-1)?(x+length+numc-1):(SA->n-1));
    len =to-from+1;
    pos = csa_inverse(SA,from+1);
    for (j=0; (int)j<(int)len;j++) {
      text_aux[j] = csa_T(SA,pos);
      pos= csa_psi(SA,pos);
    }
    text_aux+=length+2*numc;
    (*snippet_lengths)[i] = len;
  }
  free(occ);
  return 0;
}
示例#3
0
	void csa_decode1line(unsigned char *p,CSA *SA,int suf,int maxlen) {
		int i,k,m,pos;
		unsigned char *tmp;

		m = maxlen*2;
		tmp = (unsigned char *)malloc(m+1);
		if (tmp==NULL) {perror("csa_decode1line");  exit(1);}

		k = suf - maxlen;  if (k <= 0) k = 1;
		pos = csa_inverse(SA,k);

		i = 0;
		while (i < m) {
			tmp[i] = csa_T(SA,pos);
			pos = csa_psi(SA,pos);
			i++;
		}
		for (i = suf-k;  i < m;  i++) {
			if (tmp[i] == 0x0a) {i--;  break;}
		}
		m = i;
		for (i = suf-k;  i >= 0;  i--) {
			if (tmp[i] == 0x0a) {i++;  break;}
		}
		if (m-i > maxlen) i = m-maxlen;
		while (i < m) *p++ = tmp[i++];
		*p = 0;
		free(tmp);
	}
示例#4
0
	void csa_decode2(unsigned char *p,CSA *SA,int pos,int len) {
		int i;
		i = 0;
		while (i < len) {
			*p++ = csa_T(SA,pos);
			pos = csa_psi(SA,pos);
			i++;
		}
	}
示例#5
0
	int csa_lookup(CSA *SA, int i) {
		int v,two;
		v = 0;  two = SA->two;
		while (i % two !=0) {
			i = csa_psi(SA,i);
			v++;
		}
		i = i / two;
		return SA->SA[i]-v;
	}
示例#6
0
	void csa_decode(unsigned char *p,CSA *SA,int suf,int len) {
		int pos;
		int i;
		pos = csa_inverse(SA,suf);
		i = 0;
		while (i < len) {
			*p++ = csa_T(SA,pos);
			pos = csa_psi(SA,pos);
			i++;
		}
	}
示例#7
0
	int csa_inverse(CSA *SA, int suf) {
		int p,pos;
		int two2;

		two2 = SA->two2;

		p = ((suf-1)/two2)*two2+1;
		pos = SA->ISA[(suf-1)/two2];

		while (p < suf) {
			pos = csa_psi(SA,pos);
			p++;
		}
		return pos;
	}
int extract(void *index, ULONG from, ULONG to, UCHAR **snippet, ULONG *snippet_length){
   CSA *SA=(CSA *) index;
   ULONG n = SA->n;
   int pos;
   if (to >= n) to=n-1;
   if (from > to) {
     *snippet = NULL;
     *snippet_length=0;
   } else {
     ULONG j;
     ULONG len =to-from+1;
     *snippet = (UCHAR *) malloc((len)*sizeof(UCHAR));
     if (!(*snippet)) return 1;
     pos = csa_inverse(SA,from+1);
     for (j=0; j<len;j++) {
       (*snippet)[j]=csa_T(SA,pos);
       pos= csa_psi(SA,pos);
     }
     (*snippet_length)=len;
   }
   return 0;
}
示例#9
0
	int extract(void *index, ulong from, ulong to, uchar **snippet, ulong *snippet_length) {
		CSA *SA=(CSA *) index;
		ulong n = SA->n;
		int pos;
		if (to >= n) to=n-1;
		if (from > to) {
			*snippet = NULL;
			*snippet_length=0;
		}
		else {
			ulong j;
			ulong len =to-from+1;
			*snippet = (uchar *) malloc((len)*sizeof(uchar));
			if (!(*snippet)) return 1;
			pos = csa_inverse(SA,from+1);
			for (j=0; j<len;j++) {
				(*snippet)[j]=csa_T(SA,pos);
				pos= csa_psi(SA,pos);
			}
			(*snippet_length)=len;
		}
		return 0;
	}
示例#10
0
	size_t TextIndexCSA::getPsi(size_t i) const{
		return csa_psi(csa,i+1)-1;
	}
示例#11
0
	/* backward search */
	int csa_bsearch(unsigned char *key,int keylen,CSA *SA,int *li,int *ri) {
		int c,h,l,r,m,ll,rr,pl,pr;
		int x,b,w,d,n,*R;
		unsigned short *B;
		int len;

		c = key[keylen-1];
		r = SA->C[c];  if (c>0) l = SA->C[c-1]+1; else l = 1;
		len = 0;
		if (l > r) goto end;
		len++;
		for (h = keylen-2; h >= 0; h--) {
			pl = l;  pr = r;
			c = key[h];
			r = SA->C[c];  if (c>0) l = SA->C[c-1]+1; else l = 1;
			if (l > r) goto end;
			#if 0
			while (1) {			 // find maximum r such that Psi[r] <= pr
				j = csa_psi(SA,r);
				if (j <= pr) break;
				r--;
				//if (l > r) goto end;
			}
			#else
			#if 0
			ll = l;  rr = r;
			while (ll <= rr) {
				m = (ll + rr) / 2;
				if (csa_psi(SA,m) <= pr) ll = m+1; else rr = m-1;
			}
			r = ll-1;
			#else
			R = SA->R;  B = SA->B;  w = SA->l;  n = SA->n;
			ll = l / w + 1;
			rr = r / w;
			while (ll <= rr) {
				m = (ll + rr) / 2;
				if (R[m*2] <= pr) ll = m+1; else rr = m-1;
			}
			m = (ll-1)*w;
			x = R[(m / w)*2];
			b = R[(m / w)*2+1];

			#if 1
			while (m < l) {
				b += DECODENUM(B,b,&d);
				x += d;
				//if (x > n) printf("??? \n");
				if (x > n) {x = -1;  m--;}
				m++;
			}
			#endif
			while (x <= pr && m <= r) {
				b += DECODENUM(B,b,&d);
				x += d;
				//if (x > n) printf("??? \n");
				m++;
			}
			r = m-1;
			#endif
			#endif
			#if 0
			while (1) {			 // find minimum l such that Psi[l] >= pl
				j = csa_psi(SA,l);
				if (j >= pl) break;
				l++;
				//if (l > r) goto end;
			}
			#else
			#if 0
			ll = l;  rr = r;
			while (ll <= rr) {
				m = (ll + rr) / 2;
				if (csa_psi(SA,m) >= pl) rr = m-1; else ll = m+1;
			}
			l = rr+1;
			#else
			//ll = l / w + 1;
			ll = l / w;
			rr = r / w;
			while (ll <= rr) {
				m = (ll + rr) / 2;
				if (R[m*2] >= pl) rr = m-1; else ll = m+1;
			}
			m = rr*w;
			x = R[(m / w)*2];
			b = R[(m / w)*2+1];

			while (m < l) {
				b += DECODENUM(B,b,&d);
				x += d;
				if (x > n) {x = -1;  m--;}
				m++;
			}
			while (x < pl && m <= r) {
				b += DECODENUM(B,b,&d);
				x += d;
				m++;
			}
			l = m;
			#endif
			#endif
			if (l > r) goto end;
			len++;
		}
		end:
		*li = l;  *ri = r;
		return len;
	}
示例#12
0
	int *csa_batchlookup3(CSA *SA,int l, int r,int len) {
		int *I;					 /* z */
		int *P;					 /* r i z */
		int v;					 /* [ */
		int m;					 /* SA */
		int q;
		int i,j;
		int two;
		int *sa;
		int k,b,d,x,n,w;
		unsigned short *B;

		n = SA->n;
		B = SA->B;
		two = SA->two;
		sa = SA->SA;
		w = SA->l;

		I =(int *) malloc((r-l+1+1)*sizeof(*I));
		P =(int *) malloc((r-l+1+1)*sizeof(*I));
		#if 1
		x = SA->R[(l / w)*2];
		b = SA->R[(l / w)*2+1];
		j = l % w;
		for (k=0; k<j; k++) {
			b += DECODENUM(B,b,&d);
			x += d;
			if (x > n) {x = -1;  k--;}
		}
		for (m = 0, q = 0, i = l; i <= r; i++) {
			if (i % two == 0) {
				I[1+m] = sa[i / two];
				m++;
			}
			else {
				P[q++] = x;
			}
			b += DECODENUM(B,b,&d);
			x += d;
			if (x > n) {
				x = -1;
				b += DECODENUM(B,b,&d);
				x += d;
			}
		}
		v = 1;
		#else
		for (q = 0, i = l; i <= r; i++) {
			P[q++] = i;
		}
		v = 0;
		m = 0;
		#endif
		while (q > 0 && v <= len) {
			for (k = 0, j = 0; j < q; j++) {
				i = P[j];
				if (i % two == 0) {
					I[1+m] = sa[i / two] - v;
					m++;
				}
				else {
					P[k++] = csa_psi(SA,i);
				}
			}
			q = k;
			v++;
		}
		for (j = 0; j < q; j++) {
			I[1+m] = csa_lookup(SA,P[j])-v;
			m++;
		}
		qsort(I+1, r-l+1, sizeof(int), intcompare);
		I[0] = r-l+1;
		free(P);
		return I;
	}