Example #1
0
// find the suffix array SA of s[0..n-1] in {1..K}^n
// require s[n]=s[n+1]=s[n+2]=0, n>=2
void suffixArray(int* s, int* SA, uint32_t n, uint32_t K) {
  uint32_t n0=(n+2)/3, n1=(n+1)/3, n2=n/3, n02=n0+n2;
  int* s12  = new int[n02 + 3];  s12[n02]= s12[n02+1]= s12[n02+2]=0;
  int* SA12 = new int[n02 + 3]; SA12[n02]=SA12[n02+1]=SA12[n02+2]=0;
  int* s0   = new int[n0];
  int* SA0  = new int[n0];

  // generate positions of mod 1 and mod  2 suffixes
  // the "+(n0-n1)" adds a dummy mod 1 suffix if n%3 == 1
  for (uint32_t i=0, j=0;  i < n+(n0-n1);  i++) if (i%3 != 0) s12[j++] = i;

  // lsb radix sort the mod 1 and mod 2 triples
  radixPass(s12 , SA12, s+2, n02, K);
  radixPass(SA12, s12 , s+1, n02, K);
  radixPass(s12 , SA12, s  , n02, K);

  // find lexicographic names of triples
  int name = 0, c0 = -1, c1 = -1, c2 = -1;
  for (uint32_t i = 0;  i < n02;  i++) {
    if (s[SA12[i]] != c0 || s[SA12[i]+1] != c1 || s[SA12[i]+2] != c2) {
      name++;  c0 = s[SA12[i]];  c1 = s[SA12[i]+1];  c2 = s[SA12[i]+2];
    }
    if (SA12[i] % 3 == 1) { s12[SA12[i]/3]      = name; } // left half
    else                  { s12[SA12[i]/3 + n0] = name; } // right half
  }

  // recurse if names are not yet unique
  if (name < n02) {
    suffixArray(s12, SA12, n02, name);
    // store unique names in s12 using the suffix array
    for (uint32_t i = 0;  i < n02;  i++) s12[SA12[i]] = i + 1;
  } else // generate the suffix array of s12 directly
    for (uint32_t i = 0;  i < n02;  i++) SA12[s12[i] - 1] = i;

  // stably sort the mod 0 suffixes from SA12 by their first character
  for (uint32_t i=0, j=0;  i < n02;  i++) if (SA12[i] < n0) s0[j++] = 3*SA12[i];
  radixPass(s0, SA0, s, n0, K);

  // merge sorted SA0 suffixes and sorted SA12 suffixes
  for (uint32_t p=0,  t=n0-n1,  k=0;  k < n;  k++) {
#define GetI() (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2)
    int i = GetI(); // pos of current offset 12 suffix
    int j = SA0[p]; // pos of current offset 0  suffix
    if (SA12[t] < n0 ?
        leq(s[i],       s12[SA12[t] + n0], s[j],       s12[j/3]) :
        leq(s[i],s[i+1],s12[SA12[t]-n0+1], s[j],s[j+1],s12[j/3+n0]))
    { // suffix from SA12 is smaller
      SA[k] = i;  t++;
      if (t == n02) { // done --- only SA0 suffixes left
        for (k++;  p < n0;  p++, k++) SA[k] = SA0[p];
      }
    } else {
      SA[k] = j;  p++;
      if (p == n0)  { // done --- only SA12 suffixes left
        for (k++;  t < n02;  t++, k++) SA[k] = GetI();
      }
    }
  }
  delete [] s12; delete [] SA12; delete [] SA0; delete [] s0;
}
	void suffixArray(VI &T, VI &SA, int n, int K) {
		int n0 = (n + 2) / 3, n1 = (n + 1) / 3, n2 = n / 3, n02 = n0 + n2;
		VI R(n02+3), SA12(n02+3), R0(n0), SA0(n0);
		for (int i = 0, j = 0; i < n + (n0 - n1); i++)
			if (i % 3 != 0)
				R[j++] = i;
		radixPass(R, SA12, T.begin() + 2, n02, K);
		radixPass(SA12, R, T.begin() + 1, n02, K);
		radixPass(R, SA12, T.begin(), n02, K);
		int name = 0, c0 = -1, c1 = -1, c2 = -1;
		for (int i = 0; i < n02; i++) {
			if (T[SA12[i]] != c0 || T[SA12[i] + 1] != c1 || T[SA12[i] + 2] != c2) {
				name++;
				c0 = T[SA12[i]];
				c1 = T[SA12[i] + 1];
				c2 = T[SA12[i] + 2];
			}
			if (SA12[i] % 3 == 1) {
				R[SA12[i] / 3] = name;
			}
			else {
				R[SA12[i] / 3 + n0] = name;
			}
		}
		if (name < n02) {
			suffixArray(R, SA12, n02, name);
			for (int i = 0; i < n02; i++)
				R[SA12[i]] = i + 1;
		} else
			for (int i = 0; i < n02; i++)
				SA12[R[i] - 1] = i;
		for (int i = 0, j = 0; i < n02; i++)
			if (SA12[i] < n0)
				R0[j++] = 3 * SA12[i];
		radixPass(R0, SA0, T.begin(), n0, K);
		for (int p = 0, t = n0 - n1, k = 0; k < n; k++) {
#define GetI() (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2)
			int i = GetI(); // pos of current offset 12 suffix
			int j = SA0[p]; // pos of current offset 0 suffix
			if (SA12[t] < n0 ? // different compares for mod 1 and mod 2 suffixes
				leq(T[i], R[SA12[t] + n0], T[j], R[j / 3]) :
				leq(T[i], T[i + 1], R[SA12[t] - n0 + 1], T[j], T[j + 1], R[j / 3 + n0])) { // suffix from SA12 is smaller
				SA[k] = i;
				t++;
				if (t == n02) // done --- only SA0 suffixes left
					for (k++; p < n0; p++, k++)
						SA[k] = SA0[p];
			} else { // suffix from SA0 is smaller
				SA[k] = j;
				p++;
				if (p == n0) // done --- only SA12 suffixes left
					for (k++; t < n02; t++, k++)
						SA[k] = GetI();
			}
		}
	}
Example #3
0
int main(int argc, char** argv)
{
  double one_gb = 1024*1024*1024;
  double num_gb;
  int size;
  int* T;
  int* SA;
  FILE* f;
  int c, j;
  error_t err;

  if( argc == 1 ) {
    printf("Usage: %s <num_gb> [input_file] [input_file] ...\n", argv[0]);
    return -1;
  }
  sscanf(argv[1], "%lf", &num_gb);
  printf("Will run test with %lf GB\n", num_gb);

  size = num_gb * one_gb;

  printf("Allocating memory (size=%i)\n", size);
  T = malloc((size+4)*sizeof(int));
  assert(T);
  SA = malloc(size*sizeof(int));
  assert(SA);

  // read in some input files.
  j = 0;
  for( int i = 2; i < argc && j < size; i++ ) {
    printf("Reading %s\n", argv[i]);
    f = fopen(argv[i], "r");
    assert(f);
    while( EOF != (c = fgetc(f)) &&
           j < size ) {
      T[j++] = c;
    }
    fclose(f);
  }
  if( j < size ) {
    // make some random data
    printf("Making random data\n");
    for( int i = j; i < size; i++ ) {
      T[i] = 1 + (rand() & 0xff);
    }
  }

  printf("Suffix sorting\n");
  // sort it.
  start_clock();
  err = suffixArray(T, SA, size, 256);
  die_if_err(err);
  stop_clock();
  print_timings("Suffix Sort Bytes", size);

  return 0;
}
	void buildSA2(){
		if (N == 1) { this->SA[0] = 0, this->RA[0] = 0; return; }
		VI T(N+3), SA(N+3);
		for(int i = 0; i < A.size(); ++i)
			T[i] = A[i];
		suffixArray(T, SA, N, 256);
		for(int i = 0; i < N; ++i)
			RA[ SA[i] ] = i;
		for(int i = 0; i < N; ++i)
			this->SA[i] = SA[i];
	}
// Example
int main() {
    int n = 14;
    // Convert yabbadabbado to int array
    // Substitution sigma = [a -> 1, b -> 2, d -> 3, o -> 4, y -> 5] in alphabetical order
    // missippi sigma = 512213122134
    int s[] = {5, 1, 2, 2, 1, 3, 1, 2, 2, 1, 3, 4}; // yabbadabbado
    int b = 5;
    int* SA = new int[n + 3];
    s[n] = s[n + 1] = s[n + 2] = SA[n] = SA[n + 1] = SA[n + 2] = 0;
    suffixArray(s, SA, n, b);
}
Example #6
0
vector<size_t> const *SuffixArray<int>::genSuffixArray(
		vector<int> const &data, SortAlgorithm sortAlgorithm) const
{
	if (sortAlgorithm == STLSORT)
	{
		vector<size_t> *suffixArray = new std::vector<size_t>();

		// Initially fill a vector where the i-th element represents the
		// suffix starting at index i. This is an unsorted suffix array.
		for (size_t i = 0; i < data.size(); ++i)
			suffixArray->push_back(i);

		// Sort the suffix array.
		sort(suffixArray->begin(), suffixArray->end(), d_compareFun);

		return suffixArray;
	}

	// The caller does not want us to use STL sort, so use the suffix
	// sort algorithm by McIlroy and McIlroy.

	// ssort initially requires the original sequence of suffixes.
	QSharedPointer<vector<int> > suffixArray(new vector<int>(data));

	// The hash automaton returns [0..k-1] for k different words. ssort
	// uses 0 as an end of sequence marker and thus expects hashcodes
	// [1..k]. We can simply add 1 to all hash codes.
	transform(suffixArray->begin(), suffixArray->end(), suffixArray->begin(),
			bind2nd(plus<int>(), 1));

	// Add 0 to delimit the sequence.
	suffixArray->push_back(0);

	// Create the suffix array. While we'll pass the data array, ssort
	// will modify it to be the suffix array.
	errormining::util::ssort(suffixArray.data());

	suffixArray->pop_back();

	// ssort works on a vector of ints (amongst others because the algorithm
	// internally uses the sign bit), while the suffix array class uses a
	// vector of size_t as indexes into the data array. So, we'll convert
	// the vector.
	vector<size_t> *sizeTSuffixArray =
		new vector<size_t>(suffixArray->begin(), suffixArray->end());

	return sizeTSuffixArray;
}
Example #7
0
int main()
{
  int t,length,sum;
  while(EOF!=scanf("%d",&t))
  {
  if(t==0)
	  break;
  memset(str,0,sizeof(str));
  getchar();
  gets(str);
  len1=strlen(str);
  length=len1;
  str[len1++]=2;
  gets(str+len1);
  len2=strlen(str+len1);
  n=len1+len2;
  str[n++]=1;
  suffixArray();
  lcp();
  init();
  for(i=2;i<n;i++)
  {
	stack[i-1].index=SA[i]+1;
	if(stack[i-1].index>=9)
		stack[i-1].flag=true;
	else
		stack[i-1].flag=false;
  }
  length=2*length;sum=0;
  for(i=1;i<length;i++)
  {
    j=i+1;
	while(RMQ(stack[i].index,stack[j].index)>=t)
	{
		if(stack[i].flag!=stack[j].flag)
		   sum+=RMQ(stack[i].index,stack[j].index)-t+1;
		j++;
	}
  }
  printf("%d\n",sum);
  }
  return 0;
}
Example #8
0
int main() 
{
#ifndef ONLINE_JUDGE
	freopen("input.txt", "rt", stdin);
#endif
	scanf("%d\n%s", &N, &A);
	memcpy(A + N, A, N);
	N = 2 * N;

	A[N] = A[N + 1] = A[N + 2] = 0;
	int i;

	for (i = 0; i < N; i++)
		S[i] = A[i] - 'A';

	suffixArray(S, SA, N, 26);

	for (i = 0; i < N; i++) 
		printf("%d ", SA[i]);
	printf("\n");
	LCP = lcp(SA, A, N);

	double answer = 0;
	if (LCP)
	{
		for (i = 0; i < N; i++)
		{
			printf("%d ", LCP[i]);
			answer += LCP[i];
		}
		printf("\n");

		answer /= N - 1;
		printf("%.3lf\n", answer);
	}
	
	return 0;
}
// find the suffix array SA of T[0..n-1] in {1..K}^n
// require T[n] = T[n+1] = T[n+2] = 0, n >= 2
void suffixArray(int* T, int* SA, int n, int K) {
    int n0 = (n + 2) / 3, n1 = (n + 1) / 3, n2 = n / 3, n02 = n0 + n2;
    int* R = new int[n02 + 3];
    R[n02] = R[n02 + 1] = R[n02 + 2] = 0;
    int* SA12 = new int[n02 + 3]; SA12[n02] = SA12[n02 + 1] = SA12[n02 + 2] = 0;
    int* R0 = new int[n0];
    int* SA0 = new int[n0];

    //******* Step 0: Construct sample ********
    // generate positions of mod 1 and mod 2 suffixes
    // the "+(n0-n1)" adds a dummy mod 1 suffix if n%3 == 1
    for (int i = 0, j = 0; i < n + (n0 - n1); i++) if (i%3 != 0) R[j++] = i;

    //******* Step 1: Sort sample suffixes ********
    // lsb radix sort the mod 1 and mod 2 triples
    radixPass(R, SA12, T + 2, n02, K);
    radixPass(SA12, R, T + 1, n02, K);
    radixPass(R, SA12, T, n02, K);

    // find lexicographic names of triples and
    // write them to correct places in R
    int name = 0, c0 = -1, c1 = -1, c2 = -1;
    for (int i = 0; i < n02; i++) {
        if (T[SA12[i]] != c0 || T[SA12[i] + 1] != c1 || T[SA12[i] + 2] != c2) {
            name++;
            c0 = T[SA12[i]];
            c1 = T[SA12[i] + 1];
            c2 = T[SA12[i] + 2];
        }
        if (SA12[i] % 3 == 1) {
            R[SA12[i] / 3] = name; // write to R1
        } else {
            R[SA12[i] / 3 + n0] = name; // write to R2
        }
    }

    // recurse if names are not yet unique
    if (name < n02) {
        suffixArray(R, SA12, n02, name);
        // store unique names in R using the suffix array
        for (int i = 0; i < n02; i++) R[SA12[i]] = i + 1;
    } else // generate the suffix array of R directly
        for (int i = 0; i < n02; i++) SA12[R[i] - 1] = i;

    //******* Step 2: Sort nonsample suffixes ********
    // stably sort the mod 0 suffixes from SA12 by their first character
    for (int i = 0, j = 0; i < n02; i++) if (SA12[i] < n0) R0[j++] = 3 * SA12[i];
    radixPass(R0, SA0, T, n0, K);

    //******* Step 3: Merge ********
    // merge sorted SA0 suffixes and sorted SA12 suffixes
    for (int p = 0, t = n0 - n1, k = 0; k < n; k++) {
        #define GetI() (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2)
        int i = GetI(); // pos of current offset 12 suffix
        int j = SA0[p]; // pos of current offset 0 suffix
        if (SA12[t] < n0 ? // different compares for mod 1 and mod 2 suffixes
        leq(T[i], R[SA12[t] + n0], T[j], R[j / 3]) :
        leq(T[i], T[i+1], R[SA12[t] - n0 + 1], T[j], T[j + 1], R[j / 3 + n0])) {
            // suffix from SA12 is smaller
            SA[k] = i;
            t++;
            if (t == n02) // done --- only SA0 suffixes left
                for (k++; p < n0; p++, k++) SA[k] = SA0[p];
        } else { // suffix from SA0 is smaller
            SA[k] = j;
            p++;
            if (p == n0) // done --- only SA12 suffixes left
                for (k++; t < n02; t++, k++) SA[k] = GetI();
        }
    }
    delete [] R; delete [] SA12; delete [] SA0; delete [] R0;
}