void buildSA() {
    N = S.length();
    /* This is a loop that initializes sa[] and pos[].
       For sa[] we assume the order the suffixes have
       in the given string. For pos[] we set the lexicographic
       rank of each 1-gram using the characters themselves.
       That makes sense, right? */
    rep(i, N) sa[i] = i, pos[i] = S[i];
 
    /* Gap is the length of the m-gram in each step, divided by 2.
       We start with 2-grams, so gap is 1 initially. It then increases
       to 2, 4, 8 and so on. */
    for (gap = 1;; gap *= 2) {
        /* We sort by (gap*2)-grams: */
        sort(sa, sa + N, sufCmp);
 
        /* We compute the lexicographic renaming(rank) of each m-gram
           that we have sorted above. Notice how the rank is computed
           by comparing each n-gram at position i with its
           neighbor at i+1. If they are identical, the comparison
           yields 0, so the rank does not increase. Otherwise the
           comparison yields 1, so the rank increases by 1. */
        rep(i, N - 1) tmp[i + 1] = tmp[i] + sufCmp(sa[i], sa[i + 1]);
 
        /* tmp contains the rank by position. Now we map this
           into pos, so that in the next step we can look it
           up per m-gram, rather than by position. */
        rep(i, N) pos[sa[i]] = tmp[i];
 
        /* If the largest lexicographic name generated is
           n-1, we are finished, because this means all
           m-grams must have been different. */
        if (tmp[N - 1] == N - 1) break;
    }
}
Beispiel #2
0
 void buildSA()
  { 
  N = strlen(S);
   REP (i, N) sa[i] = i, pos[i] = S [i];
    for (gap = 1;; gap *= 2)
	 {
	  sort(sa, sa + N, sufCmp);
	  REP(i, N - 1) tmp[i + 1] = tmp[i] + sufCmp(sa[i], sa[i + 1]);
	   REP(i, N) pos [sa[i]] = tmp[i]; 
	  if (tmp[N - 1] == N - 1) break; 
	  } 
	  }