// Construct the suffix array for the string S and store it into SA // S[N] and S[N+1] must be set to zero void suffix_array(int N, int *S, int *SA) { int N0 = (N+2)/3, N1 = (N+1)/3, N2 = N/3, N12 = N1+N2; int *smpl = (int*)malloc((N12+2)*sizeof(int)); int *tmp = (int*)malloc((N12+2)*sizeof(int)); int *SA12 = (int*)malloc((N12+2)*sizeof(int)); int *rank = (int*)malloc((N+3)*sizeof(int)); int *S0 = (int*)malloc(N0*sizeof(int)); // Create sample and sort triples int t = 0; for (int i = 1; i < N; i+=3) smpl[t++] = i; for (int i = 2; i < N; i+=3) smpl[t++] = i; radixsort_pass(smpl, tmp, S+2, N12, N); radixsort_pass(tmp, smpl, S+1, N12, N); radixsort_pass(smpl, tmp, S, N12, N); // Rename triples for (int i = 0; i < N12 + 2; i++) smpl[i] = 0; int rename = 1; smpl[global_to_sample(N,tmp[0])] = 1; for (int i = 1; i < N12; i++) { if (!eq(S, tmp[i], tmp[i-1])) rename++; smpl[global_to_sample(N,tmp[i])] = rename; } smpl[N12] = smpl[N12+1] = 0; // Create suffix array of sample (SA12) if (rename < N12) suffix_array(N12, smpl, SA12); else { SA12[0] = N12; for (int i = 1; i <= N12; i++) SA12[smpl[i-1]] = i-1; } for (int i = 0; i <= N12; i++) rank[sample_to_global(N,SA12[i])] = i; rank[N+1] = rank[N+2] = 0; t = 0; for (int i = 0; i < N; i+=3) S0[t++] = i; // Sort nonsample suffixes // i <= j <=> (S[i], rank[i+1) <= (S[j], rank[j+1]) radixsort_pass(S0, tmp, rank+1, N0, N+2); radixsort_pass(tmp, S0, S, N0, N+2); // Merge sample and non-sample suffixes int p1 = 0, p2 = 1, p = 1; SA[0] = N; while (p1 < N0 && p2 <= N12) { int j = S0[p1]; int i = sample_to_global(N,SA12[p2]); // Do the comparison // i % 3 == 1: //i <= j <=> (S[i],rank[i+1]) <= (S[j],rank[j+1]) // i % 3 == 2: // i <= j <=> (S[i],S[i+1],rank[i+2]) <= (S[j],S[j+1],rank[j+2]) if (i % 3 == 1) { if (leq2(S[i], rank[i+1], S[j], rank[j+1])) { SA[p++] = i; p2++; } else SA[p++] = S0[p1++]; } else { if (leq3(S[i], S[i+1], rank[i+2], S[j], S[j+1], rank[j+2])) { SA[p++] = i; p2++; } else SA[p++] = S0[p1++]; } } while (p1 < N0) SA[p++] = S0[p1++]; while (p2 <= N12) SA[p++] = sample_to_global(N,SA12[p2++]); free(smpl); free(tmp); free(SA12); free(rank); free(S0); }
void compute_suffix_array(const size_t *const s, size_t *const suffix_array, const size_t length, const unsigned int max_val) { const size_t L0 = (length + 2) / 3, L1 = (length + 1) / 3, L2 = length / 3, L02 = L0 + L2; size_t i, j, label, p, t, k, c0, c1, c2, *const s12 = (size_t *)malloc((L02 + 3) * sizeof(size_t)), *const suffix_array12 = (size_t *)malloc((L02 + 3) * sizeof(size_t)), *const s0 = (size_t *)malloc(L0 * sizeof(size_t)), *const suffix_array0 = (size_t *)malloc(L0 * sizeof(size_t)); s12[L02] = s12[L02 + 1] = s12[L02 + 2] = 0; /* pading with 0s */ suffix_array12[L02] = suffix_array12[L02 + 1] = suffix_array12[L02 + 2] = 0; for (i = 0, j = 0; i < length + L0 - L1; ++i) if (i % 3) s12[j++] = i; radix_pass(s12, suffix_array12, s + 2, L02, max_val); radix_pass(suffix_array12, s12, s + 1, L02, max_val); radix_pass(s12, suffix_array12, s, L02, max_val); label = 0, c0 = c1 = c2 = UINT_MAX; for (i = 0; i < L02; ++i) { if (s[suffix_array12[i]] != c0 || s[suffix_array12[i] + 1] != c1 || s[suffix_array12[i] + 2] != c2) { ++label; c0 = s[suffix_array12[i]]; c1 = s[suffix_array12[i] + 1]; c2 = s[suffix_array12[i] + 2]; } if (1 == suffix_array12[i] % 3) s12[suffix_array12[i] / 3] = label; else s12[suffix_array12[i] / 3 + L0] = label; } if (label < L02) { compute_suffix_array(s12, suffix_array12, L02, label); for (i = 0; i < L02; ++i) s12[suffix_array12[i]] = i + 1; } else for (i = 0; i < L02; ++i) suffix_array12[s12[i] - 1] = i; for (i = 0, j = 0; i < L02; ++i) if (suffix_array12[i] < L0) s0[j++] = 3 * suffix_array12[i]; radix_pass(s0, suffix_array0, s, L0, max_val); for (p = 0, t = L0 - L1, k = 0; k < length; ++k) { i = (suffix_array12[t] < L0 ? suffix_array12[t] * 3 + 1 : (suffix_array12[t] - L0) * 3 + 2); j = suffix_array0[p]; if (suffix_array12[t] < L0 ? leq2(s[i], s12[suffix_array12[t] + L0], s[j], s12[j / 3]) : leq3(s[i], s[i + 1], s12[suffix_array12[t] - L0 + 1], s[j], s[j + 1], s12[j / 3 + L0])) { suffix_array[k] = i; ++t; if(t == L02) for (++k; p < L0; ++p, ++k) suffix_array[k] = suffix_array0[p]; } else { suffix_array[k] = j; ++p; if (p == L0) for (++k; t < L02; ++t, ++k) suffix_array[k] = (suffix_array12[t] < L0 ? suffix_array12[t] * 3 + 1 : (suffix_array12[t] - L0) * 3 + 2); } } free(s12), free(suffix_array12), free(suffix_array0), free(s0); }