SuffixArray::SuffixArray(uchar* _data, usint* ra, uint bytes, uint threads) : ok(false), data(_data), sa(0), original_sa(0), ranks(ra), data_size(bytes), sequences(0) { if(_data == 0 || ra == 0 || bytes == 0) { std::cerr << "Error: No input data given for suffix array construction!" << std::endl; return; } for(uint i = 0; i < this->data_size; i++) { if(this->data[i] == '\0') { this->sequences++; } } if(this->data[this->data_size - 1] != '\0') { std::cerr << "Error: Input data must end with \\0!" << std::endl; return; } pair_type* pairs = new pair_type[this->data_size]; for(usint i = 0; i < this->data_size; i++) { pairs[i] = pair_type(this->ranks[i], i); } #ifdef MULTITHREAD_SUPPORT omp_set_num_threads(threads); #endif parallelSort(pairs, pairs + this->data_size); this->sa = new uint[this->data_size]; for(uint i = 0; i < this->data_size; i++) { this->sa[i] = pairs[i].second; } delete[] pairs; this->original_sa = this->sa; this->sa += this->sequences; this->ok = true; }
short_pair* simpleSuffixSort(const usint* sequence, uint n, uint threads) { if(sequence == 0 || n == 0) { return 0; } skew_pair* pairs = (skew_pair*)new short_pair[n * sizeof(skew_pair) / sizeof(short_pair) + 1]; uint* keys = new uint[n]; // In text order. std::vector<ss_range> unsorted; threads = std::max(threads, (uint)1); #ifdef MULTITHREAD_SUPPORT omp_set_num_threads(threads); #endif // Initialize pairs. #pragma omp parallel for schedule(static) for(uint i = 0; i < n; i++) { pairs[i].first = i; pairs[i].second = sequence[i]; } // Sort according to first character. parallelSort(pairs, pairs + n, skew_comparator); unsorted.push_back(ss_range(0, n - 1)); uint total = setRanks(pairs, keys, n, unsorted, threads, 1); if(sizeof(usint) < 2 * sizeof(uint)) { return prefixDoubling(packPairs(pairs, n), keys, unsorted, n, threads, total, 1); } else { return prefixTripling(pairs, keys, unsorted, n, threads, total, 1); } }
uint initialSort(short_pair* pairs, uint* keys, std::vector<ss_range>& unsorted, uint n, uint threads, uint h) { // Sort according to first h characters. std::cout << "initialSort(short_pair* pairs, uint* keys, std::vector<ss_range>& unsorted, uint n, uint threads, uint h)" << std::endl; parallelSort(pairs, pairs + n, key_comparator); unsorted.push_back(ss_range(0, n - 1)); uint total = setRanks(pairs, keys, n, unsorted, threads, 1); // std::cout << "Sorted with h = " << h << ", unsorted total = " << total << " (" << unsorted.size() << " ranges)" << std::endl; std::cout << "done initialSort(short_pair* pairs, uint* keys, std::vector<ss_range>& unsorted, uint n, uint threads, uint h) -- Sorted with h = " << h << ", unsorted total = " << total << " (" << unsorted.size() << " ranges)" << std::endl; return total; }
void mergeRanges(std::vector<pair_type>* vec, bool parallel) { if(vec == 0 || vec->size() <= 1) { return; } if(parallel) { parallelSort(vec->begin(), vec->end()); } else { sequentialSort(vec->begin(), vec->end()); } std::vector<pair_type>::iterator prev = vec->begin(); for(std::vector<pair_type>::iterator curr = prev + 1; curr != vec->end(); ++curr) { if(prev->second + 1 >= curr->first) { prev->second = std::max(curr->second, prev->second); } else { ++prev; *prev = *curr; } } vec->resize((prev - vec->begin()) + 1); }
void BurstSort::sortAndPrint(bool serial,std::ofstream& file){ if(serial){ for(int i = 0; i < NODE_SIZE; i++){ quickSort(i,0,nodes[i].used -1); } for(int i = 0; i < NODE_SIZE; i++){ for(int j = 0; j < nodes[i].used; j++){ file << nodes[i].entries[j]; } } }else{ parallelSort(file); } }