SuffixArray::SuffixArray(uchar* _data, usint* ra, uint bytes, uint threads) :
  ok(false),
  data(_data), sa(0), original_sa(0), ranks(ra), data_size(bytes),
  sequences(0)
{
  if(_data == 0 || ra == 0 || bytes == 0)
  {
    std::cerr << "Error: No input data given for suffix array construction!" << std::endl;
    return;
  }

  for(uint i = 0; i < this->data_size; i++) { if(this->data[i] == '\0') { this->sequences++; } }
  if(this->data[this->data_size - 1] != '\0')
  {
    std::cerr << "Error: Input data must end with \\0!" << std::endl;
    return;
  }

  pair_type* pairs = new pair_type[this->data_size];
  for(usint i = 0; i < this->data_size; i++) { pairs[i] = pair_type(this->ranks[i], i); }
  #ifdef MULTITHREAD_SUPPORT
  omp_set_num_threads(threads);
  #endif
  parallelSort(pairs, pairs + this->data_size);
  this->sa = new uint[this->data_size];
  for(uint i = 0; i < this->data_size; i++) { this->sa[i] = pairs[i].second; }
  delete[] pairs;

  this->original_sa = this->sa; this->sa += this->sequences;
  this->ok = true;
}
Пример #2
0
short_pair*
simpleSuffixSort(const usint* sequence, uint n, uint threads)
{
  if(sequence == 0 || n == 0) { return 0; }

  skew_pair* pairs = (skew_pair*)new short_pair[n * sizeof(skew_pair) / sizeof(short_pair) + 1];
  uint* keys = new uint[n];               // In text order.
  std::vector<ss_range> unsorted;
  threads = std::max(threads, (uint)1);
  #ifdef MULTITHREAD_SUPPORT
  omp_set_num_threads(threads);
  #endif

  // Initialize pairs.
  #pragma omp parallel for schedule(static)
  for(uint i = 0; i < n; i++) { pairs[i].first = i; pairs[i].second = sequence[i]; }

  // Sort according to first character.
  parallelSort(pairs, pairs + n, skew_comparator);
  unsorted.push_back(ss_range(0, n - 1));
  uint total = setRanks(pairs, keys, n, unsorted, threads, 1);

  if(sizeof(usint) < 2 * sizeof(uint))
  {
    return prefixDoubling(packPairs(pairs, n), keys, unsorted, n, threads, total, 1);
  }
  else
  {
    return prefixTripling(pairs, keys, unsorted, n, threads, total, 1);
  }
}
Пример #3
0
uint
initialSort(short_pair* pairs, uint* keys, std::vector<ss_range>& unsorted, uint n, uint threads, uint h)
{
  // Sort according to first h characters.
  std::cout << "initialSort(short_pair* pairs, uint* keys, std::vector<ss_range>& unsorted, uint n, uint threads, uint h)" << std::endl;
  parallelSort(pairs, pairs + n, key_comparator);
  unsorted.push_back(ss_range(0, n - 1));
  uint total = setRanks(pairs, keys, n, unsorted, threads, 1);
//  std::cout << "Sorted with h = " << h << ", unsorted total = " << total << " (" << unsorted.size() << " ranges)" << std::endl;
  std::cout << "done initialSort(short_pair* pairs, uint* keys, std::vector<ss_range>& unsorted, uint n, uint threads, uint h) -- Sorted with h = " << h << ", unsorted total = " << total << " (" << unsorted.size() << " ranges)" << std::endl;

  return total;
}
Пример #4
0
void
mergeRanges(std::vector<pair_type>* vec, bool parallel)
{
  if(vec == 0 || vec->size() <= 1) { return; }
  if(parallel) { parallelSort(vec->begin(), vec->end()); }
  else         { sequentialSort(vec->begin(), vec->end()); }

  std::vector<pair_type>::iterator prev = vec->begin();
  for(std::vector<pair_type>::iterator curr = prev + 1; curr != vec->end(); ++curr)
  {
    if(prev->second + 1 >= curr->first) { prev->second = std::max(curr->second, prev->second); }
    else { ++prev; *prev = *curr; }
  }
  vec->resize((prev - vec->begin()) + 1);
}
Пример #5
0
void BurstSort::sortAndPrint(bool serial,std::ofstream& file){
	if(serial){
		for(int i = 0; i < NODE_SIZE; i++){
			quickSort(i,0,nodes[i].used -1);

		}
		for(int i = 0; i < NODE_SIZE; i++){
			for(int j = 0; j < nodes[i].used; j++){
				file << nodes[i].entries[j];
			}
		}
	}else{ 
		parallelSort(file);
	}
}