impl() { m_nr_to_bin.resize(1<<n); m_bin_to_nr.resize(1<<n); for (int i=0, cnt=0, class_cnt=0; i<=n; ++i) { m_C[i] = cnt; class_cnt = 0; std::vector<bool> b(n,0); for (int j=0; j<i; ++j) b[n-j-1] = 1; do { uint32_t x=0; for (int k=0; k<n; ++k) x |= ((uint32_t)b[n-k-1])<<(n-1-k); m_nr_to_bin[cnt] = x; m_bin_to_nr[x] = class_cnt; ++cnt; ++class_cnt; } while (next_permutation(b.begin(), b.end())); if (class_cnt == 1) m_space_for_bt[i] = 0; else m_space_for_bt[i] = bits::hi(class_cnt)+1; } if (n == 15) { for (int x=0; x<256; ++x) { m_space_for_bt_pair[x] = m_space_for_bt[x>>4] + m_space_for_bt[x&0x0F]; } } }
void calculate_sa(const unsigned char* c, typename int_vector<fixedIntWidth>::size_type len, int_vector<fixedIntWidth>& sa) { typedef typename int_vector<fixedIntWidth>::size_type size_type; if (len <= 1) { // handle special case sa = int_vector<fixedIntWidth>(len,0); return; } bool small_file = (sizeof(len) <= 4 or len < 0x7FFFFFFFULL); if (small_file) { uint8_t oldIntWidth = sa.width(); if (32 == fixedIntWidth or (0==fixedIntWidth and 32 >= oldIntWidth)) { sa.width(32); sa.resize(len); divsufsort(c, (int32_t*)sa.m_data, len); // copy integers back to the right positions if (oldIntWidth!=32) { for (size_type i=0; i<len; ++i) { sa.set_int(i*oldIntWidth, sa.get_int(i<<5, 32), oldIntWidth); } sa.width(oldIntWidth); sa.resize(len); } } else { if (sa.width() < bits::hi(len)+1) { throw std::logic_error("width of int_vector is to small for the text!!!"); } int_vector<> sufarray(len,0,32); divsufsort(c, (int32_t*)sufarray.m_data, len); for (size_type i=0; i<len; ++i) { sa[i] = sufarray[i]; } } } else { uint8_t oldIntWidth = sa.width(); sa.width(64); sa.resize(len); divsufsort64(c, (int64_t*)sa.m_data, len); // copy integers back to the right positions if (oldIntWidth!=64) { for (size_type i=0; i<len; ++i) { sa.set_int(i*oldIntWidth, sa.get_int(i<<6, 64), oldIntWidth); } sa.width(oldIntWidth); sa.resize(len); } } }
void index_bidirectional_waveletindex<WaveletTree, SampleDens>::extract_sa(size_t occ_begin, size_t occ_end, int_vector<> &occ) { occ.resize(occ_end-occ_begin); for (size_t k=0; k<occ_end-occ_begin; k++) { size_t i = occ_begin+k; size_t off = 0; while( (size()-1-i) % SampleDens != 0 ){// while SA[i] is not sampled // go to the position where SA[i]-1 is located // i := LF(i) uint64_t bwt_result = backward_index.extract(i); i = m_C[bwt_result] + backward_index.occ(bwt_result, i); // add 1 to the offset ++off; } occ[k] = (m_sa_sample[i / SampleDens]+off) % size(); } }
/*! * Constructor for building the Index * \param[in] str C-string of the text */ index_bidirectional_waveletindex(const unsigned char* str) : index() { size_t n = strlen((const char*)str); int_vector<> sa(n+1, 0, bit_magic::l1BP(n+1)+1); setText(str, n+1); unsigned char *bwt = new unsigned char[n+1]; algorithm::calculate_sa(str, n+1, sa); // calculate the suffix array sa of str { /* Calculate Burrows-Wheeler-Transform */ size_t i = 0; for(int_vector<>::const_iterator it = sa.begin(), end = sa.end(); it != end; ++it, ++i){ bwt[i] = m_char2comp[str[(*it+n)%(n+1)]]; } } backward_index = WaveletTree(bwt, n+1, m_sigma); /* Construct the SA-Samples */ m_sa_sample.setIntWidth( bit_magic::l1BP(sa.size())+1 ); m_sa_sample.resize( (sa.size()+SampleDens-1)/SampleDens ); size_t idx=0; size_t i=(sa.size()-1-SampleDens*(m_sa_sample.size()-1)); for(int_vector<>::const_iterator it = sa.begin()+(ptrdiff_t)i; i < sa.size(); it += (ptrdiff_t)SampleDens, i += SampleDens, ++idx){ m_sa_sample[idx] = *it; } unsigned char* reverse = new unsigned char[n+1]; for (size_t i=0; i<n; i++) reverse[i] = str[n-1-i]; reverse[n] = '\0'; algorithm::calculate_sa(reverse, n+1, sa); // calculate the suffix array sa of reverse string str { /* Calculate Burrows-Wheeler-Transform */ size_t i = 0; for(int_vector<>::const_iterator it = sa.begin(), end = sa.end(); it != end; ++it, ++i){ bwt[i] = m_char2comp[reverse[(*it+n)%(n+1)]]; } } forward_index = WaveletTree(bwt, n+1, m_sigma); delete [] bwt; delete [] reverse; }
/*! * Constructor for building the Index * \param[in] str C-string of the text */ index_csa_psi_text(const unsigned char *str) : index() { size_t n = strlen((const char*)str); int_vector<> sa(n+1, 0, bit_magic::l1BP(n+1)+1); algorithm::calculate_sa(str, n+1, sa); // calculate the suffix array sa of str int_vector<> m_psi; sdsl::algorithm::sa2psi(sa, m_psi); psi = EncVector(m_psi); setText(str, n+1); text = int_vector<>(sa.size(), 0, bit_magic::l1BP(sigma)+1); for (size_t i=0; i<sa.size(); i++) text[i] = char2comp[str[i]]; /* Construct the SA-Samples */ m_sa_sample.setIntWidth( bit_magic::l1BP(sa.size())+1 ); m_sa_sample.resize( (sa.size()+SampleDens-1)/SampleDens ); size_t i=0, idx=0; for(int_vector<>::const_iterator it = sa.begin(); i < sa.size(); it += (ptrdiff_t)SampleDens, i += SampleDens, ++idx) { m_sa_sample[idx] = *it; } }
void index_sa_text_occ<Occ>::extract_sa(size_t occ_begin, size_t occ_end, int_vector<> &occs) { occs.resize(occ_end-occ_begin); for (size_t k=0; k<occ_end-occ_begin; k++) occs[k] = sa[occ_begin+k]; }
void append_zero_symbol(int_vector& text) { text.resize(text.size()+1); text[text.size()-1] = 0; }
void index_csa_psi_text<EncVector, SampleDens>::extract_sa(size_t occ_begin, size_t occ_end, int_vector<> &occ) { occ.resize(occ_end-occ_begin); for (size_t k=0; k<occ_end-occ_begin; k++) occ[k] = getSAValue(occ_begin+k); }
void index_sa_text_psi::extract_sa(size_t occ_begin, size_t occ_end, int_vector<> &occ) { occ.resize(occ_end-occ_begin); for (size_t k=0; k<occ_end-occ_begin; k++) occ[k] = sa[occ_begin+k]; }