Beispiel #1
0
 impl()
 {
     m_nr_to_bin.resize(1<<n);
     m_bin_to_nr.resize(1<<n);
     for (int i=0, cnt=0, class_cnt=0; i<=n; ++i) {
         m_C[i] = cnt;
         class_cnt = 0;
         std::vector<bool> b(n,0);
         for (int j=0; j<i; ++j) b[n-j-1] = 1;
         do {
             uint32_t x=0;
             for (int k=0; k<n; ++k)
                 x |= ((uint32_t)b[n-k-1])<<(n-1-k);
             m_nr_to_bin[cnt] = x;
             m_bin_to_nr[x] = class_cnt;
             ++cnt;
             ++class_cnt;
         } while (next_permutation(b.begin(), b.end()));
         if (class_cnt == 1)
             m_space_for_bt[i] = 0;
         else
             m_space_for_bt[i] = bits::hi(class_cnt)+1;
     }
     if (n == 15) {
         for (int x=0; x<256; ++x) {
             m_space_for_bt_pair[x] = m_space_for_bt[x>>4] + m_space_for_bt[x&0x0F];
         }
     }
 }
void calculate_sa(const unsigned char* c, typename int_vector<fixedIntWidth>::size_type len, int_vector<fixedIntWidth>& sa)
{
    typedef typename int_vector<fixedIntWidth>::size_type size_type;
    if (len <= 1) { // handle special case
        sa = int_vector<fixedIntWidth>(len,0);
        return;
    }
    bool small_file = (sizeof(len) <= 4 or len < 0x7FFFFFFFULL);
    if (small_file) {
        uint8_t oldIntWidth = sa.width();
        if (32 == fixedIntWidth or (0==fixedIntWidth and 32 >= oldIntWidth)) {
            sa.width(32);
            sa.resize(len);
            divsufsort(c, (int32_t*)sa.m_data, len);
            // copy integers back to the right positions
            if (oldIntWidth!=32) {
                for (size_type i=0; i<len; ++i) {
                    sa.set_int(i*oldIntWidth, sa.get_int(i<<5, 32), oldIntWidth);
                }
                sa.width(oldIntWidth);
                sa.resize(len);
            }
        } else {
            if (sa.width() < bits::hi(len)+1) {
                throw std::logic_error("width of int_vector is to small for the text!!!");
            }
            int_vector<> sufarray(len,0,32);
            divsufsort(c, (int32_t*)sufarray.m_data, len);
            for (size_type i=0; i<len; ++i) {
                sa[i] = sufarray[i];
            }
        }
    } else {
        uint8_t oldIntWidth = sa.width();
        sa.width(64);
        sa.resize(len);
        divsufsort64(c, (int64_t*)sa.m_data, len);
        // copy integers back to the right positions
        if (oldIntWidth!=64) {
            for (size_type i=0; i<len; ++i) {
                sa.set_int(i*oldIntWidth, sa.get_int(i<<6, 64), oldIntWidth);
            }
            sa.width(oldIntWidth);
            sa.resize(len);
        }
    }
}
void index_bidirectional_waveletindex<WaveletTree, SampleDens>::extract_sa(size_t occ_begin, size_t occ_end, int_vector<> &occ) {
	occ.resize(occ_end-occ_begin);
	for (size_t k=0; k<occ_end-occ_begin; k++) {
		size_t i = occ_begin+k;
		size_t off = 0;
		while( (size()-1-i) % SampleDens != 0 ){// while SA[i] is not sampled
			// go to the position where SA[i]-1 is located
			// i := LF(i)
			uint64_t bwt_result = backward_index.extract(i);
			i = m_C[bwt_result] + backward_index.occ(bwt_result, i); 
			// add 1 to the offset
			++off;
		}   
		occ[k] = (m_sa_sample[i / SampleDens]+off) % size();
	}
}
		/*! 
		 * Constructor for building the Index
		 * \param[in] str C-string of the text
		 */
		index_bidirectional_waveletindex(const unsigned char* str) : index() {

			size_t n = strlen((const char*)str);
			int_vector<> sa(n+1, 0, bit_magic::l1BP(n+1)+1);
			setText(str, n+1);

			unsigned char *bwt = new unsigned char[n+1];

			algorithm::calculate_sa(str, n+1, sa);   // calculate the suffix array sa of str

			{ /* Calculate Burrows-Wheeler-Transform */
				size_t i = 0;
				for(int_vector<>::const_iterator it = sa.begin(), end = sa.end(); it != end; ++it, ++i){
					bwt[i] = m_char2comp[str[(*it+n)%(n+1)]];
				}
			}

			backward_index = WaveletTree(bwt, n+1, m_sigma);

			/* Construct the SA-Samples */
			m_sa_sample.setIntWidth( bit_magic::l1BP(sa.size())+1 );
			m_sa_sample.resize( (sa.size()+SampleDens-1)/SampleDens );
			size_t idx=0;
			size_t i=(sa.size()-1-SampleDens*(m_sa_sample.size()-1));
			for(int_vector<>::const_iterator it = sa.begin()+(ptrdiff_t)i; i < sa.size(); it += (ptrdiff_t)SampleDens, i += SampleDens, ++idx){
				m_sa_sample[idx] = *it;
			} 

			unsigned char* reverse = new unsigned char[n+1];
			for (size_t i=0; i<n; i++) reverse[i] = str[n-1-i];
			reverse[n] = '\0';

			algorithm::calculate_sa(reverse, n+1, sa);   // calculate the suffix array sa of reverse string str

			{ /* Calculate Burrows-Wheeler-Transform */
				size_t i = 0;
				for(int_vector<>::const_iterator it = sa.begin(), end = sa.end(); it != end; ++it, ++i){
					bwt[i] = m_char2comp[reverse[(*it+n)%(n+1)]];
				}
			}

			forward_index = WaveletTree(bwt, n+1, m_sigma);

			delete [] bwt;
			delete [] reverse;

		}
    /*!
     * Constructor for building the Index
     * \param[in] str C-string of the text
     */
    index_csa_psi_text(const unsigned char *str) : index() {

        size_t n = strlen((const char*)str);
        int_vector<> sa(n+1, 0, bit_magic::l1BP(n+1)+1);
        algorithm::calculate_sa(str, n+1, sa);   // calculate the suffix array sa of str
        int_vector<> m_psi;
        sdsl::algorithm::sa2psi(sa, m_psi);
        psi = EncVector(m_psi);
        setText(str, n+1);

        text = int_vector<>(sa.size(), 0, bit_magic::l1BP(sigma)+1);
        for (size_t i=0; i<sa.size(); i++) text[i] = char2comp[str[i]];

        /* Construct the SA-Samples */
        m_sa_sample.setIntWidth( bit_magic::l1BP(sa.size())+1 );
        m_sa_sample.resize( (sa.size()+SampleDens-1)/SampleDens );
        size_t i=0, idx=0;
        for(int_vector<>::const_iterator it = sa.begin(); i < sa.size(); it += (ptrdiff_t)SampleDens, i += SampleDens, ++idx) {
            m_sa_sample[idx] = *it;
        }
    }
void index_sa_text_occ<Occ>::extract_sa(size_t occ_begin, size_t occ_end, int_vector<> &occs) {
	occs.resize(occ_end-occ_begin);
	for (size_t k=0; k<occ_end-occ_begin; k++) occs[k] = sa[occ_begin+k];
}
Beispiel #7
0
void append_zero_symbol(int_vector& text)
{
    text.resize(text.size()+1);
    text[text.size()-1] = 0;
}
void index_csa_psi_text<EncVector, SampleDens>::extract_sa(size_t occ_begin, size_t occ_end, int_vector<> &occ) {
    occ.resize(occ_end-occ_begin);
    for (size_t k=0; k<occ_end-occ_begin; k++) occ[k] = getSAValue(occ_begin+k);
}
void index_sa_text_psi::extract_sa(size_t occ_begin, size_t occ_end, int_vector<> &occ) {
	occ.resize(occ_end-occ_begin);
	for (size_t k=0; k<occ_end-occ_begin; k++) occ[k] = sa[occ_begin+k];
}