/*! * \param text_buf Byte stream. * \param len Length of the byte stream. */ succinct_byte_alphabet(int_vector_buffer<8>& text_buf, int_vector_size_type len): char2comp(this), comp2char(this), C(m_C), sigma(m_sigma) { m_sigma = 0; if (0 == len or 0 == text_buf.size()) return; assert(len <= text_buf.size()); // initialize vectors int_vector<64> D(257, 0); bit_vector tmp_char(256, 0); // count occurrences of each symbol for (size_type i=0; i < len; ++i) { ++D[text_buf[i]]; } assert(1 == D[0]); // null-byte should occur exactly once m_sigma = 0; for (int i=0; i<256; ++i) if (D[i]) { tmp_char[i] = 1; // mark occurring character D[m_sigma] = D[i]; // compactify m_C ++m_sigma; } // resize to sigma+1, since CSAs also need the sum of all elements m_C = C_type(m_sigma+1, 0, bits::hi(len)+1); for (int i=(int)m_sigma; i > 0; --i) m_C[i] = D[i-1]; m_C[0] = 0; for (int i=1; i <= (int)m_sigma; ++i) m_C[i] = m_C[i] + m_C[i-1]; assert(m_C[sigma]==len); m_char = tmp_char; util::init_support(m_char_rank, &m_char); util::init_support(m_char_select, &m_char); }
void vcf_file::read_line(vector<char> &out) { static string tmp; tmp=""; out.resize(0); read_line(tmp); vector<char> tmp_char(tmp.begin(),tmp.end()); out = tmp_char; }
/*! * \param text_buf Byte stream. * \param len Length of the byte stream. */ int_alphabet_strategy(int_vector_file_buffer<0> &text_buf, int_vector_size_type len): char2comp(this), comp2char(this), C(m_C), sigma(m_sigma) { m_sigma = 0; text_buf.reset(); if (0 == len or 0 == text_buf.int_vector_size) return; assert( len <= text_buf.int_vector_size ); // initialize vectors std::map<size_type, size_type> D; // count occurrences of each symbol for (size_type i=0, r_sum=0, r = text_buf.load_next_block(); i < len;) { for (; i < r_sum+r; ++i) { D[text_buf[i-r_sum]]++; } r_sum += r; r = text_buf.load_next_block(); } m_sigma = D.size(); if ( is_continuous_alphabet(D) ){ // do not initialize m_char, m_char_rank and m_char_select since we can map directly }else{ // note: the alphabet has at least size 1, so the following is safe: size_type largest_symbol = (--D.end())->first; bit_vector tmp_char(largest_symbol+1, 0); for (std::map<size_type, size_type>::const_iterator it = D.begin(), end=D.end(); it != end; ++it){ tmp_char[it->first] = 1; } util::assign(m_char, tmp_char); util::init_support(m_char_rank, &m_char); util::init_support(m_char_select, &m_char); } assert(D.find(0) != D.end() and 1 == D[0]); // null-byte should occur exactly once // resize to sigma+1, since CSAs also need the sum of all elements util::assign(m_C, C_type(m_sigma+1, 0, bit_magic::l1BP(len)+1) ); size_type sum = 0, idx=0; for (std::map<size_type, size_type>::const_iterator it = D.begin(), end=D.end(); it != end; ++it){ m_C[idx++] = sum; sum += it->second; } m_C[idx] = sum; // insert sum of all elements }
/*! * \param text_buf Byte stream. * \param len Length of the byte stream. */ succinct_byte_alphabet_strategy(int_vector_file_buffer<8> &text_buf, int_vector_size_type len): char2comp(this), comp2char(this), C(m_C), sigma(m_sigma) { m_sigma = 0; text_buf.reset(); if (0 == len or 0 == text_buf.int_vector_size) return; assert( len <= text_buf.int_vector_size ); // initialize vectors int_vector<64> D(257, 0); bit_vector tmp_char(256, 0); // count occurrences of each symbol for (size_type i=0, r_sum=0, r = text_buf.load_next_block(); i < len;) { for (; i < r_sum+r; ++i) { ++D[text_buf[i-r_sum]]; } r_sum += r; r = text_buf.load_next_block(); } assert(1 == D[0]); // null-byte should occur exactly once m_sigma = 0; for (int i=0; i<256; ++i) if (D[i]) { tmp_char[i] = 1; // mark occurring character D[m_sigma] = D[i]; // compactify m_C ++m_sigma; } // resize to sigma+1, since CSAs also need the sum of all elements util::assign(m_C, C_type(m_sigma+1, 0, bit_magic::l1BP(len)+1)); for (int i=(int)m_sigma; i > 0; --i) m_C[i] = D[i-1]; m_C[0] = 0; for (int i=1; i <= (int)m_sigma; ++i) m_C[i] = m_C[i] + m_C[i-1]; assert(m_C[sigma]==len); util::assign(m_char, tmp_char); util::init_support(m_char_rank, &m_char); util::init_support(m_char_select, &m_char); }