/*!
         *  \param text_buf Byte stream.
         *  \param len      Length of the byte stream.
         */
        succinct_byte_alphabet(int_vector_buffer<8>& text_buf, int_vector_size_type len):
            char2comp(this), comp2char(this), C(m_C), sigma(m_sigma)
        {
            m_sigma = 0;
            if (0 == len or 0 == text_buf.size())
                return;
            assert(len <= text_buf.size());
            // initialize vectors
            int_vector<64> D(257, 0);
            bit_vector tmp_char(256, 0);
            // count occurrences of each symbol
            for (size_type i=0; i < len; ++i) {
                ++D[text_buf[i]];
            }
            assert(1 == D[0]); // null-byte should occur exactly once
            m_sigma = 0;
            for (int i=0; i<256; ++i)
                if (D[i]) {
                    tmp_char[i] = 1;    // mark occurring character
                    D[m_sigma] = D[i];  // compactify m_C
                    ++m_sigma;
                }
            // resize to sigma+1, since CSAs also need the sum of all elements
            m_C = C_type(m_sigma+1, 0, bits::hi(len)+1);

            for (int i=(int)m_sigma; i > 0; --i) m_C[i] = D[i-1];
            m_C[0] = 0;
            for (int i=1; i <= (int)m_sigma; ++i) m_C[i] = m_C[i] + m_C[i-1];
            assert(m_C[sigma]==len);
            m_char = tmp_char;
            util::init_support(m_char_rank, &m_char);
            util::init_support(m_char_select, &m_char);
        }
Beispiel #2
0
void vcf_file::read_line(vector<char> &out)
{
	static string tmp;
	tmp="";
	out.resize(0);
	read_line(tmp);
	vector<char> tmp_char(tmp.begin(),tmp.end());
	out = tmp_char;
}
		/*!
		 *  \param text_buf	Byte stream.
		 *  \param len		Length of the byte stream. 
		 */
		int_alphabet_strategy(int_vector_file_buffer<0> &text_buf, int_vector_size_type len): 
							           char2comp(this), comp2char(this), C(m_C), sigma(m_sigma)
		{
			m_sigma = 0;
    		text_buf.reset();
   			if (0 == len or 0 == text_buf.int_vector_size)
        		return;
			assert( len <= text_buf.int_vector_size );
			// initialize vectors 
			std::map<size_type, size_type> D;
			// count occurrences of each symbol 
     		for (size_type i=0, r_sum=0, r = text_buf.load_next_block(); i < len;) {
        		for (; i < r_sum+r; ++i) {
            		D[text_buf[i-r_sum]]++;
        		}
        		r_sum += r; r = text_buf.load_next_block();
    		}
			m_sigma = D.size();
			if ( is_continuous_alphabet(D) ){
				// do not initialize m_char, m_char_rank and m_char_select since we can map directly 
			}else{
				// note: the alphabet has at least size 1, so the following is safe:
				size_type largest_symbol = (--D.end())->first;
				bit_vector tmp_char(largest_symbol+1, 0);
				for (std::map<size_type, size_type>::const_iterator it = D.begin(), end=D.end(); it != end; ++it){
					tmp_char[it->first] = 1;
				}
				util::assign(m_char, tmp_char);
				util::init_support(m_char_rank, &m_char);	
				util::init_support(m_char_select, &m_char);
			}
    		assert(D.find(0) != D.end() and 1 == D[0]); // null-byte should occur exactly once			

			// resize to sigma+1, since CSAs also need the sum of all elements
			util::assign(m_C, C_type(m_sigma+1, 0, bit_magic::l1BP(len)+1)	);
			size_type sum = 0, idx=0;
			for (std::map<size_type, size_type>::const_iterator it = D.begin(), end=D.end(); it != end; ++it){
				m_C[idx++] = sum;
				sum += it->second;
			}				
			m_C[idx] = sum;  // insert sum of all elements
		}
		/*!
		 *  \param text_buf	Byte stream.
		 *  \param len		Length of the byte stream. 
		 */
		succinct_byte_alphabet_strategy(int_vector_file_buffer<8> &text_buf, int_vector_size_type len): 
							           char2comp(this), comp2char(this), C(m_C), sigma(m_sigma)
		{
			m_sigma = 0;
    		text_buf.reset();
   			if (0 == len or 0 == text_buf.int_vector_size)
        		return;
			assert( len <= text_buf.int_vector_size );
			// initialize vectors 
			int_vector<64> D(257, 0);
			bit_vector tmp_char(256, 0);
			// count occurrences of each symbol 
     		for (size_type i=0, r_sum=0, r = text_buf.load_next_block(); i < len;) {
        		for (; i < r_sum+r; ++i) {
            		++D[text_buf[i-r_sum]];
        		}
        		r_sum += r; r = text_buf.load_next_block();
    		}
    		assert(1 == D[0]); // null-byte should occur exactly once
    		m_sigma = 0;
			for (int i=0; i<256; ++i)
				if (D[i]) {
					tmp_char[i] = 1;	// mark occurring character
					D[m_sigma] = D[i];  // compactify m_C
					++m_sigma;
				}
			// resize to sigma+1, since CSAs also need the sum of all elements
			util::assign(m_C, C_type(m_sigma+1, 0, bit_magic::l1BP(len)+1));
			
			for (int i=(int)m_sigma; i > 0; --i) m_C[i] = D[i-1]; 
			m_C[0] = 0;
			for (int i=1; i <= (int)m_sigma; ++i) m_C[i] = m_C[i] + m_C[i-1];
			assert(m_C[sigma]==len);
			util::assign(m_char, tmp_char);
			util::init_support(m_char_rank, &m_char);	
			util::init_support(m_char_select, &m_char);	
		}