/*
 * constructor. the code in of the constructor is imported from Simon Gog's SDSL library
 */
libmaus::util::NearestNeighbourDictionary::NearestNeighbourDictionary(::libmaus::bitio::BitVector const & v)
{
    assert ( nndblocksize );

    uint64_t max_distance_between_two_ones = 0;
    uint64_t ones = 0; // counter for the ones in v
    // get maximal distance between to ones in the bit vector
    // speed this up by broadword computing
    for (uint64_t i=0, last_one_pos_plus_1=0; i < v.size(); ++i)
    {
        if ( (v)[i])
        {
            if (i+1-last_one_pos_plus_1 > max_distance_between_two_ones)
                max_distance_between_two_ones = i+1-last_one_pos_plus_1;
            last_one_pos_plus_1 = i+1;
            ++ones;
        }
    }
    m_ones = ones;
    m_size = v.size();
    // initialize absolute samples m_abs_samples[0]=0
    ::libmaus::bitio::CompactArray::unique_ptr_type tm_abs_samples(new ::libmaus::bitio::CompactArray( m_ones/nndblocksize + 1, ::libmaus::math::numbits(v.size()-1) ));
    m_abs_samples = UNIQUE_PTR_MOVE(tm_abs_samples);
    // initialize different values
    ::libmaus::bitio::CompactArray::unique_ptr_type tm_differences(new ::libmaus::bitio::CompactArray( m_ones - m_ones/nndblocksize, ::libmaus::math::numbits(max_distance_between_two_ones) ));
    m_differences = UNIQUE_PTR_MOVE(tm_differences);
    // initialize m_contains_abs_sample
    ::libmaus::bitio::IndexedBitVector::unique_ptr_type tm_contains_abs_sample(new ::libmaus::bitio::IndexedBitVector( (v.size()+nndblocksize-1)/nndblocksize ));
    m_contains_abs_sample = UNIQUE_PTR_MOVE(tm_contains_abs_sample);

    ones = 0;
    for (uint64_t i=0, last_one_pos=0; i < v.size(); ++i)
    {
        if ( (v)[i])
        {
            ++ones;
            if ((ones % nndblocksize) == 0)
            {
                // insert absolute samples
                assert ( ones/nndblocksize < m_abs_samples->size() );
                (*m_abs_samples)[ones/nndblocksize] = i;
                assert ( i/nndblocksize < m_contains_abs_sample->size() );
                (*m_contains_abs_sample)[i/nndblocksize] = 1;
            }
            else
            {
                assert ( ones - ones/nndblocksize - 1 < m_differences->size() );
                (*m_differences)[ones - ones/nndblocksize - 1] = i - last_one_pos;
            }
            last_one_pos = i;
        }
    }
    m_contains_abs_sample->setupIndex();
}
Ejemplo n.º 2
0
	/*! \param bp The balanced parentheses sequence for that the pioneers should be calculated.
	 *  \param block_size Size of the blocks for which the pioneers should be calculated.
	 *  \param pioneer_bitmap Reference to the resulting bit_vector.
	 *  \par Time complexity
	 *       \f$ \Order{n} \f$, where \f$ n=\f$bp.size()  
	 *  \par Space complexity
	 *       \f$ \Order{2n + n} \f$ bits: \f$n\f$ bits for input, \f$n\f$ bits for output, and \f$n\f$ bits for a succinct stack.
	 *  \pre The parentheses sequence represented by bp has to be balanced.
	 */
	static ::libmaus::bitio::IndexedBitVector::unique_ptr_type calculatePioneerBitVector(::libmaus::bitio::BitVector const & bp, uint64_t const block_size)
	{
		::libmaus::bitio::IndexedBitVector::unique_ptr_type Ppioneer_bitmap(new ::libmaus::bitio::IndexedBitVector(bp.size()));
		::libmaus::bitio::IndexedBitVector & pioneer_bitmap = *Ppioneer_bitmap;

		IncreasingStack opening_parenthesis(bp.size());

		uint64_t cur_pioneer_block = 0;
	 	uint64_t last_start = 0;
	 	uint64_t last_j = 0;
	 	uint64_t cur_block=0;
	 	uint64_t first_index_in_block=0;
	 	
	 	// calculate positions of findclose and findopen pioneers
		for(uint64_t j=0, new_block=block_size; j < bp.size(); ++j, --new_block)
		{
			if( !(new_block) )
			{
				cur_pioneer_block = j/block_size; 
				++cur_block;
				first_index_in_block = j;
				new_block = block_size;
			}

			// opening parenthesis
			if( bp[j] )
			{ 
				/*j < bp.size() is not neccecssary as the last parenthesis is always a closing one*/
				/* if closing par immediately follows opening, skip both and carry on */
				if( new_block>1 and !bp[j+1] )
				{
					++j;
					--new_block;
				}
				/* otherwise push opening par */
				else
				{
					opening_parenthesis.push(j);
				}
			}
			else
			{
				uint64_t const start = opening_parenthesis.top();
				opening_parenthesis.pop();
				// if start is not in this block (i.e. far parenthesis)
				if( start < first_index_in_block )
				{
					// same block as previous pioneer
					if( (start/block_size)==cur_pioneer_block  )
					{
						// erase previous pioneer
						pioneer_bitmap[last_start] = false;
						pioneer_bitmap[last_j] = false;
					}
					// set this pioneer
					pioneer_bitmap[start] = true;
					pioneer_bitmap[j] = true;
					cur_pioneer_block = start/block_size;
					last_start = start;
					last_j = j;
				}
			}
		}
		
		pioneer_bitmap.setupIndex();
		
		return UNIQUE_PTR_MOVE(Ppioneer_bitmap);
	}