Beispiel #1
0
        //! Constructor
        lcp_byte(cache_config& config) {
            std::string lcp_file = cache_file_name(conf::KEY_LCP, config);
            int_vector_buffer<> lcp_buf(lcp_file);
            m_small_lcp = int_vector<8>(lcp_buf.size());
            size_type l=0, max_l=0, max_big_idx=0, big_sum=0;

            for (size_type i=0; i < m_small_lcp.size(); ++i) {
                if ((l=lcp_buf[i]) < 255) {
                    m_small_lcp[i] = l;
                } else {
                    m_small_lcp[i] = 255;
                    if (l > max_l) max_l = l;
                    max_big_idx = i;
                    ++big_sum;
                }
            }
            m_big_lcp     = int_vector<>(big_sum, 0, bits::hi(max_l)+1);
            m_big_lcp_idx = int_vector<>(big_sum, 0, bits::hi(max_big_idx)+1);

            for (size_type i=0,ii=0; i<m_small_lcp.size(); ++i) {
                if ((l=lcp_buf[i]) >= 255) {
                    m_big_lcp[ii] = l;
                    m_big_lcp_idx[ii] = i;
                    ++ii;
                }
            }
        }
Beispiel #2
0
 //! Load from a stream.
 void load(std::istream& in) {
     m_data.load(in);
     m_overflow.load(in);
     m_overflow_rank.load(in, &m_overflow);
     m_level_pointer_and_rank.load(in);
     read_member(m_max_level, in);
 }
Beispiel #3
0
        //! Swap operator
        void swap(wt_int_rlmn& wt) {
            if (this != &wt) {
                std::swap(m_size, wt.m_size);
                m_bl.swap(wt.m_bl);
                m_bf.swap(wt.m_bf);
                m_wt.swap(wt.m_wt);

                m_bl_rank.swap(wt.m_bl_rank);
                m_bl_rank.set_vector(&m_bl);
                wt.m_bl_rank.set_vector(&(wt.m_bl));
                m_bf_rank.swap(wt.m_bf_rank);
                m_bf_rank.set_vector(&m_bf);
                wt.m_bf_rank.set_vector(&(wt.m_bf));

                m_bl_select.swap(wt.m_bl_select);
                m_bl_select.set_vector(&m_bl);
                wt.m_bl_select.set_vector(&(wt.m_bl));
                m_bf_select.swap(wt.m_bf_select);
                m_bf_select.set_vector(&m_bf);
                wt.m_bf_select.set_vector(&(wt.m_bf));

                m_C.swap(wt.m_C);
                m_C_bf_rank.swap(wt.m_C_bf_rank);
            }
        }
Beispiel #4
0
 impl()
 {
     m_nr_to_bin.resize(1<<n);
     m_bin_to_nr.resize(1<<n);
     for (int i=0, cnt=0, class_cnt=0; i<=n; ++i) {
         m_C[i] = cnt;
         class_cnt = 0;
         std::vector<bool> b(n,0);
         for (int j=0; j<i; ++j) b[n-j-1] = 1;
         do {
             uint32_t x=0;
             for (int k=0; k<n; ++k)
                 x |= ((uint32_t)b[n-k-1])<<(n-1-k);
             m_nr_to_bin[cnt] = x;
             m_bin_to_nr[x] = class_cnt;
             ++cnt;
             ++class_cnt;
         } while (next_permutation(b.begin(), b.end()));
         if (class_cnt == 1)
             m_space_for_bt[i] = 0;
         else
             m_space_for_bt[i] = bits::hi(class_cnt)+1;
     }
     if (n == 15) {
         for (int x=0; x<256; ++x) {
             m_space_for_bt_pair[x] = m_space_for_bt[x>>4] + m_space_for_bt[x&0x0F];
         }
     }
 }
Beispiel #5
0
// query by keywords
std::vector<double> queryImgIDKeywords(const int dbId, long int id, int numres, int kwJoinType, int_vector keywords, bool colorOnly){
	if (!validate_dbid(dbId)) { cerr << "ERROR: database space not found (" << dbId << ")" << endl; return std::vector<double>();}

	if ((id != 0) && !validate_imgid(dbId, id)) { // not search random and image doesnt exist
		cerr << "ERROR: image id (" << id << ") not found on given dbid (" << dbId << ") or dbid not existant" << endl ;
		return std::vector<double>();
	}

	if (keywords.size() < 1) {
		cerr << "ERROR: At least one keyword must be supplied" << endl ;
		return std::vector<double>();
	}

	// populate filter
	intVectorIterator it = keywords.begin();
	bloom_filter* bf = 0;

    // OR or AND each kwd postings filter to get final filter
    // start with the first one
    bf = new bloom_filter(*(getKwdPostings(*it)->imgIdsFilter));
    it++;
    for (; it != keywords.end(); it++) { // iterate the rest
        if (kwJoinType) { // and'd
            (*bf) &= *(getKwdPostings(*it)->imgIdsFilter);
        } else { // or'd
            (*bf) |= *(getKwdPostings(*it)->imgIdsFilter);
        }
    }

	if (id == 0) { // random images with these kwds

		vector<double> V; // select all images with the desired keywords
		for (sigIterator sit = dbSpace[dbId]->sigs.begin(); sit != dbSpace[dbId]->sigs.end(); sit++) {
			if (V.size() > 20*numres) break;

			if ((bf == 0) || (bf->contains((*sit).first))) { // image has desired keyword or we're querying random
				V.insert(V.end(), (*sit).first);
				V.insert(V.end(), 0);
			}
		}

		vector<double> Vres;

		for (int var = 0; var < min(V.size()/2, numres); ) { // var goes from 0 to numres
			int rint = rand()%(V.size()/2);
			if (V[rint*2] > 0) { // havent added this random result yet
				Vres.insert(Vres.end(), V[rint*2] );
				Vres.insert(Vres.end(), 0 );
				V[rint*2] = 0;
				++var;
			}
			++var;
		}

		return Vres;
	}
	return queryImgIDFiltered(dbId, id, numres, bf, colorOnly);

}
Beispiel #6
0
 //! Loads the data structure from the given istream.
 void load(std::istream& in) {
     read_member(m_size, in);
     m_bt.load(in);
     m_btnr.load(in);
     m_btnrp.load(in);
     m_rank.load(in);
     m_invert.load(in);
 }
 /*! \param in In-Stream to load the rank_support data from.
  */
 void load(std::istream& in) {
     m_abs_samples.load(in);
     m_differences.load(in);
     read_member(m_ones, in);
     read_member(m_size, in);
     m_contains_abs_sample.load(in);
     m_rank_contains_abs_sample.load(in, &m_contains_abs_sample);
 }
Beispiel #8
0
        //! Swap method for lcp_dac
        void swap(lcp_dac& lcp_c) {
            m_data.swap(lcp_c.m_data);
            m_overflow.swap(lcp_c.m_overflow);
            util::swap_support(m_overflow_rank, lcp_c.m_overflow_rank,
                               &m_overflow, &(lcp_c.m_overflow));

            m_level_pointer_and_rank.swap(lcp_c.m_level_pointer_and_rank);
            std::swap(m_max_level, lcp_c.m_max_level);
        }
bool ternary::encode(const int_vector &v, int_vector &z){
	z.setIntWidth( v.getIntWidth() );
	size_t z_bit_size = 0;
	for(typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it){
		z_bit_size += encoding_length(*it);
	}
	z.bit_resize( z_bit_size ); // Initial size of z
	if( z_bit_size & 0x3F ){ // if z_bit_size % 64 != 0
		*(z.m_data + (z_bit_size>>6)) = 0; // initialize last word
	}
Beispiel #10
0
 //! Swap method
 void swap(rrr_vector& rrr) {
     if (this != &rrr) {
         std::swap(m_size, rrr.m_size);
         m_bt.swap(rrr.m_bt);
         m_btnr.swap(rrr.m_btnr);
         m_btnrp.swap(rrr.m_btnrp);
         m_rank.swap(rrr.m_rank);
         m_invert.swap(rrr.m_invert);
     }
 }
size_t index_sa_text_psi::serialize(std::ostream &out) const {
	size_t written_bytes = 0;
	out.put('1');
	written_bytes += sizeof(char);
	superserialize(out);
	written_bytes += sa.serialize(out);
	written_bytes += text.serialize(out);
	written_bytes += psi.serialize(out);
	return written_bytes;
}
Beispiel #12
0
 /*! \param i Index of the value. \f$ i \in [0..size()-1]\f$.
  * Time complexity: O(1) for small and O(log n) for large values
  */
 inline value_type operator[](size_type i)const {
     if (m_small_lcp[i]!=255) {
         return m_small_lcp[i];
     } else {
         size_type idx = lower_bound(m_big_lcp_idx.begin(),
                                     m_big_lcp_idx.end(),i)
                         - m_big_lcp_idx.begin();
         return m_big_lcp[idx];
     }
 }
void index_sa_text_psi::load(std::istream &in) {
	if (in.get()!='1') {
		std::cerr << "wrong index!!";
		throw("wrong index!!");
	}
	superload(in);
	sa.load(in);
	text.load(in);
	psi.load(in);
}
 void swap(nearest_neighbour_dictionary& nnd) {
     // copy all members of the data structure
     m_abs_samples.swap(nnd.m_abs_samples);
     m_differences.swap(nnd.m_differences);
     std::swap(m_ones, nnd.m_ones);
     std::swap(m_size, nnd.m_size);
     m_contains_abs_sample.swap(nnd.m_contains_abs_sample);
     util::swap_support(m_rank_contains_abs_sample, nnd.m_rank_contains_abs_sample,
                        &m_contains_abs_sample, &(nnd.m_contains_abs_sample));
 }
Beispiel #15
0
 //! Serialize to a stream.
 size_type serialize(std::ostream& out, structure_tree_node* v=nullptr,
                     std::string name="")const {
     structure_tree_node* child = structure_tree::add_child(v, name,
                                  util::class_name(*this));
     size_type written_bytes = 0;
     written_bytes += m_small_lcp.serialize(out, child, "small_lcp");
     written_bytes += m_big_lcp.serialize(out, child, "large_lcp");
     written_bytes += m_big_lcp_idx.serialize(out, child, "large_lcp_idx");
     structure_tree::add_size(child, written_bytes);
     return written_bytes;
 }
Beispiel #16
0
std::vector<int> const build_vector()
{
    typedef std::vector<int> int_vector;
    static int_vector data = init_vector();
    int_vector::size_type const size = data.size();
    int_vector::iterator it = data.begin();
    int_vector::iterator const end = data.end();
    for (; it != end; ++it)
        *it += size;
    return data;
}
		/*! 
		 * Constructor for building the Index
		 * \param[in] str C-string of the text
		 */
		index_sa_text_psi(const unsigned char* str) : index() {

			size_t n = strlen((const char*)str);
			sa = int_vector<>(n+1, 0, bit_magic::l1BP(n+1)+1);
			algorithm::calculate_sa(str, n+1, sa);   // calculate the suffix array sa of str
			sdsl::algorithm::sa2psi(sa, psi);
			setText(str, n+1);

			text = int_vector<>(sa.size(), 0, bit_magic::l1BP(sigma)+1);
			for (size_t i=0; i<sa.size(); i++) text[i] = char2comp[str[i]];
		}
Beispiel #18
0
 //! Loads the data structure from the given istream.
 void load(std::istream& in) {
     read_member(m_size, in);
     m_bl.load(in);
     m_bf.load(in);
     m_wt.load(in);
     m_bl_rank.load(in, &m_bl);
     m_bf_rank.load(in, &m_bf);
     m_bl_select.load(in, &m_bl);
     m_bf_select.load(in, &m_bf);
     m_C.load(in);
     m_C_bf_rank.load(in);
 }
 /*! \param out Out-Stream to serialize the data to.
 */
 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const {
     size_type written_bytes = 0;
     structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
     written_bytes += m_abs_samples.serialize(out, child, "absolute_samples");
     written_bytes += m_differences.serialize(out, child, "differences");
     written_bytes += write_member(m_ones, out, child, "ones");
     written_bytes += write_member(m_size,out,  child, "size");
     written_bytes += m_contains_abs_sample.serialize(out, child, "contains_abs_sample");
     written_bytes += m_rank_contains_abs_sample.serialize(out, child, "rank_contains_abs_sample");
     structure_tree::add_size(child, written_bytes);
     return written_bytes;
 }
std::unordered_set<int> const build_unordered_set()
{
    typedef std::unordered_set<int> int_set;
    typedef std::vector<int> int_vector;

    int_set result;
    int_vector const data = build_vector();
    int_vector::const_iterator it = data.begin();
    int_vector::const_iterator const end = data.end();
    result.insert(it, end);
    return result;
}
Beispiel #21
0
 //! Answers select queries
 //! Serializes the data structure into the given ostream
 size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const {
     structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
     size_type written_bytes = 0;
     written_bytes += write_member(m_size, out, child, "size");
     written_bytes += m_bt.serialize(out, child, "bt");
     written_bytes += m_btnr.serialize(out, child, "btnr");
     written_bytes += m_btnrp.serialize(out, child, "btnrp");
     written_bytes += m_rank.serialize(out, child, "rank_samples");
     written_bytes += m_invert.serialize(out, child, "invert");
     structure_tree::add_size(child, written_bytes);
     return written_bytes;
 }
Beispiel #22
0
bool addKeywordsImg(const int dbId, const int id, int_vector hashes){
	if (!validate_imgid(dbId, id)) { cerr << "ERROR: image id (" << id << ") not found on given dbid (" << dbId << ") or dbid not existant" << endl ; return false;};

	// populate keyword postings
	for (intVectorIterator it = hashes.begin(); it != hashes.end(); it++) {
		getKwdPostings(*it)->imgIdsFilter->insert(id);
	}

	// populate image kwds
	int_hashset& imgKwds = dbSpace[dbId]->sigs[id]->keywords;
	imgKwds.insert(hashes.begin(),hashes.end());
	return true;
}
std::map<int, int> const build_map()
{
    typedef std::map<int, int> int_map;
    typedef std::vector<int> int_vector;

    int_map result;
    int_vector const data = build_vector();
    int_vector::const_iterator it = data.begin();
    int_vector::const_iterator const end = data.end();
    for (; it != end; ++it) {
	    int const value = *it;
	    result[value] = 100 * value;
    }
    return result;
}
Beispiel #24
0
 //! Serializes the data structure into the given ostream
 size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const {
     structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
     size_type written_bytes = 0;
     written_bytes += write_member(m_size, out, child, "size");
     written_bytes += m_bl.serialize(out, child, "bl");
     written_bytes += m_bf.serialize(out, child, "bf");
     written_bytes += m_wt.serialize(out, child, "wt");
     written_bytes += m_bl_rank.serialize(out, child, "bl_rank");
     written_bytes += m_bf_rank.serialize(out, child, "bf_rank");
     written_bytes += m_bl_select.serialize(out, child, "bl_select");
     written_bytes += m_bf_select.serialize(out, child, "bf_select");
     written_bytes += m_C.serialize(out, child, "C");
     written_bytes += m_C_bf_rank.serialize(out, child, "C_bf_rank");
     structure_tree::add_size(child, written_bytes);
     return written_bytes;
 }
		/*! 
		 * Constructor for building the Index
		 * \param[in] str C-string of the text
		 */
		index_bidirectional_waveletindex(const unsigned char* str) : index() {

			size_t n = strlen((const char*)str);
			int_vector<> sa(n+1, 0, bit_magic::l1BP(n+1)+1);
			setText(str, n+1);

			unsigned char *bwt = new unsigned char[n+1];

			algorithm::calculate_sa(str, n+1, sa);   // calculate the suffix array sa of str

			{ /* Calculate Burrows-Wheeler-Transform */
				size_t i = 0;
				for(int_vector<>::const_iterator it = sa.begin(), end = sa.end(); it != end; ++it, ++i){
					bwt[i] = m_char2comp[str[(*it+n)%(n+1)]];
				}
			}

			backward_index = WaveletTree(bwt, n+1, m_sigma);

			/* Construct the SA-Samples */
			m_sa_sample.setIntWidth( bit_magic::l1BP(sa.size())+1 );
			m_sa_sample.resize( (sa.size()+SampleDens-1)/SampleDens );
			size_t idx=0;
			size_t i=(sa.size()-1-SampleDens*(m_sa_sample.size()-1));
			for(int_vector<>::const_iterator it = sa.begin()+(ptrdiff_t)i; i < sa.size(); it += (ptrdiff_t)SampleDens, i += SampleDens, ++idx){
				m_sa_sample[idx] = *it;
			} 

			unsigned char* reverse = new unsigned char[n+1];
			for (size_t i=0; i<n; i++) reverse[i] = str[n-1-i];
			reverse[n] = '\0';

			algorithm::calculate_sa(reverse, n+1, sa);   // calculate the suffix array sa of reverse string str

			{ /* Calculate Burrows-Wheeler-Transform */
				size_t i = 0;
				for(int_vector<>::const_iterator it = sa.begin(), end = sa.end(); it != end; ++it, ++i){
					bwt[i] = m_char2comp[reverse[(*it+n)%(n+1)]];
				}
			}

			forward_index = WaveletTree(bwt, n+1, m_sigma);

			delete [] bwt;
			delete [] reverse;

		}
Beispiel #26
0
 //! Serializes the data structure into the given ostream
 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const {
     size_type written_bytes = 0;
     structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
     written_bytes += util::write_member(m_size, out, child, "size");
     written_bytes += m_position.serialize(out, child, "positions");
     structure_tree::add_size(child, written_bytes);
     return written_bytes;
 }
 //! Constructor taking a cache_config
 lcp_bitcompressed(cache_config& config) {
     std::string lcp_file = cache_file_name(conf::KEY_LCP, config);
     int_vector_buffer<> lcp_buf(lcp_file);
     m_lcp = int_vector<t_width>(lcp_buf.size(), 0, lcp_buf.width());
     for (size_type i=0; i < m_lcp.size(); ++i) {
         m_lcp[i] = lcp_buf[i];
     }
 }
Beispiel #28
0
bool contains_no_zero_symbol(const int_vector& text, const std::string& file)
{
    for (int_vector_size_type i=0; i < text.size(); ++i) {
        if ((uint64_t)0 == text[i]) {
            throw std::logic_error(std::string("Error: File \"")+file+"\" contains zero symbol.");
            return false;
        }
    }
    return true;
}
		/*! 
		 * Constructor for building the Index
		 * \param[in] str C-string of the text
		 */
		index_sa_text_occ(const unsigned char* str) : index() {

			size_t n = strlen((const char*)str);
			sa = int_vector<>(n+1, 0, bit_magic::l1BP(n+1)+1);
			algorithm::calculate_sa(str, n+1, sa);   // calculate the suffix array sa of str
			setText(str, n+1);

			text = int_vector<>(sa.size(), 0, bit_magic::l1BP(sigma)+1);
			for (size_t i=0; i<sa.size(); i++) text[i] = char2comp[str[i]];

			unsigned char *bwt = new unsigned char[n+1];
			{ /* Calculate Burrows-Wheeler-Transform */
				size_t i = 0;
				for(int_vector<>::const_iterator it = sa.begin(), end = sa.end(); it != end; ++it, ++i){
					bwt[i] = m_char2comp[str[(*it+n)%(n+1)]];
				}
			}
			occ = Occ(bwt, n+1, m_sigma);
			delete[] bwt;
		}
    /*!
     * Constructor for building the Index
     * \param[in] str C-string of the text
     */
    index_csa_psi_text(const unsigned char *str) : index() {

        size_t n = strlen((const char*)str);
        int_vector<> sa(n+1, 0, bit_magic::l1BP(n+1)+1);
        algorithm::calculate_sa(str, n+1, sa);   // calculate the suffix array sa of str
        int_vector<> m_psi;
        sdsl::algorithm::sa2psi(sa, m_psi);
        psi = EncVector(m_psi);
        setText(str, n+1);

        text = int_vector<>(sa.size(), 0, bit_magic::l1BP(sigma)+1);
        for (size_t i=0; i<sa.size(); i++) text[i] = char2comp[str[i]];

        /* Construct the SA-Samples */
        m_sa_sample.setIntWidth( bit_magic::l1BP(sa.size())+1 );
        m_sa_sample.resize( (sa.size()+SampleDens-1)/SampleDens );
        size_t i=0, idx=0;
        for(int_vector<>::const_iterator it = sa.begin(); i < sa.size(); it += (ptrdiff_t)SampleDens, i += SampleDens, ++idx) {
            m_sa_sample[idx] = *it;
        }
    }