//! Constructor lcp_byte(cache_config& config) { std::string lcp_file = cache_file_name(conf::KEY_LCP, config); int_vector_buffer<> lcp_buf(lcp_file); m_small_lcp = int_vector<8>(lcp_buf.size()); size_type l=0, max_l=0, max_big_idx=0, big_sum=0; for (size_type i=0; i < m_small_lcp.size(); ++i) { if ((l=lcp_buf[i]) < 255) { m_small_lcp[i] = l; } else { m_small_lcp[i] = 255; if (l > max_l) max_l = l; max_big_idx = i; ++big_sum; } } m_big_lcp = int_vector<>(big_sum, 0, bits::hi(max_l)+1); m_big_lcp_idx = int_vector<>(big_sum, 0, bits::hi(max_big_idx)+1); for (size_type i=0,ii=0; i<m_small_lcp.size(); ++i) { if ((l=lcp_buf[i]) >= 255) { m_big_lcp[ii] = l; m_big_lcp_idx[ii] = i; ++ii; } } }
//! Load from a stream. void load(std::istream& in) { m_data.load(in); m_overflow.load(in); m_overflow_rank.load(in, &m_overflow); m_level_pointer_and_rank.load(in); read_member(m_max_level, in); }
//! Swap operator void swap(wt_int_rlmn& wt) { if (this != &wt) { std::swap(m_size, wt.m_size); m_bl.swap(wt.m_bl); m_bf.swap(wt.m_bf); m_wt.swap(wt.m_wt); m_bl_rank.swap(wt.m_bl_rank); m_bl_rank.set_vector(&m_bl); wt.m_bl_rank.set_vector(&(wt.m_bl)); m_bf_rank.swap(wt.m_bf_rank); m_bf_rank.set_vector(&m_bf); wt.m_bf_rank.set_vector(&(wt.m_bf)); m_bl_select.swap(wt.m_bl_select); m_bl_select.set_vector(&m_bl); wt.m_bl_select.set_vector(&(wt.m_bl)); m_bf_select.swap(wt.m_bf_select); m_bf_select.set_vector(&m_bf); wt.m_bf_select.set_vector(&(wt.m_bf)); m_C.swap(wt.m_C); m_C_bf_rank.swap(wt.m_C_bf_rank); } }
impl() { m_nr_to_bin.resize(1<<n); m_bin_to_nr.resize(1<<n); for (int i=0, cnt=0, class_cnt=0; i<=n; ++i) { m_C[i] = cnt; class_cnt = 0; std::vector<bool> b(n,0); for (int j=0; j<i; ++j) b[n-j-1] = 1; do { uint32_t x=0; for (int k=0; k<n; ++k) x |= ((uint32_t)b[n-k-1])<<(n-1-k); m_nr_to_bin[cnt] = x; m_bin_to_nr[x] = class_cnt; ++cnt; ++class_cnt; } while (next_permutation(b.begin(), b.end())); if (class_cnt == 1) m_space_for_bt[i] = 0; else m_space_for_bt[i] = bits::hi(class_cnt)+1; } if (n == 15) { for (int x=0; x<256; ++x) { m_space_for_bt_pair[x] = m_space_for_bt[x>>4] + m_space_for_bt[x&0x0F]; } } }
// query by keywords std::vector<double> queryImgIDKeywords(const int dbId, long int id, int numres, int kwJoinType, int_vector keywords, bool colorOnly){ if (!validate_dbid(dbId)) { cerr << "ERROR: database space not found (" << dbId << ")" << endl; return std::vector<double>();} if ((id != 0) && !validate_imgid(dbId, id)) { // not search random and image doesnt exist cerr << "ERROR: image id (" << id << ") not found on given dbid (" << dbId << ") or dbid not existant" << endl ; return std::vector<double>(); } if (keywords.size() < 1) { cerr << "ERROR: At least one keyword must be supplied" << endl ; return std::vector<double>(); } // populate filter intVectorIterator it = keywords.begin(); bloom_filter* bf = 0; // OR or AND each kwd postings filter to get final filter // start with the first one bf = new bloom_filter(*(getKwdPostings(*it)->imgIdsFilter)); it++; for (; it != keywords.end(); it++) { // iterate the rest if (kwJoinType) { // and'd (*bf) &= *(getKwdPostings(*it)->imgIdsFilter); } else { // or'd (*bf) |= *(getKwdPostings(*it)->imgIdsFilter); } } if (id == 0) { // random images with these kwds vector<double> V; // select all images with the desired keywords for (sigIterator sit = dbSpace[dbId]->sigs.begin(); sit != dbSpace[dbId]->sigs.end(); sit++) { if (V.size() > 20*numres) break; if ((bf == 0) || (bf->contains((*sit).first))) { // image has desired keyword or we're querying random V.insert(V.end(), (*sit).first); V.insert(V.end(), 0); } } vector<double> Vres; for (int var = 0; var < min(V.size()/2, numres); ) { // var goes from 0 to numres int rint = rand()%(V.size()/2); if (V[rint*2] > 0) { // havent added this random result yet Vres.insert(Vres.end(), V[rint*2] ); Vres.insert(Vres.end(), 0 ); V[rint*2] = 0; ++var; } ++var; } return Vres; } return queryImgIDFiltered(dbId, id, numres, bf, colorOnly); }
//! Loads the data structure from the given istream. void load(std::istream& in) { read_member(m_size, in); m_bt.load(in); m_btnr.load(in); m_btnrp.load(in); m_rank.load(in); m_invert.load(in); }
/*! \param in In-Stream to load the rank_support data from. */ void load(std::istream& in) { m_abs_samples.load(in); m_differences.load(in); read_member(m_ones, in); read_member(m_size, in); m_contains_abs_sample.load(in); m_rank_contains_abs_sample.load(in, &m_contains_abs_sample); }
//! Swap method for lcp_dac void swap(lcp_dac& lcp_c) { m_data.swap(lcp_c.m_data); m_overflow.swap(lcp_c.m_overflow); util::swap_support(m_overflow_rank, lcp_c.m_overflow_rank, &m_overflow, &(lcp_c.m_overflow)); m_level_pointer_and_rank.swap(lcp_c.m_level_pointer_and_rank); std::swap(m_max_level, lcp_c.m_max_level); }
bool ternary::encode(const int_vector &v, int_vector &z){ z.setIntWidth( v.getIntWidth() ); size_t z_bit_size = 0; for(typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it){ z_bit_size += encoding_length(*it); } z.bit_resize( z_bit_size ); // Initial size of z if( z_bit_size & 0x3F ){ // if z_bit_size % 64 != 0 *(z.m_data + (z_bit_size>>6)) = 0; // initialize last word }
//! Swap method void swap(rrr_vector& rrr) { if (this != &rrr) { std::swap(m_size, rrr.m_size); m_bt.swap(rrr.m_bt); m_btnr.swap(rrr.m_btnr); m_btnrp.swap(rrr.m_btnrp); m_rank.swap(rrr.m_rank); m_invert.swap(rrr.m_invert); } }
size_t index_sa_text_psi::serialize(std::ostream &out) const { size_t written_bytes = 0; out.put('1'); written_bytes += sizeof(char); superserialize(out); written_bytes += sa.serialize(out); written_bytes += text.serialize(out); written_bytes += psi.serialize(out); return written_bytes; }
/*! \param i Index of the value. \f$ i \in [0..size()-1]\f$. * Time complexity: O(1) for small and O(log n) for large values */ inline value_type operator[](size_type i)const { if (m_small_lcp[i]!=255) { return m_small_lcp[i]; } else { size_type idx = lower_bound(m_big_lcp_idx.begin(), m_big_lcp_idx.end(),i) - m_big_lcp_idx.begin(); return m_big_lcp[idx]; } }
void index_sa_text_psi::load(std::istream &in) { if (in.get()!='1') { std::cerr << "wrong index!!"; throw("wrong index!!"); } superload(in); sa.load(in); text.load(in); psi.load(in); }
void swap(nearest_neighbour_dictionary& nnd) { // copy all members of the data structure m_abs_samples.swap(nnd.m_abs_samples); m_differences.swap(nnd.m_differences); std::swap(m_ones, nnd.m_ones); std::swap(m_size, nnd.m_size); m_contains_abs_sample.swap(nnd.m_contains_abs_sample); util::swap_support(m_rank_contains_abs_sample, nnd.m_rank_contains_abs_sample, &m_contains_abs_sample, &(nnd.m_contains_abs_sample)); }
//! Serialize to a stream. size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const { structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); size_type written_bytes = 0; written_bytes += m_small_lcp.serialize(out, child, "small_lcp"); written_bytes += m_big_lcp.serialize(out, child, "large_lcp"); written_bytes += m_big_lcp_idx.serialize(out, child, "large_lcp_idx"); structure_tree::add_size(child, written_bytes); return written_bytes; }
std::vector<int> const build_vector() { typedef std::vector<int> int_vector; static int_vector data = init_vector(); int_vector::size_type const size = data.size(); int_vector::iterator it = data.begin(); int_vector::iterator const end = data.end(); for (; it != end; ++it) *it += size; return data; }
/*! * Constructor for building the Index * \param[in] str C-string of the text */ index_sa_text_psi(const unsigned char* str) : index() { size_t n = strlen((const char*)str); sa = int_vector<>(n+1, 0, bit_magic::l1BP(n+1)+1); algorithm::calculate_sa(str, n+1, sa); // calculate the suffix array sa of str sdsl::algorithm::sa2psi(sa, psi); setText(str, n+1); text = int_vector<>(sa.size(), 0, bit_magic::l1BP(sigma)+1); for (size_t i=0; i<sa.size(); i++) text[i] = char2comp[str[i]]; }
//! Loads the data structure from the given istream. void load(std::istream& in) { read_member(m_size, in); m_bl.load(in); m_bf.load(in); m_wt.load(in); m_bl_rank.load(in, &m_bl); m_bf_rank.load(in, &m_bf); m_bl_select.load(in, &m_bl); m_bf_select.load(in, &m_bf); m_C.load(in); m_C_bf_rank.load(in); }
/*! \param out Out-Stream to serialize the data to. */ size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { size_type written_bytes = 0; structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); written_bytes += m_abs_samples.serialize(out, child, "absolute_samples"); written_bytes += m_differences.serialize(out, child, "differences"); written_bytes += write_member(m_ones, out, child, "ones"); written_bytes += write_member(m_size,out, child, "size"); written_bytes += m_contains_abs_sample.serialize(out, child, "contains_abs_sample"); written_bytes += m_rank_contains_abs_sample.serialize(out, child, "rank_contains_abs_sample"); structure_tree::add_size(child, written_bytes); return written_bytes; }
std::unordered_set<int> const build_unordered_set() { typedef std::unordered_set<int> int_set; typedef std::vector<int> int_vector; int_set result; int_vector const data = build_vector(); int_vector::const_iterator it = data.begin(); int_vector::const_iterator const end = data.end(); result.insert(it, end); return result; }
//! Answers select queries //! Serializes the data structure into the given ostream size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const { structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); size_type written_bytes = 0; written_bytes += write_member(m_size, out, child, "size"); written_bytes += m_bt.serialize(out, child, "bt"); written_bytes += m_btnr.serialize(out, child, "btnr"); written_bytes += m_btnrp.serialize(out, child, "btnrp"); written_bytes += m_rank.serialize(out, child, "rank_samples"); written_bytes += m_invert.serialize(out, child, "invert"); structure_tree::add_size(child, written_bytes); return written_bytes; }
bool addKeywordsImg(const int dbId, const int id, int_vector hashes){ if (!validate_imgid(dbId, id)) { cerr << "ERROR: image id (" << id << ") not found on given dbid (" << dbId << ") or dbid not existant" << endl ; return false;}; // populate keyword postings for (intVectorIterator it = hashes.begin(); it != hashes.end(); it++) { getKwdPostings(*it)->imgIdsFilter->insert(id); } // populate image kwds int_hashset& imgKwds = dbSpace[dbId]->sigs[id]->keywords; imgKwds.insert(hashes.begin(),hashes.end()); return true; }
std::map<int, int> const build_map() { typedef std::map<int, int> int_map; typedef std::vector<int> int_vector; int_map result; int_vector const data = build_vector(); int_vector::const_iterator it = data.begin(); int_vector::const_iterator const end = data.end(); for (; it != end; ++it) { int const value = *it; result[value] = 100 * value; } return result; }
//! Serializes the data structure into the given ostream size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const { structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); size_type written_bytes = 0; written_bytes += write_member(m_size, out, child, "size"); written_bytes += m_bl.serialize(out, child, "bl"); written_bytes += m_bf.serialize(out, child, "bf"); written_bytes += m_wt.serialize(out, child, "wt"); written_bytes += m_bl_rank.serialize(out, child, "bl_rank"); written_bytes += m_bf_rank.serialize(out, child, "bf_rank"); written_bytes += m_bl_select.serialize(out, child, "bl_select"); written_bytes += m_bf_select.serialize(out, child, "bf_select"); written_bytes += m_C.serialize(out, child, "C"); written_bytes += m_C_bf_rank.serialize(out, child, "C_bf_rank"); structure_tree::add_size(child, written_bytes); return written_bytes; }
/*! * Constructor for building the Index * \param[in] str C-string of the text */ index_bidirectional_waveletindex(const unsigned char* str) : index() { size_t n = strlen((const char*)str); int_vector<> sa(n+1, 0, bit_magic::l1BP(n+1)+1); setText(str, n+1); unsigned char *bwt = new unsigned char[n+1]; algorithm::calculate_sa(str, n+1, sa); // calculate the suffix array sa of str { /* Calculate Burrows-Wheeler-Transform */ size_t i = 0; for(int_vector<>::const_iterator it = sa.begin(), end = sa.end(); it != end; ++it, ++i){ bwt[i] = m_char2comp[str[(*it+n)%(n+1)]]; } } backward_index = WaveletTree(bwt, n+1, m_sigma); /* Construct the SA-Samples */ m_sa_sample.setIntWidth( bit_magic::l1BP(sa.size())+1 ); m_sa_sample.resize( (sa.size()+SampleDens-1)/SampleDens ); size_t idx=0; size_t i=(sa.size()-1-SampleDens*(m_sa_sample.size()-1)); for(int_vector<>::const_iterator it = sa.begin()+(ptrdiff_t)i; i < sa.size(); it += (ptrdiff_t)SampleDens, i += SampleDens, ++idx){ m_sa_sample[idx] = *it; } unsigned char* reverse = new unsigned char[n+1]; for (size_t i=0; i<n; i++) reverse[i] = str[n-1-i]; reverse[n] = '\0'; algorithm::calculate_sa(reverse, n+1, sa); // calculate the suffix array sa of reverse string str { /* Calculate Burrows-Wheeler-Transform */ size_t i = 0; for(int_vector<>::const_iterator it = sa.begin(), end = sa.end(); it != end; ++it, ++i){ bwt[i] = m_char2comp[reverse[(*it+n)%(n+1)]]; } } forward_index = WaveletTree(bwt, n+1, m_sigma); delete [] bwt; delete [] reverse; }
//! Serializes the data structure into the given ostream size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { size_type written_bytes = 0; structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); written_bytes += util::write_member(m_size, out, child, "size"); written_bytes += m_position.serialize(out, child, "positions"); structure_tree::add_size(child, written_bytes); return written_bytes; }
//! Constructor taking a cache_config lcp_bitcompressed(cache_config& config) { std::string lcp_file = cache_file_name(conf::KEY_LCP, config); int_vector_buffer<> lcp_buf(lcp_file); m_lcp = int_vector<t_width>(lcp_buf.size(), 0, lcp_buf.width()); for (size_type i=0; i < m_lcp.size(); ++i) { m_lcp[i] = lcp_buf[i]; } }
bool contains_no_zero_symbol(const int_vector& text, const std::string& file) { for (int_vector_size_type i=0; i < text.size(); ++i) { if ((uint64_t)0 == text[i]) { throw std::logic_error(std::string("Error: File \"")+file+"\" contains zero symbol."); return false; } } return true; }
/*! * Constructor for building the Index * \param[in] str C-string of the text */ index_sa_text_occ(const unsigned char* str) : index() { size_t n = strlen((const char*)str); sa = int_vector<>(n+1, 0, bit_magic::l1BP(n+1)+1); algorithm::calculate_sa(str, n+1, sa); // calculate the suffix array sa of str setText(str, n+1); text = int_vector<>(sa.size(), 0, bit_magic::l1BP(sigma)+1); for (size_t i=0; i<sa.size(); i++) text[i] = char2comp[str[i]]; unsigned char *bwt = new unsigned char[n+1]; { /* Calculate Burrows-Wheeler-Transform */ size_t i = 0; for(int_vector<>::const_iterator it = sa.begin(), end = sa.end(); it != end; ++it, ++i){ bwt[i] = m_char2comp[str[(*it+n)%(n+1)]]; } } occ = Occ(bwt, n+1, m_sigma); delete[] bwt; }
/*! * Constructor for building the Index * \param[in] str C-string of the text */ index_csa_psi_text(const unsigned char *str) : index() { size_t n = strlen((const char*)str); int_vector<> sa(n+1, 0, bit_magic::l1BP(n+1)+1); algorithm::calculate_sa(str, n+1, sa); // calculate the suffix array sa of str int_vector<> m_psi; sdsl::algorithm::sa2psi(sa, m_psi); psi = EncVector(m_psi); setText(str, n+1); text = int_vector<>(sa.size(), 0, bit_magic::l1BP(sigma)+1); for (size_t i=0; i<sa.size(); i++) text[i] = char2comp[str[i]]; /* Construct the SA-Samples */ m_sa_sample.setIntWidth( bit_magic::l1BP(sa.size())+1 ); m_sa_sample.resize( (sa.size()+SampleDens-1)/SampleDens ); size_t i=0, idx=0; for(int_vector<>::const_iterator it = sa.begin(); i < sa.size(); it += (ptrdiff_t)SampleDens, i += SampleDens, ++idx) { m_sa_sample[idx] = *it; } }