void euclid_lsh::neighbor_row_from_hash( const bit_vector& bv, float norm, vector<pair<string, float> >& ids, uint64_t ret_num) const { jubatus::util::lang::shared_ptr<const column_table> table = get_const_table(); jubatus::core::storage::fixed_size_heap<pair<float, size_t> > heap(ret_num); { const_bit_vector_column& bv_col = lsh_column(); const_float_column& norm_col = norm_column(); const float denom = bv.bit_num(); for (size_t i = 0; i < table->size(); ++i) { const size_t hamm_dist = bv.calc_hamming_distance(bv_col[i]); const float theta = hamm_dist * M_PI / denom; const float score = norm_col[i] * (norm_col[i] - 2 * norm * cos(theta)); heap.push(make_pair(score, i)); } } vector<pair<float, size_t> > sorted; heap.get_sorted(sorted); ids.clear(); const float squared_norm = norm * norm; for (size_t i = 0; i < sorted.size(); ++i) { ids.push_back(make_pair(table->get_key(sorted[i].second), sqrt(squared_norm + sorted[i].first))); } }
bit_vector YaoChooser::go(Circuit_p cc, FmtFile &fmt, const bit_vector &inputs) { FmtFile::VarDesc vars = fmt.getVarDesc(); GarbledCircuit_p gcc = GarbledCircuit::readCircuit(in); vector<SFEKey_p> yourinpsecs; readVector(in, yourinpsecs); uint ot_size = in->readInt(); if (ot_size != inputs.size()) throw new ProtocolException(cstr_printf( "ot_size %d != inputs.size %d", ot_size, inputs.size())); pinkasnaor::OT ot; bit_vector inputs_copy(inputs); pinkasnaor::Chooser chooser(inputs_copy, &ot); chooser.setStreams(in, out); chooser.precalc(); BigInt_Vect myinpsecs = chooser.online(); GCircuitEval geval; vector<SecretKey_p> gcirc_input(cc->inputs.size()); int ja=0; int jb=0; for (uint i=0; i<gcirc_input.size(); ++i) { if (vars.who.at(i) == "A") { gcirc_input[i] = yourinpsecs.at(ja++); } else if (vars.who.at(i) == "B") { gcirc_input[i] = SFEKey_p(new SFEKey( new byte_buf(BigInt::fromPaddedBigInt(myinpsecs.at(jb++))), true)); } } bit_vector circ_out = geval.eval(*gcc, gcirc_input); return circ_out; }
bit_vector::size_type construct_supercartesian_tree_bp_succinct_and_first_child( int_vector_buffer<t_width>& lcp_buf, bit_vector& bp, bit_vector& bp_fc, const bool minimum = true) { typedef bit_vector::size_type size_type; size_type n = lcp_buf.size(); bp.resize(2 * n); // resize bit vector for balanced parentheses to 2 n bits bp_fc.resize(n); if (n == 0) // if n == 0 we are done return 0; size_type fc_cnt = 0; // first child counter util::set_to_value(bp, 0); util::set_to_value(bp_fc, 0); sorted_multi_stack_support vec_stack(n); size_type k = 0; size_type k_fc = 0; // first child index if (minimum) { // no "lazy stack" trick used here for (size_type i = 0, x; i < n; ++i) { x = lcp_buf[i]; while (!vec_stack.empty() and x < vec_stack.top()) { if (vec_stack.pop()) { bp_fc[k_fc] = 1; ++fc_cnt; } ++k; // writing a closing parenthesis, bp is already initialized to zeros ++k_fc; // write a bit in first_child } vec_stack.push(x); bp[k++] = 1; // writing an opening parenthesis } } else { // no "lazy stack" trick used here for (size_type i = 0, x; i < n; ++i) { x = lcp_buf[i]; while (!vec_stack.empty() and x > vec_stack.top()) { if (vec_stack.pop()) { bp_fc[k_fc] = 1; ++fc_cnt; } ++k; // writing a closing parenthesis, bp is already initialized to zeros ++k_fc; // write a bit in first_child } vec_stack.push(x); bp[k++] = 1; // writing an opening parenthesis } } while (!vec_stack.empty()) { if (vec_stack.pop()) { bp_fc[k_fc] = 1; ++fc_cnt; } // writing a closing parenthesis in bp, not necessary as bp is initialized with zeros ++k; ++k_fc; } return fc_cnt; }
template<> void build_template_vector<bit_vector>(bit_vector& k_t_, bit_vector& k_l_, bit_vector& k_t, bit_vector& k_l) { k_t.swap(k_t_); k_l.swap(k_l_); }
void serialize_bit_vector(std::ostream &out, const bit_vector &v) { out << v.size() << "\n"; for (size_t i = 0; i < v.size(); ++i) { out << v[i] << "\n"; } }
//! Loads the data structure from the given istream. void load(std::istream& in) { read_member(m_size, in); m_bt.load(in); m_btnr.load(in); m_btnrp.load(in); m_rank.load(in); m_invert.load(in); }
bit_vector vert::operator&( const bit_vector &rhs, const bit_vector &lhs ) { bit_vector result; std::size_t maxSize = std::max( rhs.size(), lhs.size() ); for( std::size_t i = 0; i < maxSize; ++i ) { result.append( i < rhs.size() && rhs[i] && i < lhs.size() && lhs[i] ); } return result; }
//! Swap method void swap(rrr_vector& rrr) { if (this != &rrr) { std::swap(m_size, rrr.m_size); m_bt.swap(rrr.m_bt); m_btnr.swap(rrr.m_btnr); m_btnrp.swap(rrr.m_btnrp); m_rank.swap(rrr.m_rank); m_invert.swap(rrr.m_invert); } }
//! Answers select queries //! Serializes the data structure into the given ostream size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const { structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); size_type written_bytes = 0; written_bytes += write_member(m_size, out, child, "size"); written_bytes += m_bt.serialize(out, child, "bt"); written_bytes += m_btnr.serialize(out, child, "btnr"); written_bytes += m_btnrp.serialize(out, child, "btnrp"); written_bytes += m_rank.serialize(out, child, "rank_samples"); written_bytes += m_invert.serialize(out, child, "invert"); structure_tree::add_size(child, written_bytes); return written_bytes; }
float calc_euclidean_distance( const lsh_entry& entry, const bit_vector& bv, float norm) { const uint64_t hamm = bv.calc_hamming_similarity(entry.simhash_bv); if (hamm == bv.bit_num()) { // Avoid NaN caused by arithmetic error return std::fabs(norm - entry.norm); } const float angle = (1 - static_cast<float>(hamm) / bv.bit_num()) * M_PI; const float dot = entry.norm * norm * std::cos(angle); return std::sqrt(norm * norm + entry.norm * entry.norm - 2 * dot); }
void bit_vector::and_op(const bit_vector& v) { uint sz = m_size; const uchar* vbuf = v.buf(); if (v.size() < sz) sz = v.size(); for (uint o=0; o<sz; o++) { m_buf[o] &= vbuf[o]; } // shorten our size if v is smaller than us if (sz < m_size) m_size=sz; }
void euclid_lsh::neighbor_row_from_hash( const bit_vector& bv, float norm, vector<pair<string, float> >& ids, uint64_t ret_num) const { // This function is not thread safe. // Take lock out of this function. jubatus::util::lang::shared_ptr<const column_table> table = get_const_table(); const_bit_vector_column& bv_col = lsh_column(); const_float_column& norm_col = norm_column(); const float denom = bv.bit_num(); heap_t heap(ret_num); jubatus::util::lang::function<heap_t(size_t, size_t)> f = jubatus::util::lang::bind( &ranking_hamming_bit_vectors_worker, &bv, &bv_col, &norm_col, denom, norm, ret_num, jubatus::util::lang::_1, jubatus::util::lang::_2); ranking_hamming_bit_vectors_internal( f, table->size_nolock(), threads_, heap); vector<pair<float, size_t> > sorted; heap.get_sorted(sorted); ids.clear(); for (size_t i = 0; i < sorted.size(); ++i) { ids.push_back(make_pair( table->get_key_nolock(sorted[i].second), sorted[i].first)); } }
void construct_supercartesian_tree_bp_succinct(const RandomAccessContainer& vec, bit_vector& bp, const bool minimum=true) { typedef typename RandomAccessContainer::size_type size_type; bp.resize(2*vec.size()); // resize bit vector for balanced parentheses to 2 n bits if (vec.size() > 0) { util::set_to_value(bp, 0); sorted_stack_support vec_stack(vec.size()); // <- das ist ein Problem fuer int_vector_file_buffer size_type k=0; if (minimum) { bp[k++] = 1; for (size_type i=1; i < vec.size(); ++i) { if (vec[i] < vec[i-1]) { ++k; while (vec_stack.size() > 0 and vec[i] < vec[vec_stack.top()]) { vec_stack.pop(); ++k; // writing a closing parenthesis, bp is already initialized to zero } } else { vec_stack.push(i-1); // "lazy stack" trick: speed-up ca. 25% } bp[k++] = 1; // writing an opening parenthesis } /* vec_stack.push(0); bp[k++] = 1; for(size_type i=1,j, start_run=1; i < vec.size(); ++i){ if( vec[i] < vec[i-1] ){ j = i; while( --j >= start_run and vec[i] < vec[j]) ++k; while(start_run <= j){ // auf den stack pushen vec_stack.push(start_run++); } while( vec_stack.size() > 0 and vec[i] < vec[vec_stack.top()] ){ vec_stack.pop(); ++k; } start_run = i; } bp[k++] = 1; } */ } else { // hier noch ohne "lazy stack" trick for (size_type i=0; i < vec.size(); ++i) { while (vec_stack.size() > 0 and vec[i] > vec[vec_stack.top()]) { vec_stack.pop(); ++k; /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis } vec_stack.push(i); bp[k++] = 1; // writing an opening parenthesis } } #ifdef SDSL_DEBUG // not necessary as bp is already initialized to zero while (!vec_stack.empty()) { vec_stack.pop(); bp[k++] = 0; // writing a closing parenthesis } assert(k == 2*vec.size()); #endif } }
void construct_supercartesian_tree_bp_succinct2(const RandomAccessContainer& vec, bit_vector& bp, const bool minimum=true) { typedef typename RandomAccessContainer::size_type size_type; bp.resize(2*vec.size()); // resize bit vector for balanced parentheses to 2 n bits util::set_to_value(bp, 0); sorted_stack_support vec_stack(vec.size()); // <- das ist ein Problem fuer int_vector_file_buffer size_type k=0; // uint64_t wbuf=0; for (size_type i=0/*, cnt64=0*/; i < vec.size(); ++i) { while (vec_stack.size() > 0 and vec[i] < vec[vec_stack.top()]) { vec_stack.pop(); ++k; /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis } vec_stack.push(i); bp[k++] = 1; // writing an opening parenthesis while (i+1 < vec.size() and vec[i+1] >= vec[i]) { vec_stack.push(++i); bp[k++]; } } #ifdef SDSL_DEBUG // not neccessary as bp is already initialized to zero while (vec_stack.size() > 0) { vec_stack.pop(); bp[k++] = 0; // writing a closing parenthesis } assert(k == 2*vec.size()); #endif }
inline uint64_t select(bit_vector const& bv, uint64_t idx) const { assert(idx < num_positions()); uint64_t block = idx / block_size; int64_t block_pos = m_block_inventory[block]; if (block_pos < 0) { uint64_t overflow_pos = uint64_t(-block_pos - 1); return m_overflow_positions[overflow_pos + (idx % block_size)]; } uint64_t subblock = idx / subblock_size; uint64_t start_pos = uint64_t(block_pos) + m_subblock_inventory[subblock]; uint64_t reminder = idx % subblock_size; mapper::mappable_vector<uint64_t> const& data = bv.data(); if (!reminder) { return start_pos; } else { uint64_t word_idx = start_pos / 64; uint64_t word_shift = start_pos % 64; uint64_t word = WordGetter()(data, word_idx) & (uint64_t(-1) << word_shift); while (true) { uint64_t popcnt = broadword::popcount(word); if (reminder < popcnt) break; reminder -= popcnt; word = WordGetter()(data, ++word_idx); } return 64 * word_idx + broadword::select_in_word(word, reminder); } }
gap_vector(const bit_vector& bv) { m_size = bv.size(); if (m_size == 0) return; size_type ones = util::get_one_bits(bv); m_position = int_vector<>(ones, 0, bit_magic::l1BP(m_size)+1); const uint64_t* bvp = bv.data(); for (size_type i=0, one_cnt=0; i < (bv.size()+63)/64; ++i, ++bvp) { if (*bvp) { // if there is a one in the word for (size_type j=0; j<64 and 64*i+j < bv.size(); ++j) // check each bit of the word if (bv[64*i+j]) { m_position[one_cnt++] = 64*i+j; } } } }
void construct_supercartesian_tree_bp(const t_rac& vec, bit_vector& bp, const bool minimum = true) { typedef typename t_rac::size_type size_type; bp.resize(2 * vec.size()); // resize bit vector for balanaced parantheses to 2 n bits util::set_to_value(bp, 0); std::stack<typename t_rac::value_type> vec_stack; size_type k = 0; for (size_type i = 0; i < vec.size(); ++i) { typename t_rac::value_type l = vec[i]; if (minimum) { while (vec_stack.size() > 0 and l < vec_stack.top()) { vec_stack.pop(); ++k; /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis } } else { while (vec_stack.size() > 0 and l > vec_stack.top()) { vec_stack.pop(); ++k; /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis } } vec_stack.push(l); bp[k++] = 1; // writing an opening parenthesis } while (vec_stack.size() > 0) { vec_stack.pop(); bp[k++] = 0; // writing a closing parenthesis } assert(k == 2 * vec.size()); }
static void similar_row_one( const bit_vector& x, const pair<string, bit_vector>& y, heap_type& heap) { uint64_t match_num = x.calc_hamming_similarity(y.second); heap.push(make_pair(match_num, y.first)); }
void bit_index_storage::similar_row( const bit_vector& bv, vector<pair<string, float> >& ids, uint64_t ret_num) const { ids.clear(); uint64_t bit_num = bv.bit_num(); if (bit_num == 0) { return; } heap_type heap(ret_num); for (bit_table_t::const_iterator it = bitvals_diff_.begin(); it != bitvals_diff_.end(); ++it) { similar_row_one(bv, *it, heap); } for (bit_table_t::const_iterator it = bitvals_.begin(); it != bitvals_.end(); ++it) { if (bitvals_diff_.find(it->first) != bitvals_diff_.end()) { continue; } similar_row_one(bv, *it, heap); } vector<pair<uint64_t, string> > scores; heap.get_sorted(scores); for (size_t i = 0; i < scores.size() && i < ret_num; ++i) { ids.push_back(make_pair(scores[i].second, static_cast<float>(scores[i].first) / bit_num)); } }
//! Load from a stream. void load(std::istream& in) { m_data.load(in); m_overflow.load(in); m_overflow_rank.load(in, &m_overflow); m_level_pointer_and_rank.load(in); read_member(m_max_level, in); }
bool doc_manager::merge(doc& d, unsigned idx, subset_ints const& equalities, bit_vector const& discard_cols) { unsigned root = equalities.find(idx); idx = root; unsigned num_x = 0; unsigned root1 = root; tbit value = BIT_x; do { switch (d[idx]) { case BIT_0: if (value == BIT_1) return false; value = BIT_0; break; case BIT_1: if (value == BIT_0) return false; value = BIT_1; break; case BIT_x: ++num_x; if (!discard_cols.get(idx)) { root1 = idx; } break; default: UNREACHABLE(); break; } idx = equalities.next(idx); } while (idx != root); TRACE("doc", tout << "num_x: " << num_x << " value: " << value << "\n";);
/*! \param in In-Stream to load the rank_support data from. */ void load(std::istream& in) { m_abs_samples.load(in); m_differences.load(in); read_member(m_ones, in); read_member(m_size, in); m_contains_abs_sample.load(in); m_rank_contains_abs_sample.load(in, &m_contains_abs_sample); }
size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const { structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); size_type written_bytes = 0; written_bytes += m_sct_bp.serialize(out, child, "sct_bp"); written_bytes += m_sct_bp_support.serialize(out, child, "sct_bp_support"); structure_tree::add_size(child, written_bytes); return written_bytes; }
//! Swap method for lcp_dac void swap(lcp_dac& lcp_c) { m_data.swap(lcp_c.m_data); m_overflow.swap(lcp_c.m_overflow); util::swap_support(m_overflow_rank, lcp_c.m_overflow_rank, &m_overflow, &(lcp_c.m_overflow)); m_level_pointer_and_rank.swap(lcp_c.m_level_pointer_and_rank); std::swap(m_max_level, lcp_c.m_max_level); }
void swap(nearest_neighbour_dictionary& nnd) { // copy all members of the data structure m_abs_samples.swap(nnd.m_abs_samples); m_differences.swap(nnd.m_differences); std::swap(m_ones, nnd.m_ones); std::swap(m_size, nnd.m_size); m_contains_abs_sample.swap(nnd.m_contains_abs_sample); util::swap_support(m_rank_contains_abs_sample, nnd.m_rank_contains_abs_sample, &m_contains_abs_sample, &(nnd.m_contains_abs_sample)); }
/*! \param v The supported bit_vector. */ nearest_neighbour_dictionary(const bit_vector& v):m_ones(0), m_size(0) { if (sample_dens==0) { // first logical error check throw std::logic_error(util::demangle(typeid(this).name())+": sample_dens should not be equal 0!"); } size_type max_distance_between_two_ones = 0; size_type ones = 0; // counter for the ones in v // get maximal distance between to ones in the bit vector // speed this up by broadword computing for (size_type i=0, last_one_pos_plus_1=0; i < v.size(); ++i) { if (v[i]) { if (i+1-last_one_pos_plus_1 > max_distance_between_two_ones) max_distance_between_two_ones = i+1-last_one_pos_plus_1; last_one_pos_plus_1 = i+1; ++ones; } } m_ones = ones; m_size = v.size(); // std::cerr<<ones<<std::endl; // initialize absolute samples m_abs_samples[0]=0 m_abs_samples = int_vector<>(m_ones/sample_dens + 1, 0, bits::hi(v.size())+1); // initialize different values m_differences = int_vector<>(m_ones - m_ones/sample_dens, 0, bits::hi(max_distance_between_two_ones)+1); // initialize m_contains_abs_sample m_contains_abs_sample = bit_vector((v.size()+sample_dens-1)/sample_dens, 0); ones = 0; for (size_type i=0, last_one_pos=0; i < v.size(); ++i) { if (v[i]) { ++ones; if ((ones % sample_dens) == 0) { // insert absolute samples m_abs_samples[ones/sample_dens] = i; m_contains_abs_sample[i/sample_dens] = 1; } else { m_differences[ones - ones/sample_dens - 1] = i - last_one_pos; } last_one_pos = i; } } util::init_support(m_rank_contains_abs_sample, &m_contains_abs_sample); }
void ranking_hamming_bit_vectors( const bit_vector& query, const const_bit_vector_column& bvs, vector<pair<uint64_t, float> >& ret, uint64_t ret_num) { storage::fixed_size_heap<pair<uint32_t, uint64_t> > heap(ret_num); for (uint64_t i = 0; i< bvs.size(); ++i) { const size_t dist = query.calc_hamming_distance(bvs[i]); heap.push(make_pair(dist, i)); } vector<pair<uint32_t, uint64_t> > sorted; heap.get_sorted(sorted); ret.clear(); const float denom = query.bit_num(); for (size_t i = 0; i < sorted.size(); ++i) { ret.push_back(make_pair(sorted[i].second, sorted[i].first / denom)); } }
void deserialize_bit_vector(std::istream &in, bit_vector &v) { size_t size; in >> size; v.resize(size); for (size_t i = 0; i < size; ++i) { bool b; in >> b; v[i] = b; } }
/*! \param out Out-Stream to serialize the data to. */ size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { size_type written_bytes = 0; structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); written_bytes += m_abs_samples.serialize(out, child, "absolute_samples"); written_bytes += m_differences.serialize(out, child, "differences"); written_bytes += write_member(m_ones, out, child, "ones"); written_bytes += write_member(m_size,out, child, "size"); written_bytes += m_contains_abs_sample.serialize(out, child, "contains_abs_sample"); written_bytes += m_rank_contains_abs_sample.serialize(out, child, "rank_contains_abs_sample"); structure_tree::add_size(child, written_bytes); return written_bytes; }
/// /// @brief shift a bit vector to left or right @a num times /// void shift_bit_vector(bit_vector &v, int num, bool to_left) { #ifdef DEBUG cout<<"before shifting to "<<(to_left? "left" : "right")<<" : "; for (int i = v.size() - 1; i >= 0; --i) { cout<<(v[i]? 1 : 0); } cout<<endl; #endif if (to_left) { for (int i = v.size() - 1; i >= num; --i) { v[i] = v[i - num]; } for (int i = 0; i < num; ++i) { v[i] = 0; } } else { for (int i = 0; i < v.size() - num; ++i) { v[i] = v[i + num]; } for (int i = v.size() - num; i < v.size(); ++i) { v[i] = 0; } } #ifdef DEBUG cout<<"after shifting to "<<(to_left? "left" : "right")<<" : "; for (int i = v.size() - 1; i >= 0; --i) { cout<<(v[i]? 1 : 0); } cout<<endl; #endif }