void euclid_lsh::neighbor_row_from_hash( const bit_vector& bv, float norm, vector<pair<string, float> >& ids, uint64_t ret_num) const { // This function is not thread safe. // Take lock out of this function. jubatus::util::lang::shared_ptr<const column_table> table = get_const_table(); const_bit_vector_column& bv_col = lsh_column(); const_float_column& norm_col = norm_column(); const float denom = bv.bit_num(); heap_t heap(ret_num); jubatus::util::lang::function<heap_t(size_t, size_t)> f = jubatus::util::lang::bind( &ranking_hamming_bit_vectors_worker, &bv, &bv_col, &norm_col, denom, norm, ret_num, jubatus::util::lang::_1, jubatus::util::lang::_2); ranking_hamming_bit_vectors_internal( f, table->size_nolock(), threads_, heap); vector<pair<float, size_t> > sorted; heap.get_sorted(sorted); ids.clear(); for (size_t i = 0; i < sorted.size(); ++i) { ids.push_back(make_pair( table->get_key_nolock(sorted[i].second), sorted[i].first)); } }
void euclid_lsh::neighbor_row_from_hash( const bit_vector& bv, float norm, vector<pair<string, float> >& ids, uint64_t ret_num) const { jubatus::util::lang::shared_ptr<const column_table> table = get_const_table(); jubatus::core::storage::fixed_size_heap<pair<float, size_t> > heap(ret_num); { const_bit_vector_column& bv_col = lsh_column(); const_float_column& norm_col = norm_column(); const float denom = bv.bit_num(); for (size_t i = 0; i < table->size(); ++i) { const size_t hamm_dist = bv.calc_hamming_distance(bv_col[i]); const float theta = hamm_dist * M_PI / denom; const float score = norm_col[i] * (norm_col[i] - 2 * norm * cos(theta)); heap.push(make_pair(score, i)); } } vector<pair<float, size_t> > sorted; heap.get_sorted(sorted); ids.clear(); const float squared_norm = norm * norm; for (size_t i = 0; i < sorted.size(); ++i) { ids.push_back(make_pair(table->get_key(sorted[i].second), sqrt(squared_norm + sorted[i].first))); } }
void nearest_neighbor_base::get_all_row_ids(vector<string>& ids) const { vector<string> ret; shared_ptr<const table::column_table> table = get_const_table(); ret.reserve(table->size()); for (size_t i = 0; i < table->size(); ++i) { ret.push_back(table->get_key(i)); } ret.swap(ids); }
void euclid_lsh::neighbor_row( const std::string& query_id, vector<pair<string, float> >& ids, uint64_t ret_num) const { util::concurrent::scoped_rlock lk(get_const_table()->get_mutex()); /* table lock acquired; all subsequent table operations must be nolock */ const pair<bool, uint64_t> maybe_index = get_const_table()->exact_match_nolock(query_id); if (!maybe_index.first) { ids.clear(); return; } const bit_vector bv = lsh_column()[maybe_index.second]; const float norm = norm_column()[maybe_index.second]; neighbor_row_from_hash(bv, norm, ids, ret_num); }
void bit_vector_nearest_neighbor_base::neighbor_row_from_hash( const bit_vector& query, vector<pair<string, float> >& ids, uint64_t ret_num) const { vector<pair<uint64_t, float> > scores; ranking_hamming_bit_vectors(query, bit_vector_column(), scores, ret_num); jubatus::util::lang::shared_ptr<const column_table> table = get_const_table(); ids.clear(); for (size_t i = 0; i < scores.size(); ++i) { ids.push_back(make_pair(table->get_key(scores[i].first), scores[i].second)); } }
void bit_vector_nearest_neighbor_base::neighbor_row( const string& query_id, vector<pair<string, float> >& ids, uint64_t ret_num) const { const table::column_table& table = *get_const_table(); const pair<bool, uint64_t> maybe_index = table.exact_match(query_id); if (!maybe_index.first) { ids.clear(); return; } const_bit_vector_column& col = bit_vector_column(); neighbor_row_from_hash(col[maybe_index.second], ids, ret_num); }
void euclid_lsh::neighbor_row( const common::sfv_t& query, vector<pair<string, float> >& ids, uint64_t ret_num) const { util::concurrent::scoped_rlock lk(get_const_table()->get_mutex()); /* table lock acquired; all subsequent table operations must be nolock */ neighbor_row_from_hash( cosine_lsh(query, hash_num_, threads_, cache_), l2norm(query), ids, ret_num); }
void euclid_lsh::neighbor_row( const std::string& query_id, vector<pair<string, float> >& ids, uint64_t ret_num) const { const pair<bool, uint64_t> maybe_index = get_const_table()->exact_match(query_id); if (!maybe_index.first) { ids.clear(); return; } const bit_vector bv = lsh_column()[maybe_index.second]; const float norm = norm_column()[maybe_index.second]; neighbor_row_from_hash(bv, norm, ids, ret_num); }
void nearest_neighbor_base::pack(framework::packer& packer) const { get_const_table()->pack(packer); }
const_bit_vector_column& bit_vector_nearest_neighbor_base::bit_vector_column() const { return get_const_table()->get_bit_vector_column(bit_vector_column_id_); }
const_float_column& euclid_lsh::norm_column() const { return get_const_table()->get_float_column(first_column_id_ + 1); }
const_bit_vector_column& euclid_lsh::lsh_column() const { return get_const_table()->get_bit_vector_column(first_column_id_); }