Beispiel #1
0
void euclid_lsh::neighbor_row_from_hash(
    const bit_vector& bv,
    float norm,
    vector<pair<string, float> >& ids,
    uint64_t ret_num) const {
  // This function is not thread safe.
  // Take lock out of this function.
  jubatus::util::lang::shared_ptr<const column_table> table =
    get_const_table();
  const_bit_vector_column& bv_col = lsh_column();
  const_float_column& norm_col = norm_column();
  const float denom = bv.bit_num();
  heap_t heap(ret_num);
  jubatus::util::lang::function<heap_t(size_t, size_t)> f =
    jubatus::util::lang::bind(
      &ranking_hamming_bit_vectors_worker, &bv, &bv_col, &norm_col,
      denom, norm, ret_num,
      jubatus::util::lang::_1, jubatus::util::lang::_2);
  ranking_hamming_bit_vectors_internal(
      f, table->size_nolock(), threads_, heap);

  vector<pair<float, size_t> > sorted;
  heap.get_sorted(sorted);

  ids.clear();
  for (size_t i = 0; i < sorted.size(); ++i) {
    ids.push_back(make_pair(
      table->get_key_nolock(sorted[i].second), sorted[i].first));
  }
}
Beispiel #2
0
void euclid_lsh::neighbor_row_from_hash(
    const bit_vector& bv,
    float norm,
    vector<pair<string, float> >& ids,
    uint64_t ret_num) const {
  jubatus::util::lang::shared_ptr<const column_table> table = get_const_table();

  jubatus::core::storage::fixed_size_heap<pair<float, size_t> > heap(ret_num);
  {
    const_bit_vector_column& bv_col = lsh_column();
    const_float_column& norm_col = norm_column();

    const float denom = bv.bit_num();
    for (size_t i = 0; i < table->size(); ++i) {
      const size_t hamm_dist = bv.calc_hamming_distance(bv_col[i]);
      const float theta = hamm_dist * M_PI / denom;
      const float score = norm_col[i] * (norm_col[i] - 2 * norm * cos(theta));
      heap.push(make_pair(score, i));
    }
  }

  vector<pair<float, size_t> > sorted;
  heap.get_sorted(sorted);

  ids.clear();
  const float squared_norm = norm * norm;
  for (size_t i = 0; i < sorted.size(); ++i) {
    ids.push_back(make_pair(table->get_key(sorted[i].second),
                            sqrt(squared_norm + sorted[i].first)));
  }
}
void nearest_neighbor_base::get_all_row_ids(vector<string>& ids) const {
  vector<string> ret;
  shared_ptr<const table::column_table> table = get_const_table();
  ret.reserve(table->size());
  for (size_t i = 0; i < table->size(); ++i) {
    ret.push_back(table->get_key(i));
  }
  ret.swap(ids);
}
Beispiel #4
0
void euclid_lsh::neighbor_row(
    const std::string& query_id,
    vector<pair<string, float> >& ids,
    uint64_t ret_num) const {
  util::concurrent::scoped_rlock lk(get_const_table()->get_mutex());

  /* table lock acquired; all subsequent table operations must be nolock */

  const pair<bool, uint64_t> maybe_index =
      get_const_table()->exact_match_nolock(query_id);
  if (!maybe_index.first) {
    ids.clear();
    return;
  }

  const bit_vector bv = lsh_column()[maybe_index.second];
  const float norm = norm_column()[maybe_index.second];
  neighbor_row_from_hash(bv, norm, ids, ret_num);
}
void bit_vector_nearest_neighbor_base::neighbor_row_from_hash(
    const bit_vector& query,
    vector<pair<string, float> >& ids,
    uint64_t ret_num) const {
  vector<pair<uint64_t, float> > scores;
  ranking_hamming_bit_vectors(query, bit_vector_column(), scores, ret_num);

  jubatus::util::lang::shared_ptr<const column_table> table = get_const_table();
  ids.clear();
  for (size_t i = 0; i < scores.size(); ++i) {
    ids.push_back(make_pair(table->get_key(scores[i].first), scores[i].second));
  }
}
void bit_vector_nearest_neighbor_base::neighbor_row(
    const string& query_id,
    vector<pair<string, float> >& ids,
    uint64_t ret_num) const {
  const table::column_table& table = *get_const_table();
  const pair<bool, uint64_t> maybe_index = table.exact_match(query_id);
  if (!maybe_index.first) {
    ids.clear();
    return;
  }

  const_bit_vector_column& col = bit_vector_column();
  neighbor_row_from_hash(col[maybe_index.second], ids, ret_num);
}
Beispiel #7
0
void euclid_lsh::neighbor_row(
    const common::sfv_t& query,
    vector<pair<string, float> >& ids,
    uint64_t ret_num) const {
  util::concurrent::scoped_rlock lk(get_const_table()->get_mutex());

  /* table lock acquired; all subsequent table operations must be nolock */

  neighbor_row_from_hash(
      cosine_lsh(query, hash_num_, threads_, cache_),
      l2norm(query),
      ids,
      ret_num);
}
Beispiel #8
0
void euclid_lsh::neighbor_row(
    const std::string& query_id,
    vector<pair<string, float> >& ids,
    uint64_t ret_num) const {
  const pair<bool, uint64_t> maybe_index =
      get_const_table()->exact_match(query_id);
  if (!maybe_index.first) {
    ids.clear();
    return;
  }

  const bit_vector bv = lsh_column()[maybe_index.second];
  const float norm = norm_column()[maybe_index.second];
  neighbor_row_from_hash(bv, norm, ids, ret_num);
}
void nearest_neighbor_base::pack(framework::packer& packer) const {
  get_const_table()->pack(packer);
}
const_bit_vector_column& bit_vector_nearest_neighbor_base::bit_vector_column()
    const {
  return get_const_table()->get_bit_vector_column(bit_vector_column_id_);
}
Beispiel #11
0
const_float_column& euclid_lsh::norm_column() const {
  return get_const_table()->get_float_column(first_column_id_ + 1);
}
Beispiel #12
0
const_bit_vector_column& euclid_lsh::lsh_column() const {
  return get_const_table()->get_bit_vector_column(first_column_id_);
}