Esempio n. 1
0
void recommender_base::complete_row(const sfv_t& query, sfv_t& ret) const {
  ret.clear();
  vector<pair<string, float> > ids;
  similar_row(query, ids, complete_row_similar_num_);
  if (ids.size() == 0) {
    return;
  }

  size_t exist_row_num = 0;
  for (size_t i = 0; i < ids.size(); ++i) {
    sfv_t row;
    orig_.get_row(ids[i].first, row);
    if (row.size() == 0) {
      continue;
    } else {
      ++exist_row_num;
    }
    float ratio = ids[i].second;
    for (size_t j = 0; j < row.size(); ++j) {
      ret.push_back(make_pair(row[j].first, row[j].second * ratio));
    }
  }

  if (exist_row_num == 0) {
    return;
  }
  sort_and_merge(ret);
  for (size_t i = 0; i < ret.size(); ++i) {
    ret[i].second /= exist_row_num;
  }
}
float inverted_index_storage::calc_l2norm(const sfv_t& sfv){
  float ret = 0.f;
  for (size_t i = 0; i < sfv.size(); ++i){
    ret +=  sfv[i].second * sfv[i].second;
  }
  return sqrt(ret);
}
Esempio n. 3
0
static float calc_norm(const sfv_t& fv) {
  float norm = 0;
  for (size_t i = 0; i < fv.size(); ++i) {
    norm += fv[i].second * fv[i].second;
  }
  return norm;
}
Esempio n. 4
0
float recommender_base::calc_l2norm(const sfv_t& query) {
  float ret = 0.f;
  for (size_t i = 0; i < query.size(); ++i) {
    ret += query[i].second * query[i].second;
  }
  return sqrt(ret);
}
void inverted_index_storage::calc_scores(const sfv_t& query, 
                                         vector<pair<string, float> >& scores,
                                         size_t ret_num) const {
  float query_norm = calc_l2norm(query);
  if (query_norm == 0.f){
    return;
  }
  pfi::data::unordered_map<uint64_t, float> i_scores;
  for (size_t i = 0; i < query.size(); ++i){
    const string& fid = query[i].first;
    float val = query[i].second;
    add_inp_scores(fid, val, i_scores);
  }

  vector<pair<float, uint64_t> > sorted_scores;
  for (pfi::data::unordered_map<uint64_t, float>::const_iterator it = i_scores.begin(); it != i_scores.end(); ++it){
    float norm = calc_columnl2norm(it->first);
    float normed_score = (norm != 0.f) ? it->second / norm / query_norm : 0.f;
    sorted_scores.push_back(make_pair(normed_score, it->first));
  }
  sort(sorted_scores.rbegin(), sorted_scores.rend());
  for (size_t i = 0; i < sorted_scores.size() && i < ret_num; ++i){
    scores.push_back(make_pair(column2id_.get_key(sorted_scores[i].second), sorted_scores[i].first));
  }
}
Esempio n. 6
0
void AROW::update(const sfv_t& sfv, float alpha, float beta, 
		  const std::string& pos_label, const std::string& neg_label){
 for (sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it){
    const string& feature = it->first;
    float val = it->second;
    storage::feature_val2_t ret;
    storage_->get2(feature, ret);
    
    storage::val2_t pos_val(0.f, 1.f);
    storage::val2_t neg_val(0.f, 1.f);
    ClassifierUtil::get_two(ret, pos_label, neg_label, pos_val, neg_val);

    storage_->set2(feature, pos_label, storage::val2_t(pos_val.v1 + alpha * pos_val.v2 * val, pos_val.v2 - beta * pos_val.v2 * pos_val.v2 * val * val));
    if (neg_label != "")
      storage_->set2(feature, neg_label, storage::val2_t(neg_val.v1 - alpha * neg_val.v2 * val, neg_val.v2 - beta * neg_val.v2 * neg_val.v2 * val * val));
  }

}
Esempio n. 7
0
void local_storage_mixture::bulk_update(const sfv_t& sfv, float step_width, const string& inc_class, const string& dec_class){
  uint64_t inc_id = class2id_.get_id(inc_class);
  if (dec_class != ""){
    uint64_t dec_id = class2id_.get_id(dec_class);
    for (sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it){
      float val = it->second * step_width;
      id_feature_val3_t& feature_row = tbl_diff_[it->first];
      feature_row[inc_id].v1 += val;
      feature_row[dec_id].v1 -= val;
    }
  } else {
    for (sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it){
      float val = it->second * step_width;
      id_feature_val3_t& feature_row = tbl_diff_[it->first];
      feature_row[inc_id].v1 += val;
    }
  }
}
Esempio n. 8
0
void sort_and_merge(sfv_t& sfv){
  if (sfv.size() == 0) return;
  sort(sfv.begin(), sfv.end());
  sfv_t ret_sfv;
  const string* prev = &sfv[0].first;
  float val = sfv[0].second;
  for (size_t i = 1; i < sfv.size(); ++i){
    if (sfv[i].first == *prev){
      val += sfv[i].second;
    } else {
      ret_sfv.push_back(make_pair(*prev, val));
      prev = &sfv[i].first;
      val = sfv[i].second;
    }
  }
  ret_sfv.push_back(make_pair(*prev, val));
  sfv.swap(ret_sfv);
}
Esempio n. 9
0
void local_storage_mixture::inp(const sfv_t& sfv, map_feature_val1_t& ret) {
  ret.clear();
  
  std::vector<float> ret_id(class2id_.size());
  for (sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it){
    const string& feature = it->first;
    const float val = it->second;
    id_feature_val3_t m;
    get_internal(feature, m);
    for (id_feature_val3_t::const_iterator it3 = m.begin(); it3 != m.end(); ++it3){
      ret_id[it3->first] += it3->second.v1 * val;
    }
  }
  
  for (size_t i = 0; i < ret_id.size(); ++i){
    if (ret_id[i] == 0.f) continue;
    ret[class2id_.get_key(i)] = ret_id[i];
  }
}
Esempio n. 10
0
void revert_feature(const sfv_t& fv,
                    fv_converter::datum& data) {
  for (size_t i = 0; i < fv.size(); ++i) {
    pair<string, float> num_value;
    pair<string, string> string_value;
    if (revert_num_value(fv[i], num_value)) {
      data.num_values_.push_back(num_value);
    } else if (revert_string_value(fv[i], string_value)) {
      data.string_values_.push_back(string_value);
    }
  }
}
Esempio n. 11
0
float recommender_base::calc_similality(sfv_t& q1, sfv_t& q2) {
  float q1_norm = calc_l2norm(q1);
  float q2_norm = calc_l2norm(q2);
  if (q1_norm == 0.f || q2_norm == 0.f) {
    return 0.f;
  }
  sort(q1.begin(), q1.end());
  sort(q2.begin(), q2.end());

  size_t i1 = 0;
  size_t i2 = 0;
  float ret = 0.f;
  while (i1 < q1.size() && i2 < q2.size()) {
    const string& ind1 = q1[i1].first;
    const string& ind2 = q2[i2].first;
    if (ind1 < ind2) {
      ++i1;
    } else if (ind1 > ind2) {
      ++i2;
    } else {
      ret += q1[i1].second * q2[i2].second;
      ++i1;
      ++i2;
    }
  }

  return ret / q1_norm / q2_norm;
}
Esempio n. 12
0
void weight_manager::get_weight(sfv_t& fv) const {
  for (sfv_t::iterator it = fv.begin(); it != fv.end(); ++it) {
    double global_weight = get_global_weight(it->first);
    it->second *= global_weight;
  }
  fv.erase(remove_if(fv.begin(), fv.end(), is_zero()), fv.end());
}
Esempio n. 13
0
void NHERD::update(const sfv_t& sfv, float margin, float variance, 
		   const string& pos_label, const string& neg_label){
  for (sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it){
    const string& feature = it->first;
    float val = it->second;
    storage::feature_val2_t ret;
    storage_->get2(feature, ret);

    storage::val2_t pos_val(0.f, 1.f);
    storage::val2_t neg_val(0.f, 1.f);
    ClassifierUtil::get_two(ret, pos_label, neg_label, pos_val, neg_val);

    float val_covariance_pos = val * pos_val.v2;
    float val_covariance_neg = val * neg_val.v2;

    storage_->set2(feature, pos_label, 
                   storage::val2_t(pos_val.v1 + (1.f - margin) * val_covariance_pos / (val_covariance_pos * val + 1.f / C_),
                                   1.f / ((1.f / pos_val.v2) + (2 * C_ + C_ * C_ *  variance) * val * val)));
    if (neg_label != "")
      storage_->set2(feature, neg_label, 
                     storage::val2_t(neg_val.v1 - (1.f - margin) * val_covariance_neg / (val_covariance_neg * val + 1.f / C_),
                                     1.f / ((1.f / neg_val.v2) + (2 * C_ + C_ * C_ *  variance) * val * val)));
  }
}
Esempio n. 14
0
void CW::update(const sfv_t& sfv, float step_width, const string& pos_label, const string& neg_label){
  for (sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it){
    const string& feature = it->first;
    float val = it->second;
    storage::feature_val2_t val2;
    storage_->get2(feature, val2);

    storage::val2_t pos_val(0.f, 1.f);
    storage::val2_t neg_val(0.f, 1.f);
    ClassifierUtil::get_two(val2, pos_label, neg_label, pos_val, neg_val);

    const float C = config.C;
    float covar_pos_step = 2.f * step_width * pos_val.v2 * val * val * C;
    float covar_neg_step = 2.f * step_width * neg_val.v2 * val * val * C;

   storage_->set2(feature, pos_label, 
                  storage::val2_t(pos_val.v1 + step_width * pos_val.v2 * val,
                                  1.f / (1.f / pos_val.v2 + covar_pos_step)));
   if (neg_label != "")
     storage_->set2(feature, neg_label, 
                    storage::val2_t(neg_val.v1 - step_width * neg_val.v2 * val,
                                    1.f / (1.f / neg_val.v2 + covar_neg_step)));
  }
}
Esempio n. 15
0
void minhash::calc_minhash_values(const sfv_t& sfv, bit_vector& bv) const{
  vector<float> min_values_buffer(hash_num_, FLT_MAX);
  vector<uint64_t> hash_buffer(hash_num_);
  for (size_t i = 0; i < sfv.size(); ++i){
    uint64_t key_hash = hash_util::calc_string_hash(sfv[i].first);
    float val = sfv[i].second;
    for (uint64_t j = 0; j < hash_num_; ++j){
      float hashval = calc_hash(key_hash, j, val);
      if (hashval < min_values_buffer[j]){
        min_values_buffer[j] = hashval;
        hash_buffer[j] = key_hash;
      }
    }
  }

  bv.resize_and_clear(hash_num_);
  for (size_t i = 0; i < hash_buffer.size(); ++i){
    if ((hash_buffer[i] & 1LLU) == 1){
      bv.set_bit(i);
    }
  }
}
Esempio n. 16
0
void sort_and_merge(sfv_t& sfv) {
    if (sfv.size() <= 1) {
        return;
    }
    sort(sfv.begin(), sfv.end());

    typedef sfv_t::iterator iterator;
    iterator cur = sfv.begin();
    iterator end = sfv.end();
    for (iterator iter = cur+1; iter != end; ++iter) {
        if (iter->first == cur->first) {
            cur->second += iter->second;
        } else {
            ++cur;
            *cur = *iter;
        }
    }
    sfv.erase(cur+1, end);
}
Esempio n. 17
0
void recommender_base::decode_row(const std::string& id, sfv_t& ret) const {
  ret.clear();
  orig_.get_row(id, ret);
}
Esempio n. 18
0
void feature_hasher::hash_feature_keys(sfv_t& fv) const {
  for (size_t i = 0, size = fv.size(); i < size; ++i) {
    uint64_t id = hash_util::calc_string_hash(fv[i].first) % max_size_;
    fv[i].first = pfi::lang::lexical_cast<string>(id);
  }
}
Esempio n. 19
0
 void add_feature(const std::string& key, double value, sfv_t& ret_fv) const {
   std::stringstream ss;
   ss << key << "$" << value;
   ret_fv.push_back(make_pair(ss.str(), 1.0));
 }
Esempio n. 20
0
void keyword_weights::update_document_frequency(const sfv_t& fv) {
  ++document_count_;
  for (sfv_t::const_iterator it = fv.begin(); it != fv.end(); ++it) {
    ++document_frequencies_[it->first];
  }
}
Esempio n. 21
0
void recommender_base::complete_row(const std::string& id, sfv_t& ret) const {
  ret.clear();
  sfv_t sfv;
  orig_.get_row(id, sfv);
  complete_row(sfv, ret);
}
Esempio n. 22
0
void lsh::generate_column_bases(const sfv_t& sfv){
  for (size_t i = 0; i < sfv.size(); ++i){
    generate_column_base(sfv[i].first);
  }
}
Esempio n. 23
0
 void add_feature(const std::string& key, double value, sfv_t& ret_fv) const {
   ret_fv.push_back(make_pair(key, std::log(std::max(1.0, value))));
 }