void IndexEncoding::BestNextWords(const double* p_residual, const SMatrix<double>& matDictionary, int idx_start, int idx_end, SMatrix<double> &mat_diff, Vector<short>& best_idx) const { int m_nDimension = matDictionary.Cols(); SMatrix<double> mat_each_diff(idx_end - idx_start, m_nDimension); Heap<PAIR<double> > pq; pq.Reserve(m_nNumBestCan); for (int i = idx_start; i < idx_end; i++) { const double* p_dic = matDictionary[i]; double e = 0; for (int j = 0; j < m_nDimension; j++) { double d = p_residual[j] - p_dic[j]; mat_each_diff[i - idx_start][j] = d; e += d * d; } if (pq.size() >= m_nNumBestCan) { const PAIR<double> &p = pq.Top(); if (p.distance > e) { pq.popMin(); pq.insert(PAIR<double>(i - idx_start, e)); } } else { pq.insert(PAIR<double>(i - idx_start, e)); } } mat_diff.AllocateSpace(m_nNumBestCan, m_nDimension); best_idx.AllocateSpace(m_nNumBestCan); for (int i = m_nNumBestCan - 1; i >= 0; i--) { PAIR<double> p; pq.popMin(p); best_idx[i] = p.index; memcpy(mat_diff[i], mat_each_diff[p.index], sizeof(double) * m_nDimension); } }
void IndexEncoding::BestNextWordsSMart( const Vector<double> &vec_x_map, const SMatrix<double> &matInnerProduct, const short* prepresentation, int idx, short next_idx[], double next_errors[]) const { Heap<PAIR<double> > pq; pq.Reserve(m_nNumBestCan); int sub_dic_size = matInnerProduct.Rows() / m_nNumberDictionaryEachPartition; int idx_start = idx * sub_dic_size; int idx_end = idx_start + sub_dic_size; for (int i = idx_start; i < idx_end; i++) { // compoute the relative error double e = -vec_x_map[i]; const double* p_inner = matInnerProduct[i]; for (int j = 0; j < idx; j++) { e += p_inner[prepresentation[j]]; } e += 0.5 * p_inner[i]; if (pq.size() >= m_nNumBestCan) { const PAIR<double> &p = pq.Top(); if (p.distance > e) { pq.popMin(); pq.insert(PAIR<double>(i, e)); } } else { pq.insert(PAIR<double>(i, e)); } } for (int i = m_nNumBestCan - 1; i >= 0; i--) { PAIR<double> p; pq.popMin(p); next_idx[i] = p.index; next_errors[i] = p.distance; } }
int IndexEncoding::SolveAdditiveQuantization(const Vector<double> &vec_x_map, const SMatrix<double> &matPrecomputed, int num_dic_each_partition, short* prepresentation) const { int num = 0; bool is_changed; int num_sub_centers = matPrecomputed.Rows() / num_dic_each_partition; SMatrix<short>* p_curr; SMatrix<short>* p_aux; p_curr = new SMatrix<short>(m_nNumBestCan, num_dic_each_partition); p_aux = new SMatrix<short>(m_nNumBestCan, num_dic_each_partition); p_curr->SetValue(-1); { // find the best m_aq_candidate number of points. int idx_center = 0; Heap<GreatPair<double> > heap; heap.Reserve(m_nNumBestCan); for (int idx_sub_cluster = 0; idx_sub_cluster < num_dic_each_partition; idx_sub_cluster++) { for (int i = 0; i < num_sub_centers; i++) { double s = residual_distance(vec_x_map, idx_center, idx_sub_cluster, (*p_curr)[0], num_dic_each_partition, matPrecomputed); if (heap.size() < m_nNumBestCan) { heap.insert(GreatPair<double>(idx_center, s)); } else { const GreatPair<double> &top = heap.Top(); if (top.distance > s) { heap.popMin(); heap.insert(GreatPair<double>(idx_center, s)); } } idx_center++; } } SMART_ASSERT(heap.size() == m_nNumBestCan).Exit(); for (int i = 0; i < m_nNumBestCan; i++) { GreatPair<double> v; heap.popMin(v); //PRINT << v.index << "\t" << v.distance << "\n"; (*p_curr)[i][v.index / num_sub_centers] = v.index; } //PRINT << *p_curr << "\n"; } int idx_start_center = 0; for (int idx_sub_cluster1 = 1; idx_sub_cluster1 < num_dic_each_partition; idx_sub_cluster1++) { priority_queue<Triplet<int, int, double>, vector<Triplet<int, int, double> >, LessTripletThird<int, int, double> > heap; for (int idx_can = 0; idx_can < m_nNumBestCan; idx_can++) { short* curr_presentation = (*p_curr)[idx_can]; for (int idx_sub_cluster2 = 0; idx_sub_cluster2 < num_dic_each_partition; idx_sub_cluster2++) { if (curr_presentation[idx_sub_cluster2] == -1) { int idx_start_center = idx_sub_cluster2 * num_sub_centers; int idx_end_center = idx_start_center + num_sub_centers; for (int idx_center = idx_start_center; idx_center < idx_end_center; idx_center++) { double s = residual_distance(vec_x_map, idx_center, idx_sub_cluster2, curr_presentation, num_dic_each_partition, matPrecomputed); if (heap.size() < m_nNumBestCan) { heap.push(Triplet<int, int, double>(idx_can, idx_center, s)); } else { const Triplet<int, int, double> &top = heap.top(); if (top.third > s) { heap.pop(); heap.push(Triplet<int, int, double>(idx_can, idx_center, s)); } } } } } } SMART_ASSERT(m_nNumBestCan == heap.size())(heap.size()).Exit(); for (int idx_can = 0; idx_can < m_nNumBestCan; idx_can++) { const Triplet<int, int, double> &top = heap.top(); int idx_origin_can = top.first; const short* p_origin = p_curr->operator[](idx_origin_can); short* p_now = p_aux->operator[](idx_can); //PRINT << top.first << "\t" << top.second << "\t" << top.third << "\n"; memcpy(p_now, p_origin, sizeof(short) * num_dic_each_partition); p_now[top.second / num_sub_centers] = top.second; heap.pop(); } swap(p_curr, p_aux); //PRINT << *p_curr; } memcpy(prepresentation, p_curr->operator[](m_nNumBestCan - 1), sizeof(short) * num_dic_each_partition); //PRINT << *p_curr; delete p_curr; delete p_aux; return false; }