void SparseRowCpuMatrix::addTo(BaseMatrix& dest, std::vector<uint32_t>& ids, size_t tid, size_t numThreads) { CHECK(!dest.useGpu_); CHECK_EQ(dest.height_ * dest.width_, this->height_ * this->width_); std::vector<unsigned int>& localIndices = indexDictHandle_->localIndices; for (size_t i = 0; i < localIndices.size(); ++i) { uint32_t id = localIndices[i]; if (id % numThreads == tid) { simd::addTo(dest.rowBuf(id), getLocalRow(i), this->width_); ids.push_back(id); } } }
void SparseRowCpuMatrix::sgdUpdate(BaseMatrix& value, IVector& t0, real learningRate, int currentTime, real decayRate, bool useL1, bool fini) { std::vector<unsigned int>& localIndices = indexDictHandle_->localIndices; // t0 and value are vectors CHECK_EQ(t0.getSize(), this->height_); CHECK_EQ(value.width_, this->height_ * this->width_); if (decayRate == 0.0f) { if (fini) { return; } for (size_t i = 0; i < localIndices.size(); ++i) { real* g = getLocalRow(i); real* v = value.rowBuf(localIndices[i]); for (size_t j = 0; j < this->width_; ++j) { v[j] -= learningRate * g[j]; } } return; } // else if (useL1) { // L1 decay if (fini) { for (size_t i = 0; i < this->height_; ++i) { real* v = value.rowBuf(i); int* t = t0.getData() + i; if (t[0] < currentTime) { // W(t0) -> W(t+1) int tDiff = currentTime - t[0]; real delta = tDiff * learningRate * decayRate; simd::decayL1(v, v, delta, this->width_); } } return; } // else for (size_t i = 0; i < localIndices.size(); ++i) { real* g = getLocalRow(i); real* v = value.rowBuf(localIndices[i]); int* t = t0.getData() + localIndices[i]; if (t[0] < currentTime) { // W(t0) -> W(t) int tDiff = currentTime - t[0]; real delta = tDiff * learningRate * decayRate; simd::decayL1(v, v, delta, this->width_); } // W(t) -> W(t+1) for (size_t j = 0; j < this->width_; ++j) { v[j] -= learningRate * g[j]; } simd::decayL1(v, v, learningRate * decayRate, this->width_); // state update to t+1 t[0] = currentTime + 1; } } else { // L2 decay if (fini) { for (size_t i = 0; i < this->height_; ++i) { real* v = value.rowBuf(i); int* t = t0.getData() + i; if (t[0] < currentTime) { // W(t0) -> W(t+1) int tDiff = currentTime - t[0]; real recip = 1.0f / (1.0f + tDiff * learningRate * decayRate); for (size_t j = 0; j < this->width_; ++j) { v[j] *= recip; } } } return; } // else real recipDecay = 1.0f / (1.0f + learningRate * decayRate); for (size_t i = 0; i < localIndices.size(); ++i) { real* g = getLocalRow(i); real* v = value.rowBuf(localIndices[i]); int* t = t0.getData() + localIndices[i]; if (t[0] < currentTime) { // W(t0) -> W(t) int tDiff = currentTime - t[0]; real recip = 1.0f / (1.0f + tDiff * learningRate * decayRate); for (size_t j = 0; j < this->width_; ++j) { v[j] *= recip; } } // W(t) -> W(t+1) for (size_t j = 0; j < this->width_; ++j) { v[j] = recipDecay * (v[j] - learningRate * g[j]); } // state update to t+1 t[0] = currentTime + 1; } } }