void SequenceToBatch::sequence2BatchCopy(Matrix &batch, Matrix &sequence, IVector &seq2BatchIdx, bool seq2batch) { int seqWidth = sequence.getWidth(); int batchCount = batch.getHeight(); real *batchData = batch.getData(); real *seqData = sequence.getData(); int *idxData = seq2BatchIdx.getData(); if (useGpu_) { hl_sequence2batch_copy( batchData, seqData, idxData, seqWidth, batchCount, seq2batch); } else { if (seq2batch) { #ifdef PADDLE_USE_MKLML const int blockMemSize = 8 * 1024; const int blockSize = blockMemSize / sizeof(real); #pragma omp parallel for collapse(2) for (int i = 0; i < batchCount; ++i) { for (int j = 0; j < seqWidth; j += blockSize) { memcpy(batch.rowBuf(i) + j, sequence.rowBuf(idxData[i]) + j, (j + blockSize > seqWidth) ? (seqWidth - j) * sizeof(real) : blockMemSize); } } #else for (int i = 0; i < batchCount; ++i) { memcpy(batch.rowBuf(i), sequence.rowBuf(idxData[i]), seqWidth * sizeof(real)); } #endif } else { #ifdef PADDLE_USE_MKLML #pragma omp parallel for #endif for (int i = 0; i < batchCount; ++i) { memcpy(sequence.rowBuf(idxData[i]), batch.rowBuf(i), seqWidth * sizeof(real)); } } } }
void SequenceToBatch::sequence2BatchAdd(Matrix &batch, Matrix &sequence, IVector &seq2BatchIdx, bool seq2batch) { int seqWidth = sequence.getWidth(); int batchCount = batch.getHeight(); real *batchData = batch.getData(); real *seqData = sequence.getData(); int *idxData = seq2BatchIdx.getData(); if (useGpu_) { hl_sequence2batch_add( batchData, seqData, idxData, seqWidth, batchCount, seq2batch); } else { for (int i = 0; i < batchCount; ++i) { if (seq2batch) { batch.subMatrix(i, 1)->add(*sequence.subMatrix(idxData[i], 1)); } else { sequence.subMatrix(idxData[i], 1)->add(*batch.subMatrix(i, 1)); } } } }
void SparseRowCpuMatrix::sgdUpdate(BaseMatrix& value, IVector& t0, real learningRate, int currentTime, real decayRate, bool useL1, bool fini) { std::vector<unsigned int>& localIndices = indexDictHandle_->localIndices; // t0 and value are vectors CHECK_EQ(t0.getSize(), this->height_); CHECK_EQ(value.width_, this->height_ * this->width_); if (decayRate == 0.0f) { if (fini) { return; } for (size_t i = 0; i < localIndices.size(); ++i) { real* g = getLocalRow(i); real* v = value.rowBuf(localIndices[i]); for (size_t j = 0; j < this->width_; ++j) { v[j] -= learningRate * g[j]; } } return; } // else if (useL1) { // L1 decay if (fini) { for (size_t i = 0; i < this->height_; ++i) { real* v = value.rowBuf(i); int* t = t0.getData() + i; if (t[0] < currentTime) { // W(t0) -> W(t+1) int tDiff = currentTime - t[0]; real delta = tDiff * learningRate * decayRate; simd::decayL1(v, v, delta, this->width_); } } return; } // else for (size_t i = 0; i < localIndices.size(); ++i) { real* g = getLocalRow(i); real* v = value.rowBuf(localIndices[i]); int* t = t0.getData() + localIndices[i]; if (t[0] < currentTime) { // W(t0) -> W(t) int tDiff = currentTime - t[0]; real delta = tDiff * learningRate * decayRate; simd::decayL1(v, v, delta, this->width_); } // W(t) -> W(t+1) for (size_t j = 0; j < this->width_; ++j) { v[j] -= learningRate * g[j]; } simd::decayL1(v, v, learningRate * decayRate, this->width_); // state update to t+1 t[0] = currentTime + 1; } } else { // L2 decay if (fini) { for (size_t i = 0; i < this->height_; ++i) { real* v = value.rowBuf(i); int* t = t0.getData() + i; if (t[0] < currentTime) { // W(t0) -> W(t+1) int tDiff = currentTime - t[0]; real recip = 1.0f / (1.0f + tDiff * learningRate * decayRate); for (size_t j = 0; j < this->width_; ++j) { v[j] *= recip; } } } return; } // else real recipDecay = 1.0f / (1.0f + learningRate * decayRate); for (size_t i = 0; i < localIndices.size(); ++i) { real* g = getLocalRow(i); real* v = value.rowBuf(localIndices[i]); int* t = t0.getData() + localIndices[i]; if (t[0] < currentTime) { // W(t0) -> W(t) int tDiff = currentTime - t[0]; real recip = 1.0f / (1.0f + tDiff * learningRate * decayRate); for (size_t j = 0; j < this->width_; ++j) { v[j] *= recip; } } // W(t) -> W(t+1) for (size_t j = 0; j < this->width_; ++j) { v[j] = recipDecay * (v[j] - learningRate * g[j]); } // state update to t+1 t[0] = currentTime + 1; } } }