Exemple #1
0
void SequenceToBatch::sequence2BatchCopy(Matrix &batch,
                                         Matrix &sequence,
                                         IVector &seq2BatchIdx,
                                         bool seq2batch) {
  int seqWidth = sequence.getWidth();
  int batchCount = batch.getHeight();
  real *batchData = batch.getData();
  real *seqData = sequence.getData();
  int *idxData = seq2BatchIdx.getData();

  if (useGpu_) {
    hl_sequence2batch_copy(
        batchData, seqData, idxData, seqWidth, batchCount, seq2batch);
  } else {
    if (seq2batch) {
#ifdef PADDLE_USE_MKLML
      const int blockMemSize = 8 * 1024;
      const int blockSize = blockMemSize / sizeof(real);
#pragma omp parallel for collapse(2)
      for (int i = 0; i < batchCount; ++i) {
        for (int j = 0; j < seqWidth; j += blockSize) {
          memcpy(batch.rowBuf(i) + j,
                 sequence.rowBuf(idxData[i]) + j,
                 (j + blockSize > seqWidth) ? (seqWidth - j) * sizeof(real)
                                            : blockMemSize);
        }
      }
#else
      for (int i = 0; i < batchCount; ++i) {
        memcpy(batch.rowBuf(i),
               sequence.rowBuf(idxData[i]),
               seqWidth * sizeof(real));
      }
#endif
    } else {
#ifdef PADDLE_USE_MKLML
#pragma omp parallel for
#endif
      for (int i = 0; i < batchCount; ++i) {
        memcpy(sequence.rowBuf(idxData[i]),
               batch.rowBuf(i),
               seqWidth * sizeof(real));
      }
    }
  }
}
Exemple #2
0
void SequenceToBatch::sequence2BatchAdd(Matrix &batch,
                                        Matrix &sequence,
                                        IVector &seq2BatchIdx,
                                        bool seq2batch) {
  int seqWidth = sequence.getWidth();
  int batchCount = batch.getHeight();
  real *batchData = batch.getData();
  real *seqData = sequence.getData();
  int *idxData = seq2BatchIdx.getData();

  if (useGpu_) {
    hl_sequence2batch_add(
        batchData, seqData, idxData, seqWidth, batchCount, seq2batch);
  } else {
    for (int i = 0; i < batchCount; ++i) {
      if (seq2batch) {
        batch.subMatrix(i, 1)->add(*sequence.subMatrix(idxData[i], 1));
      } else {
        sequence.subMatrix(idxData[i], 1)->add(*batch.subMatrix(i, 1));
      }
    }
  }
}
void SparseRowCpuMatrix::sgdUpdate(BaseMatrix& value,
                                   IVector& t0,
                                   real learningRate,
                                   int currentTime,
                                   real decayRate,
                                   bool useL1,
                                   bool fini) {
  std::vector<unsigned int>& localIndices = indexDictHandle_->localIndices;

  // t0 and value are vectors
  CHECK_EQ(t0.getSize(), this->height_);
  CHECK_EQ(value.width_, this->height_ * this->width_);

  if (decayRate == 0.0f) {
    if (fini) {
      return;
    }

    for (size_t i = 0; i < localIndices.size(); ++i) {
      real* g = getLocalRow(i);
      real* v = value.rowBuf(localIndices[i]);
      for (size_t j = 0; j < this->width_; ++j) {
        v[j] -= learningRate * g[j];
      }
    }
    return;
  }  // else

  if (useL1) {  // L1 decay
    if (fini) {
      for (size_t i = 0; i < this->height_; ++i) {
        real* v = value.rowBuf(i);
        int* t = t0.getData() + i;
        if (t[0] < currentTime) {
          // W(t0) -> W(t+1)
          int tDiff = currentTime - t[0];
          real delta = tDiff * learningRate * decayRate;
          simd::decayL1(v, v, delta, this->width_);
        }
      }
      return;
    }  // else

    for (size_t i = 0; i < localIndices.size(); ++i) {
      real* g = getLocalRow(i);
      real* v = value.rowBuf(localIndices[i]);
      int* t = t0.getData() + localIndices[i];
      if (t[0] < currentTime) {
        // W(t0) -> W(t)
        int tDiff = currentTime - t[0];
        real delta = tDiff * learningRate * decayRate;
        simd::decayL1(v, v, delta, this->width_);
      }

      // W(t) -> W(t+1)
      for (size_t j = 0; j < this->width_; ++j) {
        v[j] -= learningRate * g[j];
      }
      simd::decayL1(v, v, learningRate * decayRate, this->width_);

      // state update to t+1
      t[0] = currentTime + 1;
    }

  } else {  // L2 decay
    if (fini) {
      for (size_t i = 0; i < this->height_; ++i) {
        real* v = value.rowBuf(i);
        int* t = t0.getData() + i;
        if (t[0] < currentTime) {
          // W(t0) -> W(t+1)
          int tDiff = currentTime - t[0];
          real recip = 1.0f / (1.0f + tDiff * learningRate * decayRate);
          for (size_t j = 0; j < this->width_; ++j) {
            v[j] *= recip;
          }
        }
      }
      return;
    }  // else

    real recipDecay = 1.0f / (1.0f + learningRate * decayRate);

    for (size_t i = 0; i < localIndices.size(); ++i) {
      real* g = getLocalRow(i);
      real* v = value.rowBuf(localIndices[i]);
      int* t = t0.getData() + localIndices[i];
      if (t[0] < currentTime) {
        // W(t0) -> W(t)
        int tDiff = currentTime - t[0];
        real recip = 1.0f / (1.0f + tDiff * learningRate * decayRate);
        for (size_t j = 0; j < this->width_; ++j) {
          v[j] *= recip;
        }
      }

      // W(t) -> W(t+1)
      for (size_t j = 0; j < this->width_; ++j) {
        v[j] = recipDecay * (v[j] - learningRate * g[j]);
      }

      // state update to t+1
      t[0] = currentTime + 1;
    }
  }
}