Пример #1
0
 void SymmetricBandMatrix<TYPE>::operator<<(const BaseMatrix<TYPE>& bm)
 {
     if (&bm == this)
     {
         return;
     }
     assert(bm.Nrows() == GeneralMatrix<TYPE>::nrows && bm.Ncols() == GeneralMatrix<TYPE>::ncols);
     int n = GeneralMatrix<TYPE>::nrows, lb = this->BandWidth().Lower();
     if (bm.Search(*this) == 0)
     {
         for (int i = 0; i <= lb; ++i)
         {
             for (int j = 1; j <= n - i; ++j)
             {
                 operator()(j + i, j) = bm(j + i, j);
             }
         }
     }
     else
     {
         SymmetricBandMatrix<TYPE> t(n, lb);
         t << bm;
         this->Swap(t);
     }
 }
Пример #2
0
void MICAcceleratorMatrixHYB<ValueType>::CopyFrom(const BaseMatrix<ValueType> &src) {

  const MICAcceleratorMatrixHYB<ValueType> *mic_cast_mat;
  const HostMatrix<ValueType> *host_cast_mat;

  // copy only in the same format
  assert(this->get_mat_format() == src.get_mat_format());

  // MIC to MIC copy
  if ((mic_cast_mat = dynamic_cast<const MICAcceleratorMatrixHYB<ValueType>*> (&src)) != NULL) {
    
  if (this->get_nnz() == 0)
    this->AllocateHYB(mic_cast_mat->get_ell_nnz(), mic_cast_mat->get_coo_nnz(), mic_cast_mat->get_ell_max_row(),
                      mic_cast_mat->get_nrow(), mic_cast_mat->get_ncol());

    assert((this->get_nnz()  == src.get_nnz())  &&
	   (this->get_nrow() == src.get_nrow()) &&
	   (this->get_ncol() == src.get_ncol()) );

    if (this->get_ell_nnz() > 0) {
      
      copy_mic_mic(this->local_backend_.MIC_dev,
		   mic_cast_mat->mat_.ELL.val, this->mat_.ELL.val, this->get_ell_nnz());
      copy_mic_mic(this->local_backend_.MIC_dev,
		   mic_cast_mat->mat_.ELL.col, this->mat_.ELL.col, this->get_ell_nnz());

    }
    
    if (this->get_coo_nnz() > 0) {

      copy_mic_mic(this->local_backend_.MIC_dev,
		   mic_cast_mat->mat_.COO.row, this->mat_.COO.row, this->get_coo_nnz());
      copy_mic_mic(this->local_backend_.MIC_dev,
		   mic_cast_mat->mat_.COO.col, this->mat_.COO.col, this->get_coo_nnz());
      copy_mic_mic(this->local_backend_.MIC_dev,
		   mic_cast_mat->mat_.COO.val, this->mat_.COO.val, this->get_coo_nnz());
      
    }
   
  } else {

    //CPU to MIC
    if ((host_cast_mat = dynamic_cast<const HostMatrix<ValueType>*> (&src)) != NULL) {
      
      this->CopyFromHost(*host_cast_mat);
      
    } else {
      
      LOG_INFO("Error unsupported MIC matrix type");
      this->info();
      src.info();
      FATAL_ERROR(__FILE__, __LINE__);
      
    }
    
  }

}
Пример #3
0
    void IdentityMatrix<TYPE>::Solve(const BaseMatrix<TYPE>& in, BaseMatrix<TYPE>& out) const
    {
        int n = GeneralMatrix<TYPE>::nrows;
        assert(n == in.Nrows());
        assert(in.Ncols() == out.Ncols() && in.Nrows() == out.Nrows());

        std::shared_ptr<LinearEquationSolver<TYPE> > solver = this->MakeSolver();
        solver->Solve(in, out);
    }
Пример #4
0
 void IdentityMatrix<TYPE>::operator<<(const BaseMatrix<TYPE> &bm)
 {
     if (&bm == this)
     {
         return;
     }
     assert(bm.Nrows() == GeneralMatrix<TYPE>::nrows && bm.Ncols() == GeneralMatrix<TYPE>::ncols);
     operator()(1, 1) = bm(1, 1);
 }
Пример #5
0
Matrix<T> SymMatrix<T>::operator*(const BaseMatrix<T>& rhs) const
{
  if(Matrix<T>::numCols() != rhs.numRows())
    throw InvalidVectorMath("Trying to multiply two incompatable matrices");

  Matrix<T> retval(Matrix<T>::numRows(), rhs.numCols());
  for(unsigned long i = 0; i < Matrix<T>::numRows(); i++)
    for(unsigned long j = 0; j < rhs.numCols(); j++)
      for(unsigned long k = 0; k < rhs.numCols(); k++)
        retval.at(i,j) += (*this)(i,k) * rhs(k, j);
  return retval;
}
Пример #6
0
void MICAcceleratorMatrixDIA<ValueType>::CopyFrom(const BaseMatrix<ValueType> &src) {

  const MICAcceleratorMatrixDIA<ValueType> *mic_cast_mat;
  const HostMatrix<ValueType> *host_cast_mat;

  // copy only in the same format
  assert(this->get_mat_format() == src.get_mat_format());

  // MIC to MIC copy
  if ((mic_cast_mat = dynamic_cast<const MICAcceleratorMatrixDIA<ValueType>*> (&src)) != NULL) {
    
  if (this->get_nnz() == 0)
    this->AllocateDIA(mic_cast_mat->get_nnz(), mic_cast_mat->get_nrow(), mic_cast_mat->get_ncol(), mic_cast_mat->get_ndiag());

    assert((this->get_nnz()  == src.get_nnz())  &&
	   (this->get_nrow() == src.get_nrow()) &&
	   (this->get_ncol() == src.get_ncol()) );

    if (this->get_nnz() > 0) {

      copy_mic_mic(mic_cast_mat->mat_.val, this->mat_.val, this->get_nnz());
      copy_mic_mic(mic_cast_mat->mat_.offset, this->mat_.offset, this->mat_.num_diag);

      /*
      // TODO
      for (int j=0; j<this->get_nnz(); ++j)
        this->mat_.val[j] = mic_cast_mat->mat_.val[j];
      
      for (int j=0; j<this->mat_.num_diag; ++j)
        this->mat_.offset[j] = mic_cast_mat->mat_.offset[j];
      */

    }

  } else {

    //CPU to MIC
    if ((host_cast_mat = dynamic_cast<const HostMatrix<ValueType>*> (&src)) != NULL) {
      
      this->CopyFromHost(*host_cast_mat);
      
    } else {
      
      LOG_INFO("Error unsupported MIC matrix type");
      this->info();
      src.info();
      FATAL_ERROR(__FILE__, __LINE__);
      
    }
    
  }

}
Пример #7
0
void OCLAcceleratorMatrixCOO<ValueType>::CopyFrom(const BaseMatrix<ValueType> &src) {

  const OCLAcceleratorMatrixCOO<ValueType> *ocl_cast_mat;
  const HostMatrix<ValueType> *host_cast_mat;

  // copy only in the same format
  assert(this->get_mat_format() == src.get_mat_format());

  // OCL to OCL copy
  if ((ocl_cast_mat = dynamic_cast<const OCLAcceleratorMatrixCOO<ValueType>*> (&src)) != NULL) {
    
    if (this->get_nnz() == 0)
      this->AllocateCOO(src.get_nnz(), src.get_nrow(), src.get_ncol() );

    assert((this->get_nnz()  == src.get_nnz())  &&
	   (this->get_nrow() == src.get_nrow()) &&
	   (this->get_ncol() == src.get_ncol()) );

    if (this->get_nnz() > 0) {

      // Copy object from device to device memory (internal copy)
      ocl_dev2dev<int>(this->get_nnz(), // size
                       ocl_cast_mat->mat_.row, // src
                       this->mat_.row,         // dst
                       OCL_HANDLE(this->local_backend_.OCL_handle)->OCL_cmdQueue );

      // Copy object from device to device memory (internal copy)
      ocl_dev2dev<int>(this->get_nnz(), // size
                       ocl_cast_mat->mat_.col, // src
                       this->mat_.col,         // dst
                       OCL_HANDLE(this->local_backend_.OCL_handle)->OCL_cmdQueue );

      // Copy object from device to device memory (internal copy)
      ocl_dev2dev<ValueType>(this->get_nnz(), // size
                             ocl_cast_mat->mat_.val, // src
                             this->mat_.val,         // dst
                             OCL_HANDLE(this->local_backend_.OCL_handle)->OCL_cmdQueue );

    }

  } else {

    //CPU to OCL
    if ((host_cast_mat = dynamic_cast<const HostMatrix<ValueType>*> (&src)) != NULL) {
      
      this->CopyFromHost(*host_cast_mat);
      
    } else {
      
      LOG_INFO("Error unsupported OCL matrix type");
      this->info();
      src.info();
      FATAL_ERROR(__FILE__, __LINE__);
      
    }
    
  }

}
    BandLUsolverPartialPivot<TYPE>::BandLUsolverPartialPivot(const BaseMatrix<TYPE> &bm, const TYPE &e) :
        BandLUsolver<TYPE>(bm, bm.BandWidth().Lower(), std::min(bm.BandWidth().Lower() + bm.BandWidth().Upper(), bm.Nrows() - 1), e),
        lm(bm.Nrows()), um(bm.Nrows(), BandLUsolver<TYPE>::ubw),
        combine(lm, um)
    {
        lm << bm;
        um << bm;
        static const TYPE one(1);
        for (int i = 1; i <= bm.Nrows(); ++i)
        {
            lm(i, i) = one;
        }

        BandLUdecomposion();
    }
Пример #9
0
bool HostMatrixCOO<ValueType>::ConvertFrom(const BaseMatrix<ValueType> &mat) {

  this->Clear();

  // empty matrix is empty matrix
  if (mat.get_nnz() == 0)
    return true;

    if (const HostMatrixCOO<ValueType> *cast_mat = dynamic_cast<const HostMatrixCOO<ValueType>*> (&mat)) {

      this->CopyFrom(*cast_mat);
      return true;

  }


    if (const HostMatrixCSR<ValueType> *cast_mat = dynamic_cast<const HostMatrixCSR<ValueType>*> (&mat)) {

      this->Clear();
      csr_to_coo(this->local_backend_.OpenMP_threads,
                 cast_mat->get_nnz(), cast_mat->get_nrow(), cast_mat->get_ncol(),
		 cast_mat->mat_, &this->mat_);

      this->nrow_ = cast_mat->get_nrow();
      this->ncol_ = cast_mat->get_ncol();
      this->nnz_  = cast_mat->get_nnz();

    return true;

  }
  
  return false;

}
void HostMatrixCOO<ValueType>::CopyFrom(const BaseMatrix<ValueType> &mat) {

    // copy only in the same format
    assert(this->get_mat_format() == mat.get_mat_format());

    if (const HostMatrixCOO<ValueType> *cast_mat = dynamic_cast<const HostMatrixCOO<ValueType>*> (&mat)) {

        if (this->nnz_ == 0)
            this->AllocateCOO(cast_mat->nnz_, cast_mat->nrow_, cast_mat->ncol_ );

        assert((this->nnz_  == cast_mat->nnz_)  &&
               (this->nrow_ == cast_mat->nrow_) &&
               (this->ncol_ == cast_mat->ncol_) );

        if (this->nnz_ > 0) {

            _set_omp_backend_threads(this->local_backend_, this->nnz_);

            #pragma omp parallel for
            for (int j=0; j<this->nnz_; ++j)
                this->mat_.row[j] = cast_mat->mat_.row[j];

            #pragma omp parallel for
            for (int j=0; j<this->nnz_; ++j)
                this->mat_.col[j] = cast_mat->mat_.col[j];

            #pragma omp parallel for
            for (int j=0; j<this->nnz_; ++j)
                this->mat_.val[j] = cast_mat->mat_.val[j];

        }

    } else {

        // Host matrix knows only host matrices
        // -> dispatching
        mat.CopyTo(this);

    }

}
Пример #11
0
    void CholeskySolver<TYPE>::CholeskyDecomposition(const BaseMatrix<TYPE> &bm)
    {
        assert(bm.Nrows() == bm.Ncols());

        int n = lm.Nrows();
        const TYPE &e = SimpleSolver<TYPE>::epsilon;
        TYPE temp;
        for (int i = 1; i <= n; ++i)
        {
            if (i == 1)
            {
                temp = bm(i, i);
            }
            else
            {
                temp = bm(i, i) - (c_sub(lm, i, i, 1, i - 1) * t(c_sub(lm, i, i, 1, i - 1)))(1, 1);
            }
            if (temp <= e)
            {
                LinearEquationSolver<TYPE>::fail = true;
                return;
            }
            else
            {
                lm(i, i) = std::sqrt(temp);
                for (int j = i + 1; j <= n; ++j)
                {
                    if (i == 1)
                    {
                        lm(j, i) = bm(j, i) / lm(i, i);
                    }
                    else
                    {
                        lm(j, i) = (bm(j, i) - (c_sub(lm, i, i, 1, i - 1) * t(c_sub(lm, j, j, 1, i - 1)))(1, 1)) / lm(i, i);
                    }
                }
            }
        }
    }
Пример #12
0
    void ConstantSolver<TYPE>::Solve(const BaseMatrix<TYPE> &in, BaseMatrix<TYPE> &out) const
    {
        if (LinearEquationSolver<TYPE>::IsFailed())
        {
            Singleton<Tracer>::Instance()->AddMessage("ConstantSolver::Solve");
            throw SingularException(SimpleSolver<TYPE>::mat);
        }

        int r = SimpleSolver<TYPE>::mat.Nrows();
        int c = SimpleSolver<TYPE>::mat.Ncols();

        assert(r == 1 && c == 1 && c == in.Nrows());
        assert(in.Ncols() == out.Ncols() && in.Nrows() == out.Nrows());

        const BaseMatrix<TYPE> &m = SimpleSolver<TYPE>::mat;
        for (int i = 1; i <= c; ++i)
        {
            for (int j = r; j >= 1; --j)
            {
                out(j, i) = in(j, i) / m(j, j);
            }
        }
    }
Пример #13
0
void SparseRowCpuMatrix::addTo(BaseMatrix& dest,
                               std::vector<uint32_t>& ids,
                               size_t tid,
                               size_t numThreads) {
  CHECK(!dest.useGpu_);
  CHECK_EQ(dest.height_ * dest.width_, this->height_ * this->width_);

  std::vector<unsigned int>& localIndices = indexDictHandle_->localIndices;
  for (size_t i = 0; i < localIndices.size(); ++i) {
    uint32_t id = localIndices[i];
    if (id % numThreads == tid) {
      simd::addTo(dest.rowBuf(id), getLocalRow(i), this->width_);
      ids.push_back(id);
    }
  }
}
void OCLAcceleratorMatrixBCSR<ValueType>::CopyFrom(const BaseMatrix<ValueType> &src) {

  const OCLAcceleratorMatrixBCSR<ValueType> *ocl_cast_mat;
  const HostMatrix<ValueType> *host_cast_mat;

  // copy only in the same format
  assert(this->get_mat_format() == src.get_mat_format());

  // OCL to OCL copy
  if ((ocl_cast_mat = dynamic_cast<const OCLAcceleratorMatrixBCSR<ValueType>*> (&src)) != NULL) {
    
  if (this->get_nnz() == 0)
    this->AllocateBCSR(src.get_nnz(), src.get_nrow(), src.get_ncol() );  

    assert((this->get_nnz()  == src.get_nnz())  &&
	   (this->get_nrow() == src.get_nrow()) &&
	   (this->get_ncol() == src.get_ncol()) );

    ocl_cast_mat->get_nnz();

    FATAL_ERROR(__FILE__, __LINE__);    

    
  } else {

    //CPU to OCL
    if ((host_cast_mat = dynamic_cast<const HostMatrix<ValueType>*> (&src)) != NULL) {
      
      this->CopyFromHost(*host_cast_mat);
      
    } else {
      
      LOG_INFO("Error unsupported OCL matrix type");
      this->info();
      src.info();
      FATAL_ERROR(__FILE__, __LINE__);
      
    }
    
  }

}
Пример #15
0
 void CholeskySolver<TYPE>::Solve(const BaseMatrix<TYPE> &in, BaseMatrix<TYPE> &out) const
 {
     assert(in.Nrows() == lm.Ncols());
     assert(in.Nrows() == out.Nrows() && in.Ncols() == out.Ncols());
     if (LinearEquationSolver<TYPE>::fail)
     {
         Singleton<Tracer>::Instance()->AddMessage("CholeskySolver::Solve(in, out)");
         throw NPDException(SimpleSolver<TYPE>::mat);
     }
     Matrix<TYPE> temp(out.Nrows(), out.Ncols());
     lm.Solve(in, temp);
     t(lm).Solve(temp, out);
 }
Пример #16
0
 void LUsolverNoPivot<TYPE>::Solve(const BaseMatrix<TYPE> &in, BaseMatrix<TYPE> &out) const
 {
     assert(in.Nrows() == combine.Ncols());
     assert(in.Nrows() == out.Nrows() && in.Ncols() == out.Ncols());
     if (LinearEquationSolver<TYPE>::IsFailed())
     {
         Singleton<Tracer>::Instance()->AddMessage("LUsolverNoPivot::Solve");
         throw LogicError("LUsolverNoPivot: LU decomposition is failed");
     }
     Matrix<TYPE> t(in.Nrows(), in.Ncols());
     LUsolver<TYPE>::lm.Solve(in, t);
     LUsolver<TYPE>::um.Solve(t, out);
 }
Пример #17
0
 void BandLUsolverPartialPivot<TYPE>::Solve(const BaseMatrix<TYPE> &in, BaseMatrix<TYPE> &out) const
 {
     assert(in.Nrows() == combine.Ncols());
     assert(in.Nrows() == out.Nrows() && in.Ncols() == out.Ncols());
     if (LinearEquationSolver<TYPE>::IsFailed())
     {
         Singleton<Tracer>::Instance()->AddMessage("BandLUsolverPartialPivot::Solve");
         throw SingularException(BandLUsolver<TYPE>::mat);
     }
     const PermuteMatrix<TYPE> &lp = BandLUsolver<TYPE>::left;
     Matrix<TYPE> t(in.Nrows(), in.Ncols());
     lm.Solve(c_perm(lp, in), t);
     um.Solve(t, out);
 }
Пример #18
0
bool MICAcceleratorMatrixHYB<ValueType>::ConvertFrom(const BaseMatrix<ValueType> &mat) {

  this->Clear();

  // empty matrix is empty matrix
  if (mat.get_nnz() == 0)
    return true;

  const MICAcceleratorMatrixHYB<ValueType>   *cast_mat_hyb;
  
  if ((cast_mat_hyb = dynamic_cast<const MICAcceleratorMatrixHYB<ValueType>*> (&mat)) != NULL) {

    this->CopyFrom(*cast_mat_hyb);
    return true;

  }

  return false;

}
bool OCLAcceleratorMatrixBCSR<ValueType>::ConvertFrom(const BaseMatrix<ValueType> &mat) {

  this->Clear();

  // empty matrix is empty matrix
  if (mat.get_nnz() == 0)
    return true;

  const OCLAcceleratorMatrixBCSR<ValueType> *cast_mat_bcsr;
  
  if ((cast_mat_bcsr = dynamic_cast<const OCLAcceleratorMatrixBCSR<ValueType>*> (&mat)) != NULL) {

      this->CopyFrom(*cast_mat_bcsr);
      return true;

  }

  /*
    const OCLAcceleratorMatrixCSR<ValueType>  *cast_mat_csr;
    if ((cast_mat_csr = dynamic_cast<const OCLAcceleratorMatrixCSR<ValueType>*> (&mat)) != NULL) {

      this->Clear();

      FATAL_ERROR(__FILE__, __LINE__);

      this->nrow_ = cast_mat_csr->get_nrow();
      this->ncol_ = cast_mat_csr->get_ncol();
      this->nnz_  = cast_mat_csr->get_nnz();

    return 0;

  }
  */

  return false;

}
Пример #20
0
    void IdentitySolver<TYPE>::Solve(const BaseMatrix<TYPE> &in, BaseMatrix<TYPE> &out) const
    {
        if (LinearEquationSolver<TYPE>::IsFailed())
        {
            Singleton<Tracer>::Instance()->AddMessage("IdentitySolver::Solve");
            throw SingularException(SimpleSolver<TYPE>::mat);
        }

        int n = SimpleSolver<TYPE>::mat.Nrows();

        assert(n == in.Nrows());
        assert(in.Ncols() == out.Ncols() && in.Nrows() == out.Nrows());

        TYPE t = SimpleSolver<TYPE>::mat(1, 1);
        int c = in.Ncols();
        for (int i = 1; i <= c; ++i)
        {
            for (int j = n; j >= 1; --j)
            {
                out(j, i) = in(j, i) / t;
            }
        }

    }
Пример #21
0
void OCLAcceleratorMatrixHYB<ValueType>::CopyFrom(const BaseMatrix<ValueType> &src) {

  const OCLAcceleratorMatrixHYB<ValueType> *ocl_cast_mat;
  const HostMatrix<ValueType> *host_cast_mat;

  // copy only in the same format
  assert(this->get_mat_format() == src.get_mat_format());

  // OCL to OCL copy
  if ((ocl_cast_mat = dynamic_cast<const OCLAcceleratorMatrixHYB<ValueType>*> (&src)) != NULL) {
    
    if (this->get_nnz() == 0)
      this->AllocateHYB(ocl_cast_mat->get_ell_nnz(), ocl_cast_mat->get_coo_nnz(), ocl_cast_mat->get_ell_max_row(),
                        ocl_cast_mat->get_nrow(), ocl_cast_mat->get_ncol());

    assert((this->get_nnz()  == src.get_nnz())  &&
	   (this->get_nrow() == src.get_nrow()) &&
	   (this->get_ncol() == src.get_ncol()) );


    if (this->get_ell_nnz() > 0) {

      // ELL
      // must be within same opencl context
      ocl_dev2dev<int>(this->get_ell_nnz(), // size
                       ocl_cast_mat->mat_.ELL.col, // src
                       this->mat_.ELL.col,         // dst
                       OCL_HANDLE(this->local_backend_.OCL_handle)->OCL_cmdQueue );

      ocl_dev2dev<ValueType>(this->get_ell_nnz(), // size
                             ocl_cast_mat->mat_.ELL.val, // src
                             this->mat_.ELL.val,         // dst
                             OCL_HANDLE(this->local_backend_.OCL_handle)->OCL_cmdQueue );

    }

    if (this->get_coo_nnz() > 0) {

      // COO
      // must be within same opencl context
      ocl_dev2dev<int>(this->get_coo_nnz(), // size
                       ocl_cast_mat->mat_.COO.row, // src
                       this->mat_.COO.row,         // dst
                       OCL_HANDLE(this->local_backend_.OCL_handle)->OCL_cmdQueue );

      ocl_dev2dev<int>(this->get_coo_nnz(), // size
                       ocl_cast_mat->mat_.COO.col, // src
                       this->mat_.COO.col,         // dst
                       OCL_HANDLE(this->local_backend_.OCL_handle)->OCL_cmdQueue );

      ocl_dev2dev<ValueType>(this->get_coo_nnz(), // size
                             ocl_cast_mat->mat_.COO.val, // src
                             this->mat_.COO.val,         // dst
                             OCL_HANDLE(this->local_backend_.OCL_handle)->OCL_cmdQueue );

    }

  } else {

    //CPU to OCL
    if ((host_cast_mat = dynamic_cast<const HostMatrix<ValueType>*> (&src)) != NULL) {

      this->CopyFromHost(*host_cast_mat);

    } else {

      LOG_INFO("Error unsupported OCL matrix type");
      this->info();
      src.info();
      FATAL_ERROR(__FILE__, __LINE__);

    }

  }

}
Пример #22
0
void SparseRowCpuMatrix::sgdUpdate(BaseMatrix& value,
                                   IVector& t0,
                                   real learningRate,
                                   int currentTime,
                                   real decayRate,
                                   bool useL1,
                                   bool fini) {
  std::vector<unsigned int>& localIndices = indexDictHandle_->localIndices;

  // t0 and value are vectors
  CHECK_EQ(t0.getSize(), this->height_);
  CHECK_EQ(value.width_, this->height_ * this->width_);

  if (decayRate == 0.0f) {
    if (fini) {
      return;
    }

    for (size_t i = 0; i < localIndices.size(); ++i) {
      real* g = getLocalRow(i);
      real* v = value.rowBuf(localIndices[i]);
      for (size_t j = 0; j < this->width_; ++j) {
        v[j] -= learningRate * g[j];
      }
    }
    return;
  }  // else

  if (useL1) {  // L1 decay
    if (fini) {
      for (size_t i = 0; i < this->height_; ++i) {
        real* v = value.rowBuf(i);
        int* t = t0.getData() + i;
        if (t[0] < currentTime) {
          // W(t0) -> W(t+1)
          int tDiff = currentTime - t[0];
          real delta = tDiff * learningRate * decayRate;
          simd::decayL1(v, v, delta, this->width_);
        }
      }
      return;
    }  // else

    for (size_t i = 0; i < localIndices.size(); ++i) {
      real* g = getLocalRow(i);
      real* v = value.rowBuf(localIndices[i]);
      int* t = t0.getData() + localIndices[i];
      if (t[0] < currentTime) {
        // W(t0) -> W(t)
        int tDiff = currentTime - t[0];
        real delta = tDiff * learningRate * decayRate;
        simd::decayL1(v, v, delta, this->width_);
      }

      // W(t) -> W(t+1)
      for (size_t j = 0; j < this->width_; ++j) {
        v[j] -= learningRate * g[j];
      }
      simd::decayL1(v, v, learningRate * decayRate, this->width_);

      // state update to t+1
      t[0] = currentTime + 1;
    }

  } else {  // L2 decay
    if (fini) {
      for (size_t i = 0; i < this->height_; ++i) {
        real* v = value.rowBuf(i);
        int* t = t0.getData() + i;
        if (t[0] < currentTime) {
          // W(t0) -> W(t+1)
          int tDiff = currentTime - t[0];
          real recip = 1.0f / (1.0f + tDiff * learningRate * decayRate);
          for (size_t j = 0; j < this->width_; ++j) {
            v[j] *= recip;
          }
        }
      }
      return;
    }  // else

    real recipDecay = 1.0f / (1.0f + learningRate * decayRate);

    for (size_t i = 0; i < localIndices.size(); ++i) {
      real* g = getLocalRow(i);
      real* v = value.rowBuf(localIndices[i]);
      int* t = t0.getData() + localIndices[i];
      if (t[0] < currentTime) {
        // W(t0) -> W(t)
        int tDiff = currentTime - t[0];
        real recip = 1.0f / (1.0f + tDiff * learningRate * decayRate);
        for (size_t j = 0; j < this->width_; ++j) {
          v[j] *= recip;
        }
      }

      // W(t) -> W(t+1)
      for (size_t j = 0; j < this->width_; ++j) {
        v[j] = recipDecay * (v[j] - learningRate * g[j]);
      }

      // state update to t+1
      t[0] = currentTime + 1;
    }
  }
}