double softmax<T>::compute_cost(const Eigen::Ref<const EigenMat> &train,
                                const Eigen::Ref<const EigenMat> &weight,
                                const Eigen::Ref<const EigenMat> &ground_truth)
{    
    compute_hypothesis(train, weight);
    double const NSamples = static_cast<double>(train.cols());
    return  -1.0 * (hypothesis_.array().log() *
                    ground_truth.array()).sum() / NSamples +
            weight.array().pow(2.0).sum() * params_.lambda_ / 2.0;
}
void softmax<T>::compute_gradient(Eigen::Ref<const EigenMat> const &train,
                                  Eigen::Ref<const EigenMat> const &weight,
                                  Eigen::Ref<const EigenMat> const &ground_truth)
{
    grad_.noalias() =
            (ground_truth.array() - hypothesis_.array())
            .matrix() * train.transpose();
    auto const NSamples = static_cast<double>(train.cols());
    grad_.array() = grad_.array() / -NSamples +
            params_.lambda_ * weight.array();
}