ArrayXXd CMT::BlobNonlinearity::gradient(const ArrayXXd& inputs) const { if(inputs.rows() != 1) throw Exception("Data has to be stored in one row."); ArrayXXd diff = ArrayXXd::Zero(mNumComponents, inputs.cols()); diff.rowwise() += inputs.row(0); diff.colwise() -= mMeans; ArrayXXd diffSq = diff.square(); ArrayXd precisions = mLogPrecisions.exp(); ArrayXd weights = mLogWeights.exp(); ArrayXXd negEnergy = diffSq.colwise() * (-precisions / 2.); ArrayXXd negEnergyExp = negEnergy.exp(); ArrayXXd gradient(3 * mNumComponents, inputs.cols()); // gradient of mean gradient.topRows(mNumComponents) = (diff * negEnergyExp).colwise() * (weights * precisions); // gradient of log-precisions gradient.middleRows(mNumComponents, mNumComponents) = (diffSq / 2. * negEnergyExp).colwise() * (-weights * precisions); // gradient of log-weights gradient.bottomRows(mNumComponents) = negEnergyExp.colwise() * weights; return gradient; }
Array<int, 1, Dynamic> CMT::MCBM::samplePrior(const MatrixXd& input) const { if(input.rows() != dimIn()) throw Exception("Inputs have wrong dimensionality."); ArrayXXd featureEnergy = mWeights * (mFeatures.transpose() * input).array().square().matrix(); ArrayXXd biasEnergy = mInputBias.transpose() * input; ArrayXXd predictorEnergy = mPredictors * input; ArrayXXd tmp0 = (featureEnergy + biasEnergy).colwise() + mPriors.array(); ArrayXXd tmp1 = (tmp0 + predictorEnergy).colwise() + mOutputBias.array(); ArrayXXd logPrior = tmp0 + tmp1; logPrior.rowwise() -= logSumExp(logPrior); ArrayXXd prior = logPrior.exp(); Array<int, 1, Dynamic> labels(input.cols()); #pragma omp parallel for for(int j = 0; j < input.cols(); ++j) { int i = 0; double urand = static_cast<double>(rand()) / (static_cast<long>(RAND_MAX) + 1l); double cdf; // compute index for(cdf = prior(0, j); cdf < urand; cdf += prior(i, j)) ++i; labels[j] = i; } return labels; }
ArrayXXd CMT::BlobNonlinearity::operator()(const ArrayXXd& inputs) const { if(inputs.rows() != 1) throw Exception("Data has to be stored in one row."); ArrayXXd diff = ArrayXXd::Zero(mNumComponents, inputs.cols()); diff.rowwise() += inputs.row(0); diff.colwise() -= mMeans; ArrayXXd negEnergy = diff.square().colwise() * (-mLogPrecisions.exp() / 2.); return (mLogWeights.exp().transpose().matrix() * negEnergy.exp().matrix()).array() + mEpsilon; }
MatrixXd CMT::MLR::predict(const MatrixXd& input) const { if(input.rows() != mDimIn) throw Exception("Inputs have wrong dimensionality."); MatrixXd output = MatrixXd::Zero(mDimOut, input.cols()); // distribution over outputs ArrayXXd prob = (mWeights * input).colwise() + mBiases; prob.rowwise() -= logSumExp(prob); prob = prob.exp(); return prob; }
ArrayXXd CMT::BlobNonlinearity::derivative(const ArrayXXd& inputs) const { if(inputs.rows() != 1) throw Exception("Data has to be stored in one row."); ArrayXXd diff = ArrayXXd::Zero(mNumComponents, inputs.cols()); diff.rowwise() -= inputs.row(0); diff.colwise() += mMeans; ArrayXd precisions = mLogPrecisions.exp(); ArrayXXd negEnergy = diff.square().colwise() * (-precisions / 2.); return (mLogWeights.exp() * precisions).transpose().matrix() * (diff * negEnergy.exp()).matrix(); }
double CMT::MLR::parameterGradient( const MatrixXd& input, const MatrixXd& output, const lbfgsfloatval_t* x, lbfgsfloatval_t* g, const Trainable::Parameters& params_) const { const Parameters& params = dynamic_cast<const Parameters&>(params_); MatrixXd weights = mWeights; VectorXd biases = mBiases; // copy parameters int k = 0; if(params.trainWeights) for(int i = 1; i < weights.rows(); ++i) for(int j = 0; j < weights.cols(); ++j, ++k) weights(i, j) = x[k]; if(params.trainBiases) for(int i = 1; i < mBiases.rows(); ++i, ++k) biases[i] = x[k]; // compute distribution over outputs ArrayXXd logProb = (weights * input).colwise() + biases; logProb.rowwise() -= logSumExp(logProb); // difference between prediction and actual output MatrixXd diff = (logProb.exp().matrix() - output); // compute gradients double normConst = output.cols() * log(2.); if(g) { int offset = 0; if(params.trainWeights) { Map<Matrix<double, Dynamic, Dynamic, RowMajor> > weightsGrad(g, mDimOut - 1, mDimIn); weightsGrad = (diff * input.transpose() / normConst).bottomRows(mDimOut - 1); offset += weightsGrad.size(); weightsGrad += params.regularizeWeights.gradient( weights.bottomRows(mDimOut - 1).transpose()).transpose(); } if(params.trainBiases) { VectorLBFGS biasesGrad(g + offset, mDimOut - 1); biasesGrad = diff.rowwise().sum().bottomRows(mDimOut - 1) / normConst; biasesGrad += params.regularizeBiases.gradient(biases); } } // return negative average log-likelihood in bits double value = -(logProb * output.array()).sum() / normConst; if(params.trainWeights) value += params.regularizeWeights.evaluate(weights.bottomRows(mDimOut - 1).transpose()); if(params.trainBiases) value += params.regularizeBiases.evaluate(biases); return value; }
ArrayXXd CMT::ExponentialFunction::derivative(const ArrayXXd& data) const { return data.exp(); }
ArrayXXd CMT::ExponentialFunction::operator()(const ArrayXXd& data) const { return data.exp() + mEpsilon; }