void CMT::HistogramNonlinearity::initialize( const ArrayXXd& inputs, const ArrayXXd& outputs) { if(inputs.rows() != outputs.rows() || inputs.cols() != outputs.cols()) throw Exception("Inputs and outputs have to have same size."); mHistogram = vector<double>(mBinEdges.size() - 1); vector<int> counter(mBinEdges.size() - 1); for(int k = 0; k < mHistogram.size(); ++k) { mHistogram[k] = 0.; counter[k] = 0; } for(int i = 0; i < inputs.rows(); ++i) for(int j = 0; j < inputs.cols(); ++j) { // find bin int k = bin(inputs(i, j)); // update histogram counter[k] += 1; mHistogram[k] += outputs(i, j); } for(int k = 0; k < mHistogram.size(); ++k) if(mHistogram[k] > 0.) // average output observed in bin k mHistogram[k] /= counter[k]; }
void CMT::WhiteningTransform::initialize(const ArrayXXd& input, int dimOut) { if(input.cols() < input.rows()) throw Exception("Too few inputs to compute whitening transform."); mMeanIn = input.rowwise().mean(); // compute covariances MatrixXd covXX = covariance(input); // input whitening SelfAdjointEigenSolver<MatrixXd> eigenSolver; eigenSolver.compute(covXX); Array<double, 1, Dynamic> eigenvalues = eigenSolver.eigenvalues(); MatrixXd eigenvectors = eigenSolver.eigenvectors(); // don't whiten directions with near-zero variance for(int i = 0; i < eigenvalues.size(); ++i) if(eigenvalues[i] < 1e-7) eigenvalues[i] = 1.; mPreIn = (eigenvectors.array().rowwise() * eigenvalues.sqrt().cwiseInverse()).matrix() * eigenvectors.transpose(); mPreInInv = (eigenvectors.array().rowwise() * eigenvalues.sqrt()).matrix() * eigenvectors.transpose(); mMeanOut = VectorXd::Zero(dimOut); mPreOut = MatrixXd::Identity(dimOut, dimOut); mPreOutInv = MatrixXd::Identity(dimOut, dimOut); mPredictor = MatrixXd::Zero(dimOut, input.rows()); mGradTransform = MatrixXd::Zero(dimOut, input.rows()); mLogJacobian = 1.; }
ArrayXXd CMT::HistogramNonlinearity::operator()(const ArrayXXd& inputs) const { ArrayXXd outputs(inputs.rows(), inputs.cols()); for(int i = 0; i < inputs.rows(); ++i) for(int j = 0; j < inputs.cols(); ++j) outputs(i, j) = mHistogram[bin(inputs(i, j))] + mEpsilon; return outputs; }
ArrayXXd CMT::HistogramNonlinearity::gradient(const ArrayXXd& inputs) const { if(inputs.rows() != 1) throw Exception("Data has to be stored in one row."); ArrayXXd gradient = ArrayXXd::Zero(mHistogram.size(), inputs.cols()); for(int i = 0; i < inputs.rows(); ++i) for(int j = 0; j < inputs.rows(); ++j) gradient(bin(inputs(i, j)), j) = 1; return gradient; }
void NestedSampler::setPosteriorSample(ArrayXXd newPosteriorSample) { Ndimensions = newPosteriorSample.rows(); int Nsamples = newPosteriorSample.cols(); posteriorSample.resize(Ndimensions, Nsamples); posteriorSample = newPosteriorSample; }
ArrayXXd CMT::BlobNonlinearity::gradient(const ArrayXXd& inputs) const { if(inputs.rows() != 1) throw Exception("Data has to be stored in one row."); ArrayXXd diff = ArrayXXd::Zero(mNumComponents, inputs.cols()); diff.rowwise() += inputs.row(0); diff.colwise() -= mMeans; ArrayXXd diffSq = diff.square(); ArrayXd precisions = mLogPrecisions.exp(); ArrayXd weights = mLogWeights.exp(); ArrayXXd negEnergy = diffSq.colwise() * (-precisions / 2.); ArrayXXd negEnergyExp = negEnergy.exp(); ArrayXXd gradient(3 * mNumComponents, inputs.cols()); // gradient of mean gradient.topRows(mNumComponents) = (diff * negEnergyExp).colwise() * (weights * precisions); // gradient of log-precisions gradient.middleRows(mNumComponents, mNumComponents) = (diffSq / 2. * negEnergyExp).colwise() * (-weights * precisions); // gradient of log-weights gradient.bottomRows(mNumComponents) = negEnergyExp.colwise() * weights; return gradient; }
ArrayXXd CMT::tanh(const ArrayXXd& arr) { ArrayXXd result(arr.rows(), arr.cols()); #pragma omp parallel for for(int i = 0; i < arr.size(); ++i) result(i) = std::tanh(arr(i)); return result; }
ArrayXXd CMT::BlobNonlinearity::operator()(const ArrayXXd& inputs) const { if(inputs.rows() != 1) throw Exception("Data has to be stored in one row."); ArrayXXd diff = ArrayXXd::Zero(mNumComponents, inputs.cols()); diff.rowwise() += inputs.row(0); diff.colwise() -= mMeans; ArrayXXd negEnergy = diff.square().colwise() * (-mLogPrecisions.exp() / 2.); return (mLogWeights.exp().transpose().matrix() * negEnergy.exp().matrix()).array() + mEpsilon; }
ArrayXXd CMT::BlobNonlinearity::derivative(const ArrayXXd& inputs) const { if(inputs.rows() != 1) throw Exception("Data has to be stored in one row."); ArrayXXd diff = ArrayXXd::Zero(mNumComponents, inputs.cols()); diff.rowwise() -= inputs.row(0); diff.colwise() += mMeans; ArrayXd precisions = mLogPrecisions.exp(); ArrayXXd negEnergy = diff.square().colwise() * (-precisions / 2.); return (mLogWeights.exp() * precisions).transpose().matrix() * (diff * negEnergy.exp()).matrix(); }
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { int N = mxGetScalar(prhs[0]); double d = mxGetScalar(prhs[1]); double h = mxGetScalar(prhs[2]); int Njacv = mxGetScalar(prhs[3]); double b = mxGetScalar(prhs[4]); double c = mxGetScalar(prhs[5]); double dr = mxGetScalar(prhs[6]); double di = mxGetScalar(prhs[7]); int threadNum = mxGetScalar(prhs[8]); double *a0 = mxGetPr(prhs[9]); double *v = mxGetPr(prhs[10]); double th = mxGetScalar(prhs[11]); double phi = mxGetScalar(prhs[12]); int nstp = mxGetScalar(prhs[13]); // mwSize isJ = mxGetScalar(prhs[14]); ArrayXXd av = gintgv(N, d, h, Njacv, b, c, dr, di, threadNum, a0, v, th, phi, nstp); plhs[0] = mxCreateDoubleMatrix(av.rows(), av.cols(), mxREAL); memcpy(mxGetPr(plhs[0]), av.data(), av.cols()*av.rows()*sizeof(double)); }
ArrayXXi CMT::sampleBinomial(const ArrayXXi& n, const ArrayXXd& p) { if(n.rows() != p.rows() || n.cols() != p.cols()) throw Exception("n and p must be of the same size."); ArrayXXi samples = ArrayXXi::Zero(n.rows(), n.cols()); #pragma omp parallel for for(int i = 0; i < samples.size(); ++i) { // very naive algorithm for generating binomial samples for(int k = 0; k < n(i); ++k) if(rand() / static_cast<double>(RAND_MAX) < p(i)) samples(i) += 1; } return samples; }
/** * Algorithm due to Knuth, 1969. */ ArrayXXi CMT::samplePoisson(const ArrayXXd& lambda) { ArrayXXi samples(lambda.rows(), lambda.cols()); ArrayXXd threshold = (-lambda).exp(); #pragma omp parallel for for(int i = 0; i < samples.size(); ++i) { double p = rand() / static_cast<double>(RAND_MAX); int k = 0; while(p > threshold(i)) { k += 1; p *= rand() / static_cast<double>(RAND_MAX); } samples(i) = k; } return samples; }
CMT::WhiteningTransform::WhiteningTransform(const ArrayXXd& input, const ArrayXXd& output) { initialize(input, output.rows()); }
pair<pair<ArrayXXd, ArrayXXd>, Array<double, 1, Dynamic> > CMT::STM::computeDataGradient( const MatrixXd& input, const MatrixXd& output) const { // make sure nonlinearity is differentiable DifferentiableNonlinearity* nonlinearity = dynamic_cast<DifferentiableNonlinearity*>(mNonlinearity); if(!nonlinearity) throw Exception("Nonlinearity has to be differentiable."); if(input.rows() != dimIn()) throw Exception("Input has wrong dimensionality."); if(output.rows() != 1) throw Exception("Output has wrong dimensionality."); if(input.cols() != output.cols()) throw Exception("Number of inputs and outputs should be the same."); if(dimInNonlinear() && !dimInLinear()) { Array<double, 1, Dynamic> responses; ArrayXXd jointEnergy; if(numFeatures() > 0) jointEnergy = mWeights * (mFeatures.transpose() * input).array().square().matrix() + mPredictors * input; else jointEnergy = mPredictors * input; jointEnergy.colwise() += mBiases.array(); jointEnergy *= mSharpness; responses = logSumExp(jointEnergy); // posterior over components for each input MatrixXd posterior = (jointEnergy.rowwise() - responses).array().exp(); responses /= mSharpness; Array<double, 1, Dynamic> tmp0 = (*mNonlinearity)(responses); Array<double, 1, Dynamic> tmp1 = -mDistribution->gradient(output, tmp0); Array<double, 1, Dynamic> tmp2 = nonlinearity->derivative(responses); ArrayXXd avgPredictor = mPredictors.transpose() * posterior; ArrayXXd tmp3; if(numFeatures() > 0) { ArrayXXd avgWeights = (2. * mWeights).transpose() * posterior; tmp3 = mFeatures * (avgWeights * (mFeatures.transpose() * input).array()).matrix(); } else { tmp3 = ArrayXXd::Zero(avgPredictor.rows(), avgPredictor.cols()); } return make_pair( make_pair( (tmp3 + avgPredictor).rowwise() * (tmp1 * tmp2), ArrayXXd::Zero(output.rows(), output.cols())), mDistribution->logLikelihood(output, tmp0)); } else if(dimInNonlinear() && dimInLinear()) { // split inputs into linear and nonlinear components MatrixXd inputNonlinear = input.topRows(dimInNonlinear()); MatrixXd inputLinear = input.bottomRows(dimInLinear()); Array<double, 1, Dynamic> responses; ArrayXXd jointEnergy; if(numFeatures() > 0) jointEnergy = mWeights * (mFeatures.transpose() * inputNonlinear).array().square().matrix() + mPredictors * input; else jointEnergy = mPredictors * inputNonlinear; jointEnergy.colwise() += mBiases.array(); jointEnergy *= mSharpness; responses = logSumExp(jointEnergy); // posterior over components for each input MatrixXd posterior = (jointEnergy.rowwise() - responses).array().exp(); responses /= mSharpness; responses += (mLinearPredictor.transpose() * inputLinear).array(); Array<double, 1, Dynamic> tmp0 = (*mNonlinearity)(responses); Array<double, 1, Dynamic> tmp1 = -mDistribution->gradient(output, tmp0); Array<double, 1, Dynamic> tmp2 = nonlinearity->derivative(responses); ArrayXXd avgPredictor = mPredictors.transpose() * posterior; ArrayXXd tmp3; if(numFeatures() > 0) { ArrayXXd avgWeights = (2. * mWeights).transpose() * posterior; tmp3 = mFeatures * (avgWeights * (mFeatures.transpose() * inputNonlinear).array()).matrix(); } else { tmp3 = ArrayXXd::Zero(avgPredictor.rows(), avgPredictor.cols()); } // concatenate gradients of nonlinear and linear component ArrayXXd inputGradient(dimIn(), input.cols()); inputGradient << (tmp3 + avgPredictor).rowwise() * (tmp1 * tmp2), mLinearPredictor * (tmp1 * tmp2).matrix(); return make_pair( make_pair( inputGradient, ArrayXXd::Zero(output.rows(), output.cols())), mDistribution->logLikelihood(output, tmp0)); } else if(dimInLinear()) { double avgBias = logSumExp(mSharpness * mBiases)(0, 0) / mSharpness; Array<double, 1, Dynamic> responses = (mLinearPredictor.transpose() * input).array() + avgBias; Array<double, 1, Dynamic> tmp0 = (*mNonlinearity)(responses); Array<double, 1, Dynamic> tmp1 = -mDistribution->gradient(output, tmp0); Array<double, 1, Dynamic> tmp2 = nonlinearity->derivative(responses); return make_pair( make_pair( mLinearPredictor * (tmp1 * tmp2).matrix(), ArrayXXd::Zero(output.rows(), output.cols())), mDistribution->logLikelihood(output, tmp0)); } return make_pair( make_pair( ArrayXXd::Zero(input.rows(), input.cols()), ArrayXXd::Zero(output.rows(), output.cols())), logLikelihood(input, output)); }