/** * \brief Calculate the loglikelihood of a linear regression contained * in a linear_reg object. * * @param X The design matrix. */ void linear_reg::logLikelihood(const mematrix<double>& X) { /* loglik = 0.; double ss=0; for (int i=0;i<rdata.nids;i++) { double resid = rdata.Y[i] - beta.get(0,0); // intercept for (int j=1;j<beta.nrow;j++) resid -= beta.get(j,0)*X.get(i,j); // residuals[i] = resid; ss += resid*resid; } sigma2 = ss/N; */ //cout << "estimate " << rdata.nids << "\n"; //(rdata.X).print(); //for (int i=0;i<rdata.nids;i++) cout << rdata.masked_data[i] << " "; //cout << endl; loglik = 0.; double halfrecsig2 = .5 / sigma2; //loglik -= halfrecsig2 * residuals[i] * residuals[i]; double intercept = beta.get(0, 0); residuals.data = reg_data.Y.data.array() - intercept; //matrix. ArrayXXd betacol = beta.data.block(1, 0, beta.data.rows() - 1, 1).array().transpose(); ArrayXXd resid_sub = (X.data.block(0, 1, X.data.rows(), X.data.cols() - 1) * betacol.matrix().asDiagonal()).rowwise().sum(); //std::cout << resid_sub << std::endl; residuals.data -= resid_sub.matrix(); //residuals[i] -= resid_sub; loglik -= (residuals.data.array().square() * halfrecsig2).sum(); loglik -= static_cast<double>(reg_data.nids) * log(sqrt(sigma2)); }
/* Multiply each row of u by temp */ MatrixXd arrayMultiplierRowWise(MatrixXd u,ArrayXXd temp,int n){ ArrayXXd uArray = u.array(); int i; for(i=0;i<n;i++){ uArray.row(i) *= temp; } return uArray.matrix(); }
MatrixXd cube(MatrixXd xin){ ArrayXXd x = xin.array(); //convert to Array x*=(x*x); return x.matrix(); }
double CMT::MCBM::parameterGradient( const MatrixXd& inputCompl, const MatrixXd& outputCompl, const lbfgsfloatval_t* x, lbfgsfloatval_t* g, const Trainable::Parameters& params_) const { const Parameters& params = dynamic_cast<const Parameters&>(params_); // average log-likelihood double logLik = 0.; // interpret memory for parameters and gradients lbfgsfloatval_t* y = const_cast<lbfgsfloatval_t*>(x); int offset = 0; VectorLBFGS priors(params.trainPriors ? y : const_cast<double*>(mPriors.data()), mNumComponents); VectorLBFGS priorsGrad(g, mNumComponents); if(params.trainPriors) offset += priors.size(); MatrixLBFGS weights(params.trainWeights ? y + offset : const_cast<double*>(mWeights.data()), mNumComponents, mNumFeatures); MatrixLBFGS weightsGrad(g + offset, mNumComponents, mNumFeatures); if(params.trainWeights) offset += weights.size(); MatrixLBFGS features(params.trainFeatures ? y + offset : const_cast<double*>(mFeatures.data()), mDimIn, mNumFeatures); MatrixLBFGS featuresGrad(g + offset, mDimIn, mNumFeatures); if(params.trainFeatures) offset += features.size(); MatrixLBFGS predictors(params.trainPredictors ? y + offset : const_cast<double*>(mPredictors.data()), mNumComponents, mDimIn); MatrixLBFGS predictorsGrad(g + offset, mNumComponents, mDimIn); if(params.trainPredictors) offset += predictors.size(); MatrixLBFGS inputBias(params.trainInputBias ? y + offset : const_cast<double*>(mInputBias.data()), mDimIn, mNumComponents); MatrixLBFGS inputBiasGrad(g + offset, mDimIn, mNumComponents); if(params.trainInputBias) offset += inputBias.size(); VectorLBFGS outputBias(params.trainOutputBias ? y + offset : const_cast<double*>(mOutputBias.data()), mNumComponents); VectorLBFGS outputBiasGrad(g + offset, mNumComponents); if(params.trainOutputBias) offset += outputBias.size(); if(g) { // initialize gradients if(params.trainPriors) priorsGrad.setZero(); if(params.trainWeights) weightsGrad.setZero(); if(params.trainFeatures) featuresGrad.setZero(); if(params.trainPredictors) predictorsGrad.setZero(); if(params.trainInputBias) inputBiasGrad.setZero(); if(params.trainOutputBias) outputBiasGrad.setZero(); } // split data into batches for better performance int numData = static_cast<int>(inputCompl.cols()); int batchSize = min(max(params.batchSize, 10), numData); #pragma omp parallel for for(int b = 0; b < inputCompl.cols(); b += batchSize) { const MatrixXd& input = inputCompl.middleCols(b, min(batchSize, numData - b)); const MatrixXd& output = outputCompl.middleCols(b, min(batchSize, numData - b)); ArrayXXd featureOutput = features.transpose() * input; MatrixXd featureOutputSq = featureOutput.square(); MatrixXd weightsOutput = weights * featureOutputSq; ArrayXXd predictorOutput = predictors * input; // unnormalized posteriors over components for both possible outputs ArrayXXd logPost0 = (weightsOutput + inputBias.transpose() * input).colwise() + priors; ArrayXXd logPost1 = (logPost0 + predictorOutput).colwise() + outputBias.array(); // sum over components to get unnormalized probabilities of outputs Array<double, 1, Dynamic> logProb0 = logSumExp(logPost0); Array<double, 1, Dynamic> logProb1 = logSumExp(logPost1); // normalize posteriors over components logPost0.rowwise() -= logProb0; logPost1.rowwise() -= logProb1; // stack row vectors ArrayXXd logProb01(2, input.cols()); logProb01 << logProb0, logProb1; // normalize log-probabilities Array<double, 1, Dynamic> logNorm = logSumExp(logProb01); logProb1 -= logNorm; logProb0 -= logNorm; double logLikBatch = (output.array() * logProb1 + (1. - output.array()) * logProb0).sum(); #pragma omp critical logLik += logLikBatch; if(!g) // don't compute gradients continue; Array<double, 1, Dynamic> tmp = output.array() * logProb0.exp() - (1. - output.array()) * logProb1.exp(); ArrayXXd post0Tmp = logPost0.exp().rowwise() * tmp; ArrayXXd post1Tmp = logPost1.exp().rowwise() * tmp; ArrayXXd postDiffTmp = post1Tmp - post0Tmp; // update gradients if(params.trainPriors) #pragma omp critical priorsGrad -= postDiffTmp.rowwise().sum().matrix(); if(params.trainWeights) #pragma omp critical weightsGrad -= postDiffTmp.matrix() * featureOutputSq.transpose(); if(params.trainFeatures) { ArrayXXd tmp2 = weights.transpose() * postDiffTmp.matrix() * 2.; MatrixXd tmp3 = featureOutput * tmp2; #pragma omp critical featuresGrad -= input * tmp3.transpose(); } if(params.trainPredictors) #pragma omp critical predictorsGrad -= post1Tmp.matrix() * input.transpose(); if(params.trainInputBias) #pragma omp critical inputBiasGrad -= input * postDiffTmp.matrix().transpose(); if(params.trainOutputBias) #pragma omp critical outputBiasGrad -= post1Tmp.rowwise().sum().matrix(); } double normConst = inputCompl.cols() * log(2.) * dimOut(); if(g) { for(int i = 0; i < offset; ++i) g[i] /= normConst; if(params.trainFeatures) featuresGrad += params.regularizeFeatures.gradient(features); if(params.trainPredictors) predictorsGrad += params.regularizePredictors.gradient(predictors.transpose()).transpose(); if(params.trainWeights) weightsGrad += params.regularizeWeights.gradient(weights); } double value = -logLik / normConst; if(params.trainFeatures) value += params.regularizeFeatures.evaluate(features); if(params.trainPredictors) value += params.regularizePredictors.evaluate(predictors.transpose()); if(params.trainWeights) value += params.regularizeWeights.evaluate(weights); return value; }
int main() { // The eigen approach ArrayXd n = ArrayXd::LinSpaced(N+1,0,N); double multiplier = M_PI/N; Array<double, 1, N+1> nT = n.transpose(); ArrayXd x = - cos(multiplier*n); ArrayXd xsub = x.middleRows(1, N-1); ArrayXd ysub = (x1-x0)/2*xsub + (x1+x0)/2; ArrayXXd T = cos((acos(x).matrix()*nT.matrix()).array()); ArrayXXd Tsub = cos((acos(xsub).matrix()*nT.matrix()).array()); ArrayXd sqinx = 1/sqrt(1-xsub*xsub); MatrixXd inv1x2 = (sqinx).matrix().asDiagonal(); // Can't use the following to test elements of inv1x2 // std::cout << inv1x2(0,0) << "\n"; MatrixXd Usub = inv1x2 * sin(((acos(xsub).matrix())*nT.matrix()).array()).matrix(); MatrixXd dTsub = Usub*(n.matrix().asDiagonal()); MatrixXd d2Tsub = ((sqinx*sqinx).matrix().asDiagonal())*((xsub.matrix().asDiagonal()) * (dTsub.matrix()) - (Tsub.matrix()) * ((n*n).matrix().asDiagonal())); MatrixXd d2T(N+1, N+1); RowVectorXd a = (pow((-1),nT))*(nT*nT+1)*(nT*nT)/3; RowVectorXd b = (nT*nT+1)*(nT*nT)/3; d2T.middleRows(1,N-1) = d2Tsub; d2T.row(0) = a; d2T.row(N) = b; MatrixXd D2 = d2T.matrix() * ((T.matrix()).inverse()); MatrixXd E2 = D2.middleRows(1,N-1).middleCols(1,N-1); MatrixXd Y = ysub.matrix().asDiagonal(); MatrixXd H = - (4 / ((x1-x0)*(x1-x0))) * E2 + k*Y; Eigen::EigenSolver<Eigen::MatrixXd> HE(H); VectorXcd D = HE.eigenvalues(); MatrixXcd V = HE.eigenvectors(); std::cout << HE.info() << std::endl; // Open ofstream ofstream Dfile; Dfile.open("D-output.txt"); ofstream Vfile; Vfile.open("V-output.txt"); ofstream V544file; V544file.open("V544-output.txt"); Dfile.precision(15); Dfile << D.real() << "\n"; Vfile.precision(15); Vfile << V.real() << "\n"; V544file.precision(15); for(int i = 1; i<N-1; i++) { V544file << ysub[i-1]; V544file << " " << V.col(544).row(i-1).real() << "\n"; } Dfile.close(); Vfile.close(); V544file.close(); system("gnuplot -p plot.gp"); system("rsvg-convert -w 2000 -o V544-plot.png V544-plot.svg"); }