int main(int argc, char** argv) { int nIterations = 10000; if (argc > 1) { nIterations = atoi(argv[1]); } Lattice lattice(4, 8, 5.5, 1.0, 1.0, 1.0, 0, 10, 0, 1, 4, -1); vector<complex<double> > boundaryConditions(4, complex<double>(1.0, 0.0)); DWF linop(0.4, 1.8, 4, pyQCD::wilson, boundaryConditions, &lattice); VectorXcd psi = VectorXcd::Zero(4 * 12 * 4 * 4 * 4 * 8); psi(0) = 1.0; std::cout << "Performing " << nIterations << " matrix-vector products." << std::endl; boost::timer::cpu_timer timer; for (int i = 0; i < nIterations; ++i) { VectorXcd eta = linop.apply(psi); } boost::timer::cpu_times const elapsedTimes(timer.elapsed()); boost::timer::nanosecond_type const elapsed(elapsedTimes.system + elapsedTimes.user); boost::timer::nanosecond_type const walltime(elapsedTimes.wall); std::cout << "Total CPU time = " << elapsed / 1.0e9 << " s" << endl; std::cout << "CPU time per iteration = " << elapsed / 1.0e9 / nIterations << " s" << endl; std::cout << "Walltime = " << walltime / 1.0e9 << " s" << endl; std::cout << "Walltime per iteration = " << walltime / 1.0e9 / nIterations << " s" << endl; std::cout << "Performance: " << linop.getNumFlops() << " floating point operations; " << (double) linop.getNumFlops() / elapsed * 1000.0 << " MFlops / thread" << endl; return 0; }
/** * Conjugate Gradient Descent with history saving and pre-allocated data. * The history and cgdata should be preallocated. * * Preallocating the data is useful if the conjgrad is called within * an iteration loop (i.e. admm). */ float conjgrad_hist_prealloc(struct iter_history_s* history, unsigned int maxiter, float l2lambda, float epsilon, long N, void* data, const struct cg_data_s* cgdata, const struct vec_iter_s* vops, void (*linop)(void* data, float* dst, const float* src), float* x, const float* b, const float* x_truth, void* obj_eval_data, float (*obj_eval)(const void*, const float*)) { float* r = cgdata->r; float* p = cgdata->p; float* Ap = cgdata->Ap; float* x_err = NULL; if (NULL != x_truth) x_err = vops->allocate(N); // The first calculation of the residual might not // be necessary in some cases... linop(data, r, x); // r = A x vops->axpy(N, r, l2lambda, x); vops->xpay(N, -1., r, b); // r = b - r = b - A x vops->copy(N, p, r); // p = r float rsnot = (float)pow(vops->norm(N, r), 2.); float rsold = rsnot; float rsnew = rsnot; float eps_squared = pow(epsilon, 2.); for (unsigned int i = 0; i < maxiter; i++) { history->numiter = i + 1; if (NULL != x_truth) { vops->sub(N, x_err, x, x_truth); float relMSE = vops->norm(N, x_err) / vops->norm(N, x_truth); history->relMSE[i] = relMSE; debug_printf(DP_DEBUG3, "relMSE = %f\n", relMSE); } if ((NULL != obj_eval) && (NULL != obj_eval_data)) { float objval = obj_eval(obj_eval_data, x); history->objective[i] = objval; debug_printf(DP_DEBUG3, "#CG%d OBJVAL= %f\n", i, objval); } //debug_printf(DP_DEBUG3, "#%d: %f\n", i, (double)sqrtf(rsnew)); linop(data, Ap, p); // Ap = A p vops->axpy(N, Ap, l2lambda, p); float alpha = rsold / (float)vops->dot(N, p, Ap); vops->axpy(N, x, +alpha, p); vops->axpy(N, r, -alpha, Ap); rsnew = (float)pow(vops->norm(N, r), 2.); float beta = rsnew / rsold; rsold = rsnew; if (rsnew <= eps_squared) { //debug_printf(DP_DEBUG3, "%d ", i); break; } vops->xpay(N, beta, p, r); // p = beta * p + r history->resid[i] = sqrtf(rsnew); } if (NULL != x_truth) vops->del(x_err); return sqrtf(rsnew); }
/** * Iterative Soft Thresholding * * @param maxiter maximum number of iterations * @param epsilon stop criterion * @param tau (step size) weighting on the residual term, A^H (b - Ax) * @param lambda_start initial regularization weighting * @param lambda_end final regularization weighting (for continuation) * @param N size of input, x * @param data structure, e.g. sense_data * @param vops vector ops definition * @param op linear operator, e.g. A * @param thresh threshold function, e.g. complex soft threshold * @param x initial estimate * @param b observations */ void ist(unsigned int maxiter, float epsilon, float tau, float continuation, bool hogwild, long N, void* data, const struct vec_iter_s* vops, void (*op)(void* data, float* dst, const float* src), void (*thresh)(void* data, float lambda, float* dst, const float* src), void* tdata, float* x, const float* b, const float* x_truth, void* obj_eval_data, float (*obj_eval)(const void*, const float*)) { struct iter_data itrdata = { .rsnew = 1., .rsnot = 1., .iter = 0, .maxiter = maxiter, }; float* r = vops->allocate(N); float* x_err = NULL; if (NULL != x_truth) x_err = vops->allocate(N); itrdata.rsnot = vops->norm(N, b); float ls_old = 1.; float lambda_scale = 1.; int hogwild_k = 0; int hogwild_K = 10; for (itrdata.iter = 0; itrdata.iter < maxiter; itrdata.iter++) { if (NULL != x_truth) { vops->sub(N, x_err, x, x_truth); debug_printf(DP_DEBUG3, "relMSE = %f\n", vops->norm(N, x_err) / vops->norm(N, x_truth)); } if (NULL != obj_eval) { float objval = obj_eval(obj_eval_data, x); debug_printf(DP_DEBUG3, "#%d OBJVAL= %f\n", itrdata.iter, objval); } ls_old = lambda_scale; lambda_scale = ist_continuation(&itrdata, continuation); if (lambda_scale != ls_old) debug_printf(DP_DEBUG3, "##lambda_scale = %f\n", lambda_scale); thresh(tdata, tau, x, x); op(data, r, x); // r = A x vops->xpay(N, -1., r, b); // r = b - r = b - A x itrdata.rsnew = vops->norm(N, r); debug_printf(DP_DEBUG3, "#It %03d: %f \n", itrdata.iter, itrdata.rsnew / itrdata.rsnot); if (itrdata.rsnew < epsilon) break; vops->axpy(N, x, tau * lambda_scale, r); if (hogwild) hogwild_k++; if (hogwild_k == hogwild_K) { hogwild_K *= 2; hogwild_k = 0; tau /= 2; } } debug_printf(DP_DEBUG3, "\n"); if (NULL != x_truth) vops->del(x_err); vops->del(r); } /** * Iterative Soft Thresholding/FISTA to solve min || b - Ax ||_2 + lambda || T x ||_1 * * @param maxiter maximum number of iterations * @param epsilon stop criterion * @param tau (step size) weighting on the residual term, A^H (b - Ax) * @param lambda_start initial regularization weighting * @param lambda_end final regularization weighting (for continuation) * @param N size of input, x * @param data structure, e.g. sense_data * @param vops vector ops definition * @param op linear operator, e.g. A * @param thresh threshold function, e.g. complex soft threshold * @param x initial estimate * @param b observations */ void fista(unsigned int maxiter, float epsilon, float tau, float continuation, bool hogwild, long N, void* data, const struct vec_iter_s* vops, void (*op)(void* data, float* dst, const float* src), void (*thresh)(void* data, float lambda, float* dst, const float* src), void* tdata, float* x, const float* b, const float* x_truth, void* obj_eval_data, float (*obj_eval)(const void*, const float*)) { struct iter_data itrdata = { .rsnew = 1., .rsnot = 1., .iter = 0, .maxiter = maxiter, }; float* r = vops->allocate(N); float* o = vops->allocate(N); float* x_err = NULL; if (NULL != x_truth) x_err = vops->allocate(N); float ra = 1.; vops->copy(N, o, x); itrdata.rsnot = vops->norm(N, b); float ls_old = 1.; float lambda_scale = 1.; int hogwild_k = 0; int hogwild_K = 10; for (itrdata.iter = 0; itrdata.iter < maxiter; itrdata.iter++) { if (NULL != x_truth) { vops->sub(N, x_err, x, x_truth); debug_printf(DP_DEBUG3, "relMSE = %f\n", vops->norm(N, x_err) / vops->norm(N, x_truth)); } if (NULL != obj_eval) { float objval = obj_eval(obj_eval_data, x); debug_printf(DP_DEBUG3, "#%d OBJVAL= %f\n", itrdata.iter, objval); } ls_old = lambda_scale; lambda_scale = ist_continuation(&itrdata, continuation); if (lambda_scale != ls_old) debug_printf(DP_DEBUG3, "##lambda_scale = %f\n", lambda_scale); thresh(tdata, lambda_scale * tau, x, x); ravine(vops, N, &ra, x, o); // FISTA op(data, r, x); // r = A x vops->xpay(N, -1., r, b); // r = b - r = b - A x itrdata.rsnew = vops->norm(N, r); debug_printf(DP_DEBUG3, "#It %03d: %f \n", itrdata.iter, itrdata.rsnew / itrdata.rsnot); if (itrdata.rsnew < epsilon) break; vops->axpy(N, x, tau, r); if (hogwild) hogwild_k++; if (hogwild_k == hogwild_K) { hogwild_K *= 2; hogwild_k = 0; tau /= 2; } } debug_printf(DP_DEBUG3, "\n"); vops->del(o); vops->del(r); if (NULL != x_truth) vops->del(x_err); } /** * Landweber L. An iteration formula for Fredholm integral equations of the * first kind. Amer. J. Math. 1951; 73, 615-624. */ void landweber(unsigned int maxiter, float epsilon, float alpha, long N, long M, void* data, const struct vec_iter_s* vops, void (*op)(void* data, float* dst, const float* src), void (*adj)(void* data, float* dst, const float* src), float* x, const float* b, float (*obj_eval)(const void*, const float*)) { float* r = vops->allocate(M); float* p = vops->allocate(N); double rsnot = vops->norm(M, b); UNUSED(obj_eval); for (unsigned int i = 0; i < maxiter; i++) { op(data, r, x); // r = A x vops->xpay(M, -1., r, b); // r = b - r = b - A x double rsnew = vops->norm(M, r); debug_printf(DP_DEBUG3, "#%d: %f\n", i, rsnew / rsnot); if (rsnew < epsilon) break; adj(data, p, r); vops->axpy(N, x, alpha, p); } vops->del(r); vops->del(p); } /** * Conjugate Gradient Descent to solve Ax = b for symmetric A * * @param maxiter maximum number of iterations * @param regularization parameter * @param epsilon stop criterion * @param N size of input, x * @param data structure, e.g. sense_data * @param vops vector ops definition * @param linop linear operator, i.e. A * @param x initial estimate * @param b observations */ float conjgrad(unsigned int maxiter, float l2lambda, float epsilon, long N, void* data, const struct vec_iter_s* vops, void (*linop)(void* data, float* dst, const float* src), float* x, const float* b, const float* x_truth, void* obj_eval_data, float (*obj_eval)(const void*, const float*)) { float* r = vops->allocate(N); float* p = vops->allocate(N); float* Ap = vops->allocate(N); float* x_err = NULL; if (NULL != x_truth) x_err = vops->allocate(N); // The first calculation of the residual might not // be necessary in some cases... linop(data, r, x); // r = A x vops->axpy(N, r, l2lambda, x); vops->xpay(N, -1., r, b); // r = b - r = b - A x vops->copy(N, p, r); // p = r float rsnot = (float)pow(vops->norm(N, r), 2.); float rsold = rsnot; float rsnew = rsnot; float eps_squared = pow(epsilon, 2.); if (0. == rsold) { debug_printf(DP_DEBUG3, "CG: early out\n"); return 0.; } for (unsigned int i = 0; i < maxiter; i++) { if (NULL != x_truth) { vops->sub(N, x_err, x, x_truth); debug_printf(DP_DEBUG3, "relMSE = %f\n", vops->norm(N, x_err) / vops->norm(N, x_truth)); } if ((NULL != obj_eval) && (NULL != obj_eval_data)) { float objval = obj_eval(obj_eval_data, x); debug_printf(DP_DEBUG3, "#CG%d OBJVAL= %f\n", i, objval); } debug_printf(DP_DEBUG3, "#%d: %f\n", i, (double)sqrtf(rsnew)); linop(data, Ap, p); // Ap = A p vops->axpy(N, Ap, l2lambda, p); float pAp = (float)vops->dot(N, p, Ap); if (0. == pAp) break; float alpha = rsold / pAp; vops->axpy(N, x, +alpha, p); vops->axpy(N, r, -alpha, Ap); rsnew = (float)pow(vops->norm(N, r), 2.); float beta = rsnew / rsold; rsold = rsnew; if (rsnew <= eps_squared) { //debug_printf(DP_DEBUG3, "%d ", i); break; } vops->xpay(N, beta, p, r); // p = beta * p + r } vops->del(Ap); vops->del(p); vops->del(r); if (NULL != x_truth) vops->del(x_err); return sqrtf(rsnew); }