ok_status dense_operator_mul_t(void * data, vector * input, vector * output) { OK_CHECK_PTR(data); return blas_gemv(((dense_operator_data *) data)->dense_handle, CblasTrans, kOne, ((dense_operator_data *) data)->A, input, kZero, output); }
static void update(struct bfgs *opt, double f, const double *grad) { size_t i, n = opt->dim; double *H = opt->inv_hess; double *s = opt->step; double *y = opt->dg; /* update step */ memcpy(s, opt->search, n * sizeof(s[0])); blas_dscal(n, linesearch_step(&opt->ls), s, 1); /* update df */ opt->df = f - opt->f0; /* update dg */ memcpy(y, grad, n * sizeof(y[0])); blas_daxpy(n, -1.0, opt->grad0, 1, y, 1); double s_y = blas_ddot(n, s, 1, y, 1); /* NOTE: could use damped update instead (Nocedal and Wright, p. 537) */ assert(s_y > 0); /* initialize inv hessian on first step (Nocedal and Wright, p. 143) */ if (opt->first_step) { /* */ double y_y = blas_ddot(n, y, 1, y, 1); assert(y_y > 0); double scale = s_y / y_y; memset(H, 0, n * n * sizeof(H[0])); for (i = 0; i < n; i++) { H[i * n + i] = scale; } opt->first_step = 0; } /* compute H_y */ double *H_y = opt->H_dg; blas_gemv(n, n, BLAS_NOTRANS, 1.0, H, n, y, 1, 0.0, H_y, 1); double y_H_y = blas_ddot(n, H_y, 1, y, 1); double scale1 = (1.0 + (y_H_y / s_y)) / s_y; double rho = 1.0 / s_y; /* update inverse hessian */ blas_dger(n, n, scale1, s, 1, s, 1, H, n); blas_dger(n, n, -rho, H_y, 1, s, 1, H, n); blas_dger(n, n, -rho, s, 1, H_y, 1, H, n); /* update search direction */ blas_dgemv(BLAS_NOTRANS, n, n, -1.0, opt->inv_hess, n, grad, 1, 0.0, opt->search, 1); assert(isfinite(blas_dnrm2(n, opt->search, 1))); /* update initial position, value, and grad */ memcpy(opt->x0, opt->x, n * sizeof(opt->x0[0])); opt->f0 = f; memcpy(opt->grad0, grad, n * sizeof(opt->grad0[0])); }
ok_status dense_operator_mul_t_fused(void * data, ok_float alpha, vector * input, ok_float beta, vector * output) { OK_CHECK_PTR(data); return blas_gemv(((dense_operator_data *) data)->dense_handle, CblasTrans, alpha, ((dense_operator_data *) data)->A, input, beta, output); }
int ParpackSolver::Solve(int nev) { /* Get MPI info */ int nprocs, me; MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &me); MPI_Fint fcomm = MPI_Comm_c2f(MPI_COMM_WORLD); /* Select number of working Ritz vectors */ if(ncv == -1) ncv = 2*nev; ncv = std::min(ncv, n-1); /* Initialize matrix descriptors */ xdesc = pcontext->new_descriptor(n, 1, divup(n,nprocs), 1); Bdesc = pcontext->new_descriptor(n, ncv, divup(n,nprocs), ncv); assert(nloc == Bdesc->num_local_rows() && nloc == xdesc->num_local_rows()); assert(ncv == Bdesc->num_local_cols() && 1 == xdesc->num_local_cols()); /* Allocate local memory for eigenvector matrix $B$ */ Bvalues = (real*) opsec_malloc(Bdesc->local_size() * sizeof(real)); real sigma; int iparam[11], ipntr[11]; /* Set PARPACK parameters */ char bmat[] = "I"; char which[] = "LA"; char howmny[] = "All"; iparam[0] = 1; // ishfts iparam[2] = maxitr; // maxitr iparam[6] = 1; // mode /* Allocate working memory */ int lworkl = ncv*(ncv + 8); real* workl = (real*) opsec_calloc(lworkl, sizeof(real)); real* workd = (real*) opsec_calloc(3*nloc, sizeof(real)); real* resid = (real*) opsec_calloc(nloc, sizeof(real)); int* select = (int*) opsec_calloc(ncv, sizeof(int)); /* Begin reverse communication loop */ int itr = 0; int info = 0; int ido = 0; while(ido != 99) { parpack_psaupd(&fcomm, &ido, bmat, &nloc, which, &nev, &tol, resid, &ncv, Bvalues, &nloc, iparam, ipntr, workd, workl, &lworkl, &info); if(ido == 1 || ido == -1) { /* Compute y = A*x (don't forget Fortran indexing conventions!) */ slp::Matrix<real> A(Adesc, Avalues); slp::Matrix<real> x(xdesc, &workd[ipntr[0] - 1]); slp::Matrix<real> y(xdesc, &workd[ipntr[1] - 1]); slp::multiply(A, x, y); } } if(me == 0) { opsec_info("Number of Implicit Arnoldi update iterations taken is %d\n", iparam[2]); opsec_info(" info = %d\n", info); opsec_info(" nconv = %d, nev = %d\n", iparam[4], nev); time_t t = time(NULL); opsec_info("Time: %s\n", ctime(&t)); opsec_info("Post-processing Ritz values and vectors\n"); } /* Check return code */ if(info < 0) { /* Error encountered. Abort. */ if(me == 0) opsec_error("parpack_psaupd returned error: info = %d\n", info); return info; } else { /* Save number of successfully computed eigenvalues */ nconv = iparam[4]; evals.resize(nconv); /* Retrieve eigenvalues and eigenvectors */ int rvec = 1; int ierr; parpack_pseupd(&fcomm, &rvec, howmny, select, &evals[0], Bvalues, &nloc, &sigma, bmat, &nloc, which, &nev, &tol, resid, &ncv, Bvalues, &nloc, iparam, ipntr, workd, workl, &lworkl, &ierr); if(ierr != 0) { if(me == 0) opsec_error("parpack_pseupd returned error: ierr = %d\n", ierr); } } if(me == 0) { time_t t = time(NULL); opsec_info("Time: %s\n", ctime(&t)); } #if 0 { int i; /* Debugging: check residuals || A*x - lambda*x || */ y = (real*) opsec_calloc(nloc, sizeof(real)); for(i = iparam[4]-1; i >= 0; i--) { static char trans = 'T'; static int incx = 1; static int incy = 1; static real alpha = 1.0; static real beta = 0.0; real a = -evals[i]; ierr = MPI_Allgatherv(&evecs[i*nloc], nloc, REAL_MPI_TYPE, xfull, locsizes, locdisps, REAL_MPI_TYPE, MPI_COMM_WORLD); blas_gemv(&trans, &n, &nloc, &alpha, A, &n, xfull, &incx, &beta, y, &incy); blas_axpy(&nloc, &a, &evecs[i*nloc], &incx, y, &incy); real d = parpack_pnorm2(&fcomm, &nloc, y, &incy); if(myid == 0) printf("Eigenvalue %d: lambda = %16.16f, |A*x - lambda*x| = %16.16f\n", iparam[4]-i, evals[i], d); ierr = MPI_Allgatherv(y, nloc, REAL_MPI_TYPE, xfull, locsizes, locdisps, REAL_MPI_TYPE, MPI_COMM_WORLD); } free(y); } #endif #if 0 /* Sort from largest to smallest eigenvalue */ for(int j = 0; j < nconv/2; j++) { std::swap(evals[j], evals[nconv-j-1]); memcpy(workd, &B(0,j), nloc*sizeof(real)); memcpy(&B(0,j), &B(0,nconv-j-1), nloc*sizeof(real)); memcpy(&B(0,nconv-j-1), workd, nloc*sizeof(real)); } #endif /* Clean up */ free(workl); free(workd); free(resid); free(select); return nconv; }
ok_status regularized_sinkhorn_knopp(void * linalg_handle, ok_float * A_in, matrix * A_out, vector * d, vector * e, enum CBLAS_ORDER ord) { OK_CHECK_PTR(A_in); OK_CHECK_MATRIX(A_out); OK_CHECK_VECTOR(d); OK_CHECK_VECTOR(e); ok_status err = OPTKIT_SUCCESS; const ok_float kSinkhornConst = (ok_float) 1e-4; const ok_float kEps = (ok_float) 1e-2; const size_t kMaxIter = 300; ok_float norm_d, norm_e; size_t i; vector a, d_diff, e_diff; a.data = OK_NULL; d_diff.data = OK_NULL; e_diff.data = OK_NULL; if (A_out->size1 != d->size || A_out->size2 != e->size) return OK_SCAN_ERR( OPTKIT_ERROR_DIMENSION_MISMATCH ); vector_calloc(&d_diff, A_out->size1); vector_calloc(&e_diff, A_out->size2); norm_d = norm_e = 1; OK_CHECK_ERR( err, matrix_memcpy_ma(A_out, A_in, ord) ); OK_CHECK_ERR( err, matrix_abs(A_out) ); OK_CHECK_ERR( err, vector_set_all(d, kOne) ); OK_CHECK_ERR( err, vector_scale(e, kZero) ); /* optional argument ok_float pnorm? */ /* if (pnorm != 1) { matrix_pow(A, pnorm) } */ for (i = 0; i < kMaxIter && !err; ++i){ blas_gemv(linalg_handle, CblasTrans, kOne, A_out, d, kZero, e); vector_add_constant(e, kSinkhornConst / (ok_float) e->size); vector_recip(e); vector_scale(e, (ok_float) d->size); blas_gemv(linalg_handle, CblasNoTrans, kOne, A_out, e, kZero, d); vector_add_constant(d, kSinkhornConst / (ok_float) d->size); vector_recip(d); vector_scale(d, (ok_float) e->size); blas_axpy(linalg_handle, -kOne, d, &d_diff); blas_axpy(linalg_handle, -kOne, e, &e_diff); blas_nrm2(linalg_handle, &d_diff, &norm_d); blas_nrm2(linalg_handle, &e_diff, &norm_e); if ((norm_d < kEps) && (norm_e < kEps)) break; vector_memcpy_vv(&d_diff, d); vector_memcpy_vv(&e_diff, e); } /* optional argument ok_float pnorm? */ /* if (pnorm != 1) { vector_pow(d, kOne / pnorm) vector_pow(e, kOne / pnorm) } */ OK_CHECK_ERR( err, matrix_memcpy_ma(A_out, A_in, ord) ); if (!err) { for (i = 0; i < A_out->size1; ++i) { matrix_row(&a, A_out, i); vector_mul(&a, e); } for (i = 0; i < A_out->size2; ++i) { matrix_column(&a, A_out, i); vector_mul(&a, d); } } vector_free(&d_diff); vector_free(&e_diff); return err; }