Esempio n. 1
0
ok_status dense_operator_mul_t(void * data, vector * input, vector * output)
{
	OK_CHECK_PTR(data);
	return blas_gemv(((dense_operator_data *) data)->dense_handle,
		CblasTrans, kOne, ((dense_operator_data *) data)->A, input,
		kZero, output);
}
Esempio n. 2
0
static void update(struct bfgs *opt, double f, const double *grad)
{
	size_t i, n = opt->dim;
	double *H = opt->inv_hess;
	double *s = opt->step;
	double *y = opt->dg;

	/* update step */
	memcpy(s, opt->search, n * sizeof(s[0]));
	blas_dscal(n, linesearch_step(&opt->ls), s, 1);

	/* update df */
	opt->df = f - opt->f0;

	/* update dg */
	memcpy(y, grad, n * sizeof(y[0]));
	blas_daxpy(n, -1.0, opt->grad0, 1, y, 1);

	double s_y = blas_ddot(n, s, 1, y, 1);

	/* NOTE: could use damped update instead (Nocedal and Wright, p. 537) */
	assert(s_y > 0);

	/* initialize inv hessian on first step (Nocedal and Wright, p. 143) */
	if (opt->first_step) {	/*  */
		double y_y = blas_ddot(n, y, 1, y, 1);
		assert(y_y > 0);
		double scale = s_y / y_y;

		memset(H, 0, n * n * sizeof(H[0]));
		for (i = 0; i < n; i++) {
			H[i * n + i] = scale;
		}
		opt->first_step = 0;
	}

	/* compute H_y */
	double *H_y = opt->H_dg;
	blas_gemv(n, n, BLAS_NOTRANS, 1.0, H, n, y, 1, 0.0, H_y, 1);

	double y_H_y = blas_ddot(n, H_y, 1, y, 1);
	double scale1 = (1.0 + (y_H_y / s_y)) / s_y;
	double rho = 1.0 / s_y;

	/* update inverse hessian */
	blas_dger(n, n, scale1, s, 1, s, 1, H, n);
	blas_dger(n, n, -rho, H_y, 1, s, 1, H, n);
	blas_dger(n, n, -rho, s, 1, H_y, 1, H, n);

	/* update search direction */
	blas_dgemv(BLAS_NOTRANS, n, n, -1.0, opt->inv_hess, n, grad, 1,
		   0.0, opt->search, 1);
	assert(isfinite(blas_dnrm2(n, opt->search, 1)));

	/* update initial position, value, and grad */
	memcpy(opt->x0, opt->x, n * sizeof(opt->x0[0]));
	opt->f0 = f;
	memcpy(opt->grad0, grad, n * sizeof(opt->grad0[0]));
}
Esempio n. 3
0
ok_status dense_operator_mul_t_fused(void * data, ok_float alpha,
	vector * input, ok_float beta, vector * output)
{
	OK_CHECK_PTR(data);
	return blas_gemv(((dense_operator_data *) data)->dense_handle,
		CblasTrans, alpha, ((dense_operator_data *) data)->A, input,
		beta, output);
}
Esempio n. 4
0
int ParpackSolver::Solve(int nev) {
    /* Get MPI info */
    int nprocs, me;
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
    MPI_Comm_rank(MPI_COMM_WORLD, &me);
    MPI_Fint fcomm = MPI_Comm_c2f(MPI_COMM_WORLD);

    /* Select number of working Ritz vectors */
    if(ncv == -1)
        ncv = 2*nev;
    ncv = std::min(ncv, n-1);

    /* Initialize matrix descriptors */
    xdesc = pcontext->new_descriptor(n, 1, divup(n,nprocs), 1);
    Bdesc = pcontext->new_descriptor(n, ncv, divup(n,nprocs), ncv);
    assert(nloc == Bdesc->num_local_rows() && nloc == xdesc->num_local_rows());
    assert(ncv == Bdesc->num_local_cols() && 1 == xdesc->num_local_cols());

    /* Allocate local memory for eigenvector matrix $B$ */
    Bvalues = (real*) opsec_malloc(Bdesc->local_size() * sizeof(real));

    real sigma;
    int iparam[11], ipntr[11];

    /* Set PARPACK parameters */
    char bmat[] = "I";
    char which[] = "LA";
    char howmny[] = "All";
    iparam[0] = 1;      // ishfts
    iparam[2] = maxitr; // maxitr
    iparam[6] = 1;      // mode

    /* Allocate working memory */
    int lworkl = ncv*(ncv + 8);
    real* workl = (real*) opsec_calloc(lworkl, sizeof(real));
    real* workd = (real*) opsec_calloc(3*nloc, sizeof(real));
    real* resid = (real*) opsec_calloc(nloc, sizeof(real));
    int* select = (int*) opsec_calloc(ncv, sizeof(int));

    /* Begin reverse communication loop */
    int itr = 0;
    int info = 0;
    int ido = 0;
    while(ido != 99) {
        parpack_psaupd(&fcomm, &ido, bmat, &nloc, which, &nev,
                       &tol, resid, &ncv, Bvalues, &nloc, iparam, ipntr,
                       workd, workl, &lworkl, &info);

        if(ido == 1 || ido == -1) {
            /* Compute y = A*x (don't forget Fortran indexing conventions!) */
            slp::Matrix<real> A(Adesc, Avalues);
            slp::Matrix<real> x(xdesc, &workd[ipntr[0] - 1]);
            slp::Matrix<real> y(xdesc, &workd[ipntr[1] - 1]);
            slp::multiply(A, x, y);
        }
    }

    if(me == 0) {
        opsec_info("Number of Implicit Arnoldi update iterations taken is %d\n", iparam[2]);
        opsec_info("  info = %d\n", info);
        opsec_info("  nconv = %d, nev = %d\n", iparam[4], nev);

        time_t t = time(NULL);
        opsec_info("Time: %s\n", ctime(&t));
        opsec_info("Post-processing Ritz values and vectors\n");
    }

    /* Check return code */
    if(info < 0) {
        /* Error encountered.  Abort. */
        if(me == 0)
            opsec_error("parpack_psaupd returned error: info = %d\n", info);
        return info;
    }
    else {
        /* Save number of successfully computed eigenvalues */
        nconv = iparam[4];
        evals.resize(nconv);

        /* Retrieve eigenvalues and eigenvectors */
        int rvec = 1;
        int ierr;
        parpack_pseupd(&fcomm, &rvec, howmny, select, &evals[0], Bvalues, &nloc, &sigma,
                       bmat, &nloc, which, &nev, &tol, resid, &ncv, Bvalues, &nloc,
                       iparam, ipntr, workd, workl, &lworkl, &ierr);

        if(ierr != 0) {
            if(me == 0)
                opsec_error("parpack_pseupd returned error: ierr = %d\n", ierr);
        }
    }

    if(me == 0) {
        time_t t = time(NULL);
        opsec_info("Time: %s\n", ctime(&t));
    }

#if 0
    {
        int i;
        /* Debugging: check residuals  || A*x - lambda*x || */
        y = (real*) opsec_calloc(nloc, sizeof(real));
        for(i = iparam[4]-1; i >= 0; i--) { 
            static char trans = 'T';
            static int incx = 1;
            static int incy = 1;
            static real alpha = 1.0;
            static real beta = 0.0;
            real a = -evals[i];
            ierr = MPI_Allgatherv(&evecs[i*nloc], nloc, REAL_MPI_TYPE, xfull, locsizes, locdisps, REAL_MPI_TYPE, MPI_COMM_WORLD);
            blas_gemv(&trans, &n, &nloc, &alpha, A, &n, xfull, &incx, &beta, y, &incy);
            blas_axpy(&nloc, &a, &evecs[i*nloc], &incx, y, &incy);
            real d = parpack_pnorm2(&fcomm, &nloc, y, &incy);
            if(myid == 0)
                printf("Eigenvalue %d: lambda = %16.16f, |A*x - lambda*x| = %16.16f\n", iparam[4]-i, evals[i], d);
            ierr = MPI_Allgatherv(y, nloc, REAL_MPI_TYPE, xfull, locsizes, locdisps, REAL_MPI_TYPE, MPI_COMM_WORLD);
        }
        free(y);
    }
#endif

#if 0
    /* Sort from largest to smallest eigenvalue */
    for(int j = 0; j < nconv/2; j++) {
        std::swap(evals[j], evals[nconv-j-1]);
        memcpy(workd, &B(0,j), nloc*sizeof(real));
        memcpy(&B(0,j), &B(0,nconv-j-1), nloc*sizeof(real));
        memcpy(&B(0,nconv-j-1), workd, nloc*sizeof(real));
    }
#endif

    /* Clean up */
    free(workl);
    free(workd);
    free(resid);
    free(select);

    return nconv;
}
Esempio n. 5
0
ok_status regularized_sinkhorn_knopp(void * linalg_handle, ok_float * A_in,
	matrix * A_out, vector * d, vector * e, enum CBLAS_ORDER ord)
{
	OK_CHECK_PTR(A_in);
	OK_CHECK_MATRIX(A_out);
	OK_CHECK_VECTOR(d);
	OK_CHECK_VECTOR(e);

	ok_status err = OPTKIT_SUCCESS;
	const ok_float kSinkhornConst = (ok_float) 1e-4;
	const ok_float kEps = (ok_float) 1e-2;
	const size_t kMaxIter = 300;
	ok_float norm_d, norm_e;
	size_t i;

	vector a, d_diff, e_diff;
	a.data = OK_NULL;
	d_diff.data = OK_NULL;
	e_diff.data = OK_NULL;

	if (A_out->size1 != d->size || A_out->size2 != e->size)
		return OK_SCAN_ERR( OPTKIT_ERROR_DIMENSION_MISMATCH );

	vector_calloc(&d_diff, A_out->size1);
	vector_calloc(&e_diff, A_out->size2);

	norm_d = norm_e = 1;

	OK_CHECK_ERR( err, matrix_memcpy_ma(A_out, A_in, ord) );
	OK_CHECK_ERR( err, matrix_abs(A_out) );
	OK_CHECK_ERR( err, vector_set_all(d, kOne) );
	OK_CHECK_ERR( err, vector_scale(e, kZero) );

	/* optional argument ok_float pnorm? */
	/*
	if (pnorm != 1) {
		matrix_pow(A, pnorm)
	}
	*/

	for (i = 0; i < kMaxIter && !err; ++i){
		blas_gemv(linalg_handle, CblasTrans, kOne, A_out, d, kZero, e);
		vector_add_constant(e, kSinkhornConst / (ok_float) e->size);
		vector_recip(e);
		vector_scale(e, (ok_float) d->size);

		blas_gemv(linalg_handle, CblasNoTrans, kOne, A_out, e, kZero,
			d);
		vector_add_constant(d, kSinkhornConst / (ok_float) d->size);
		vector_recip(d);
		vector_scale(d, (ok_float) e->size);

		blas_axpy(linalg_handle, -kOne, d, &d_diff);
		blas_axpy(linalg_handle, -kOne, e, &e_diff);

		blas_nrm2(linalg_handle, &d_diff, &norm_d);
		blas_nrm2(linalg_handle, &e_diff, &norm_e);

		if ((norm_d < kEps) && (norm_e < kEps))
			break;

		vector_memcpy_vv(&d_diff, d);
		vector_memcpy_vv(&e_diff, e);
	}

	/* optional argument ok_float pnorm? */
	/*
	if (pnorm != 1) {
		vector_pow(d, kOne / pnorm)
		vector_pow(e, kOne / pnorm)
	}
	*/

	OK_CHECK_ERR( err, matrix_memcpy_ma(A_out, A_in, ord) );
	if (!err) {
		for (i = 0; i < A_out->size1; ++i) {
			matrix_row(&a, A_out, i);
			vector_mul(&a, e);
		}
		for (i = 0; i < A_out->size2; ++i) {
			matrix_column(&a, A_out, i);
			vector_mul(&a, d);
		}
	}

	vector_free(&d_diff);
	vector_free(&e_diff);

	return err;
}