static std::shared_ptr<block_cyclic_mat_t> make_tridiagonal(std::shared_ptr<blacs_grid_t> grid, blas_idx_t n_global) { // First create a matrix with 2 on the diagonal auto a = block_cyclic_mat_t::diagonal(grid, n_global, n_global, 2.0); // Then set the off-diagonal entries to -1 // See: http://icl.cs.utk.edu/lapack-forum/archives/scalapack/msg00055.html char uplo = 'L'; blas_idx_t n = n_global - 1; blas_idx_t ia = 2; blas_idx_t ja = 1; double zero = 0.0; double minus_one = -1.0; pdlaset_(uplo, n, n, zero, minus_one, a -> local_data(), ia, ja, a -> descriptor()); uplo = 'U'; ia = 1; ja = 2; pdlaset_(uplo, n, n, zero, minus_one, a -> local_data(), ia, ja, a -> descriptor()); return a; }
PyObject* scalapack_set(PyObject *self, PyObject *args) { PyArrayObject* a; // matrix; PyArrayObject* desca; // descriptor Py_complex alpha; Py_complex beta; int m, n; int ia, ja; char uplo; if (!PyArg_ParseTuple(args, "OODDciiii", &a, &desca, &alpha, &beta, &uplo, &m, &n, &ia, &ja)) return NULL; if (a->descr->type_num == PyArray_DOUBLE) pdlaset_(&uplo, &m, &n, &(alpha.real), &(beta.real), DOUBLEP(a), &ia, &ja, INTP(desca)); else pzlaset_(&uplo, &m, &n, &alpha, &beta, (void*)COMPLEXP(a), &ia, &ja, INTP(desca)); Py_RETURN_NONE; }
int main(int argc, char *argv[]) { // Some constants int minusone = -1; int zero = 0; int one = 1; double dzero = 0.0; // ConText int ConTxt = minusone; // order char order = 'R'; char scope = 'A'; // root process int root = zero; // BLACS/SCALAPACK parameters // the size of the blocks the distributed matrix is split into // (applies to both rows and columns) int mb = 32; int nb = mb; // PDSYEVxxx constraint // the number of rows and columns in the processor grid // only square processor grids due to C vs. Fortran ordering int nprow = 2; int npcol = nprow; // only square processor grids, // starting row and column in grid, do not change int rsrc = zero; int csrc = zero; // dimensions of the matrix to diagonalize int m = 1000; int n = m; // only square matrices int info = zero; // Rest of code will only work for: // nprow = npcol // mb = nb; // m = n; // rsrc = crsc; // Paramteres for Trivial Matrix double alpha = 0.1; // off-diagonal double beta = 75.0; // diagonal // For timing: double tdiag0, tdiag, ttotal0, ttotal; // BLACS Communicator MPI_Comm blacs_comm; int nprocs; int iam; int myrow, mycol; MPI_Init(&argc, &argv); MPI_Barrier(MPI_COMM_WORLD); ttotal0 = MPI_Wtime(); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &iam); if (argc > one) { nprow = strtod(argv[1],NULL); m = strtod(argv[2],NULL); npcol = nprow; n = m; } if (iam == root) { printf("world size %d \n",nprocs); printf("n %d \n", n); printf("nprow %d \n", nprow); printf("npcol %d \n", npcol); } // We can do this on any subcommunicator. #ifdef CartComm int dim[2]; int pbc[2]; dim[0] = nprow; dim[1] = npcol; pbc[0] = 0; pbc[1] = 0; MPI_Cart_create(MPI_COMM_WORLD, 2, dim, pbc, 1, &blacs_comm); #else blacs_comm = MPI_COMM_WORLD; #endif // initialize the grid // The lines below are equivalent to the one call to: if (blacs_comm != MPI_COMM_NULL) { ConTxt = Csys2blacs_handle_(blacs_comm); Cblacs_gridinit_(&ConTxt, &order, nprow, npcol); // get information back about the grid Cblacs_gridinfo_(ConTxt, &nprow, &npcol, &myrow, &mycol); } if (ConTxt != minusone) { int desc[9]; // get the size of the distributed matrix int locM = numroc_(&m, &mb, &myrow, &rsrc, &nprow); int locN = numroc_(&n, &nb, &mycol, &csrc, &npcol); // printf ("locM = %d \n", locM); // printf ("locN = %d \n", locN); int lld = MAX(one,locM); // build the descriptor descinit_(desc, &m, &n, &mb, &nb, &rsrc, &csrc, &ConTxt, &lld, &info); // Allocate arrays // eigenvalues double* eigvals = malloc(n * sizeof(double)); // allocate the distributed matrices double* mata = malloc(locM*locN * sizeof(double)); // allocate the distributed matrix of eigenvectors double* z = malloc(locM*locN * sizeof(double)); // Eigensolver parameters int ibtype = one; char jobz = 'V'; // eigenvectors also char range = 'A'; // all eiganvalues char uplo = 'L'; // work with upper double vl, vu; int il, iu; char cmach = 'U'; double abstol = 2.0 * pdlamch_(&ConTxt, &cmach); int eigvalm, nz; double orfac = -1.0; //double orfac = 0.001; int* ifail; ifail = malloc(m * sizeof(int)); int* iclustr; iclustr = malloc(2*nprow*npcol * sizeof(int)); double* gap; gap = malloc(nprow*npcol * sizeof(double)); double* work; work = malloc(3 * sizeof(double)); int querylwork = minusone; int* iwork; iwork = malloc(1 * sizeof(int)); int queryliwork = minusone; // Build a trivial distributed matrix: Diagonal matrix pdlaset_(&uplo, &m, &n, &alpha, &beta, mata, &one, &one, desc); // First there is a workspace query // pdsyevx_(&jobz, &range, &uplo, &n, mata, &one, &one, desc, &vl, // &vu, &il, &iu, &abstol, &eigvalm, &nz, eigvals, &orfac, z, &one, // &one, desc, work, &querylwork, iwork, &queryliwork, ifail, iclustr, gap, &info); pdsyevd_(&jobz, &uplo, &n, mata, &one, &one, desc, eigvals, z, &one, &one, desc, work, &querylwork, iwork, &queryliwork, &info); //pdsyev_(&jobz, &uplo, &m, mata, &one, &one, desc, eigvals, // z, &one, &one, desc, work, &querylwork, &info); int lwork = (int)work[0]; //printf("lwork %d\n", lwork); free(work); int liwork = (int)iwork[0]; //printf("liwork %d\n", liwork); free(iwork); work = (double*)malloc(lwork * sizeof(double)); iwork = (int*)malloc(liwork * sizeof(int)); // This is actually diagonalizes the matrix // pdsyevx_(&jobz, &range, &uplo, &n, mata, &one, &one, desc, &vl, // &vu, &il, &iu, &abstol, &eigvalm, &nz, eigvals, &orfac, z, &one, // &one, desc, work, &lwork, iwork, &liwork, ifail, iclustr, gap, &info); Cblacs_barrier(ConTxt, &scope); tdiag0 = MPI_Wtime(); pdsyevd_(&jobz, &uplo, &n, mata, &one, &one, desc, eigvals, z, &one, &one, desc, work, &lwork, iwork, &liwork, &info); //pdsyev_(&jobz, &uplo, &m, mata, &one, &one, desc, eigvals, // z, &one, &one, desc, work, &lwork, &info); Cblacs_barrier(ConTxt, &scope); tdiag = MPI_Wtime() - tdiag0; free(work); free(iwork); free(gap); free(iclustr); free(ifail); free(z); free(mata); // Destroy BLACS grid Cblacs_gridexit_(ConTxt); // Check eigenvalues if (myrow == zero && mycol == zero) { for (int i = 0; i < n; i++) { if (fabs(eigvals[i] - beta) > 0.0001) printf("Problem: eigval %d != %f5.2 but %f\n", i, beta, eigvals[i]); } if (info != zero) { printf("info = %d \n", info); } printf("Time (s) diag: %f\n", tdiag); } free(eigvals); } MPI_Barrier(MPI_COMM_WORLD); ttotal = MPI_Wtime() - ttotal0; if (iam == 0) printf("Time (s) total: %f\n", ttotal); MPI_Finalize(); }