void blasglue_gemm(char transa, char transb, int m, int n, int k, real a, scalar *A, int fdA, scalar *B, int fdB, real b, scalar *C, int fdC) { scalar alpha, beta; if (m*n == 0) return; if (k == 0) { int i, j; for (i = 0; i < m; ++i) for (j = 0; j < n; ++j) ASSIGN_ZERO(C[i*fdC + j]); return; } CHECK(A != C && B != C, "gemm output array must be distinct"); ASSIGN_REAL(alpha,a); ASSIGN_REAL(beta,b); F(gemm,GEMM) (&transb, &transa, &n, &m, &k, &alpha, B, &fdB, A, &fdA, &beta, C, &fdC); }
static void TEMPLATE (ldl_dsolve) ( cholmod_factor *L, cholmod_dense *Y /* nr-by-n with leading dimension nr */ ) { double d [1] ; double *Lx, *Yx, *Yz ; Int *Lp ; Int n, nrhs, k, p, k1, k2 ; ASSERT (L->xtype == Y->xtype) ; /* L and Y must have the same xtype */ ASSERT (L->n == Y->ncol) ; /* dimensions must match */ ASSERT (Y->nrow == Y->d) ; /* leading dimension of Y = # rows of Y */ ASSERT (L->xtype != CHOLMOD_PATTERN) ; /* L is not symbolic */ ASSERT (!(L->is_super) && !(L->is_ll)) ; /* L is simplicial LDL' */ nrhs = Y->nrow ; n = L->n ; Lp = L->p ; Lx = L->x ; Yx = Y->x ; Yz = Y->z ; for (k = 0 ; k < n ; k++) { k1 = k*nrhs ; k2 = (k+1)*nrhs ; ASSIGN_REAL (d,0, Lx,Lp[k]) ; for (p = k1 ; p < k2 ; p++) { DIV_REAL (Yx,Yz,p, Yx,Yz,p, d,0) ; } } }
void blasglue_rscal(int n, real a, scalar *x, int incx) { scalar alpha; ASSIGN_REAL(alpha, a); F(scal,SCAL) (&n, &alpha, x, &incx); }
void blasglue_axpy(int n, real a, scalar *x, int incx, scalar *y, int incy) { scalar alpha; ASSIGN_REAL(alpha, a); F(axpy,AXPY) (&n, &alpha, x, &incx, y, &incy); }
int main(int argc, char **argv) { maxwell_data *mdata; maxwell_target_data *mtdata = NULL; int local_N, N_start, alloc_N; real R[3][3] = { {1,0,0}, {0,0.01,0}, {0,0,0.01} }; real G[3][3] = { {1,0,0}, {0,100,0}, {0,0,100} }; real kvector[3] = {KX,0,0}; evectmatrix H, Hstart, W[NWORK]; real *eigvals; int i, iters; int num_iters; int parity = NO_PARITY; int nx = NX, ny = NY, nz = NZ; int num_bands = NUM_BANDS; real target_freq = 0.0; int do_target = 0; evectoperator op; evectpreconditioner pre_op; void *op_data, *pre_op_data; real error_tol = ERROR_TOL; int mesh_size = MESH_SIZE, mesh[3]; epsilon_data ed; int stop1 = 0; int verbose = 0; int which_preconditioner = 2; double max_err = 1e20; srand(time(NULL)); #if defined(DEBUG) && defined(HAVE_FEENABLEEXCEPT) feenableexcept(FE_INVALID | FE_OVERFLOW); /* crash on NaN/overflow */ #endif ed.eps_high = EPS_HIGH; ed.eps_low = EPS_LOW; ed.eps_high_x = EPS_HIGH_X; #ifdef HAVE_GETOPT { extern char *optarg; extern int optind; int c; while ((c = getopt(argc, argv, "hs:k:b:n:f:x:y:z:emt:c:g:1pvE:")) != -1) switch (c) { case 'h': usage(); exit(EXIT_SUCCESS); break; case 's': srand(atoi(optarg)); break; case 'k': kvector[0] = atof(optarg); break; case 'b': num_bands = atoi(optarg); CHECK(num_bands > 0, "num_bands must be positive"); break; case 'n': ed.eps_high = atof(optarg); CHECK(ed.eps_high > 0.0, "index must be positive"); ed.eps_high = ed.eps_high * ed.eps_high; break; case 'f': ed.eps_high_x = atof(optarg); CHECK(ed.eps_high_x > 0.0, "fill must be positive"); break; case 'x': nx = atoi(optarg); CHECK(nx > 0, "x size must be positive"); break; case 'y': ny = atoi(optarg); CHECK(ny > 0, "y size must be positive"); break; case 'z': nz = atoi(optarg); CHECK(nz > 0, "z size must be positive"); break; case 'e': parity = EVEN_Z_PARITY; break; case 'm': parity = ODD_Z_PARITY; break; case 't': target_freq = fabs(atof(optarg)); do_target = 1; break; case 'E': max_err = fabs(atof(optarg)); CHECK(max_err > 0, "maximum error must be positive"); break; case 'c': error_tol = fabs(atof(optarg)); break; case 'g': mesh_size = atoi(optarg); CHECK(mesh_size > 0, "mesh size must be positive"); break; case '1': stop1 = 1; break; case 'p': which_preconditioner = 1; break; case 'v': verbose = 1; break; default: usage(); exit(EXIT_FAILURE); } if (argc != optind) { usage(); exit(EXIT_FAILURE); } } #endif #ifdef ENABLE_PROF stop1 = 1; #endif mesh[0] = mesh[1] = mesh[2] = mesh_size; printf("Creating Maxwell data...\n"); mdata = create_maxwell_data(nx, ny, nz, &local_N, &N_start, &alloc_N, num_bands, NUM_FFT_BANDS); CHECK(mdata, "NULL mdata"); set_maxwell_data_parity(mdata, parity); printf("Setting k vector to (%g, %g, %g)...\n", kvector[0], kvector[1], kvector[2]); update_maxwell_data_k(mdata, kvector, G[0], G[1], G[2]); printf("Initializing dielectric...\n"); /* set up dielectric function (a simple Bragg mirror) */ set_maxwell_dielectric(mdata, mesh, R, G, epsilon, 0, &ed); if (verbose && ny == 1 && nz == 1) { printf("dielectric function:\n"); for (i = 0; i < nx; ++i) { if (mdata->eps_inv[i].m00 == mdata->eps_inv[i].m11) printf(" eps(%g) = %g\n", i * 1.0 / nx, 1.0/mdata->eps_inv[i].m00); else printf(" eps(%g) = x: %g OR y: %g\n", i * 1.0 / nx, 1.0/mdata->eps_inv[i].m00, 1.0/mdata->eps_inv[i].m11); } printf("\n"); } printf("Allocating fields...\n"); H = create_evectmatrix(nx * ny * nz, 2, num_bands, local_N, N_start, alloc_N); Hstart = create_evectmatrix(nx * ny * nz, 2, num_bands, local_N, N_start, alloc_N); for (i = 0; i < NWORK; ++i) W[i] = create_evectmatrix(nx * ny * nz, 2, num_bands, local_N, N_start, alloc_N); CHK_MALLOC(eigvals, real, num_bands); for (iters = 0; iters < PROF_ITERS; ++iters) { printf("Initializing fields...\n"); for (i = 0; i < H.n * H.p; ++i) ASSIGN_REAL(Hstart.data[i], rand() * 1.0 / RAND_MAX); /*****************************************/ if (do_target) { printf("\nSolving for eigenvectors close to %f...\n", target_freq); mtdata = create_maxwell_target_data(mdata, target_freq); op = maxwell_target_operator; if (which_preconditioner == 1) pre_op = maxwell_target_preconditioner; else pre_op = maxwell_target_preconditioner2; op_data = (void *) mtdata; pre_op_data = (void *) mtdata; } else { op = maxwell_operator; if (which_preconditioner == 1) pre_op = maxwell_preconditioner; else pre_op = maxwell_preconditioner2; op_data = (void *) mdata; pre_op_data = (void *) mdata; } /*****************************************/ printf("\nSolving for eigenvectors with preconditioning...\n"); evectmatrix_copy(H, Hstart); eigensolver(H, eigvals, op, op_data, NULL,NULL, pre_op, pre_op_data, maxwell_parity_constraint, (void *) mdata, W, NWORK, error_tol, &num_iters, EIGS_DEFAULT_FLAGS); if (do_target) eigensolver_get_eigenvals(H, eigvals, maxwell_operator, mdata, W[0], W[1]); printf("Solved for eigenvectors after %d iterations.\n", num_iters); printf("%15s%15s%15s%15s\n","eigenval", "frequency", "exact freq.", "error"); for (i = 0; i < num_bands; ++i) { double err; real freq = sqrt(eigvals[i]); real exact_freq = bragg_omega(freq, kvector[0], sqrt(ed.eps_high), ed.eps_high_x, sqrt(ed.eps_low), 1.0 - ed.eps_high_x, 1.0e-7); printf("%15f%15f%15f%15e\n", eigvals[i], freq, exact_freq, err = fabs(freq - exact_freq) / exact_freq); CHECK(err <= max_err, "error exceeds tolerance"); } printf("\n"); for (i = 0; i < num_bands; ++i) { real kdom[3]; real k; maxwell_dominant_planewave(mdata, H, i + 1, kdom); if ((i + 1) % 2 == 1) k = kvector[0] + (i + 1) / 2; else k = kvector[0] - (i + 1) / 2; if (kvector[0] > 0 && kvector[0] < 0.5 && ed.eps_high == 1) { printf("Expected kdom: %15f%15f%15f\n", k, kvector[1], kvector[2]); printf("Got kdom: %15f%15f%15f\n", kdom[0], kdom[1], kdom[2]); CHECK(k == kdom[0] && kvector[1] == kdom[1] && kvector[2] == kdom[2], "unexpected result from maxwell_dominant_planewave"); } } } if (!stop1) { /*****************************************/ printf("\nSolving for eigenvectors without preconditioning...\n"); evectmatrix_copy(H, Hstart); eigensolver(H, eigvals, op, op_data, NULL,NULL, NULL, NULL, maxwell_parity_constraint, (void *) mdata, W, NWORK, error_tol, &num_iters, EIGS_DEFAULT_FLAGS); if (do_target) eigensolver_get_eigenvals(H, eigvals, maxwell_operator, mdata, W[0], W[1]); printf("Solved for eigenvectors after %d iterations.\n", num_iters); printf("%15s%15s%15s%15s\n","eigenval", "frequency", "exact freq.", "error"); for (i = 0; i < num_bands; ++i) { double err; real freq = sqrt(eigvals[i]); real exact_freq = bragg_omega(freq, kvector[0], sqrt(ed.eps_high), ed.eps_high_x, sqrt(ed.eps_low), 1.0 - ed.eps_high_x, 1.0e-7); printf("%15f%15f%15f%15e\n", eigvals[i], freq, exact_freq, err = fabs(freq - exact_freq) / exact_freq); CHECK(err <= max_err, "error exceeds tolerance"); } printf("\n"); /*****************************************/ printf("\nSolving for eigenvectors without conj. grad...\n"); evectmatrix_copy(H, Hstart); eigensolver(H, eigvals, op, op_data, NULL,NULL, pre_op, pre_op_data, maxwell_parity_constraint, (void *) mdata, W, NWORK - 1, error_tol, &num_iters, EIGS_DEFAULT_FLAGS); if (do_target) eigensolver_get_eigenvals(H, eigvals, maxwell_operator, mdata, W[0], W[1]); printf("Solved for eigenvectors after %d iterations.\n", num_iters); printf("%15s%15s%15s%15s\n","eigenval", "frequency", "exact freq.", "error"); for (i = 0; i < num_bands; ++i) { double err; real freq = sqrt(eigvals[i]); real exact_freq = bragg_omega(freq, kvector[0], sqrt(ed.eps_high), ed.eps_high_x, sqrt(ed.eps_low), 1.0 - ed.eps_high_x, 1.0e-7); printf("%15f%15f%15f%15e\n", eigvals[i], freq, exact_freq, err = fabs(freq - exact_freq) / exact_freq); CHECK(err <= max_err, "error exceeds tolerance"); } printf("\n"); /*****************************************/ printf("\nSolving for eigenvectors without precond. or conj. grad...\n"); evectmatrix_copy(H, Hstart); eigensolver(H, eigvals, op, op_data, NULL, NULL, NULL,NULL, maxwell_parity_constraint, (void *) mdata, W, NWORK - 1, error_tol, &num_iters, EIGS_DEFAULT_FLAGS); if (do_target) eigensolver_get_eigenvals(H, eigvals, maxwell_operator, mdata, W[0], W[1]); printf("Solved for eigenvectors after %d iterations.\n", num_iters); printf("%15s%15s%15s%15s\n","eigenval", "frequency", "exact freq.", "error"); for (i = 0; i < num_bands; ++i) { double err; real freq = sqrt(eigvals[i]); real exact_freq = bragg_omega(freq, kvector[0], sqrt(ed.eps_high), ed.eps_high_x, sqrt(ed.eps_low), 1.0 - ed.eps_high_x, 1.0e-7); printf("%15f%15f%15f%15e\n", eigvals[i], freq, exact_freq, err = fabs(freq - exact_freq) / exact_freq); CHECK(err <= max_err, "error exceeds tolerance"); } printf("\n"); /*****************************************/ } destroy_evectmatrix(H); destroy_evectmatrix(Hstart); for (i = 0; i < NWORK; ++i) destroy_evectmatrix(W[i]); destroy_maxwell_target_data(mtdata); destroy_maxwell_data(mdata); free(eigvals); debug_check_memory_leaks(); return EXIT_SUCCESS; }