SCM cscalar_field_make(SCM f0) { int i; field_smob *pf; field_smob *pf0 = assert_field_smob(f0); CHK_MALLOC(pf, field_smob, 1); *pf = *pf0; pf->type = CSCALAR_FIELD_SMOB; pf->type_char = 'C'; CHK_MALLOC(pf->f.cs, scalar_complex, pf->N); for (i = 0; i < pf->N; ++i) CASSIGN_ZERO(pf->f.cs[i]); return field2scm(pf); }
SCM rscalar_field_make(SCM f0) { int i; field_smob *pf; field_smob *pf0 = assert_field_smob(f0); CHK_MALLOC(pf, field_smob, 1); *pf = *pf0; pf->type = RSCALAR_FIELD_SMOB; pf->type_char = 'R'; CHK_MALLOC(pf->f.rs, real, pf->N); for (i = 0; i < pf->N; ++i) pf->f.rs[i] = 0.0; return field2scm(pf); }
SCM cvector_field_make(SCM f0) { int i; field_smob *pf; field_smob *pf0 = assert_field_smob(f0); CHECK(mdata, "init-params must be called before cvector-field-make"); CHK_MALLOC(pf, field_smob, 1); *pf = *pf0; pf->type = CVECTOR_FIELD_SMOB; pf->type_char = 'c'; CHK_MALLOC(pf->f.cv, scalar_complex, 3 * pf->N); for (i = 0; i < pf->N * 3; ++i) CASSIGN_ZERO(pf->f.cv[i]); return field2scm(pf); }
void lapackglue_geev(char jobvl, char jobvr, int n, scalar *A, int fdA, scalar_complex *w, scalar *VL, int fdVL, scalar *VR, int fdVR, scalar *work, int lwork, real *rwork) { int info; #ifdef SCALAR_COMPLEX F(geev,GEEV) (&jobvl, &jobvr, &n, A, &fdA, w, VL, &fdVL, VR, &fdVR, work, &lwork, rwork, &info); #else int i; real *wr, *wi; CHK_MALLOC(wr, real, 2*n); wi = wr + n; (void) rwork; /* unused */ FR(geev,GEEV) (&jobvl, &jobvr, &n, A, &fdA, wr, wi, VL, &fdVL, VR, &fdVR, work, &lwork, &info); for (i = 0; i < n; ++i) CASSIGN_SCALAR(w[i], wr[i], wi[i]); free(wr); #endif CHECK(info >= 0, "invalid argument in geev"); CHECK(info <= 0, "failure to converge in geev"); }
static material_type make_medium(double epsilon, double mu) { material_type m; m.which_subclass = MEDIUM; CHK_MALLOC(m.subclass.medium_data, medium, 1); m.subclass.medium_data->epsilon = epsilon; m.subclass.medium_data->mu = mu; return m; }
SCM sqmatrix2scm(sqmatrix m) { SCM obj; sqmatrix *mp; CHK_MALLOC(mp, sqmatrix, 1); *mp = create_sqmatrix(m.p); sqmatrix_copy(*mp, m); NEWCELL_SMOB(obj, sqmatrix, mp); return obj; }
/* return a Scheme object *copy* of m */ SCM evectmatrix2scm(evectmatrix m) { SCM obj; evectmatrix *mp; CHK_MALLOC(mp, evectmatrix, 1); *mp = create_evectmatrix(m.N, m.c, m.p, m.localN, m.Nstart, m.allocN); evectmatrix_copy(*mp, m); NEWCELL_SMOB(obj, evectmatrix, mp); return obj; }
/* return a Scheme object *copy* of the given columns of m */ SCM evectmatrix_slice2scm(evectmatrix m, int p_start, int p) { SCM obj; evectmatrix *mp; CHECK(p_start >= 0 && p_start + p <= m.p && p >= 0, "invalid arguments in evectmatrix_slice2scm"); CHK_MALLOC(mp, evectmatrix, 1); *mp = create_evectmatrix(m.N, m.c, p, m.localN, m.Nstart, m.allocN); evectmatrix_copy_slice(*mp, m, 0, p_start, p); NEWCELL_SMOB(obj, evectmatrix, mp); return obj; }
maxwell_target_data *create_maxwell_target_data(maxwell_data *md, real target_frequency) { maxwell_target_data *d; CHK_MALLOC(d, maxwell_target_data, 1); d->d = md; d->target_frequency = target_frequency; return d; }
evectconstraint_chain *evect_add_constraint(evectconstraint_chain *constraints, evectconstraint C, void *constraint_data) { evectconstraint_chain *new_constraints; CHK_MALLOC(new_constraints, evectconstraint_chain, 1); new_constraints->C = C; new_constraints->constraint_data = constraint_data; new_constraints->next = constraints; return new_constraints; }
maxwell_data *create_maxwell_data(int nx, int ny, int nz, int *local_N, int *N_start, int *alloc_N, int num_bands, int max_fft_bands) { int n[3], rank = (nz == 1) ? (ny == 1 ? 1 : 2) : 3; maxwell_data *d = 0; int fft_data_size; n[0] = nx; n[1] = ny; n[2] = nz; #if !defined(HAVE_FFTW) && !defined(HAVE_FFTW3) # error Non-FFTW FFTs are not currently supported. #endif #if defined(HAVE_FFTW) CHECK(sizeof(fftw_real) == sizeof(real), "floating-point type is inconsistent with FFTW!"); #endif CHK_MALLOC(d, maxwell_data, 1); d->nx = nx; d->ny = ny; d->nz = nz; d->max_fft_bands = MIN2(num_bands, max_fft_bands); maxwell_set_num_bands(d, num_bands); d->current_k[0] = d->current_k[1] = d->current_k[2] = 0.0; d->parity = NO_PARITY; d->last_dim_size = d->last_dim = n[rank - 1]; /* ----------------------------------------------------- */ d->nplans = 1; #ifndef HAVE_MPI d->local_nx = nx; d->local_ny = ny; d->local_x_start = d->local_y_start = 0; *local_N = *alloc_N = nx * ny * nz; *N_start = 0; d->other_dims = *local_N / d->last_dim; d->fft_data = 0; /* initialize it here for use in specific planner? */ # if defined(HAVE_FFTW3) d->nplans = 0; /* plans will be created as needed */ # ifdef SCALAR_COMPLEX d->fft_output_size = fft_data_size = nx * ny * nz; # else d->last_dim_size = 2 * (d->last_dim / 2 + 1); d->fft_output_size = (fft_data_size = d->other_dims * d->last_dim_size)/2; # endif # elif defined(HAVE_FFTW) # ifdef SCALAR_COMPLEX d->fft_output_size = fft_data_size = nx * ny * nz; d->plans[0] = fftwnd_create_plan_specific(rank, n, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_IN_PLACE, (fftw_complex*) d->fft_data, 3 * d->num_fft_bands, (fftw_complex*) d->fft_data, 3 * d->num_fft_bands); d->iplans[0] = fftwnd_create_plan_specific(rank, n, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE, (fftw_complex*) d->fft_data, 3 * d->num_fft_bands, (fftw_complex*) d->fft_data, 3 * d->num_fft_bands); # else /* not SCALAR_COMPLEX */ d->last_dim_size = 2 * (d->last_dim / 2 + 1); d->fft_output_size = (fft_data_size = d->other_dims * d->last_dim_size)/2; d->plans[0] = rfftwnd_create_plan_specific(rank, n, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE, (fftw_real*) d->fft_data, 3 * d->num_fft_bands, (fftw_real*) d->fft_data, 3 * d->num_fft_bands); d->iplans[0] = rfftwnd_create_plan_specific(rank, n, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE, (fftw_real*) d->fft_data, 3 * d->num_fft_bands, (fftw_real*) d->fft_data, 3 * d->num_fft_bands); # endif /* not SCALAR_COMPLEX */ # endif /* HAVE_FFTW */ #else /* HAVE_MPI */ /* ----------------------------------------------------- */ # if defined(HAVE_FFTW3) { int i; ptrdiff_t np[3], local_nx, local_ny, local_x_start, local_y_start; CHECK(rank > 1, "rank < 2 MPI computations are not supported"); d->nplans = 0; /* plans will be created as needed */ for (i = 0; i < rank; ++i) np[i] = n[i]; # ifndef SCALAR_COMPLEX d->last_dim_size = 2 * (np[rank-1] = d->last_dim / 2 + 1); # endif fft_data_size = *alloc_N = FFTW(mpi_local_size_transposed)(rank, np, MPI_COMM_WORLD, &local_nx, &local_x_start, &local_ny, &local_y_start); # ifndef SCALAR_COMPLEX fft_data_size = (*alloc_N *= 2); // convert to # of real scalars # endif d->local_nx = local_nx; d->local_x_start = local_x_start; d->local_ny = local_ny; d->local_y_start = local_y_start; d->fft_output_size = nx * d->local_ny * (rank==3 ? np[2] : nz); *local_N = d->local_nx * ny * nz; *N_start = d->local_x_start * ny * nz; d->other_dims = *local_N / d->last_dim; } # elif defined(HAVE_FFTW) CHECK(rank > 1, "rank < 2 MPI computations are not supported"); # ifdef SCALAR_COMPLEX d->iplans[0] = fftwnd_mpi_create_plan(MPI_COMM_WORLD, rank, n, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); { int nt[3]; /* transposed dimensions for reverse FFT */ nt[0] = n[1]; nt[1] = n[0]; nt[2] = n[2]; d->plans[0] = fftwnd_mpi_create_plan(MPI_COMM_WORLD, rank, nt, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); } fftwnd_mpi_local_sizes(d->iplans[0], &d->local_nx, &d->local_x_start, &d->local_ny, &d->local_y_start, &fft_data_size); d->fft_output_size = nx * d->local_ny * nz; # else /* not SCALAR_COMPLEX */ CHECK(rank > 1, "rank < 2 MPI computations are not supported"); d->iplans[0] = rfftwnd_mpi_create_plan(MPI_COMM_WORLD, rank, n, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE); /* Unlike fftwnd_mpi, we do *not* pass transposed dimensions for the reverse transform here--we always pass the dimensions of the original real array, and rfftwnd_mpi assumes that if one transform is transposed, then the other is as well. */ d->plans[0] = rfftwnd_mpi_create_plan(MPI_COMM_WORLD, rank, n, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); rfftwnd_mpi_local_sizes(d->iplans[0], &d->local_nx, &d->local_x_start, &d->local_ny, &d->local_y_start, &fft_data_size); d->last_dim_size = 2 * (d->last_dim / 2 + 1); if (rank == 2) d->fft_output_size = nx * d->local_ny * nz; else d->fft_output_size = nx * d->local_ny * (d->last_dim_size / 2); # endif /* not SCALAR_COMPLEX */ *local_N = d->local_nx * ny * nz; *N_start = d->local_x_start * ny * nz; *alloc_N = *local_N; d->other_dims = *local_N / d->last_dim; # endif /* HAVE_FFTW */ #endif /* HAVE_MPI */ /* ----------------------------------------------------- */ #ifdef HAVE_FFTW CHECK(d->plans[0] && d->iplans[0], "FFTW plan creation failed"); #endif CHK_MALLOC(d->eps_inv, symmetric_matrix, d->fft_output_size); /* A scratch output array is required because the "ordinary" arrays are not in a cartesian basis (or even a constant basis). */ fft_data_size *= d->max_fft_bands; #if defined(HAVE_FFTW3) d->fft_data = (scalar *) FFTW(malloc)(sizeof(scalar) * 3 * fft_data_size); CHECK(d->fft_data, "out of memory!"); d->fft_data2 = d->fft_data; /* works in-place */ #else CHK_MALLOC(d->fft_data, scalar, 3 * fft_data_size); d->fft_data2 = d->fft_data; /* works in-place */ #endif CHK_MALLOC(d->k_plus_G, k_data, *local_N); CHK_MALLOC(d->k_plus_G_normsqr, real, *local_N); d->eps_inv_mean = 1.0; d->local_N = *local_N; d->N_start = *N_start; d->alloc_N = *alloc_N; d->N = nx * ny * nz; return d; }
int main(int argc, char **argv) { maxwell_data *mdata; maxwell_target_data *mtdata = NULL; int local_N, N_start, alloc_N; real R[3][3] = { {1,0,0}, {0,0.01,0}, {0,0,0.01} }; real G[3][3] = { {1,0,0}, {0,100,0}, {0,0,100} }; real kvector[3] = {KX,0,0}; evectmatrix H, Hstart, W[NWORK]; real *eigvals; int i, iters; int num_iters; int parity = NO_PARITY; int nx = NX, ny = NY, nz = NZ; int num_bands = NUM_BANDS; real target_freq = 0.0; int do_target = 0; evectoperator op; evectpreconditioner pre_op; void *op_data, *pre_op_data; real error_tol = ERROR_TOL; int mesh_size = MESH_SIZE, mesh[3]; epsilon_data ed; int stop1 = 0; int verbose = 0; int which_preconditioner = 2; double max_err = 1e20; srand(time(NULL)); #if defined(DEBUG) && defined(HAVE_FEENABLEEXCEPT) feenableexcept(FE_INVALID | FE_OVERFLOW); /* crash on NaN/overflow */ #endif ed.eps_high = EPS_HIGH; ed.eps_low = EPS_LOW; ed.eps_high_x = EPS_HIGH_X; #ifdef HAVE_GETOPT { extern char *optarg; extern int optind; int c; while ((c = getopt(argc, argv, "hs:k:b:n:f:x:y:z:emt:c:g:1pvE:")) != -1) switch (c) { case 'h': usage(); exit(EXIT_SUCCESS); break; case 's': srand(atoi(optarg)); break; case 'k': kvector[0] = atof(optarg); break; case 'b': num_bands = atoi(optarg); CHECK(num_bands > 0, "num_bands must be positive"); break; case 'n': ed.eps_high = atof(optarg); CHECK(ed.eps_high > 0.0, "index must be positive"); ed.eps_high = ed.eps_high * ed.eps_high; break; case 'f': ed.eps_high_x = atof(optarg); CHECK(ed.eps_high_x > 0.0, "fill must be positive"); break; case 'x': nx = atoi(optarg); CHECK(nx > 0, "x size must be positive"); break; case 'y': ny = atoi(optarg); CHECK(ny > 0, "y size must be positive"); break; case 'z': nz = atoi(optarg); CHECK(nz > 0, "z size must be positive"); break; case 'e': parity = EVEN_Z_PARITY; break; case 'm': parity = ODD_Z_PARITY; break; case 't': target_freq = fabs(atof(optarg)); do_target = 1; break; case 'E': max_err = fabs(atof(optarg)); CHECK(max_err > 0, "maximum error must be positive"); break; case 'c': error_tol = fabs(atof(optarg)); break; case 'g': mesh_size = atoi(optarg); CHECK(mesh_size > 0, "mesh size must be positive"); break; case '1': stop1 = 1; break; case 'p': which_preconditioner = 1; break; case 'v': verbose = 1; break; default: usage(); exit(EXIT_FAILURE); } if (argc != optind) { usage(); exit(EXIT_FAILURE); } } #endif #ifdef ENABLE_PROF stop1 = 1; #endif mesh[0] = mesh[1] = mesh[2] = mesh_size; printf("Creating Maxwell data...\n"); mdata = create_maxwell_data(nx, ny, nz, &local_N, &N_start, &alloc_N, num_bands, NUM_FFT_BANDS); CHECK(mdata, "NULL mdata"); set_maxwell_data_parity(mdata, parity); printf("Setting k vector to (%g, %g, %g)...\n", kvector[0], kvector[1], kvector[2]); update_maxwell_data_k(mdata, kvector, G[0], G[1], G[2]); printf("Initializing dielectric...\n"); /* set up dielectric function (a simple Bragg mirror) */ set_maxwell_dielectric(mdata, mesh, R, G, epsilon, 0, &ed); if (verbose && ny == 1 && nz == 1) { printf("dielectric function:\n"); for (i = 0; i < nx; ++i) { if (mdata->eps_inv[i].m00 == mdata->eps_inv[i].m11) printf(" eps(%g) = %g\n", i * 1.0 / nx, 1.0/mdata->eps_inv[i].m00); else printf(" eps(%g) = x: %g OR y: %g\n", i * 1.0 / nx, 1.0/mdata->eps_inv[i].m00, 1.0/mdata->eps_inv[i].m11); } printf("\n"); } printf("Allocating fields...\n"); H = create_evectmatrix(nx * ny * nz, 2, num_bands, local_N, N_start, alloc_N); Hstart = create_evectmatrix(nx * ny * nz, 2, num_bands, local_N, N_start, alloc_N); for (i = 0; i < NWORK; ++i) W[i] = create_evectmatrix(nx * ny * nz, 2, num_bands, local_N, N_start, alloc_N); CHK_MALLOC(eigvals, real, num_bands); for (iters = 0; iters < PROF_ITERS; ++iters) { printf("Initializing fields...\n"); for (i = 0; i < H.n * H.p; ++i) ASSIGN_REAL(Hstart.data[i], rand() * 1.0 / RAND_MAX); /*****************************************/ if (do_target) { printf("\nSolving for eigenvectors close to %f...\n", target_freq); mtdata = create_maxwell_target_data(mdata, target_freq); op = maxwell_target_operator; if (which_preconditioner == 1) pre_op = maxwell_target_preconditioner; else pre_op = maxwell_target_preconditioner2; op_data = (void *) mtdata; pre_op_data = (void *) mtdata; } else { op = maxwell_operator; if (which_preconditioner == 1) pre_op = maxwell_preconditioner; else pre_op = maxwell_preconditioner2; op_data = (void *) mdata; pre_op_data = (void *) mdata; } /*****************************************/ printf("\nSolving for eigenvectors with preconditioning...\n"); evectmatrix_copy(H, Hstart); eigensolver(H, eigvals, op, op_data, NULL,NULL, pre_op, pre_op_data, maxwell_parity_constraint, (void *) mdata, W, NWORK, error_tol, &num_iters, EIGS_DEFAULT_FLAGS); if (do_target) eigensolver_get_eigenvals(H, eigvals, maxwell_operator, mdata, W[0], W[1]); printf("Solved for eigenvectors after %d iterations.\n", num_iters); printf("%15s%15s%15s%15s\n","eigenval", "frequency", "exact freq.", "error"); for (i = 0; i < num_bands; ++i) { double err; real freq = sqrt(eigvals[i]); real exact_freq = bragg_omega(freq, kvector[0], sqrt(ed.eps_high), ed.eps_high_x, sqrt(ed.eps_low), 1.0 - ed.eps_high_x, 1.0e-7); printf("%15f%15f%15f%15e\n", eigvals[i], freq, exact_freq, err = fabs(freq - exact_freq) / exact_freq); CHECK(err <= max_err, "error exceeds tolerance"); } printf("\n"); for (i = 0; i < num_bands; ++i) { real kdom[3]; real k; maxwell_dominant_planewave(mdata, H, i + 1, kdom); if ((i + 1) % 2 == 1) k = kvector[0] + (i + 1) / 2; else k = kvector[0] - (i + 1) / 2; if (kvector[0] > 0 && kvector[0] < 0.5 && ed.eps_high == 1) { printf("Expected kdom: %15f%15f%15f\n", k, kvector[1], kvector[2]); printf("Got kdom: %15f%15f%15f\n", kdom[0], kdom[1], kdom[2]); CHECK(k == kdom[0] && kvector[1] == kdom[1] && kvector[2] == kdom[2], "unexpected result from maxwell_dominant_planewave"); } } } if (!stop1) { /*****************************************/ printf("\nSolving for eigenvectors without preconditioning...\n"); evectmatrix_copy(H, Hstart); eigensolver(H, eigvals, op, op_data, NULL,NULL, NULL, NULL, maxwell_parity_constraint, (void *) mdata, W, NWORK, error_tol, &num_iters, EIGS_DEFAULT_FLAGS); if (do_target) eigensolver_get_eigenvals(H, eigvals, maxwell_operator, mdata, W[0], W[1]); printf("Solved for eigenvectors after %d iterations.\n", num_iters); printf("%15s%15s%15s%15s\n","eigenval", "frequency", "exact freq.", "error"); for (i = 0; i < num_bands; ++i) { double err; real freq = sqrt(eigvals[i]); real exact_freq = bragg_omega(freq, kvector[0], sqrt(ed.eps_high), ed.eps_high_x, sqrt(ed.eps_low), 1.0 - ed.eps_high_x, 1.0e-7); printf("%15f%15f%15f%15e\n", eigvals[i], freq, exact_freq, err = fabs(freq - exact_freq) / exact_freq); CHECK(err <= max_err, "error exceeds tolerance"); } printf("\n"); /*****************************************/ printf("\nSolving for eigenvectors without conj. grad...\n"); evectmatrix_copy(H, Hstart); eigensolver(H, eigvals, op, op_data, NULL,NULL, pre_op, pre_op_data, maxwell_parity_constraint, (void *) mdata, W, NWORK - 1, error_tol, &num_iters, EIGS_DEFAULT_FLAGS); if (do_target) eigensolver_get_eigenvals(H, eigvals, maxwell_operator, mdata, W[0], W[1]); printf("Solved for eigenvectors after %d iterations.\n", num_iters); printf("%15s%15s%15s%15s\n","eigenval", "frequency", "exact freq.", "error"); for (i = 0; i < num_bands; ++i) { double err; real freq = sqrt(eigvals[i]); real exact_freq = bragg_omega(freq, kvector[0], sqrt(ed.eps_high), ed.eps_high_x, sqrt(ed.eps_low), 1.0 - ed.eps_high_x, 1.0e-7); printf("%15f%15f%15f%15e\n", eigvals[i], freq, exact_freq, err = fabs(freq - exact_freq) / exact_freq); CHECK(err <= max_err, "error exceeds tolerance"); } printf("\n"); /*****************************************/ printf("\nSolving for eigenvectors without precond. or conj. grad...\n"); evectmatrix_copy(H, Hstart); eigensolver(H, eigvals, op, op_data, NULL, NULL, NULL,NULL, maxwell_parity_constraint, (void *) mdata, W, NWORK - 1, error_tol, &num_iters, EIGS_DEFAULT_FLAGS); if (do_target) eigensolver_get_eigenvals(H, eigvals, maxwell_operator, mdata, W[0], W[1]); printf("Solved for eigenvectors after %d iterations.\n", num_iters); printf("%15s%15s%15s%15s\n","eigenval", "frequency", "exact freq.", "error"); for (i = 0; i < num_bands; ++i) { double err; real freq = sqrt(eigvals[i]); real exact_freq = bragg_omega(freq, kvector[0], sqrt(ed.eps_high), ed.eps_high_x, sqrt(ed.eps_low), 1.0 - ed.eps_high_x, 1.0e-7); printf("%15f%15f%15f%15e\n", eigvals[i], freq, exact_freq, err = fabs(freq - exact_freq) / exact_freq); CHECK(err <= max_err, "error exceeds tolerance"); } printf("\n"); /*****************************************/ } destroy_evectmatrix(H); destroy_evectmatrix(Hstart); for (i = 0; i < NWORK; ++i) destroy_evectmatrix(W[i]); destroy_maxwell_target_data(mtdata); destroy_maxwell_data(mdata); free(eigvals); debug_check_memory_leaks(); return EXIT_SUCCESS; }