예제 #1
0
파일: blasglue.c 프로젝트: HomerReid/mpb
void blasglue_gemm(char transa, char transb, int m, int n, int k,
		   real a, scalar *A, int fdA, scalar *B, int fdB,
		   real b, scalar *C, int fdC)
{
     scalar alpha, beta;

     if (m*n == 0)
	  return;

     if (k == 0) {
	  int i, j;
	  for (i = 0; i < m; ++i)
	       for (j = 0; j < n; ++j)
		    ASSIGN_ZERO(C[i*fdC + j]);
	  return;
     }

     CHECK(A != C && B != C, "gemm output array must be distinct");

     ASSIGN_REAL(alpha,a);
     ASSIGN_REAL(beta,b);

     F(gemm,GEMM) (&transb, &transa, &n, &m, &k,
		   &alpha, B, &fdB, A, &fdA, &beta, C, &fdC);
}
예제 #2
0
static void TEMPLATE (ldl_dsolve)
(
    cholmod_factor *L,
    cholmod_dense *Y		/* nr-by-n with leading dimension nr */
)
{
    double d [1] ;
    double *Lx, *Yx, *Yz ;
    Int *Lp ;
    Int n, nrhs, k, p, k1, k2 ;

    ASSERT (L->xtype == Y->xtype) ; /* L and Y must have the same xtype */
    ASSERT (L->n == Y->ncol) ;	    /* dimensions must match */
    ASSERT (Y->nrow == Y->d) ;	    /* leading dimension of Y = # rows of Y */
    ASSERT (L->xtype != CHOLMOD_PATTERN) ;  /* L is not symbolic */
    ASSERT (!(L->is_super) && !(L->is_ll)) ;	/* L is simplicial LDL' */

    nrhs = Y->nrow ;
    n = L->n ;
    Lp = L->p ;
    Lx = L->x ;
    Yx = Y->x ;
    Yz = Y->z ;
    for (k = 0 ; k < n ; k++)
    {
	k1 = k*nrhs ;
	k2 = (k+1)*nrhs ;
	ASSIGN_REAL (d,0, Lx,Lp[k]) ;
	for (p = k1 ; p < k2 ; p++)
	{
	    DIV_REAL (Yx,Yz,p, Yx,Yz,p, d,0) ;
	}
    }
}
예제 #3
0
파일: blasglue.c 프로젝트: HomerReid/mpb
void blasglue_rscal(int n, real a, scalar *x, int incx)
{
     scalar alpha;

     ASSIGN_REAL(alpha, a);

     F(scal,SCAL) (&n, &alpha, x, &incx);
}
예제 #4
0
파일: blasglue.c 프로젝트: HomerReid/mpb
void blasglue_axpy(int n, real a, scalar *x, int incx, scalar *y, int incy)
{
     scalar alpha;

     ASSIGN_REAL(alpha, a);

     F(axpy,AXPY) (&n, &alpha, x, &incx, y, &incy);
}
예제 #5
0
파일: maxwell_test.c 프로젝트: oskooi/mpb
int main(int argc, char **argv)
{
     maxwell_data *mdata;
     maxwell_target_data *mtdata = NULL;
     int local_N, N_start, alloc_N;
     real R[3][3] = { {1,0,0}, {0,0.01,0}, {0,0,0.01} };
     real G[3][3] = { {1,0,0}, {0,100,0}, {0,0,100} };
     real kvector[3] = {KX,0,0};
     evectmatrix H, Hstart, W[NWORK];
     real *eigvals;
     int i, iters;
     int num_iters;
     int parity = NO_PARITY;
     int nx = NX, ny = NY, nz = NZ;
     int num_bands = NUM_BANDS;
     real target_freq = 0.0;
     int do_target = 0;
     evectoperator op;
     evectpreconditioner pre_op;
     void *op_data, *pre_op_data;
     real error_tol = ERROR_TOL;
     int mesh_size = MESH_SIZE, mesh[3];
     epsilon_data ed;
     int stop1 = 0;
     int verbose = 0;
     int which_preconditioner = 2;
     double max_err = 1e20;

     srand(time(NULL));

#if defined(DEBUG) && defined(HAVE_FEENABLEEXCEPT)
     feenableexcept(FE_INVALID | FE_OVERFLOW); /* crash on NaN/overflow */
#endif


     ed.eps_high = EPS_HIGH;
     ed.eps_low = EPS_LOW;
     ed.eps_high_x = EPS_HIGH_X;

#ifdef HAVE_GETOPT
     {
          extern char *optarg;
          extern int optind;
          int c;

          while ((c = getopt(argc, argv, "hs:k:b:n:f:x:y:z:emt:c:g:1pvE:"))
		 != -1)
	       switch (c) {
		   case 'h':
			usage();
			exit(EXIT_SUCCESS);
			break;
		   case 's':
			srand(atoi(optarg));
			break;	
		   case 'k':
			kvector[0] = atof(optarg);
			break;
		   case 'b':
			num_bands = atoi(optarg);
			CHECK(num_bands > 0, "num_bands must be positive");
			break;
		   case 'n':
			ed.eps_high = atof(optarg);
			CHECK(ed.eps_high > 0.0, "index must be positive");
			ed.eps_high = ed.eps_high * ed.eps_high;
			break;
		   case 'f':
			ed.eps_high_x = atof(optarg);
			CHECK(ed.eps_high_x > 0.0, "fill must be positive");
			break;
		   case 'x':
			nx = atoi(optarg);
			CHECK(nx > 0, "x size must be positive");
			break;
		   case 'y':
			ny = atoi(optarg);
			CHECK(ny > 0, "y size must be positive");
			break;
		   case 'z':
			nz = atoi(optarg);
			CHECK(nz > 0, "z size must be positive");
			break;
		   case 'e':
			parity = EVEN_Z_PARITY;
			break;
		   case 'm':
			parity = ODD_Z_PARITY;
			break;
		   case 't':
			target_freq = fabs(atof(optarg));
			do_target = 1;
			break;
		   case 'E':
			max_err = fabs(atof(optarg));
			CHECK(max_err > 0, "maximum error must be positive");
			break;
		   case 'c':
			error_tol = fabs(atof(optarg));
			break;
		   case 'g':
			mesh_size = atoi(optarg);
			CHECK(mesh_size > 0, "mesh size must be positive");
			break;
		   case '1':
			stop1 = 1;
			break;
		   case 'p':
			which_preconditioner = 1;
			break;
		   case 'v':
			verbose = 1;
			break;
		   default:
			usage();
			exit(EXIT_FAILURE);
	       }

	  if (argc != optind) {
	       usage();
	       exit(EXIT_FAILURE);
	  }
     }     
#endif

#ifdef ENABLE_PROF
     stop1 = 1;
#endif

     mesh[0] = mesh[1] = mesh[2] = mesh_size;

     printf("Creating Maxwell data...\n");
     mdata = create_maxwell_data(nx, ny, nz, &local_N, &N_start, &alloc_N,
				 num_bands, NUM_FFT_BANDS);
     CHECK(mdata, "NULL mdata");

     set_maxwell_data_parity(mdata, parity);

     printf("Setting k vector to (%g, %g, %g)...\n",
	    kvector[0], kvector[1], kvector[2]);
     update_maxwell_data_k(mdata, kvector, G[0], G[1], G[2]);

     printf("Initializing dielectric...\n");
     /* set up dielectric function (a simple Bragg mirror) */
     set_maxwell_dielectric(mdata, mesh, R, G, epsilon, 0, &ed);

     if (verbose && ny == 1 && nz == 1) {
	  printf("dielectric function:\n");
	  for (i = 0; i < nx; ++i) {
	       if (mdata->eps_inv[i].m00 == mdata->eps_inv[i].m11)
		    printf("  eps(%g) = %g\n", i * 1.0 / nx, 
			   1.0/mdata->eps_inv[i].m00);
	  
	       else
		    printf("  eps(%g) = x: %g OR y: %g\n", i * 1.0 / nx, 
			   1.0/mdata->eps_inv[i].m00,
			   1.0/mdata->eps_inv[i].m11);
	  }
	  printf("\n");
     }

     printf("Allocating fields...\n");
     H = create_evectmatrix(nx * ny * nz, 2, num_bands,
			    local_N, N_start, alloc_N);
     Hstart = create_evectmatrix(nx * ny * nz, 2, num_bands,
				 local_N, N_start, alloc_N);
     for (i = 0; i < NWORK; ++i)
	  W[i] = create_evectmatrix(nx * ny * nz, 2, num_bands,
				    local_N, N_start, alloc_N);

     CHK_MALLOC(eigvals, real, num_bands);

     for (iters = 0; iters < PROF_ITERS; ++iters) {

     printf("Initializing fields...\n");
     for (i = 0; i < H.n * H.p; ++i)
          ASSIGN_REAL(Hstart.data[i], rand() * 1.0 / RAND_MAX);

     /*****************************************/
     if (do_target) {
	  printf("\nSolving for eigenvectors close to %f...\n", target_freq);
	  mtdata = create_maxwell_target_data(mdata, target_freq);
	  op = maxwell_target_operator;
	  if (which_preconditioner == 1)
	       pre_op = maxwell_target_preconditioner;
	  else
	       pre_op = maxwell_target_preconditioner2;
	  op_data = (void *) mtdata;
	  pre_op_data = (void *) mtdata;
     }
     else {
	  op = maxwell_operator;
	  if (which_preconditioner == 1)
	       pre_op = maxwell_preconditioner;
	  else
	       pre_op = maxwell_preconditioner2;
	  op_data = (void *) mdata;
	  pre_op_data = (void *) mdata;
     }

     /*****************************************/
     printf("\nSolving for eigenvectors with preconditioning...\n");
     evectmatrix_copy(H, Hstart);
     eigensolver(H, eigvals,
		 op, op_data, NULL,NULL,
		 pre_op, pre_op_data,
		 maxwell_parity_constraint, (void *) mdata,
		 W, NWORK, error_tol, &num_iters, EIGS_DEFAULT_FLAGS);

     if (do_target)
	  eigensolver_get_eigenvals(H, eigvals, maxwell_operator, mdata,
				    W[0], W[1]);

     printf("Solved for eigenvectors after %d iterations.\n", num_iters);
     printf("%15s%15s%15s%15s\n","eigenval", "frequency", "exact freq.", 
	    "error");
     for (i = 0; i < num_bands; ++i) {
	  double err;
	  real freq = sqrt(eigvals[i]);
	  real exact_freq = bragg_omega(freq, kvector[0], sqrt(ed.eps_high),
					ed.eps_high_x, sqrt(ed.eps_low),
					1.0 - ed.eps_high_x, 1.0e-7);
	  printf("%15f%15f%15f%15e\n", eigvals[i], freq, exact_freq,
		 err = fabs(freq - exact_freq) / exact_freq);
	  CHECK(err <= max_err, "error exceeds tolerance");
     }
     printf("\n");

     for (i = 0; i < num_bands; ++i) {
         real kdom[3];
         real k;
         maxwell_dominant_planewave(mdata, H, i + 1, kdom);
         if ((i + 1) % 2 == 1)
             k = kvector[0] + (i + 1) / 2;
         else
             k = kvector[0] - (i + 1) / 2;
         if (kvector[0] > 0 && kvector[0] < 0.5 && ed.eps_high == 1) {
             printf("Expected kdom: %15f%15f%15f\n", k, kvector[1], kvector[2]);
             printf("Got kdom:      %15f%15f%15f\n", kdom[0], kdom[1], kdom[2]);
             CHECK(k == kdom[0] && kvector[1] == kdom[1] && kvector[2] == kdom[2],
                   "unexpected result from maxwell_dominant_planewave");
         }
     }
     }

     if (!stop1) {

     /*****************************************/

     printf("\nSolving for eigenvectors without preconditioning...\n");
     evectmatrix_copy(H, Hstart);
     eigensolver(H, eigvals,
		 op, op_data, NULL,NULL,
		 NULL, NULL,
		 maxwell_parity_constraint, (void *) mdata,
		 W, NWORK, error_tol, &num_iters, EIGS_DEFAULT_FLAGS);

     if (do_target)
	  eigensolver_get_eigenvals(H, eigvals, maxwell_operator, mdata,
				    W[0], W[1]);

     printf("Solved for eigenvectors after %d iterations.\n", num_iters);
     printf("%15s%15s%15s%15s\n","eigenval", "frequency", "exact freq.", 
	    "error");
     for (i = 0; i < num_bands; ++i) {
	  double err;
	  real freq = sqrt(eigvals[i]);
	  real exact_freq = bragg_omega(freq, kvector[0], sqrt(ed.eps_high),
					ed.eps_high_x, sqrt(ed.eps_low),
					1.0 - ed.eps_high_x, 1.0e-7);
	  printf("%15f%15f%15f%15e\n", eigvals[i], freq, exact_freq,
		 err = fabs(freq - exact_freq) / exact_freq);
	  CHECK(err <= max_err, "error exceeds tolerance");
     }
     printf("\n");

     /*****************************************/
     
     printf("\nSolving for eigenvectors without conj. grad...\n");
     evectmatrix_copy(H, Hstart);
     eigensolver(H, eigvals,
		 op, op_data, NULL,NULL,
		 pre_op, pre_op_data,
		 maxwell_parity_constraint, (void *) mdata,
		 W, NWORK - 1, error_tol, &num_iters, EIGS_DEFAULT_FLAGS);

     if (do_target)
	  eigensolver_get_eigenvals(H, eigvals, maxwell_operator, mdata,
				    W[0], W[1]);

     printf("Solved for eigenvectors after %d iterations.\n", num_iters);
     printf("%15s%15s%15s%15s\n","eigenval", "frequency", "exact freq.", 
	    "error");
     for (i = 0; i < num_bands; ++i) {
	  double err;
	  real freq = sqrt(eigvals[i]);
	  real exact_freq = bragg_omega(freq, kvector[0], sqrt(ed.eps_high),
					ed.eps_high_x, sqrt(ed.eps_low),
					1.0 - ed.eps_high_x, 1.0e-7);
	  printf("%15f%15f%15f%15e\n", eigvals[i], freq, exact_freq,
		 err = fabs(freq - exact_freq) / exact_freq);
	  CHECK(err <= max_err, "error exceeds tolerance");
     }
     printf("\n");

     /*****************************************/
     printf("\nSolving for eigenvectors without precond. or conj. grad...\n");
     evectmatrix_copy(H, Hstart);
     eigensolver(H, eigvals,
		 op, op_data,
		 NULL, NULL, NULL,NULL,
		 maxwell_parity_constraint, (void *) mdata,
		 W, NWORK - 1, error_tol, &num_iters, EIGS_DEFAULT_FLAGS);

     if (do_target)
	  eigensolver_get_eigenvals(H, eigvals, maxwell_operator, mdata,
				    W[0], W[1]);

     printf("Solved for eigenvectors after %d iterations.\n", num_iters);
     printf("%15s%15s%15s%15s\n","eigenval", "frequency", "exact freq.", 
	    "error");
     for (i = 0; i < num_bands; ++i) {
	  double err;
	  real freq = sqrt(eigvals[i]);
	  real exact_freq = bragg_omega(freq, kvector[0], sqrt(ed.eps_high),
					ed.eps_high_x, sqrt(ed.eps_low),
					1.0 - ed.eps_high_x, 1.0e-7);
	  printf("%15f%15f%15f%15e\n", eigvals[i], freq, exact_freq,
		 err = fabs(freq - exact_freq) / exact_freq);
	  CHECK(err <= max_err, "error exceeds tolerance");
     }
     printf("\n");

     /*****************************************/

     }
     
     destroy_evectmatrix(H);
     destroy_evectmatrix(Hstart);
     for (i = 0; i < NWORK; ++i)
          destroy_evectmatrix(W[i]);

     destroy_maxwell_target_data(mtdata);
     destroy_maxwell_data(mdata);

     free(eigvals);

     debug_check_memory_leaks();

     return EXIT_SUCCESS;
}