Esempio n. 1
0
double InitCPU3d(Mesh *mesh, int Nfields){

  printf("Np = %d, BSIZE = %d\n", p_Np, BSIZE);

  /* Q  */
  int sz = mesh->K*(p_Np)*Nfields*sizeof(float);  /* TW BLOCK */

  mesh->f_Q    = (float*) calloc(mesh->K*p_Np*Nfields, sizeof(float));
  mesh->f_rhsQ = (float*) calloc(mesh->K*p_Np*Nfields, sizeof(float));
  mesh->f_resQ = (float*) calloc(mesh->K*p_Np*Nfields, sizeof(float));

  /*  float LIFT  */
  sz = p_Np*(p_Nfp)*(p_Nfaces)*sizeof(float);
  mesh->f_LIFT = (float*) malloc(sz);
  int sk = 0, n, m, f, k;

  for(n=0;n<p_Np;++n){
    for(m=0;m<p_Nfp*p_Nfaces;++m){    
      mesh->f_LIFT[sk++] = mesh->LIFT[n][m];
    }
  }

  /*  float Dr & Ds */
  sz = p_Np*p_Np*sizeof(float);
  mesh->f_Dr = (float*) malloc(sz);
  mesh->f_Ds = (float*) malloc(sz);
  mesh->f_Dt = (float*) malloc(sz);

  sk = 0;
  for(n=0;n<p_Np;++n){
    for(m=0;m<p_Np;++m){    
      mesh->f_Dr[sk] = mesh->Dr[n][m];
      mesh->f_Ds[sk] = mesh->Ds[n][m];
      mesh->f_Dt[sk] = mesh->Dt[n][m];
      ++sk;
    }
  }

  /* vgeo */
  double drdx, dsdx, dtdx;
  double drdy, dsdy, dtdy;
  double drdz, dsdz, dtdz, J;
  mesh->vgeo = (float*) calloc(12*mesh->K, sizeof(float));
  
  for(k=0;k<mesh->K;++k){
    GeometricFactors3d(mesh, k, 
		       &drdx, &dsdx, &dtdx,
		       &drdy, &dsdy, &dtdy,
		       &drdz, &dsdz, &dtdz, &J);
    
    mesh->vgeo[k*12+0] = drdx; mesh->vgeo[k*12+1] = drdy; mesh->vgeo[k*12+2] = drdz;
    mesh->vgeo[k*12+4] = dsdx; mesh->vgeo[k*12+5] = dsdy; mesh->vgeo[k*12+6] = dsdz;
    mesh->vgeo[k*12+8] = dtdx; mesh->vgeo[k*12+9] = dtdy; mesh->vgeo[k*12+10] = dtdz;
  }
  
  /* surfinfo (vmapM, vmapP, Fscale, Bscale, nx, ny, nz, 0) */
  sz = mesh->K*p_Nfp*p_Nfaces*7*sizeof(float); 
  
  mesh->surfinfo = (float*) malloc(sz); 
  
  /* local-local info */
  sk = 0;
  int skP = -1;
  double *nxk = BuildVector(mesh->Nfaces);
  double *nyk = BuildVector(mesh->Nfaces);
  double *nzk = BuildVector(mesh->Nfaces);
  double *sJk = BuildVector(mesh->Nfaces);

  double dt = 1e6;

  sk = 0;
  for(k=0;k<mesh->K;++k){
    
    GeometricFactors3d(mesh, k, 
		       &drdx, &dsdx, &dtdx,
		       &drdy, &dsdy, &dtdy,
		       &drdz, &dsdz, &dtdz, &J);
    
    Normals3d(mesh, k, nxk, nyk, nzk, sJk);
    
    for(f=0;f<mesh->Nfaces;++f){

      dt = min(dt, J/sJk[f]);
      
      for(m=0;m<p_Nfp;++m){
	int id = m + f*p_Nfp + p_Nfp*p_Nfaces*k;
	int idM = mesh->vmapM[id];
	int idP = mesh->vmapP[id];
	int  nM = idM%p_Np; 
	int  nP = idP%p_Np; 
	int  kM = (idM-nM)/p_Np;
	int  kP = (idP-nP)/p_Np;
	idM = Nfields*(nM + p_Np*kM);
	idP = Nfields*(nP + p_Np*kP);
	
	/* stub resolve some other way */
	if(mesh->vmapP[id]<0){
	  idP = mesh->vmapP[id]; /* -ve numbers */
	}
	
	mesh->surfinfo[sk++] = idM;
	mesh->surfinfo[sk++] = idP;
	mesh->surfinfo[sk++] = sJk[f]/(2.*J);
	mesh->surfinfo[sk++] = (idM==idP)?-1.:1.;
	mesh->surfinfo[sk++] = nxk[f];
	mesh->surfinfo[sk++] = nyk[f];
	mesh->surfinfo[sk++] = nzk[f];
      }
    }
  }
}
Esempio n. 2
0
double InitOCCA3d(Mesh *mesh, int Nfields){
  
  device.setup("mode = OpenCL, platformID = 0, deviceID = 2");

  /* Q  */
  int sz = mesh->K*(BSIZE)*p_Nfields*sizeof(float); 

  float *f_Q = (float*) calloc(mesh->K*BSIZE*p_Nfields, sizeof(float));

  c_Q    = device.malloc(sz, f_Q);
  c_rhsQ = device.malloc(sz, f_Q);
  c_resQ = device.malloc(sz, f_Q);

  printf("sz1= %d\n", sz);
  
  sz = mesh->parNtotalout*sizeof(float);
  c_tmp  = device.malloc(sz+1, f_Q); // should not use f_Q
  c_partQ = device.malloc(sz+1, f_Q);

  printf("sz2= %d\n", sz);

  /*  LIFT  */
  sz = p_Np*(p_Nfp)*p_Nfaces*sizeof(float);

  float *f_LIFT = (float*) malloc(sz);
  int skL = 0;
  for(int m=0;m<p_Nfp;++m){
    for(int n=0;n<p_Np;++n){
      for(int f=0;f<p_Nfaces;++f){
	f_LIFT[skL++] = mesh->LIFT[0][p_Nfp*p_Nfaces*n+(f+p_Nfaces*m)];
      }
    }
  }

  c_LIFT = device.malloc(sz, f_LIFT);
   
  /* DrDsDt */
  sz = BSIZE*BSIZE*4*sizeof(float);

  float* h_DrDsDt = (float*) calloc(BSIZE*BSIZE*4, sizeof(float));
  int sk = 0;
  /* note transposed arrays to avoid "bank conflicts" */
  for(int n=0;n<p_Np;++n){
    for(int m=0;m<p_Np;++m){
      h_DrDsDt[4*(m+n*BSIZE)+0] = mesh->Dr[0][n+m*p_Np];
      h_DrDsDt[4*(m+n*BSIZE)+1] = mesh->Ds[0][n+m*p_Np];
      h_DrDsDt[4*(m+n*BSIZE)+2] = mesh->Dt[0][n+m*p_Np];
    }
  }
   
  c_DrDsDt = device.malloc(sz, h_DrDsDt);
   
  free(h_DrDsDt);

  /* vgeo */
  double drdx, dsdx, dtdx;
  double drdy, dsdy, dtdy;
  double drdz, dsdz, dtdz, J;
  float *vgeo = (float*) calloc(12*mesh->K, sizeof(float));

  for(int k=0;k<mesh->K;++k){
    GeometricFactors3d(mesh, k, 
		       &drdx, &dsdx, &dtdx,
		       &drdy, &dsdy, &dtdy,
		       &drdz, &dsdz, &dtdz, &J);

    vgeo[k*12+0] = drdx; vgeo[k*12+1] = drdy; vgeo[k*12+2] = drdz;
    vgeo[k*12+4] = dsdx; vgeo[k*12+5] = dsdy; vgeo[k*12+6] = dsdz;
    vgeo[k*12+8] = dtdx; vgeo[k*12+9] = dtdy; vgeo[k*12+10] = dtdz;

  }

  sz = mesh->K*12*sizeof(float);
  c_vgeo = device.malloc(sz, vgeo);
   
  /* surfinfo (vmapM, vmapP, Fscale, Bscale, nx, ny, nz, 0) */
  int sz5 = mesh->K*p_Nfp*p_Nfaces*5*sizeof(float); 
  float* h_surfinfo = (float*) malloc(sz5); 

  int sz2 = mesh->K*p_Nfp*p_Nfaces*2*sizeof(int); 
  int* h_mapinfo = (int*) malloc(sz2); 
   
  /* local-local info */
  sk = 0;
  int skP = -1;
  double *nxk = BuildVector(mesh->Nfaces);
  double *nyk = BuildVector(mesh->Nfaces);
  double *nzk = BuildVector(mesh->Nfaces);
  double *sJk = BuildVector(mesh->Nfaces);

  double dt = 1e6;

  for(int k=0;k<mesh->K;++k){

    GeometricFactors3d(mesh, k, 
		       &drdx, &dsdx, &dtdx,
		       &drdy, &dsdy, &dtdy,
		       &drdz, &dsdz, &dtdz, &J);

    Normals3d(mesh, k, nxk, nyk, nzk, sJk);
     
    for(int f=0;f<mesh->Nfaces;++f){

      dt = min(dt, J/sJk[f]);
       
      for(int m=0;m<p_Nfp;++m){
	int n = m + f*p_Nfp + p_Nfp*p_Nfaces*k;
	int idM = mesh->vmapM[n];
	int idP = mesh->vmapP[n];
	int  nM = idM%p_Np; 
	int  nP = idP%p_Np; 
	int  kM = (idM-nM)/p_Np;
	int  kP = (idP-nP)/p_Np;
	idM = nM + Nfields*BSIZE*kM;
	idP = nP + Nfields*BSIZE*kP;
	 
	/* stub resolve some other way */
	if(mesh->vmapP[n]<0){
	  idP = mesh->vmapP[n]; /* -ve numbers */
	}
 
	sk = 2*p_Nfp*p_Nfaces*k+m+f*p_Nfp;
	h_mapinfo[sk + 0*p_Nfp*p_Nfaces] = idM;
	h_mapinfo[sk + 1*p_Nfp*p_Nfaces] = idP;

	sk = 5*p_Nfp*p_Nfaces*k+m+f*p_Nfp;
	h_surfinfo[sk + 0*p_Nfp*p_Nfaces] = sJk[f]/(2.*J);
	h_surfinfo[sk + 1*p_Nfp*p_Nfaces] = (idM==idP)?-1.:1.;
	h_surfinfo[sk + 2*p_Nfp*p_Nfaces] = nxk[f];
	h_surfinfo[sk + 3*p_Nfp*p_Nfaces] = nyk[f];
	h_surfinfo[sk + 4*p_Nfp*p_Nfaces] = nzk[f];
      }
    }
  }
   
  c_mapinfo = device.malloc(sz2, h_mapinfo);
  c_surfinfo = device.malloc(sz5, h_surfinfo);

  free(h_mapinfo);
  free(h_surfinfo);

  printf("mesh->parNtotalout=%d\n", mesh->parNtotalout);
  sz = mesh->parNtotalout*sizeof(int);
  c_parmapOUT = device.malloc(sz+1, mesh->parmapOUT);

  /* now build kernels */
  occa::kernelInfo dgInfo;
   
  dgInfo.addDefine("p_Np",      p_Np);
  dgInfo.addDefine("p_Nfp",     p_Nfp);
  dgInfo.addDefine("p_Nfaces",  p_Nfaces);
  dgInfo.addDefine("p_Nfields", p_Nfields);
  dgInfo.addDefine("BSIZE",     BSIZE);
  dgInfo.addDefine("p_max_NfpNfaces_Np", max(p_Nfp*p_Nfaces, p_Np));

  volumeKernel = device.buildKernelFromSource("src/MaxwellsVolumeKernel3D.okl", 
					      "MaxwellsVolumeKernel3D",
					      dgInfo);

  surfaceKernel = device.buildKernelFromSource("src/MaxwellsSurfaceKernel3D.okl", 
					       "MaxwellsSurfaceKernel3D",
					       dgInfo);
  
  rkKernel = device.buildKernelFromSource("src/MaxwellsRKKernel3D.okl", 
					  "MaxwellsRKKernel3D",
					  dgInfo);
  
  partialGetKernel = device.buildKernelFromSource("src/MaxwellsPartialGetKernel3D.okl",
						  "MaxwellsPartialGetKernel3D",
						  dgInfo);
  
#if 0
  diagnose_array<float>("c_DrDsDt", c_DrDsDt, 4*BSIZE*BSIZE);
  diagnose_array<float>("c_LIFT", c_LIFT, p_Nfaces*p_Nfp*p_Np);
  diagnose_array<float>("c_vgeo", c_vgeo, mesh->K*12);
  diagnose_array<float>("c_surfinfo", c_surfinfo, p_Nfaces*p_Nfp*7*mesh->K);
  diagnose_array<int>  ("c_parmapOUT", c_parmapOUT, mesh->parNtotalout);
#endif
  
  return dt;
}