Exemplo n.º 1
0
int main(int argc, char** argv)
{
	std::vector<int> heapVector;
	std::vector<std::string> commandVector;
	BuildVector(commandVector, std::cin);
	heapVector.reserve(500);
	MinHeap<int> heap(heapVector);

	std::ofstream file;
	file.open("output.txt");
	std::vector<std::string>::iterator iter;
	for (iter = commandVector.begin(); iter != commandVector.end(); ++iter)
	{
		// split on the space to get the operation and command
		auto command = SplitString(*iter, ' ');
		auto operation = (*command.begin())[0];
		RunOperation(operation, command, heap, std::cout);
	}
	file.close();

	return 0;
}
Exemplo n.º 2
0
void LoadBalance3d(Mesh *mesh){

  int n,p,k,v,f;

  int nprocs = mesh->nprocs;
  int procid = mesh->procid;
  int **EToV = mesh->EToV;
  double **VX = mesh->GX;
  double **VY = mesh->GY;
  double **VZ = mesh->GZ;

  if(!procid) printf("Root: Entering LoadBalance\n");

  int Nverts = mesh->Nverts;
  
  int *Kprocs = BuildIntVector(nprocs);

  /* local number of elements */
  int Klocal = mesh->K;

  /* find number of elements on all processors */
  MPI_Allgather(&Klocal, 1, MPI_INT, Kprocs, 1, MPI_INT, MPI_COMM_WORLD);

  /* element distribution -- cumulative element count on processes */
  idxtype *elmdist = idxmalloc(nprocs+1, "elmdist");

  elmdist[0] = 0;
  for(p=0;p<nprocs;++p)
    elmdist[p+1] = elmdist[p] + Kprocs[p];

  /* list of element starts */
  idxtype *eptr = idxmalloc(Klocal+1, "eptr");

  eptr[0] = 0;
  for(k=0;k<Klocal;++k)
    eptr[k+1] = eptr[k] + Nverts;

  /* local element to vertex */
  idxtype *eind = idxmalloc(Nverts*Klocal, "eind");

  for(k=0;k<Klocal;++k)
    for(n=0;n<Nverts;++n)
      eind[k*Nverts+n] = EToV[k][n];

  /* weight per element */
  idxtype *elmwgt = idxmalloc(Klocal, "elmwgt");

  for(k=0;k<Klocal;++k)
    elmwgt[k] = 1.;
  
  /* weight flag */
  int wgtflag = 0;
  
  /* number flag (1=fortran, 0=c) */
  int numflag = 0;

  /* ncon = 1 */
  int ncon = 1;

  /* nodes on element face */
  int ncommonnodes = 3;
  
  /* number of partitions */
  int nparts = nprocs;

  /* tpwgts */
  float *tpwgts = (float*) calloc(Klocal, sizeof(float));
 
  for(k=0;k<Klocal;++k)
    tpwgts[k] = 1./(float)nprocs;

  float ubvec[MAXNCON];

  for (n=0; n<ncon; ++n)
    ubvec[n] = UNBALANCE_FRACTION;
  
  int options[10];
  
  options[0] = 1;
  options[PMV3_OPTION_DBGLVL] = 7;
  options[PMV3_OPTION_SEED] = 0;

  int edgecut;

  idxtype *part = idxmalloc(Klocal, "part");

  MPI_Comm comm;
  MPI_Comm_dup(MPI_COMM_WORLD, &comm);

  ParMETIS_V3_PartMeshKway
    (elmdist, 
     eptr, 
     eind, 
     elmwgt, 
     &wgtflag, 
     &numflag, 
     &ncon, 
     &ncommonnodes,
     &nparts, 
     tpwgts, 
     ubvec, 
     options, 
     &edgecut,
     part, 
     &comm);

  int **outlist = (int**) calloc(nprocs, sizeof(int*));
  double **xoutlist = (double**) calloc(nprocs, sizeof(double*));
  double **youtlist = (double**) calloc(nprocs, sizeof(double*));
  double **zoutlist = (double**) calloc(nprocs, sizeof(double*));

  int *outK = (int*) calloc(nprocs, sizeof(int));
  
  int *inK = (int*) calloc(nprocs, sizeof(int));

  MPI_Request *inrequests = (MPI_Request*) calloc(nprocs, sizeof(MPI_Request));
  MPI_Request *outrequests = (MPI_Request*) calloc(nprocs, sizeof(MPI_Request));
  MPI_Request *xinrequests = (MPI_Request*) calloc(nprocs, sizeof(MPI_Request));
  MPI_Request *xoutrequests = (MPI_Request*) calloc(nprocs, sizeof(MPI_Request));
  MPI_Request *yinrequests = (MPI_Request*) calloc(nprocs, sizeof(MPI_Request));
  MPI_Request *youtrequests = (MPI_Request*) calloc(nprocs, sizeof(MPI_Request));
  MPI_Request *zinrequests = (MPI_Request*) calloc(nprocs, sizeof(MPI_Request));
  MPI_Request *zoutrequests = (MPI_Request*) calloc(nprocs, sizeof(MPI_Request));

  for(k=0;k<Klocal;++k)
    ++outK[part[k]];
  
  /* get count of incoming elements from each process */
  MPI_Alltoall(outK, 1, MPI_INT, 
	       inK,  1, MPI_INT, 
	       MPI_COMM_WORLD);

  /* count totals on each process */
  int *  newKprocs = BuildIntVector(nprocs);
  MPI_Allreduce(outK, newKprocs, nprocs, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

  int totalinK = 0;
  for(p=0;p<nprocs;++p){
    totalinK += inK[p];
  }

  int **newEToV = BuildIntMatrix(totalinK, Nverts);
  double **newVX   = BuildMatrix(totalinK, Nverts);
  double **newVY   = BuildMatrix(totalinK, Nverts);
  double **newVZ   = BuildMatrix(totalinK, Nverts);
  
  int cnt = 0;
  for(p=0;p<nprocs;++p){
    MPI_Irecv(newEToV[cnt], Nverts*inK[p], MPI_INT, p, 666+p, MPI_COMM_WORLD,
	      inrequests+p);
    MPI_Irecv(newVX[cnt], Nverts*inK[p], MPI_DOUBLE, p, 1666+p, MPI_COMM_WORLD,
	      xinrequests+p);
    MPI_Irecv(newVY[cnt], Nverts*inK[p], MPI_DOUBLE, p, 2666+p, MPI_COMM_WORLD,
	      yinrequests+p);
    MPI_Irecv(newVZ[cnt], Nverts*inK[p], MPI_DOUBLE, p, 3666+p, MPI_COMM_WORLD,
	      zinrequests+p);
    cnt = cnt + inK[p];
  }

  for(p=0;p<nprocs;++p){
    int cnt = 0;
    outlist[p]  = BuildIntVector(Nverts*outK[p]);
    xoutlist[p]  = BuildVector(Nverts*outK[p]);
    youtlist[p]  = BuildVector(Nverts*outK[p]);
    zoutlist[p]  = BuildVector(Nverts*outK[p]);

    for(k=0;k<Klocal;++k)
      if(part[k]==p){
	for(v=0;v<Nverts;++v){
	  outlist[p][cnt] = EToV[k][v]; 
	  xoutlist[p][cnt] = VX[k][v];
	  youtlist[p][cnt] = VY[k][v];
	  zoutlist[p][cnt] = VZ[k][v];
	  ++cnt;
	}
      }
    
    MPI_Isend(outlist[p], Nverts*outK[p], MPI_INT, p, 666+procid, MPI_COMM_WORLD, 
	      outrequests+p);
    MPI_Isend(xoutlist[p], Nverts*outK[p], MPI_DOUBLE, p, 1666+procid, MPI_COMM_WORLD, 
	      xoutrequests+p);
    MPI_Isend(youtlist[p], Nverts*outK[p], MPI_DOUBLE, p, 2666+procid, MPI_COMM_WORLD, 
	      youtrequests+p);
    MPI_Isend(zoutlist[p], Nverts*outK[p], MPI_DOUBLE, p, 3666+procid, MPI_COMM_WORLD, 
	      zoutrequests+p);
  }

  MPI_Status *instatus = (MPI_Status*) calloc(nprocs, sizeof(MPI_Status));
  MPI_Status *outstatus = (MPI_Status*) calloc(nprocs, sizeof(MPI_Status));

  MPI_Waitall(nprocs,  inrequests, instatus);
  MPI_Waitall(nprocs, xinrequests, instatus);
  MPI_Waitall(nprocs, yinrequests, instatus);
  MPI_Waitall(nprocs, zinrequests, instatus);

  MPI_Waitall(nprocs,  outrequests, outstatus);
  MPI_Waitall(nprocs, xoutrequests, outstatus);
  MPI_Waitall(nprocs, youtrequests, outstatus);
  MPI_Waitall(nprocs, zoutrequests, outstatus);

  if(mesh->GX!=NULL){
    DestroyMatrix(mesh->GX);
    DestroyMatrix(mesh->GY);
    DestroyMatrix(mesh->GZ);
    DestroyIntMatrix(mesh->EToV);
  }

  mesh->GX = newVX;
  mesh->GY = newVY;
  mesh->GZ = newVZ;
  mesh->EToV = newEToV;
  mesh->K =  totalinK;

  for(p=0;p<nprocs;++p){
    if(outlist[p]){
      free(outlist[p]);
      free(xoutlist[p]);
      free(youtlist[p]);
      free(zoutlist[p]);
    }
  }

  free(outK);
  free(inK);
  
  free(inrequests);
  free(outrequests);

  free(xinrequests);
  free(xoutrequests);
  free(yinrequests);
  free(youtrequests);
  free(zinrequests);
  free(zoutrequests);
  free(instatus);
  free(outstatus);

}
Exemplo n.º 3
0
double InitCPU3d(Mesh *mesh, int Nfields){

  printf("Np = %d, BSIZE = %d\n", p_Np, BSIZE);

  /* Q  */
  int sz = mesh->K*(p_Np)*Nfields*sizeof(float);  /* TW BLOCK */

  mesh->f_Q    = (float*) calloc(mesh->K*p_Np*Nfields, sizeof(float));
  mesh->f_rhsQ = (float*) calloc(mesh->K*p_Np*Nfields, sizeof(float));
  mesh->f_resQ = (float*) calloc(mesh->K*p_Np*Nfields, sizeof(float));

  /*  float LIFT  */
  sz = p_Np*(p_Nfp)*(p_Nfaces)*sizeof(float);
  mesh->f_LIFT = (float*) malloc(sz);
  int sk = 0, n, m, f, k;

  for(n=0;n<p_Np;++n){
    for(m=0;m<p_Nfp*p_Nfaces;++m){    
      mesh->f_LIFT[sk++] = mesh->LIFT[n][m];
    }
  }

  /*  float Dr & Ds */
  sz = p_Np*p_Np*sizeof(float);
  mesh->f_Dr = (float*) malloc(sz);
  mesh->f_Ds = (float*) malloc(sz);
  mesh->f_Dt = (float*) malloc(sz);

  sk = 0;
  for(n=0;n<p_Np;++n){
    for(m=0;m<p_Np;++m){    
      mesh->f_Dr[sk] = mesh->Dr[n][m];
      mesh->f_Ds[sk] = mesh->Ds[n][m];
      mesh->f_Dt[sk] = mesh->Dt[n][m];
      ++sk;
    }
  }

  /* vgeo */
  double drdx, dsdx, dtdx;
  double drdy, dsdy, dtdy;
  double drdz, dsdz, dtdz, J;
  mesh->vgeo = (float*) calloc(12*mesh->K, sizeof(float));
  
  for(k=0;k<mesh->K;++k){
    GeometricFactors3d(mesh, k, 
		       &drdx, &dsdx, &dtdx,
		       &drdy, &dsdy, &dtdy,
		       &drdz, &dsdz, &dtdz, &J);
    
    mesh->vgeo[k*12+0] = drdx; mesh->vgeo[k*12+1] = drdy; mesh->vgeo[k*12+2] = drdz;
    mesh->vgeo[k*12+4] = dsdx; mesh->vgeo[k*12+5] = dsdy; mesh->vgeo[k*12+6] = dsdz;
    mesh->vgeo[k*12+8] = dtdx; mesh->vgeo[k*12+9] = dtdy; mesh->vgeo[k*12+10] = dtdz;
  }
  
  /* surfinfo (vmapM, vmapP, Fscale, Bscale, nx, ny, nz, 0) */
  sz = mesh->K*p_Nfp*p_Nfaces*7*sizeof(float); 
  
  mesh->surfinfo = (float*) malloc(sz); 
  
  /* local-local info */
  sk = 0;
  int skP = -1;
  double *nxk = BuildVector(mesh->Nfaces);
  double *nyk = BuildVector(mesh->Nfaces);
  double *nzk = BuildVector(mesh->Nfaces);
  double *sJk = BuildVector(mesh->Nfaces);

  double dt = 1e6;

  sk = 0;
  for(k=0;k<mesh->K;++k){
    
    GeometricFactors3d(mesh, k, 
		       &drdx, &dsdx, &dtdx,
		       &drdy, &dsdy, &dtdy,
		       &drdz, &dsdz, &dtdz, &J);
    
    Normals3d(mesh, k, nxk, nyk, nzk, sJk);
    
    for(f=0;f<mesh->Nfaces;++f){

      dt = min(dt, J/sJk[f]);
      
      for(m=0;m<p_Nfp;++m){
	int id = m + f*p_Nfp + p_Nfp*p_Nfaces*k;
	int idM = mesh->vmapM[id];
	int idP = mesh->vmapP[id];
	int  nM = idM%p_Np; 
	int  nP = idP%p_Np; 
	int  kM = (idM-nM)/p_Np;
	int  kP = (idP-nP)/p_Np;
	idM = Nfields*(nM + p_Np*kM);
	idP = Nfields*(nP + p_Np*kP);
	
	/* stub resolve some other way */
	if(mesh->vmapP[id]<0){
	  idP = mesh->vmapP[id]; /* -ve numbers */
	}
	
	mesh->surfinfo[sk++] = idM;
	mesh->surfinfo[sk++] = idP;
	mesh->surfinfo[sk++] = sJk[f]/(2.*J);
	mesh->surfinfo[sk++] = (idM==idP)?-1.:1.;
	mesh->surfinfo[sk++] = nxk[f];
	mesh->surfinfo[sk++] = nyk[f];
	mesh->surfinfo[sk++] = nzk[f];
      }
    }
  }
}
Exemplo n.º 4
0
double InitOCCA3d(Mesh *mesh, int Nfields){
  
  device.setup("mode = OpenCL, platformID = 0, deviceID = 2");

  /* Q  */
  int sz = mesh->K*(BSIZE)*p_Nfields*sizeof(float); 

  float *f_Q = (float*) calloc(mesh->K*BSIZE*p_Nfields, sizeof(float));

  c_Q    = device.malloc(sz, f_Q);
  c_rhsQ = device.malloc(sz, f_Q);
  c_resQ = device.malloc(sz, f_Q);

  printf("sz1= %d\n", sz);
  
  sz = mesh->parNtotalout*sizeof(float);
  c_tmp  = device.malloc(sz+1, f_Q); // should not use f_Q
  c_partQ = device.malloc(sz+1, f_Q);

  printf("sz2= %d\n", sz);

  /*  LIFT  */
  sz = p_Np*(p_Nfp)*p_Nfaces*sizeof(float);

  float *f_LIFT = (float*) malloc(sz);
  int skL = 0;
  for(int m=0;m<p_Nfp;++m){
    for(int n=0;n<p_Np;++n){
      for(int f=0;f<p_Nfaces;++f){
	f_LIFT[skL++] = mesh->LIFT[0][p_Nfp*p_Nfaces*n+(f+p_Nfaces*m)];
      }
    }
  }

  c_LIFT = device.malloc(sz, f_LIFT);
   
  /* DrDsDt */
  sz = BSIZE*BSIZE*4*sizeof(float);

  float* h_DrDsDt = (float*) calloc(BSIZE*BSIZE*4, sizeof(float));
  int sk = 0;
  /* note transposed arrays to avoid "bank conflicts" */
  for(int n=0;n<p_Np;++n){
    for(int m=0;m<p_Np;++m){
      h_DrDsDt[4*(m+n*BSIZE)+0] = mesh->Dr[0][n+m*p_Np];
      h_DrDsDt[4*(m+n*BSIZE)+1] = mesh->Ds[0][n+m*p_Np];
      h_DrDsDt[4*(m+n*BSIZE)+2] = mesh->Dt[0][n+m*p_Np];
    }
  }
   
  c_DrDsDt = device.malloc(sz, h_DrDsDt);
   
  free(h_DrDsDt);

  /* vgeo */
  double drdx, dsdx, dtdx;
  double drdy, dsdy, dtdy;
  double drdz, dsdz, dtdz, J;
  float *vgeo = (float*) calloc(12*mesh->K, sizeof(float));

  for(int k=0;k<mesh->K;++k){
    GeometricFactors3d(mesh, k, 
		       &drdx, &dsdx, &dtdx,
		       &drdy, &dsdy, &dtdy,
		       &drdz, &dsdz, &dtdz, &J);

    vgeo[k*12+0] = drdx; vgeo[k*12+1] = drdy; vgeo[k*12+2] = drdz;
    vgeo[k*12+4] = dsdx; vgeo[k*12+5] = dsdy; vgeo[k*12+6] = dsdz;
    vgeo[k*12+8] = dtdx; vgeo[k*12+9] = dtdy; vgeo[k*12+10] = dtdz;

  }

  sz = mesh->K*12*sizeof(float);
  c_vgeo = device.malloc(sz, vgeo);
   
  /* surfinfo (vmapM, vmapP, Fscale, Bscale, nx, ny, nz, 0) */
  int sz5 = mesh->K*p_Nfp*p_Nfaces*5*sizeof(float); 
  float* h_surfinfo = (float*) malloc(sz5); 

  int sz2 = mesh->K*p_Nfp*p_Nfaces*2*sizeof(int); 
  int* h_mapinfo = (int*) malloc(sz2); 
   
  /* local-local info */
  sk = 0;
  int skP = -1;
  double *nxk = BuildVector(mesh->Nfaces);
  double *nyk = BuildVector(mesh->Nfaces);
  double *nzk = BuildVector(mesh->Nfaces);
  double *sJk = BuildVector(mesh->Nfaces);

  double dt = 1e6;

  for(int k=0;k<mesh->K;++k){

    GeometricFactors3d(mesh, k, 
		       &drdx, &dsdx, &dtdx,
		       &drdy, &dsdy, &dtdy,
		       &drdz, &dsdz, &dtdz, &J);

    Normals3d(mesh, k, nxk, nyk, nzk, sJk);
     
    for(int f=0;f<mesh->Nfaces;++f){

      dt = min(dt, J/sJk[f]);
       
      for(int m=0;m<p_Nfp;++m){
	int n = m + f*p_Nfp + p_Nfp*p_Nfaces*k;
	int idM = mesh->vmapM[n];
	int idP = mesh->vmapP[n];
	int  nM = idM%p_Np; 
	int  nP = idP%p_Np; 
	int  kM = (idM-nM)/p_Np;
	int  kP = (idP-nP)/p_Np;
	idM = nM + Nfields*BSIZE*kM;
	idP = nP + Nfields*BSIZE*kP;
	 
	/* stub resolve some other way */
	if(mesh->vmapP[n]<0){
	  idP = mesh->vmapP[n]; /* -ve numbers */
	}
 
	sk = 2*p_Nfp*p_Nfaces*k+m+f*p_Nfp;
	h_mapinfo[sk + 0*p_Nfp*p_Nfaces] = idM;
	h_mapinfo[sk + 1*p_Nfp*p_Nfaces] = idP;

	sk = 5*p_Nfp*p_Nfaces*k+m+f*p_Nfp;
	h_surfinfo[sk + 0*p_Nfp*p_Nfaces] = sJk[f]/(2.*J);
	h_surfinfo[sk + 1*p_Nfp*p_Nfaces] = (idM==idP)?-1.:1.;
	h_surfinfo[sk + 2*p_Nfp*p_Nfaces] = nxk[f];
	h_surfinfo[sk + 3*p_Nfp*p_Nfaces] = nyk[f];
	h_surfinfo[sk + 4*p_Nfp*p_Nfaces] = nzk[f];
      }
    }
  }
   
  c_mapinfo = device.malloc(sz2, h_mapinfo);
  c_surfinfo = device.malloc(sz5, h_surfinfo);

  free(h_mapinfo);
  free(h_surfinfo);

  printf("mesh->parNtotalout=%d\n", mesh->parNtotalout);
  sz = mesh->parNtotalout*sizeof(int);
  c_parmapOUT = device.malloc(sz+1, mesh->parmapOUT);

  /* now build kernels */
  occa::kernelInfo dgInfo;
   
  dgInfo.addDefine("p_Np",      p_Np);
  dgInfo.addDefine("p_Nfp",     p_Nfp);
  dgInfo.addDefine("p_Nfaces",  p_Nfaces);
  dgInfo.addDefine("p_Nfields", p_Nfields);
  dgInfo.addDefine("BSIZE",     BSIZE);
  dgInfo.addDefine("p_max_NfpNfaces_Np", max(p_Nfp*p_Nfaces, p_Np));

  volumeKernel = device.buildKernelFromSource("src/MaxwellsVolumeKernel3D.okl", 
					      "MaxwellsVolumeKernel3D",
					      dgInfo);

  surfaceKernel = device.buildKernelFromSource("src/MaxwellsSurfaceKernel3D.okl", 
					       "MaxwellsSurfaceKernel3D",
					       dgInfo);
  
  rkKernel = device.buildKernelFromSource("src/MaxwellsRKKernel3D.okl", 
					  "MaxwellsRKKernel3D",
					  dgInfo);
  
  partialGetKernel = device.buildKernelFromSource("src/MaxwellsPartialGetKernel3D.okl",
						  "MaxwellsPartialGetKernel3D",
						  dgInfo);
  
#if 0
  diagnose_array<float>("c_DrDsDt", c_DrDsDt, 4*BSIZE*BSIZE);
  diagnose_array<float>("c_LIFT", c_LIFT, p_Nfaces*p_Nfp*p_Np);
  diagnose_array<float>("c_vgeo", c_vgeo, mesh->K*12);
  diagnose_array<float>("c_surfinfo", c_surfinfo, p_Nfaces*p_Nfp*7*mesh->K);
  diagnose_array<int>  ("c_parmapOUT", c_parmapOUT, mesh->parNtotalout);
#endif
  
  return dt;
}
Exemplo n.º 5
0
void BuildMaps2d(Mesh *mesh){

  int nprocs = mesh->nprocs;
  int procid = mesh->procid;

  int K = mesh->K;
  int Nfaces = mesh->Nfaces;

  mesh->vmapM = BuildIntVector(p_Nfp*p_Nfaces*K);
  mesh->vmapP = BuildIntVector(p_Nfp*p_Nfaces*K);

  int m;
  int k1,f1,p1,n1,id1, k2,f2,p2,n2,id2;

  double x1, y1, x2, y2, d12;
  
  double *nxk = BuildVector(Nfaces);
  double *nyk = BuildVector(Nfaces);
  double *sJk = BuildVector(Nfaces);

  /* first build local */
  for(k1=0;k1<K;++k1){

    /* get some information about the face geometries */
    Normals2d(mesh, k1, nxk, nyk, sJk);

    for(f1=0;f1<Nfaces;++f1){

      /* volume -> face nodes */
      for(n1=0;n1<p_Nfp;++n1){
	id1 = n1+f1*p_Nfp+k1*p_Nfp*p_Nfaces;
	mesh->vmapM[id1] = mesh->Fmask[f1][n1] + k1*p_Np;
      }


      /* find neighbor */
      k2 = mesh->EToE[k1][f1]; 
      f2 = mesh->EToF[k1][f1];
      p2 = mesh->EToP[k1][f1];

      if(k1==k2 || procid!=p2 ){
	for(n1=0;n1<p_Nfp;++n1){
	  id1 = n1+f1*p_Nfp+k1*p_Nfp*p_Nfaces;
	  mesh->vmapP[id1] = k1*p_Np + mesh->Fmask[f1][n1];
	}
      }else{
	/* treat as boundary for the moment */
	
	for(n1=0;n1<p_Nfp;++n1){
	  id1 = n1+f1*p_Nfp+k1*p_Nfp*p_Nfaces;

	  x1 = mesh->x[k1][mesh->Fmask[f1][n1]];
	  y1 = mesh->y[k1][mesh->Fmask[f1][n1]];
	  for(n2=0;n2<p_Nfp;++n2){

	    id2 = n2+f2*p_Nfp+k2*p_Nfp*p_Nfaces;

	    x2 = mesh->x[k2][mesh->Fmask[f2][n2]];
	    y2 = mesh->y[k2][mesh->Fmask[f2][n2]];

	    /* find normalized distance between these nodes */
	    /* [ use sJk as a measure of edge length (ignore factor of 2) ] */
	    d12 = ((x1-x2)*(x1-x2) + (y1-y2)*(y1-y2))/(sJk[f1]*sJk[f1]); 
	    if(d12<NODETOL){
	      mesh->vmapP[id1] = k2*p_Np + mesh->Fmask[f2][n2];
	    }
	  }
	}
      }
    }
  }

#if 0
  int n;
  for(n=0;n<p_Nfp*p_Nfaces*mesh->K;++n){
    x1 = mesh->x[0][mesh->vmapM[n]];
    y1 = mesh->y[0][mesh->vmapM[n]];
    x2 = mesh->x[0][mesh->vmapP[n]];
    y2 = mesh->y[0][mesh->vmapP[n]];
    d12 = ((x1-x2)*(x1-x2) + (y1-y2)*(y1-y2));
    printf("n:%d  %d -> %d  d=%lg\n", n, mesh->vmapM[n], mesh->vmapP[n], d12);
  }
#endif

  /* now build parallel maps */
  double **xsend = (double**) calloc(nprocs, sizeof(double*));
  double **ysend = (double**) calloc(nprocs, sizeof(double*));
  double **xrecv = (double**) calloc(nprocs, sizeof(double*));
  double **yrecv = (double**) calloc(nprocs, sizeof(double*));

  int **Esend = (int**) calloc(nprocs, sizeof(int*));
  int **Fsend = (int**) calloc(nprocs, sizeof(int*));
  int **Erecv = (int**) calloc(nprocs, sizeof(int*));
  int **Frecv = (int**) calloc(nprocs, sizeof(int*));

  for(p2=0;p2<nprocs;++p2){
    if(mesh->Npar[p2]){
    xsend[p2] = BuildVector(mesh->Npar[p2]*p_Nfp);
    ysend[p2] = BuildVector(mesh->Npar[p2]*p_Nfp);
    Esend[p2] = BuildIntVector(mesh->Npar[p2]*p_Nfp);
    Fsend[p2] = BuildIntVector(mesh->Npar[p2]*p_Nfp);

    xrecv[p2] = BuildVector(mesh->Npar[p2]*p_Nfp);
    yrecv[p2] = BuildVector(mesh->Npar[p2]*p_Nfp);
    Erecv[p2] = BuildIntVector(mesh->Npar[p2]*p_Nfp);
    Frecv[p2] = BuildIntVector(mesh->Npar[p2]*p_Nfp);
    }
  }

  int *skP = BuildIntVector(nprocs);
  
  /* send coordinates in local order */
  int cnt = 0;
  for(k1=0;k1<K;++k1){
    for(f1=0;f1<p_Nfaces;++f1){
      p2 = mesh->EToP[k1][f1];
      if(p2!=procid){
	for(n1=0;n1<p_Nfp;++n1){
	  xsend[p2][skP[p2]] = mesh->x[k1][mesh->Fmask[f1][n1]];
	  ysend[p2][skP[p2]] = mesh->y[k1][mesh->Fmask[f1][n1]];
	  Esend[p2][skP[p2]] = mesh->EToE[k1][f1];
	  Fsend[p2][skP[p2]] = mesh->EToF[k1][f1];
	  ++(skP[p2]);
	}
      }
    }
  }

  MPI_Request *xsendrequests = (MPI_Request*) calloc(nprocs, sizeof(MPI_Request));
  MPI_Request *ysendrequests = (MPI_Request*) calloc(nprocs, sizeof(MPI_Request));
  MPI_Request *xrecvrequests = (MPI_Request*) calloc(nprocs, sizeof(MPI_Request));
  MPI_Request *yrecvrequests = (MPI_Request*) calloc(nprocs, sizeof(MPI_Request));
  MPI_Request *Esendrequests = (MPI_Request*) calloc(nprocs, sizeof(MPI_Request));
  MPI_Request *Fsendrequests = (MPI_Request*) calloc(nprocs, sizeof(MPI_Request));
  MPI_Request *Erecvrequests = (MPI_Request*) calloc(nprocs, sizeof(MPI_Request));
  MPI_Request *Frecvrequests = (MPI_Request*) calloc(nprocs, sizeof(MPI_Request));

  MPI_Status  *status = (MPI_Status*) calloc(nprocs, sizeof(MPI_Status));

  cnt = 0;
  for(p2=0;p2<nprocs;++p2){
    if(p2!=procid && mesh->Npar[p2]!=0){
      int Nout = mesh->Npar[p2]*p_Nfp;
      
      MPI_Isend(xsend[p2], Nout, MPI_DOUBLE, p2,  666+p2, MPI_COMM_WORLD, xsendrequests+cnt);
      MPI_Isend(ysend[p2], Nout, MPI_DOUBLE, p2, 1666+p2, MPI_COMM_WORLD, ysendrequests+cnt);
      MPI_Isend(Esend[p2], Nout, MPI_INT,    p2, 2666+p2, MPI_COMM_WORLD, Esendrequests+cnt);
      MPI_Isend(Fsend[p2], Nout, MPI_INT,    p2, 3666+p2, MPI_COMM_WORLD, Fsendrequests+cnt);

      MPI_Irecv(xrecv[p2], Nout, MPI_DOUBLE, p2,  666+procid, MPI_COMM_WORLD, xrecvrequests+cnt);
      MPI_Irecv(yrecv[p2], Nout, MPI_DOUBLE, p2, 1666+procid, MPI_COMM_WORLD, yrecvrequests+cnt);
      MPI_Irecv(Erecv[p2], Nout, MPI_INT,    p2, 2666+procid, MPI_COMM_WORLD, Erecvrequests+cnt);
      MPI_Irecv(Frecv[p2], Nout, MPI_INT,    p2, 3666+procid, MPI_COMM_WORLD, Frecvrequests+cnt);
      ++cnt;
    }
  }

  MPI_Waitall(cnt, xsendrequests, status);
  MPI_Waitall(cnt, ysendrequests, status);
  MPI_Waitall(cnt, Esendrequests, status);
  MPI_Waitall(cnt, Fsendrequests, status);
  MPI_Waitall(cnt, xrecvrequests, status);
  MPI_Waitall(cnt, yrecvrequests, status);
  MPI_Waitall(cnt, Erecvrequests, status);
  MPI_Waitall(cnt, Frecvrequests, status);
  
  /* add up the total number of outgoing/ingoing nodes */
  mesh->parNtotalout = 0;
  for(p2=0;p2<nprocs;++p2)
    mesh->parNtotalout += skP[p2]*p_Nfields;

  mesh->parmapOUT = BuildIntVector(mesh->parNtotalout);

  /* now match up local nodes with the requested (recv'ed nodes) */
  int idout = -1;
  int sk = 0;
  for(p2=0;p2<nprocs;++p2){
    /* for each received face */
    for(m=0;m<skP[p2];++m){
      k1 = Erecv[p2][m];
      f1 = Frecv[p2][m];
      x2 = xrecv[p2][m];
      y2 = yrecv[p2][m];

      Normals2d(mesh, k1, nxk, nyk, sJk);
      
      for(n1=0;n1<p_Nfp;++n1){

	x1 = mesh->x[k1][mesh->Fmask[f1][n1]];
	y1 = mesh->y[k1][mesh->Fmask[f1][n1]];
	
	d12 = ((x1-x2)*(x1-x2) + (y1-y2)*(y1-y2))/(sJk[f1]*sJk[f1]); 
	
	if(d12<NODETOL){
	  int fld;
	  for(fld=0;fld<p_Nfields;++fld){
#ifdef CUDA
	    mesh->parmapOUT[sk++] = k1*BSIZE*p_Nfields+mesh->Fmask[f1][n1] + BSIZE*fld;
#else
	    mesh->parmapOUT[sk++] = p_Nfields*(k1*p_Np+mesh->Fmask[f1][n1]) + fld;
#endif
	  }
	}
      }
    }
  }

  /* create incoming node map */
  int parcnt = -1;
  for(p2=0;p2<nprocs;++p2){
    for(k1=0;k1<K;++k1){
      for(f1=0;f1<p_Nfaces;++f1){
	if(mesh->EToP[k1][f1]==p2 && p2!=procid){
	  for(n1=0;n1<p_Nfp;++n1){
	    id1 = n1+f1*p_Nfp+k1*p_Nfp*p_Nfaces;
	    mesh->vmapP[id1] = parcnt;
	    --parcnt;
	  }
	}
      }
    }
  }

  /* buffers for communication */
  mesh->f_outQ = (float*) calloc(mesh->parNtotalout+1, sizeof(float));
  mesh->f_inQ  = (float*) calloc(mesh->parNtotalout+1, sizeof(float));
  
}
Exemplo n.º 6
0
Mesh *ReadMesh3d(char *filename){

  int n;

  Mesh *mesh = (Mesh*) calloc(1, sizeof(Mesh));

  char buf[BUFSIZ];
  
  FILE *fp = fopen(filename, "r");

  /* assume modified Gambit neutral format */
  for(n=0;n<6;++n)
    fgets(buf, BUFSIZ, fp);

  fgets(buf, BUFSIZ, fp);
  sscanf(buf, "%d %d \n", &(mesh->Nv), &(mesh->K));
  mesh->Nverts = 4; /* assume tets */
  mesh->Nedges = 6; /* assume tets */
  mesh->Nfaces = 4; /* assume tets */

  fgets(buf, BUFSIZ, fp);
  fgets(buf, BUFSIZ, fp);

  /* read vertex coordinates */
  double *VX = BuildVector(mesh->Nv);
  double *VY = BuildVector(mesh->Nv);
  double *VZ = BuildVector(mesh->Nv);
  for(n=0;n<mesh->Nv;++n){
    fgets(buf, BUFSIZ, fp);
    sscanf(buf, "%*d %lf %lf %lf", VX+n, VY+n, VZ+n);
  }

  /* decide on parition */
  int procid, nprocs;

  MPI_Comm_rank(MPI_COMM_WORLD, &procid);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  mesh->procid = procid;
  mesh->nprocs = nprocs;

  /* assume this proc owns a block of elements */

  int Klocal, Kstart;
  int *Kprocs = (int*) calloc(nprocs, sizeof(int));
  int p;
  
  int **newEToV, *newKprocs;
  double **newVX, **newVY;
  
  Klocal = (int) ( (double)(mesh->K)/(double)nprocs );
  
  for(p=0;p<nprocs-1;++p){
    Kprocs[p] = Klocal;
  }
  Kprocs[p] = Klocal + mesh->K - nprocs*Klocal;
  
  
  Kstart= 0;
  for(p=0;p<procid;++p)
    Kstart += Kprocs[p];
  
  Klocal = Kprocs[procid];

  /* read element to vertex connectivity */
  fgets(buf, BUFSIZ, fp);
  fgets(buf, BUFSIZ, fp);
  mesh->EToV = BuildIntMatrix(Klocal, mesh->Nverts);
  mesh->GX = BuildMatrix(Klocal, mesh->Nverts);
  mesh->GY = BuildMatrix(Klocal, mesh->Nverts);
  mesh->GZ = BuildMatrix(Klocal, mesh->Nverts);

  int sk = 0, v;
  for(n=0;n<mesh->K;++n){
    fgets(buf, BUFSIZ, fp);
    if(n>=Kstart && n<Kstart+Klocal){
      sscanf(buf, "%*d %*d %*d %d %d %d %d", 
	     mesh->EToV[sk]+0, mesh->EToV[sk]+1,
	     mesh->EToV[sk]+2, mesh->EToV[sk]+3);
      
      /* correct to 0-index */
      --(mesh->EToV[sk][0]);
      --(mesh->EToV[sk][1]);
      --(mesh->EToV[sk][2]);
      --(mesh->EToV[sk][3]);

      for(v=0;v<mesh->Nverts;++v){
	mesh->GX[sk][v] = VX[mesh->EToV[sk][v]];
	mesh->GY[sk][v] = VY[mesh->EToV[sk][v]];
	mesh->GZ[sk][v] = VZ[mesh->EToV[sk][v]];
      }

      ++sk;
    }
  }
  fgets(buf, BUFSIZ, fp);
  fgets(buf, BUFSIZ, fp);

  mesh->K = Klocal;

  fclose(fp);

  return mesh;
  
}
/* TargetIntrinsicLower - To handle builtins, we want to expand the
 * invocation into normal LLVM code.  If the target can handle the builtin, this
 * function should emit the expanded code and return true.
 */
bool TreeToLLVM::TargetIntrinsicLower(tree exp,
                                      unsigned FnCode,
                                      Value *DestLoc,
                                      Value *&Result,
                                      const Type *ResultType,
                                      std::vector<Value*> &Ops,
                                      SmallVector<tree, 8> &Args,
                                      BasicBlock *CurBB,
                                      bool ResIsSigned,
                                      bool ExpIsSigned) {
  switch (FnCode) {
  default: break;
  case ALTIVEC_BUILTIN_VADDFP:
  case ALTIVEC_BUILTIN_VADDUBM:
  case ALTIVEC_BUILTIN_VADDUHM:
  case ALTIVEC_BUILTIN_VADDUWM:
    Result = BinaryOperator::createAdd(Ops[0], Ops[1], "tmp", CurBB);
    return true;
  case ALTIVEC_BUILTIN_VSUBFP:
  case ALTIVEC_BUILTIN_VSUBUBM:
  case ALTIVEC_BUILTIN_VSUBUHM:
  case ALTIVEC_BUILTIN_VSUBUWM:
    Result = BinaryOperator::createSub(Ops[0], Ops[1], "tmp", CurBB);
    return true;
  case ALTIVEC_BUILTIN_VAND:
    Result = BinaryOperator::createAnd(Ops[0], Ops[1], "tmp", CurBB);
    return true;
  case ALTIVEC_BUILTIN_VANDC:
    Ops[1] = BinaryOperator::createNot(Ops[1], "tmp", CurBB);
    Result = BinaryOperator::createAnd(Ops[0], Ops[1], "tmp", CurBB);
    return true;
  case ALTIVEC_BUILTIN_VOR:
    Result = BinaryOperator::createOr(Ops[0], Ops[1], "tmp", CurBB);
    return true;
  case ALTIVEC_BUILTIN_VNOR:
    Result = BinaryOperator::createOr(Ops[0], Ops[1], "tmp", CurBB);
    Result = BinaryOperator::createNot(Result, "tmp", CurBB);
    return true;
  case ALTIVEC_BUILTIN_VXOR:
    Result = BinaryOperator::createXor(Ops[0], Ops[1], "tmp", CurBB);
    return true;
  case ALTIVEC_BUILTIN_LVSL: {
      static Constant *Cache = NULL;
      MergeIntPtrOperand(this, Cache, 0, "llvm.ppc.altivec.lvsl",
                         ResultType, Ops, CurBB, Result);
    }
    return true;
  case ALTIVEC_BUILTIN_LVSR: {
      static Constant *Cache = NULL;
      MergeIntPtrOperand(this, Cache, 0, "llvm.ppc.altivec.lvsr",
                         ResultType, Ops, CurBB, Result);
    }
    return true;
  case ALTIVEC_BUILTIN_LVX: {
      static Constant *Cache = NULL;
      MergeIntPtrOperand(this, Cache, 0, "llvm.ppc.altivec.lvx",
                         ResultType, Ops, CurBB, Result);
    }
    return true;
  case ALTIVEC_BUILTIN_LVXL: {
      static Constant *Cache = NULL;
      MergeIntPtrOperand(this, Cache, 0, "llvm.ppc.altivec.lvxl",
                         ResultType, Ops, CurBB, Result);
    }
    return true;
  case ALTIVEC_BUILTIN_LVEBX: {
      static Constant *Cache = NULL;
      MergeIntPtrOperand(this, Cache, 0, "llvm.ppc.altivec.lvebx",
                         ResultType, Ops, CurBB, Result);
    }
    return true;
  case ALTIVEC_BUILTIN_LVEHX: {
      static Constant *Cache = NULL;
      MergeIntPtrOperand(this, Cache, 0, "llvm.ppc.altivec.lvehx",
                         ResultType, Ops, CurBB, Result);
    }
    return true;
  case ALTIVEC_BUILTIN_LVEWX: {
      static Constant *Cache = NULL;
      MergeIntPtrOperand(this, Cache, 0, "llvm.ppc.altivec.lvewx",
                         ResultType, Ops, CurBB, Result);
    }
    return true;
  case ALTIVEC_BUILTIN_STVX: {
      static Constant *Cache = NULL;
      MergeIntPtrOperand(this, Cache, 1, "llvm.ppc.altivec.stvx",
                         ResultType, Ops, CurBB, Result);
    }
    return true;
  case ALTIVEC_BUILTIN_STVEBX: {
      static Constant *Cache = NULL;
      MergeIntPtrOperand(this, Cache, 1, "llvm.ppc.altivec.stvebx",
                         ResultType, Ops, CurBB, Result);
    }
    return true;
  case ALTIVEC_BUILTIN_STVEHX: {
      static Constant *Cache = NULL;
      MergeIntPtrOperand(this, Cache, 1, "llvm.ppc.altivec.stvehx",
                         ResultType, Ops, CurBB, Result);
    }
    return true;
  case ALTIVEC_BUILTIN_STVEWX: {
      static Constant *Cache = NULL;
      MergeIntPtrOperand(this, Cache, 1, "llvm.ppc.altivec.stvewx",
                         ResultType, Ops, CurBB, Result);
    }
    return true;
  case ALTIVEC_BUILTIN_STVXL: {
      static Constant *Cache = NULL;
      MergeIntPtrOperand(this, Cache, 1, "llvm.ppc.altivec.stvxl",
                         ResultType, Ops, CurBB, Result);
    }
    return true;
  case ALTIVEC_BUILTIN_VSPLTISB:
    if (Constant *Elt = dyn_cast<ConstantInt>(Ops[0])) {
      Elt = ConstantExpr::getIntegerCast(Elt, Type::Int8Ty, true);
      Result = BuildVector(Elt, Elt, Elt, Elt,  Elt, Elt, Elt, Elt,
                           Elt, Elt, Elt, Elt,  Elt, Elt, Elt, Elt, NULL);
    } else {
      error("%Helement must be an immediate", &EXPR_LOCATION(exp));
      Result = UndefValue::get(VectorType::get(Type::Int8Ty, 16));
    }
    return true;
  case ALTIVEC_BUILTIN_VSPLTISH:
    if (Constant *Elt = dyn_cast<ConstantInt>(Ops[0])) {
      Elt = ConstantExpr::getIntegerCast(Elt, Type::Int16Ty, true);
      Result = BuildVector(Elt, Elt, Elt, Elt,  Elt, Elt, Elt, Elt, NULL);
    } else {
      error("%Helement must be an immediate", &EXPR_LOCATION(exp));
      Result = UndefValue::get(VectorType::get(Type::Int16Ty, 8));
    }
    return true;
  case ALTIVEC_BUILTIN_VSPLTISW:
    if (Constant *Elt = dyn_cast<ConstantInt>(Ops[0])) {
      Elt = ConstantExpr::getIntegerCast(Elt, Type::Int32Ty, true);
      Result = BuildVector(Elt, Elt, Elt, Elt, NULL);
    } else {
      error("%Hmask must be an immediate", &EXPR_LOCATION(exp));
      Result = UndefValue::get(VectorType::get(Type::Int32Ty, 4));
    }
    return true;
  case ALTIVEC_BUILTIN_VSPLTB:
    if (ConstantInt *Elt = dyn_cast<ConstantInt>(Ops[1])) {
      int EV = Elt->getZExtValue();
      Result = BuildVectorShuffle(Ops[0], Ops[0],
                                  EV, EV, EV, EV, EV, EV, EV, EV,
                                  EV, EV, EV, EV, EV, EV, EV, EV);
    } else {
      error("%Helement number must be an immediate", &EXPR_LOCATION(exp));
      Result = Ops[0];
    }
    return true;
  case ALTIVEC_BUILTIN_VSPLTH:
    if (ConstantInt *Elt = dyn_cast<ConstantInt>(Ops[1])) {
      int EV = Elt->getZExtValue();
      Result = BuildVectorShuffle(Ops[0], Ops[0],
                                  EV, EV, EV, EV, EV, EV, EV, EV);
    } else {
      error("%Helement number must be an immediate", &EXPR_LOCATION(exp));
      Result = Ops[0];
    }
    return true;
  case ALTIVEC_BUILTIN_VSPLTW:
    if (ConstantInt *Elt = dyn_cast<ConstantInt>(Ops[1])) {
      int EV = Elt->getZExtValue();
      Result = BuildVectorShuffle(Ops[0], Ops[0], EV, EV, EV, EV);
    } else {
      error("%Helement number must be an immediate", &EXPR_LOCATION(exp));
      Result = Ops[0];
    }
    return true;
  case ALTIVEC_BUILTIN_VSLDOI_16QI:
  case ALTIVEC_BUILTIN_VSLDOI_8HI:
  case ALTIVEC_BUILTIN_VSLDOI_4SI:
  case ALTIVEC_BUILTIN_VSLDOI_4SF:
    if (ConstantInt *Elt = dyn_cast<ConstantInt>(Ops[2])) {
      /* Map all of these to a shuffle. */
      unsigned Amt = Elt->getZExtValue() & 15;
      VectorType *v16i8 = VectorType::get(Type::Int8Ty, 16);
      Value *Op0 = Ops[0];
      Instruction::CastOps opc = CastInst::getCastOpcode(Op0,
        IntrinsicOpIsSigned(Args,0), ResultType, false);
      Ops[0] = CastToType(opc, Op0, v16i8);
      Value *Op1 = Ops[1];
      opc = CastInst::getCastOpcode(Op1,
        IntrinsicOpIsSigned(Args,1), ResultType, false);
      Ops[1] = CastToType(opc, Op1, v16i8);
      Result = BuildVectorShuffle(Ops[0], Ops[1],
                                  Amt, Amt+1, Amt+2, Amt+3,
                                  Amt+4, Amt+5, Amt+6, Amt+7,
                                  Amt+8, Amt+9, Amt+10, Amt+11,
                                  Amt+12, Amt+13, Amt+14, Amt+15);
    } else {
      error("%Hshift amount must be an immediate", &EXPR_LOCATION(exp));
      Result = Ops[0];
    }
    return true;
  case ALTIVEC_BUILTIN_VPKUHUM: {
    Value *Op0 = Ops[0];
    Instruction::CastOps opc = CastInst::getCastOpcode(Op0,
        IntrinsicOpIsSigned(Args,0), ResultType, ExpIsSigned);
    Ops[0] = CastInst::create(opc, Op0, ResultType, Op0->getName(), CurBB);
    Value *Op1 = Ops[1];
    opc = CastInst::getCastOpcode(Op1,
        IntrinsicOpIsSigned(Args,1), ResultType, ExpIsSigned);
    Ops[1] = CastInst::create(opc, Op1, ResultType, Op1->getName(), CurBB);
    Result = BuildVectorShuffle(Ops[0], Ops[1], 1, 3, 5, 7, 9, 11, 13, 15,
                                17, 19, 21, 23, 25, 27, 29, 31);
    return true;
  }
  case ALTIVEC_BUILTIN_VPKUWUM: {
    Value *Op0 = Ops[0];
    Instruction::CastOps opc = CastInst::getCastOpcode(Op0,
        IntrinsicOpIsSigned(Args,0), ResultType, ExpIsSigned);
    Ops[0] = CastInst::create(opc, Op0, ResultType, Op0->getName(), CurBB);
    Value *Op1 = Ops[1];
    opc = CastInst::getCastOpcode(Op1,
        IntrinsicOpIsSigned(Args,1), ResultType, ExpIsSigned);
    Ops[1] = CastInst::create(opc, Op1, ResultType, Op1->getName(), CurBB);
    Result = BuildVectorShuffle(Ops[0], Ops[1], 1, 3, 5, 7, 9, 11, 13, 15);
    return true;
  }
  case ALTIVEC_BUILTIN_VMRGHB:
    Result = BuildVectorShuffle(Ops[0], Ops[1],
                                0, 16, 1, 17, 2, 18, 3, 19,
                                4, 20, 5, 21, 6, 22, 7, 23);
    return true;
  case ALTIVEC_BUILTIN_VMRGHH:
    Result = BuildVectorShuffle(Ops[0], Ops[1], 0, 8, 1, 9, 2, 10, 3, 11);
    return true;
  case ALTIVEC_BUILTIN_VMRGHW:
    Result = BuildVectorShuffle(Ops[0], Ops[1], 0, 4, 1, 5);
    return true;
  case ALTIVEC_BUILTIN_VMRGLB:
    Result = BuildVectorShuffle(Ops[0], Ops[1],
                                 8, 24,  9, 25, 10, 26, 11, 27,
                                12, 28, 13, 29, 14, 30, 15, 31);
    return true;
  case ALTIVEC_BUILTIN_VMRGLH:
    Result = BuildVectorShuffle(Ops[0], Ops[1], 4, 12, 5, 13, 6, 14, 7, 15);
    return true;
  case ALTIVEC_BUILTIN_VMRGLW:
    Result = BuildVectorShuffle(Ops[0], Ops[1], 2, 6, 3, 7);
    return true;
  case ALTIVEC_BUILTIN_ABS_V4SF: {
    /* and out sign bits */
    VectorType *v4i32 = VectorType::get(Type::Int32Ty, 4);
    Ops[0] = new BitCastInst(Ops[0], v4i32, Ops[0]->getName(),CurBB);
    Constant *C = ConstantInt::get(Type::Int32Ty, 0x7FFFFFFF);
    C = ConstantVector::get(std::vector<Constant*>(4, C));
    Result = BinaryOperator::createAnd(Ops[0], C, "tmp", CurBB);
    TargetIntrinsicCastResult(Result, ResultType,
                              ResIsSigned, ExpIsSigned, CurBB);
    return true;
  }
  case ALTIVEC_BUILTIN_ABS_V4SI:
  case ALTIVEC_BUILTIN_ABS_V8HI:
  case ALTIVEC_BUILTIN_ABS_V16QI: { /* iabs(x) -> smax(x, 0-x) */
    Result = BinaryOperator::createNeg(Ops[0], "tmp", CurBB);
    /* get the right smax intrinsic. */
    static Constant *smax[3];
    const VectorType *PTy = cast<VectorType>(ResultType);
    unsigned N = GetAltivecTypeNumFromType(PTy->getElementType());
    if (smax[N] == 0) {
      Module *M = CurBB->getParent()->getParent();
      smax[N] = M->getOrInsertFunction(std::string("llvm.ppc.altivec.vmaxs")+
                         GetAltivecLetterFromType(PTy->getElementType()),
                                      ResultType, ResultType, ResultType, NULL);
    }
    Result = new CallInst(smax[N], Ops[0], Result, "tmp", CurBB);
    return true;
  }
  case ALTIVEC_BUILTIN_ABSS_V4SI:
  case ALTIVEC_BUILTIN_ABSS_V8HI:
  case ALTIVEC_BUILTIN_ABSS_V16QI: { /* iabss(x) -> smax(x, satsub(0,x)) */
    static Constant *sxs[3], *smax[3];
    /* get the right satsub intrinsic. */
    const VectorType *PTy = cast<VectorType>(ResultType);
    unsigned N = GetAltivecTypeNumFromType(PTy->getElementType());
    if (sxs[N] == 0) {
      Module *M = CurBB->getParent()->getParent();
      sxs[N] = M->getOrInsertFunction(std::string("llvm.ppc.altivec.vsubs")+
                     GetAltivecLetterFromType(PTy->getElementType())+"s",
                                      ResultType, ResultType, ResultType, NULL);
    }
    Result = Constant::getNullValue(ResultType);
    Result = new CallInst(sxs[N], Result, Ops[0], "tmp", CurBB);
    /* get the right smax intrinsic. */
    if (smax[N] == 0) {
      Module *M = CurBB->getParent()->getParent();
      smax[N] = M->getOrInsertFunction(std::string("llvm.ppc.altivec.vmaxs")+
                         GetAltivecLetterFromType(PTy->getElementType()),
                                      ResultType, ResultType, ResultType, NULL);
    }
    Result = new CallInst(smax[N], Ops[0], Result, "tmp", CurBB);
    return true;
  }
  }

  return false;
}
Exemplo n.º 8
0
// TargetIntrinsicLower - To handle builtins, we want to expand the
//invocation into normal LLVM code.  If the target can handle the builtin, this
//function should emit the expanded code and return true.
//
bool TreeToLLVM::TargetIntrinsicLower(tree exp,
                                      unsigned FnCode,
                                      const MemRef *DestLoc,
                                      Value *&Result,
                                      const Type *ResultType,
                                      std::vector<Value*> &Ops) {
  switch (FnCode) {
  default: break;
  case ALTIVEC_BUILTIN_VADDFP:
  case ALTIVEC_BUILTIN_VADDUBM:
  case ALTIVEC_BUILTIN_VADDUHM:
  case ALTIVEC_BUILTIN_VADDUWM:
    Result = Builder.CreateAdd(Ops[0], Ops[1], "tmp");
    return true;
  case ALTIVEC_BUILTIN_VSUBFP:
  case ALTIVEC_BUILTIN_VSUBUBM:
  case ALTIVEC_BUILTIN_VSUBUHM:
  case ALTIVEC_BUILTIN_VSUBUWM:
    Result = Builder.CreateSub(Ops[0], Ops[1], "tmp");
    return true;
  case ALTIVEC_BUILTIN_VAND:
    Result = Builder.CreateAnd(Ops[0], Ops[1], "tmp");
    return true;
  case ALTIVEC_BUILTIN_VANDC:
    Ops[1] = Builder.CreateNot(Ops[1], "tmp");
    Result = Builder.CreateAnd(Ops[0], Ops[1], "tmp");
    return true;
  case ALTIVEC_BUILTIN_VOR:
    Result = Builder.CreateOr(Ops[0], Ops[1], "tmp");
    return true;
  case ALTIVEC_BUILTIN_VNOR:
    Result = Builder.CreateOr(Ops[0], Ops[1], "tmp");
    Result = Builder.CreateNot(Result, "tmp");
    return true;
  case ALTIVEC_BUILTIN_VXOR:
    Result = Builder.CreateXor(Ops[0], Ops[1], "tmp");
    return true;
  case ALTIVEC_BUILTIN_LVSL:
    MergeIntPtrOperand(this, 0, Intrinsic::ppc_altivec_lvsl,
                       ResultType, Ops, Builder, Result);
    return true;
  case ALTIVEC_BUILTIN_LVSR:
    MergeIntPtrOperand(this, 0, Intrinsic::ppc_altivec_lvsr,
                       ResultType, Ops, Builder, Result);
    return true;
  case ALTIVEC_BUILTIN_LVX:
    MergeIntPtrOperand(this, 0, Intrinsic::ppc_altivec_lvx,
                       ResultType, Ops, Builder, Result);
    return true;
  case ALTIVEC_BUILTIN_LVXL:
    MergeIntPtrOperand(this, 0, Intrinsic::ppc_altivec_lvxl,
                         ResultType, Ops, Builder, Result);
    return true;
  case ALTIVEC_BUILTIN_LVEBX:
    MergeIntPtrOperand(this, 0, Intrinsic::ppc_altivec_lvebx,
                       ResultType, Ops, Builder, Result);
    return true;
  case ALTIVEC_BUILTIN_LVEHX:
    MergeIntPtrOperand(this, 0, Intrinsic::ppc_altivec_lvehx,
                       ResultType, Ops, Builder, Result);
    return true;
  case ALTIVEC_BUILTIN_LVEWX:
    MergeIntPtrOperand(this, 0, Intrinsic::ppc_altivec_lvewx,
                       ResultType, Ops, Builder, Result);
    return true;
  case ALTIVEC_BUILTIN_STVX:
    MergeIntPtrOperand(this, 1, Intrinsic::ppc_altivec_stvx,
                       ResultType, Ops, Builder, Result);
    return true;
  case ALTIVEC_BUILTIN_STVEBX:
    MergeIntPtrOperand(this, 1, Intrinsic::ppc_altivec_stvebx,
                       ResultType, Ops, Builder, Result);
    return true;
  case ALTIVEC_BUILTIN_STVEHX:
    MergeIntPtrOperand(this, 1, Intrinsic::ppc_altivec_stvehx,
                       ResultType, Ops, Builder, Result);
    return true;
  case ALTIVEC_BUILTIN_STVEWX:
    MergeIntPtrOperand(this, 1, Intrinsic::ppc_altivec_stvewx,
                       ResultType, Ops, Builder, Result);
    return true;
  case ALTIVEC_BUILTIN_STVXL:
    MergeIntPtrOperand(this, 1, Intrinsic::ppc_altivec_stvxl,
                       ResultType, Ops, Builder, Result);
    return true;
  case ALTIVEC_BUILTIN_VSPLTISB:
    if (Constant *Elt = dyn_cast<ConstantInt>(Ops[0])) {
      Elt = ConstantExpr::getIntegerCast(Elt, Type::Int8Ty, true);
      Result = BuildVector(Elt, Elt, Elt, Elt,  Elt, Elt, Elt, Elt,
                           Elt, Elt, Elt, Elt,  Elt, Elt, Elt, Elt, NULL);
    } else {
      error("%Helement must be an immediate", &EXPR_LOCATION(exp));
      Result = UndefValue::get(VectorType::get(Type::Int8Ty, 16));
    }
    return true;
  case ALTIVEC_BUILTIN_VSPLTISH:
    if (Constant *Elt = dyn_cast<ConstantInt>(Ops[0])) {
      Elt = ConstantExpr::getIntegerCast(Elt, Type::Int16Ty, true);
      Result = BuildVector(Elt, Elt, Elt, Elt,  Elt, Elt, Elt, Elt, NULL);
    } else {
      error("%Helement must be an immediate", &EXPR_LOCATION(exp));
      Result = UndefValue::get(VectorType::get(Type::Int16Ty, 8));
    }
    return true;
  case ALTIVEC_BUILTIN_VSPLTISW:
    if (Constant *Elt = dyn_cast<ConstantInt>(Ops[0])) {
      Elt = ConstantExpr::getIntegerCast(Elt, Type::Int32Ty, true);
      Result = BuildVector(Elt, Elt, Elt, Elt, NULL);
    } else {
      error("%Hmask must be an immediate", &EXPR_LOCATION(exp));
      Result = UndefValue::get(VectorType::get(Type::Int32Ty, 4));
    }
    return true;
  case ALTIVEC_BUILTIN_VSPLTB:
    if (ConstantInt *Elt = dyn_cast<ConstantInt>(Ops[1])) {
      int EV = Elt->getZExtValue();
      Result = BuildVectorShuffle(Ops[0], Ops[0],
                                  EV, EV, EV, EV, EV, EV, EV, EV,
                                  EV, EV, EV, EV, EV, EV, EV, EV);
    } else {
      error("%Helement number must be an immediate", &EXPR_LOCATION(exp));
      Result = Ops[0];
    }
    return true;
  case ALTIVEC_BUILTIN_VSPLTH:
    if (ConstantInt *Elt = dyn_cast<ConstantInt>(Ops[1])) {
      int EV = Elt->getZExtValue();
      // gcc accepts anything up to 31, and there is code that tests for it, 
      // although it doesn't seem to make sense.  Hardware behaves as if mod 8.
      if (EV>7 && EV<=31)
        EV = EV%8;
      Result = BuildVectorShuffle(Ops[0], Ops[0],
                                  EV, EV, EV, EV, EV, EV, EV, EV);
    } else {
      error("%Helement number must be an immediate", &EXPR_LOCATION(exp));
      Result = Ops[0];
    }
    return true;
  case ALTIVEC_BUILTIN_VSPLTW:
    if (ConstantInt *Elt = dyn_cast<ConstantInt>(Ops[1])) {
      int EV = Elt->getZExtValue();
      // gcc accepts anything up to 31, and there is code that tests for it, 
      // although it doesn't seem to make sense.  Hardware behaves as if mod 4.
      if (EV>3 && EV<=31)
        EV = EV%4;
      Result = BuildVectorShuffle(Ops[0], Ops[0], EV, EV, EV, EV);
    } else {
      error("%Helement number must be an immediate", &EXPR_LOCATION(exp));
      Result = Ops[0];
    }
    return true;
  case ALTIVEC_BUILTIN_VSLDOI_16QI:
  case ALTIVEC_BUILTIN_VSLDOI_8HI:
  case ALTIVEC_BUILTIN_VSLDOI_4SI:
  case ALTIVEC_BUILTIN_VSLDOI_4SF:
    if (ConstantInt *Elt = dyn_cast<ConstantInt>(Ops[2])) {
      /* Map all of these to a shuffle. */
      unsigned Amt = Elt->getZExtValue() & 15;
      VectorType *v16i8 = VectorType::get(Type::Int8Ty, 16);
      Ops[0] = Builder.CreateBitCast(Ops[0], v16i8, "tmp");
      Ops[1] = Builder.CreateBitCast(Ops[1], v16i8, "tmp");
      Result = BuildVectorShuffle(Ops[0], Ops[1],
                                  Amt, Amt+1, Amt+2, Amt+3,
                                  Amt+4, Amt+5, Amt+6, Amt+7,
                                  Amt+8, Amt+9, Amt+10, Amt+11,
                                  Amt+12, Amt+13, Amt+14, Amt+15);
    } else {
      error("%Hshift amount must be an immediate", &EXPR_LOCATION(exp));
      Result = Ops[0];
    }
    return true;
  case ALTIVEC_BUILTIN_VPKUHUM:
    Ops[0] = Builder.CreateBitCast(Ops[0], ResultType, "tmp");
    Ops[1] = Builder.CreateBitCast(Ops[1], ResultType, "tmp");
    Result = BuildVectorShuffle(Ops[0], Ops[1], 1, 3, 5, 7, 9, 11, 13, 15,
                                17, 19, 21, 23, 25, 27, 29, 31);
    return true;
  case ALTIVEC_BUILTIN_VPKUWUM:
    Ops[0] = Builder.CreateBitCast(Ops[0], ResultType, "tmp");
    Ops[1] = Builder.CreateBitCast(Ops[1], ResultType, "tmp");
    Result = BuildVectorShuffle(Ops[0], Ops[1], 1, 3, 5, 7, 9, 11, 13, 15);
    return true;
  case ALTIVEC_BUILTIN_VMRGHB:
    Result = BuildVectorShuffle(Ops[0], Ops[1],
                                0, 16, 1, 17, 2, 18, 3, 19,
                                4, 20, 5, 21, 6, 22, 7, 23);
    return true;
  case ALTIVEC_BUILTIN_VMRGHH:
    Result = BuildVectorShuffle(Ops[0], Ops[1], 0, 8, 1, 9, 2, 10, 3, 11);
    return true;
  case ALTIVEC_BUILTIN_VMRGHW:
    Result = BuildVectorShuffle(Ops[0], Ops[1], 0, 4, 1, 5);
    return true;
  case ALTIVEC_BUILTIN_VMRGLB:
    Result = BuildVectorShuffle(Ops[0], Ops[1],
                                 8, 24,  9, 25, 10, 26, 11, 27,
                                12, 28, 13, 29, 14, 30, 15, 31);
    return true;
  case ALTIVEC_BUILTIN_VMRGLH:
    Result = BuildVectorShuffle(Ops[0], Ops[1], 4, 12, 5, 13, 6, 14, 7, 15);
    return true;
  case ALTIVEC_BUILTIN_VMRGLW:
    Result = BuildVectorShuffle(Ops[0], Ops[1], 2, 6, 3, 7);
    return true;
  case ALTIVEC_BUILTIN_ABS_V4SF: {
    // and out sign bits
    VectorType *v4i32 = VectorType::get(Type::Int32Ty, 4);
    Ops[0] = Builder.CreateBitCast(Ops[0], v4i32, "tmp");
    Constant *C = ConstantInt::get(Type::Int32Ty, 0x7FFFFFFF);
    C = ConstantVector::get(std::vector<Constant*>(4, C));
    Result = Builder.CreateAnd(Ops[0], C, "tmp");
    Result = Builder.CreateBitCast(Result, ResultType, "tmp");
    return true;
  }
  case ALTIVEC_BUILTIN_ABS_V4SI:
  case ALTIVEC_BUILTIN_ABS_V8HI:
  case ALTIVEC_BUILTIN_ABS_V16QI: { // iabs(x) -> smax(x, 0-x)
    Result = Builder.CreateNeg(Ops[0], "tmp");
    // get the right smax intrinsic.
    static const Intrinsic::ID smax_iid[3] = {
      Intrinsic::ppc_altivec_vmaxsw,
      Intrinsic::ppc_altivec_vmaxsh,
      Intrinsic::ppc_altivec_vmaxsb
    };
    const VectorType *PTy = cast<VectorType>(ResultType);
    unsigned N = GetAltivecTypeNumFromType(PTy->getElementType());
    Function *smax = Intrinsic::getDeclaration(TheModule, smax_iid[N]);
    Value *ActualOps[] = { Ops[0], Result };
    Result = Builder.CreateCall(smax, ActualOps, ActualOps+2, "tmp");
    return true;
  }
  case ALTIVEC_BUILTIN_ABSS_V4SI:
  case ALTIVEC_BUILTIN_ABSS_V8HI:
  case ALTIVEC_BUILTIN_ABSS_V16QI: { // iabss(x) -> smax(x, satsub(0,x))
    // get the right smax/subs intrinsics.
    static const Intrinsic::ID smax_iid[3] = {
      Intrinsic::ppc_altivec_vmaxsw,
      Intrinsic::ppc_altivec_vmaxsh,
      Intrinsic::ppc_altivec_vmaxsb
    };
    static const Intrinsic::ID subss_iid[3] = {
      Intrinsic::ppc_altivec_vsubsws,
      Intrinsic::ppc_altivec_vsubshs,
      Intrinsic::ppc_altivec_vsubsbs
    };
    
    // get the right satsub intrinsic.
    const VectorType *PTy = cast<VectorType>(ResultType);
    unsigned N = GetAltivecTypeNumFromType(PTy->getElementType());
    Function *smax = Intrinsic::getDeclaration(TheModule, smax_iid[N]);
    Function *subss = Intrinsic::getDeclaration(TheModule, subss_iid[N]);

    Value *ActualOps[] = { Constant::getNullValue(ResultType), Ops[0] };
    Result = Builder.CreateCall(subss, ActualOps, ActualOps+2, "tmp");
    ActualOps[0] = Ops[0];
    ActualOps[1] = Result;
    Result = Builder.CreateCall(smax, ActualOps, ActualOps+2, "tmp");
    return true;
  }
  case ALTIVEC_BUILTIN_VPERM_4SI:
  case ALTIVEC_BUILTIN_VPERM_4SF:
  case ALTIVEC_BUILTIN_VPERM_8HI:
  case ALTIVEC_BUILTIN_VPERM_16QI: {
    // Operation is identical on all types; we have a single intrinsic.
    const Type *VecTy = VectorType::get(Type::Int32Ty, 4);
    Value *Op0 = CastToType(Instruction::BitCast, Ops[0], VecTy);
    Value *Op1 = CastToType(Instruction::BitCast, Ops[1], VecTy);
    Value *ActualOps[] = { Op0, Op1, Ops[2]};
    Result = Builder.CreateCall(Intrinsic::getDeclaration(TheModule, 
                                          Intrinsic::ppc_altivec_vperm), 
                                ActualOps, ActualOps+3, "tmp");
    Result = CastToType(Instruction::BitCast, Result, Ops[0]->getType());
    return true;
  }
  case ALTIVEC_BUILTIN_VSEL_4SI:
  case ALTIVEC_BUILTIN_VSEL_4SF:
  case ALTIVEC_BUILTIN_VSEL_8HI:
  case ALTIVEC_BUILTIN_VSEL_16QI: {
    // Operation is identical on all types; we have a single intrinsic.
    const Type *VecTy = VectorType::get(Type::Int32Ty, 4);
    Value *Op0 = CastToType(Instruction::BitCast, Ops[0], VecTy);
    Value *Op1 = CastToType(Instruction::BitCast, Ops[1], VecTy);
    Value *Op2 = CastToType(Instruction::BitCast, Ops[2], VecTy);
    Value *ActualOps[] = { Op0, Op1, Op2 };
    Result = Builder.CreateCall(Intrinsic::getDeclaration(TheModule, 
                                          Intrinsic::ppc_altivec_vsel), 
                                ActualOps, ActualOps+3, "tmp");
    Result = CastToType(Instruction::BitCast, Result, Ops[0]->getType());
    return true;
  }
  }

  return false;
}