PotentialFieldSolver::PotentialFieldSolver()
{
	m_M_eval=0;
	m_N_mass=0;
	m_cell_h=0;
	m_gridx=m_gridy=m_gridz=0;
	m_hashx=m_hashy=m_hashz=0;
	m_initialized=false;
	m_origin=make_double4(0,0,0,0);
	m_L=0;
	m_K=0;
	m_center=make_double3(0,0,0);
	m_total_mass=0;



	m_evalPos = new GpuArrayf4;
	m_evalNormal = new GpuArrayd3;
	m_evalPos_Reorder = new GpuArrayf4;
	m_p_massPos = new GpuArrayf4;
	m_p_massPos_Reorder = new GpuArrayf4;
	m_particle_mass = new GpuArrayd;
	m_particle_mass_Reorder = new GpuArrayd;
	m_grid_density = new GpuArrayd;
	m_grid_Rhs = new GpuArrayd;
	m_grid_phi = new GpuArrayd;
	m_particle_gradPhi = new GpuArrayd3;
	m_particle_dphidn = new GpuArrayd;
	m_particle_gradPhi_deorder = new GpuArrayd3;
	m_grid_gradPhi = new GpuArrayd3;
	m_far_gradPhi = new GpuArrayd3;
	m_SLP_area = new GpuArrayd;
}
BiotSavartSolver::BiotSavartSolver()
{
	m_N_vort = 0;
	m_M_eval = 0;
	m_gridx = m_gridy = m_gridz = 0;
	m_hashx = m_hashy = m_hashz = 0;
	m_cell_h = 0.0;
	m_origin = make_double4(0,0,0,0);
	m_center = make_double3(0,0,0);
	m_L = 0.0;
	m_initialized = false;
	m_K = 0;
	m_evalPos = new gf_GpuArray<float4>;
	m_evalPos_Reorder = new gf_GpuArray<float4>;
	m_p_vortPos = new gf_GpuArray<float4>;
	m_p_vortPos_Reorder = new GpuArrayf4;
	m_isVIC = false;
	for (int i=0;i<NUM_COMPONENTS; i++)
	{
		m_total_vort[i]=0;
		m_grid_Rhs[i] = new GpuArrayd;
		m_particle_vort[i] = new GpuArrayd;
		m_particle_vort_Reorder[i] = new GpuArrayd;
		m_grid_vort[i] = new GpuArrayd;
		m_grid_Psi[i] = new GpuArrayd;
		m_particle_U[i] = new GpuArrayd;
		m_particle_U_deorder[i] = new GpuArrayd;
		m_grid_U[i] = new GpuArrayd;
		m_far_U[i] = new GpuArrayd;
	}


}
Ejemplo n.º 3
0
double4 ParticleListCPUSorted::subcycle_stats(PlasmaData* pdata)
{
	double scale = pdata->npiccard_outer;
	double mean = 0;
	double mean2 = 0;
	double mins = num_subcycles[0]/scale;
	double maxs = num_subcycles[0]/scale;
	int imax = 0;
	int imin = 0;
	double nsubcycles_total = 0;
	for(int i=0;i<nptcls;i++)
	{
		if(mins > num_subcycles[i]/scale)
		{
			mins = num_subcycles[i]/scale;
			imin = i;
		}
		if(maxs < num_subcycles[i]/scale)
		{
			maxs = num_subcycles[i]/scale;
			imax = i;
		}

		nsubcycles_total += num_subcycles[i];
		mean += num_subcycles[i]/((double)nptcls*scale);
		//mean2 += num_subcycles[i]*num_subcycles[i]/((double)nptcls*scale*scale*nptcls);
		mean2 = mean*mean;
	}

	double std_diff;

	std_diff = sqrt(fabs(mean*mean - mean2)/((double)nptcls*scale));
	printf("Particle Subcycle Stats:\n");
	printf("Avg Subcycles: %f +/- %f\n",mean,std_diff);
	printf("Min / Max: %f[%i] / %f[%i]\n",mins,imin,maxs,imax);
	printf("Total number of subcycles was %e\n",nsubcycles_total);

	return make_double4(mean,std_diff,mins,maxs);
}
Ejemplo n.º 4
0
double4 ParticleListCPUSorted::piccard_stats(PlasmaData* pdata)
{
	double scale = pdata->npiccard_outer;
	double mean = 0;
	double mean2 = 0;
	double mins = num_piccard[0]/num_subcycles[0];
	double maxs = num_piccard[0]/num_subcycles[0];
	int imax = 0;
	int imin = 0;

	for(int i=0;i<nptcls;i++)
	{
		if(mins > num_piccard[i]/num_subcycles[i])
		{
			mins = num_piccard[i]/num_subcycles[i];
			imin = i;
		}
		if(maxs < num_piccard[i]/num_subcycles[i])
		{
			maxs = num_piccard[i]/num_subcycles[i];
			imax = i;
		}

		mean += num_piccard[i]/((double)nptcls*num_subcycles[i]);
		mean2 += num_piccard[i]*num_piccard[i]/((double)nptcls*num_subcycles[i]*num_subcycles[i]);
	}

	double std_diff;

	std_diff = sqrt(fabs(mean*mean - mean2));
	printf("Particle Piccard Stats(CPU):\n");
	printf("Avg Piccard: %f +/- %f\n",mean,std_diff);
	printf("Min / Max: %f[%i] / %f[%i]\n",mins,imin,maxs,imax);

	return make_double4(mean,std_diff,mins,maxs);
}
Ejemplo n.º 5
0
static inline __host__ __device__ double4 _pixMake(Ncv64f x, Ncv64f y, Ncv64f z, Ncv64f w) {return make_double4(x,y,z,w);}
Ejemplo n.º 6
0
template<> inline __host__ __device__ double4 _pixMakeZero<double4>() {return make_double4(0.,0.,0.,0.);}
Ejemplo n.º 7
0
void sapporo::startGravCalc(int    nj,          int ni,
                            int    id[],        double xi[][3],
                            double vi[][3],     double a[][3],
                            double j6old[][3],  double phiold[3],
                            double eps2,        double h2[],
                            double eps2_i[]) {

  #ifdef DEBUG_PRINT
    cerr << "calc_firsthalf ni: " << ni << "\tnj: " << nj << "integrationOrder: "<< integrationOrder << endl;
  #endif

  if(ni == 0 || nj == 0)  return;

  //Prevent unused compiler warning
  j6old  = j6old;
  phiold = phiold;

  //Its not allowed to send more particles than n_pipes
  assert(ni <= get_n_pipes());
  
  EPS2     = eps2;  

  //Copy i-particles to device structures, first only to device 0
  //then from device 0 we use memcpy to get the data into the 
  //other devs buffers
  int toDevice = 0;
  
  for (int i = 0; i < ni; i++)
  {
    deviceList[toDevice]->pos_i[i] = make_double4(xi[i][0], xi[i][1], xi[i][2], h2[i]);

    if(integrationOrder > GRAPE5)
    {
      deviceList[toDevice]->id_i[i]  = id[i];        
      deviceList[toDevice]->vel_i[i] = make_double4(vi[i][0], vi[i][1], vi[i][2], eps2);
      
      if(eps2_i != NULL)  //Seperate softening for i-particles
        deviceList[toDevice]->vel_i[i].w = eps2_i[i];              
      
      if(integrationOrder > FOURTH)
      {
        deviceList[toDevice]->acc_i[i] = make_double4(a[i][0], a[i][1], a[i][2], 0);
      }      
    }


    #ifdef DEBUG_PRINT
      if(integrationOrder == GRAPE5)
      {
        fprintf(stderr, "Inpdevice= %d,\ti: %d\tindex: %d\teps2: %f\t%f\t%f\t%f \n",
              -1,i, 0, eps2, xi[i][0],xi[i][1],xi[i][2]);
      }
      else
      {
        fprintf(stderr, "Inpdevice= %d,\ti: %d\tindex: %d\teps2: %f\t%f\t%f\t%f\t%f\t%f\t%f",
              -1,i,id[i], eps2, xi[i][0],xi[i][1],xi[i][2],vi[i][0],vi[i][1] ,vi[i][2]);
        
        if(integrationOrder > FOURTH)
          fprintf(stderr, "\t%f %f %f\n", a[i][0], a[i][1], a[i][2]);
        else
          fprintf(stderr, "\n");    
      }
    #endif
  }//for i

  //Copy i particles from host buffer of device 0 to the devices host side buffers  
  for(int i = toDevice+1;  i < nCUDAdevices; i++)
  {
      memcpy(&deviceList[i]->pos_i[0], &deviceList[0]->pos_i[0], sizeof(double4) * ni);
      if(integrationOrder > GRAPE5)
      {
        memcpy(&deviceList[i]->vel_i[0], &deviceList[0]->vel_i[0], sizeof(double4) * ni);
        memcpy(&deviceList[i]->id_i[0],  &deviceList[0]->id_i[0],  sizeof(int)     * ni);  
        
        if(integrationOrder > FOURTH)
          memcpy(&deviceList[i]->acc_i[0], &deviceList[0]->acc_i[0], sizeof(double4) * ni);        
      }
  }
  


  #pragma omp parallel num_threads(numberOfGPUUsedBySapporo)
  {
    if (nj_updated) {
      //Get the number of particles set for this device
      int devCount = jCopyInformation[omp_get_thread_num()].count;
      if(devCount > 0)
      {
        send_j_particles_to_device(devCount);
      }
    }

    //ni is the number of particles in the pipes
    send_i_particles_to_device(ni);

    //nj is the total number of particles to which the i particles have to
    //be calculated. For direct N-body this is usually equal to the total
    //number of nj particles that have been set by the calling code


    //Calculate the number of nj particles that are used per device
    int nj_per_dev = nj / nCUDAdevices;
    if(omp_get_thread_num() < (nj  % nCUDAdevices))
      nj_per_dev++;

    evaluate_gravity(ni,  nj_per_dev);

    sapdevice->dev_ni = ni;
  }//end parallel section


  nj_modified   = -1;
  predict       = false;
  nj_updated    = false;

  //Clear the address to dev/location mapping
  mappingFromIndexToDevIndex.clear();
} //end calc_first
Ejemplo n.º 8
0
int sapporo::set_j_particle(int    address,
                            int    id,
                            double tj, double dtj,
                            double mass,
                            double k18[3],       double j6[3],
                            double a2[3],        double v[3],
                            double x[3],         double snp[3],
                            double crk[3],       double eps) {

  #ifdef DEBUG_PRINT
    cerr << "set_j_particle (Addr: " << address << "  Id: " << id << " )\n";
  #endif

  //Prevent unused compiler warning
  k18 = k18;
    
  predJOnHost  = false; //Reset the buffers on the device since they can be modified
  nj_updated   = true;  //There are particles that are updated

  //Check if the address does not fall outside the allocated memory range
  //if it falls outside that range increase j-memory by 10%
    
  if (address >= nj_max) {
    fprintf(stderr, "Increasing nj_max! Nj_max was: %d  to be stored address: %d \n",
            nj_max, address);
    increase_jMemory();

    //Extra check, if we are still outside nj_max, we quit since particles are not
    //nicely send in order
    if (address >= nj_max) {
      fprintf(stderr, "Increasing nj_max was not enough! Send particles in order to the library! Exit\n");
      exit(-1);
    }
  }

  //Memory has been allocated, now we can store the particles
  //First calculate on which device this particle has to be stored
  //and on which physical address on that device. Note that the particles
  //are distributed to the different devices in a round-robin way (based on the addres)
  int dev           = address % nCUDAdevices;
  int devAddr       = address / nCUDAdevices;
  int storeLoc      = jCopyInformation[dev].count;

  //Store this information, incase particles get overwritten
  map<int, int4>::iterator iterator = mappingFromIndexToDevIndex.find(address);
  map<int, int4>::iterator end      = mappingFromIndexToDevIndex.end();


  if(iterator != end)
  {
    //Particle with this address has been set before, retrieve previous
    //calculated indices and overwrite them with the new info
    int4 addrInfo = (*iterator).second;
    dev           = addrInfo.x;
    storeLoc      = addrInfo.y;
    devAddr       = addrInfo.z;
  }
  else
  {
    //New particle not set before, save address info and increase particles
    //on that specific device by one
    mappingFromIndexToDevIndex[address] = make_int4(dev, storeLoc, devAddr, -1);
    jCopyInformation[dev].count++;
  }


  deviceList[dev]->pos_j_temp[storeLoc] = make_double4(x[0], x[1], x[2], mass);
  deviceList[dev]->address_j[storeLoc]  = devAddr;
  
  if(integrationOrder > GRAPE5)
  {
    deviceList[dev]->t_j_temp[storeLoc]          = make_double2(tj, dtj);
    deviceList[dev]->vel_j_temp[storeLoc]        = make_double4(v[0], v[1], v[2], eps);
    deviceList[dev]->acc_j_temp[storeLoc]        = make_double4(a2[0], a2[1], a2[2], 0.0);
    deviceList[dev]->jrk_j_temp[storeLoc]        = make_double4(j6[0], j6[1], j6[2], 0.0);
    deviceList[dev]->id_j_temp[storeLoc]         = id;
    //For 6th and 8 order we need more parameters
    if(integrationOrder > FOURTH)
    {
      deviceList[dev]->snp_j_temp[storeLoc]        = make_double4(snp[0], snp[1], snp[2], 0.0);
      deviceList[dev]->crk_j_temp[storeLoc]        = make_double4(crk[0], crk[1], crk[2], 0.0);
    }
  }
  

  #ifdef CPU_SUPPORT
    //Put the new j particles directly in the correct location on the host side.
    deviceList[dev]->pos_j[devAddr] = make_double4(x[0], x[1], x[2], mass);
    
    if(integrationOrder > GRAPE5)
    {
      deviceList[dev]->t_j[devAddr]          = make_double2(tj, dtj);
      deviceList[dev]->vel_j[devAddr]        = make_double4(v[0], v[1], v[2], eps);
      deviceList[dev]->acc_j[devAddr]        = make_double4(a2[0], a2[1], a2[2], 0.0);
      deviceList[dev]->jrk_j[devAddr]        = make_double4(j6[0], j6[1], j6[2], 0.0);
      deviceList[dev]->id_j[devAddr]         = id;
      //For 6th and 8 order we need more parameters
      if(integrationOrder > FOURTH)
      {
        deviceList[dev]->snp_j[devAddr]        = make_double4(snp[0], snp[1], snp[2], 0.0);
        deviceList[dev]->crk_j[devAddr]        = make_double4(crk[0], crk[1], crk[2], 0.0);
      }
    }  
  #endif


  #ifdef DEBUG_PRINT
    if(integrationOrder == GRAPE5)
    {
      fprintf(stderr, "Setj ad: %d\tid: %d storeLoc: %d \tpos: %f %f %f m: %f \n", address, id, storeLoc, x[0],x[1],x[2], mass);
    }
    else
    {
      fprintf(stderr, "Setj ad: %d\tid: %d storeLoc: %d \tpos: %f %f %f\t mass: %f \tvel: %f %f %f", address, id, storeLoc, x[0],x[1],x[2],mass, v[0],v[1],v[2]);
      fprintf(stderr, "\tacc: %f %f %f \n", a2[0],a2[1],a2[2]);
      if(integrationOrder > FOURTH)
      {
        fprintf(stderr, "\tsnp: %f %f %f ", snp[0],snp[1],snp[2]);
        fprintf(stderr, "\tcrk: %f %f %f \n", crk[0],crk[1],crk[2]);
      }
    }
  #endif

  return 0;
};
Ejemplo n.º 9
0
void sapporo::evaluate_gravity_host(int ni_total, int nj)
{
  executedOnHost = true;
  #pragma omp for
  for(int i=0; i < ni_total; i++)
  {
    double4 pos_i = sapdevice->pos_i[i];
    double4 vel_i = sapdevice->vel_i[i];
    int      id_i = sapdevice->id_i[i];
    double4 acc_i = make_double4(0,0,0,0);
    double4 jrk_i = make_double4(0,0,0,0);    
    
    double ds_min = 10e10;
    int    nnb    = -1;
    
    for(int j=0; j < nj; j++)
    {
      double4 pos_j = sapdevice->pPos_j[j];
      double4 vel_j = sapdevice->pVel_j[j];
      int      id_j = sapdevice->id_j  [j];
      
      if(id_i == id_j)
        continue;       //Skip self-gravity
      
      //Compute the force
      const double4 dr = make_double4(pos_j.x - pos_i.x, pos_j.y - pos_i.y, pos_j.z - pos_i.z, 0);
      const double ds2 = ((dr.x*dr.x + (dr.y*dr.y)) + dr.z*dr.z);
      
      if(ds2 < ds_min) //keep track of nearest neighbour
      {
        ds_min = ds2;
        nnb    = id_j;
      }
      
      
      const double inv_ds = 1.0/sqrt(ds2+EPS2);

      const double mass   = pos_j.w;
      const double minvr1 = mass*inv_ds; 
      const double  invr2 = inv_ds*inv_ds; 
      const double minvr3 = minvr1*invr2;

      // Acceleration
      acc_i.x += minvr3 * dr.x;
      acc_i.y += minvr3 * dr.y;
      acc_i.z += minvr3 * dr.z;
      acc_i.w += (-1.0)*minvr1;

      //Jerk
      const double4 dv = make_double4(vel_j.x - vel_i.x, vel_j.y - vel_i.y, vel_j.z -  vel_i.z, 0);
      const double drdv = (-3.0) * (minvr3*invr2) * (dr.x*dv.x + dr.y*dv.y + dr.z*dv.z);

      jrk_i.x += minvr3 * dv.x + drdv * dr.x;  
      jrk_i.y += minvr3 * dv.y + drdv * dr.y;
      jrk_i.z += minvr3 * dv.z + drdv * dr.z;   
    }//for j
    
    
    sapdevice->ds_i[i].x =    nnb;
    sapdevice->ds_i[i].y = ds_min;
    
    sapdevice->iParticleResults[i         ] = acc_i;
    sapdevice->iParticleResults[i+ni_total] = jrk_i;
    
  }//for i
}
Ejemplo n.º 10
0
void sapporo::predictJParticles_host(int nj)
{

  if(integrationOrder == GRAPE5)
  {
    //GRAPE5 has no prediction
    memcpy(&sapdevice->pPos_j[0], &sapdevice->pos_j[0], sizeof(double4) * nj);
    return;
  }

  
  
  double4 snp = make_double4(0,0,0,0);
  double4 crk = make_double4(0,0,0,0);
  
  for(int i=0; i < nj; i++)
  {
    double dt  = t_i  - sapdevice->t_j[i].x;
    double dt2 = (1./2.)*dt;
    double dt3 = (1./3.)*dt;
    double dt4 = (1./4.)*dt;
    double dt5 = (1./5.)*dt;

    double4  pos         = sapdevice->pos_j[i];
    double4  vel         = sapdevice->vel_j[i];
    double4  acc         = sapdevice->acc_j[i];
    double4  jrk         = sapdevice->jrk_j[i];
    
    if(integrationOrder > FOURTH)
    {
      snp         = sapdevice->snp_j[i];
      crk         = sapdevice->crk_j[i];
    }

    //Positions
    pos.x += dt  * (vel.x +  dt2 * (acc.x + dt3 * (jrk.x + 
             dt4 * (snp.x +  dt5 * (crk.x)))));
    pos.y += dt  * (vel.y +  dt2 * (acc.y + dt3 * (jrk.y + 
             dt4 * (snp.y +  dt5 * (crk.y)))));
    pos.z += dt  * (vel.z +  dt2 * (acc.z + dt3 * (jrk.z + 
             dt4 * (snp.z +  dt5 * (crk.z)))));
    sapdevice->pPos_j[i] = pos;
    

    //Velocities
    vel.x += dt  * (acc.x + dt2  * (jrk.x + 
             dt3 * (snp.x +  dt4 * (crk.x))));
    vel.y += dt  * (acc.y + dt2  * (jrk.y + 
             dt3 * (snp.y +  dt4 * (crk.y))));
    vel.z += dt  * (acc.z + dt2  * (jrk.z + 
             dt3 * (snp.z +  dt4 * (crk.z))));
    sapdevice->pVel_j[i] = vel;


    if(integrationOrder > FOURTH)
    {
      //Accelerations
      acc.x += dt * (jrk.x + dt2 * (snp.x +  dt3 * (crk.x)));
      acc.y += dt * (jrk.y + dt2 * (snp.y +  dt3 * (crk.y)));
      acc.z += dt * (jrk.z + dt2 * (snp.z +  dt3 * (crk.z)));
      sapdevice->pAcc_j[i] = acc;
    }
  }//for i  
  
}
bool
PotentialFieldSolver::m_ParticleToMesh()
{
	m_SpatialHasher_mass.setSpatialHashGrid(m_gridx, m_L/(double)m_gridx,
		make_float3(m_origin.x,m_origin.y,m_origin.z),
		m_N_mass);
	m_SpatialHasher_mass.setHashParam();
	m_SpatialHasher_mass.doSpatialHash(m_p_massPos->getDevicePtr(),m_N_mass);

	m_p_massPos_Reorder->memset(make_float4(0,0,0,0));
	m_SpatialHasher_mass.reorderData(m_N_mass, (void*)(m_p_massPos->getDevicePtr()),
		(void*)(m_p_massPos_Reorder->getDevicePtr()), 4, 1);



	m_particle_mass_Reorder->memset(0);
	m_SpatialHasher_mass.reorderData(m_N_mass, (void*)(m_particle_mass->getDevicePtr()),
			(void*)(m_particle_mass_Reorder->getDevicePtr()), 1, 2);


	m_grid_density->memset(0);
	ParticleToMesh(m_SpatialHasher_mass.getStartTable(),
		m_SpatialHasher_mass.getEndTable(),
		m_p_massPos_Reorder->getDevicePtr(),
		m_particle_mass_Reorder->getDevicePtr(),
		m_SpatialHasher_mass.getCellSize().x,
		m_grid_density->getDevicePtr(),
		make_uint3(m_gridx,m_gridy,m_gridz),
		make_uint3(m_gridx,m_gridy,m_gridz),
		m_N_mass,
		m_origin);
	cudaMemcpy(m_grid_Rhs->getDevicePtr(),
		m_grid_density->getDevicePtr(),
		m_grid_Rhs->getSize()*m_grid_Rhs->typeSize(),
		cudaMemcpyDeviceToDevice);
	ComputeRHS(m_grid_Rhs->getDevicePtr(),
		m_SpatialHasher_mass.getCellSize().x*m_SpatialHasher_mass.getCellSize().x,
		-1.0,
		m_gridx*m_gridy*m_gridz);

	m_p_massPos_Reorder->copy(gf_GpuArray<float4>::DEVICE_TO_HOST);
	m_particle_mass_Reorder->copy(gf_GpuArray<double>::DEVICE_TO_HOST);

	double total_weight = 0;
	double total_mass = 0;
	for(int i=0; i<m_N_mass; i++)
	{
		double *host = m_particle_mass_Reorder->getHostPtr();
		total_weight += fabs(host[i]);
		total_mass += host[i];
	}
	double cx=0, cy=0, cz=0;
	for(int i=0; i<m_N_mass; i++)
	{
		float4 *hpos = m_p_massPos_Reorder->getHostPtr();
		double *hmass = m_particle_mass_Reorder->getHostPtr();
		cx+=hpos[i].x*fabs(hmass[i]);
		cy+=hpos[i].y*fabs(hmass[i]);
		cz+=hpos[i].z*fabs(hmass[i]);
		//printf("%f,%f,%f\n",cx,cy,cz);
	}
	cx=cx/total_weight;
	cy=cy/total_weight;
	cz=cz/total_weight;

	m_center.x = cx;
	m_center.y = cy;
	m_center.z = cz;
	m_total_mass = total_mass;

	applyDirichlet(m_grid_Rhs->getDevicePtr(), 
		make_double4(cx,cy,cz,0),
		m_total_mass,
		make_double4(m_origin.x,m_origin.y,m_origin.z,0),
		m_SpatialHasher_mass.getCellSize().x,
		m_gridx,
		m_gridy,
		m_gridz);

	return true;

}