void PotentialFieldSolver::evaluateScalarPotential()
{
	double h = m_SpatialHasher_mass.getCellSize().x;
	m_particle_dphidn->memset(0);



	PotentialInterpolateFarFieldScalar(m_evalPos->getDevicePtr(),
		m_grid_phi->getDevicePtr(),m_particle_dphidn->getDevicePtr(),
		m_SpatialHasher_mass.getCellSize().x,
		m_gridx,m_gridy,m_gridz,m_M_eval,m_origin);

	Potential_PPCorrMNScalar(m_SpatialHasher_mass.getStartTable(),
		m_SpatialHasher_mass.getEndTable(),
		m_evalPos->getDevicePtr(),
		m_p_massPos_Reorder->getDevicePtr(),
		m_particle_mass_Reorder->getDevicePtr(),
		m_particle_dphidn->getDevicePtr(),
		1.0/h,
		h,
		1.0,
		1.0,
		make_uint3(m_gridx,m_gridy,m_gridz),
		make_uint3(m_gridx,m_gridy,m_gridz),
		make_uint2(m_gridx*m_gridy,m_gridx),
		make_uint2(m_gridx*m_gridy,m_gridx),
		m_K,
		m_M_eval,
		m_N_mass,
		m_origin);

}
bool PotentialFieldSolver::evaluateGradient( bool large_eval )
{
	double h = m_SpatialHasher_mass.getCellSize().x;
	m_particle_gradPhi->memset(make_double3(0,0,0));


	if(large_eval)
	{
		m_SpatialHasher_eval.endSpatialHash();
		m_SpatialHasher_eval.initSpatialHash(m_M_eval,m_gridx,m_gridy,m_gridz);


		m_SpatialHasher_eval.setSpatialHashGrid(m_gridx, h,
			m_SpatialHasher_mass.getWorldOrigin(),
			m_M_eval);
		m_SpatialHasher_eval.setHashParam();
		m_SpatialHasher_eval.doSpatialHash(m_evalPos->getDevicePtr(),m_M_eval);
		m_SpatialHasher_eval.reorderData(m_M_eval, m_evalPos->getDevicePtr(),m_evalPos_Reorder->getDevicePtr(),4,1);

		m_particle_gradPhi_deorder->memset(make_double3(0,0,0));

		PotentialInterpolateFarField(m_evalPos_Reorder->getDevicePtr(),
			m_far_gradPhi->getDevicePtr(),m_particle_gradPhi_deorder->getDevicePtr(),
			m_SpatialHasher_mass.getCellSize().x,
			m_gridx,m_gridy,m_gridz,m_M_eval,m_origin);

		Potential_PPCorrMN(m_SpatialHasher_mass.getStartTable(),
			m_SpatialHasher_mass.getEndTable(),
			m_evalPos_Reorder->getDevicePtr(),
			m_p_massPos_Reorder->getDevicePtr(),
			m_particle_mass_Reorder->getDevicePtr(),
			m_particle_gradPhi_deorder->getDevicePtr(),
			1.0/h,
			h,
			1.0,
			1.0,
			make_uint3(m_gridx,m_gridy,m_gridz),
			make_uint3(m_gridx,m_gridy,m_gridz),
			make_uint2(m_gridx*m_gridy,m_gridx),
			make_uint2(m_gridx*m_gridy,m_gridx),
			m_K,
			m_M_eval,
			m_N_mass,
			m_origin);
		m_SpatialHasher_eval.deorderData(m_M_eval,m_particle_gradPhi_deorder->getDevicePtr(),m_particle_gradPhi->getDevicePtr(),3,2);
	}
	else
	{
		PotentialInterpolateFarField(m_evalPos->getDevicePtr(),
			m_far_gradPhi->getDevicePtr(),m_particle_gradPhi->getDevicePtr(),
			m_SpatialHasher_mass.getCellSize().x,
			m_gridx,m_gridy,m_gridz,m_M_eval,m_origin);

		Potential_PPCorrMN(m_SpatialHasher_mass.getStartTable(),
			m_SpatialHasher_mass.getEndTable(),
			m_evalPos->getDevicePtr(),
			m_p_massPos_Reorder->getDevicePtr(),
			m_particle_mass_Reorder->getDevicePtr(),
			m_particle_gradPhi->getDevicePtr(),
			1.0/h,
			h,
			1.0,
			1.0,
			make_uint3(m_gridx,m_gridy,m_gridz),
			make_uint3(m_gridx,m_gridy,m_gridz),
			make_uint2(m_gridx*m_gridy,m_gridx),
			make_uint2(m_gridx*m_gridy,m_gridx),
			m_K,
			m_M_eval,
			m_N_mass,
			m_origin);
	}


	

	PotentialComputeGradForOutParticle(m_evalPos->getDevicePtr(),m_total_mass, m_center,
		m_SpatialHasher_mass.getWorldOrigin(), 
		make_float3(m_SpatialHasher_mass.getWorldOrigin().x+m_L,
		m_SpatialHasher_mass.getWorldOrigin().y+m_L,
		m_SpatialHasher_mass.getWorldOrigin().z+m_L),
		1.0,1.0,m_particle_gradPhi->getDevicePtr(),m_M_eval);


	return true;
}
Example #3
0
void reference_calc_custom(const uchar4* const h_sourceImg,
                    const size_t numRowsSource, const size_t numColsSource,
                    const uchar4* const h_destImg,
                    uchar4* const h_blendedImg,
					const unsigned char* h_mask,
					const unsigned char* h_border,
					const unsigned char* h_interior){

  //we need to create a list of border pixels and interior pixels
  //this is a conceptually simple implementation, not a particularly efficient one...

  //first create mask
  size_t srcSize = numRowsSource * numColsSource;

  const unsigned char* mask = 
	  h_mask;
	  // new unsigned char[srcSize];

  /*
  for (int i = 0; i < srcSize; ++i) {
    mask[i] = (h_sourceImg[i].x + h_sourceImg[i].y + h_sourceImg[i].z < 3 * 255) ? 1 : 0;
  }
  */

  //next compute strictly interior pixels and border pixels
  const unsigned char *borderPixels =
	  h_border;
	  // new unsigned char[srcSize];
  const unsigned char *strictInteriorPixels =
	  h_interior;
	  // new unsigned char[srcSize];

  std::vector<uint2> interiorPixelList;

  //the source region in the homework isn't near an image boundary, so we can
  //simplify the conditionals a little...
  for (size_t r = 1; r < numRowsSource - 1; ++r) {
    for (size_t c = 1; c < numColsSource - 1; ++c) {
      if (mask[r * numColsSource + c]) {
        if (mask[(r -1) * numColsSource + c] && mask[(r + 1) * numColsSource + c] &&
            mask[r * numColsSource + c - 1] && mask[r * numColsSource + c + 1]) {
          // strictInteriorPixels[r * numColsSource + c] = 1;
          // borderPixels[r * numColsSource + c] = 0;
          interiorPixelList.push_back(make_uint2(r, c));
        }
        else {
          // strictInteriorPixels[r * numColsSource + c] = 0;
          // borderPixels[r * numColsSource + c] = 1;
        }
      }
      else {
          // strictInteriorPixels[r * numColsSource + c] = 0;
          // borderPixels[r * numColsSource + c] = 0;

      }
    }
  }

  //split the source and destination images into their respective
  //channels
  unsigned char* red_src   = new unsigned char[srcSize];
  unsigned char* blue_src  = new unsigned char[srcSize];
  unsigned char* green_src = new unsigned char[srcSize];

  for (int i = 0; i < srcSize; ++i) {
    red_src[i]   = h_sourceImg[i].x;
    blue_src[i]  = h_sourceImg[i].y;
    green_src[i] = h_sourceImg[i].z;
  }

  unsigned char* red_dst   = new unsigned char[srcSize];
  unsigned char* blue_dst  = new unsigned char[srcSize];
  unsigned char* green_dst = new unsigned char[srcSize];

  for (int i = 0; i < srcSize; ++i) {
    red_dst[i]   = h_destImg[i].x;
    blue_dst[i]  = h_destImg[i].y;
    green_dst[i] = h_destImg[i].z;
  }

  //next we'll precompute the g term - it never changes, no need to recompute every iteration
  float *g_red   = new float[srcSize];
  float *g_blue  = new float[srcSize];
  float *g_green = new float[srcSize];

  memset(g_red,   0, srcSize * sizeof(float));
  memset(g_blue,  0, srcSize * sizeof(float));
  memset(g_green, 0, srcSize * sizeof(float));

  computeG_custom(red_src,   g_red,   numColsSource, interiorPixelList);
  computeG_custom(blue_src,  g_blue,  numColsSource, interiorPixelList);
  computeG_custom(green_src, g_green, numColsSource, interiorPixelList);

  //for each color channel we'll need two buffers and we'll ping-pong between them
  float *blendedValsRed_1 = new float[srcSize];
  float *blendedValsRed_2 = new float[srcSize];

  float *blendedValsBlue_1 = new float[srcSize];
  float *blendedValsBlue_2 = new float[srcSize];

  float *blendedValsGreen_1 = new float[srcSize];
  float *blendedValsGreen_2 = new float[srcSize];

  //IC is the source image, copy over
  for (size_t i = 0; i < srcSize; ++i) {
    blendedValsRed_1[i] = red_src[i];
    blendedValsRed_2[i] = red_src[i];
    blendedValsBlue_1[i] = blue_src[i];
    blendedValsBlue_2[i] = blue_src[i];
    blendedValsGreen_1[i] = green_src[i];
    blendedValsGreen_2[i] = green_src[i];
  }

  //Perform the solve on each color channel
  const size_t numIterations = 800;
  for (size_t i = 0; i < numIterations; ++i) {
    computeIteration_custom(red_dst, strictInteriorPixels, borderPixels,
                     interiorPixelList, numColsSource, blendedValsRed_1, g_red,
                     blendedValsRed_2);

    std::swap(blendedValsRed_1, blendedValsRed_2);
  }

  for (size_t i = 0; i < numIterations; ++i) {
    computeIteration_custom(blue_dst, strictInteriorPixels, borderPixels,
                     interiorPixelList, numColsSource, blendedValsBlue_1, g_blue,
                     blendedValsBlue_2);

    std::swap(blendedValsBlue_1, blendedValsBlue_2);
  }

  for (size_t i = 0; i < numIterations; ++i) {
    computeIteration_custom(green_dst, strictInteriorPixels, borderPixels,
                     interiorPixelList, numColsSource, blendedValsGreen_1, g_green,
                     blendedValsGreen_2);

    std::swap(blendedValsGreen_1, blendedValsGreen_2);
  }
  std::swap(blendedValsRed_1,   blendedValsRed_2);   //put output into _2
  std::swap(blendedValsBlue_1,  blendedValsBlue_2);  //put output into _2
  std::swap(blendedValsGreen_1, blendedValsGreen_2); //put output into _2

  //copy the destination image to the output
  memcpy(h_blendedImg, h_destImg, sizeof(uchar4) * srcSize);

  //copy computed values for the interior into the output
  for (size_t i = 0; i < interiorPixelList.size(); ++i) {
    uint2 coord = interiorPixelList[i];

    unsigned int offset = coord.x * numColsSource + coord.y;

    h_blendedImg[offset].x = blendedValsRed_2[offset];
    h_blendedImg[offset].y = blendedValsBlue_2[offset];
    h_blendedImg[offset].z = blendedValsGreen_2[offset];
  }

  //wow, we allocated a lot of memory!
  // delete[] mask;
  delete[] blendedValsRed_1;
  delete[] blendedValsRed_2;
  delete[] blendedValsBlue_1;
  delete[] blendedValsBlue_2;
  delete[] blendedValsGreen_1;
  delete[] blendedValsGreen_2;
  delete[] g_red;
  delete[] g_blue;
  delete[] g_green;
  delete[] red_src;
  delete[] red_dst;
  delete[] blue_src;
  delete[] blue_dst;
  delete[] green_src;
  delete[] green_dst;
  // delete[] borderPixels;
  // delete[] strictInteriorPixels;
}