void PotentialFieldSolver::evaluateScalarPotential() { double h = m_SpatialHasher_mass.getCellSize().x; m_particle_dphidn->memset(0); PotentialInterpolateFarFieldScalar(m_evalPos->getDevicePtr(), m_grid_phi->getDevicePtr(),m_particle_dphidn->getDevicePtr(), m_SpatialHasher_mass.getCellSize().x, m_gridx,m_gridy,m_gridz,m_M_eval,m_origin); Potential_PPCorrMNScalar(m_SpatialHasher_mass.getStartTable(), m_SpatialHasher_mass.getEndTable(), m_evalPos->getDevicePtr(), m_p_massPos_Reorder->getDevicePtr(), m_particle_mass_Reorder->getDevicePtr(), m_particle_dphidn->getDevicePtr(), 1.0/h, h, 1.0, 1.0, make_uint3(m_gridx,m_gridy,m_gridz), make_uint3(m_gridx,m_gridy,m_gridz), make_uint2(m_gridx*m_gridy,m_gridx), make_uint2(m_gridx*m_gridy,m_gridx), m_K, m_M_eval, m_N_mass, m_origin); }
bool PotentialFieldSolver::evaluateGradient( bool large_eval ) { double h = m_SpatialHasher_mass.getCellSize().x; m_particle_gradPhi->memset(make_double3(0,0,0)); if(large_eval) { m_SpatialHasher_eval.endSpatialHash(); m_SpatialHasher_eval.initSpatialHash(m_M_eval,m_gridx,m_gridy,m_gridz); m_SpatialHasher_eval.setSpatialHashGrid(m_gridx, h, m_SpatialHasher_mass.getWorldOrigin(), m_M_eval); m_SpatialHasher_eval.setHashParam(); m_SpatialHasher_eval.doSpatialHash(m_evalPos->getDevicePtr(),m_M_eval); m_SpatialHasher_eval.reorderData(m_M_eval, m_evalPos->getDevicePtr(),m_evalPos_Reorder->getDevicePtr(),4,1); m_particle_gradPhi_deorder->memset(make_double3(0,0,0)); PotentialInterpolateFarField(m_evalPos_Reorder->getDevicePtr(), m_far_gradPhi->getDevicePtr(),m_particle_gradPhi_deorder->getDevicePtr(), m_SpatialHasher_mass.getCellSize().x, m_gridx,m_gridy,m_gridz,m_M_eval,m_origin); Potential_PPCorrMN(m_SpatialHasher_mass.getStartTable(), m_SpatialHasher_mass.getEndTable(), m_evalPos_Reorder->getDevicePtr(), m_p_massPos_Reorder->getDevicePtr(), m_particle_mass_Reorder->getDevicePtr(), m_particle_gradPhi_deorder->getDevicePtr(), 1.0/h, h, 1.0, 1.0, make_uint3(m_gridx,m_gridy,m_gridz), make_uint3(m_gridx,m_gridy,m_gridz), make_uint2(m_gridx*m_gridy,m_gridx), make_uint2(m_gridx*m_gridy,m_gridx), m_K, m_M_eval, m_N_mass, m_origin); m_SpatialHasher_eval.deorderData(m_M_eval,m_particle_gradPhi_deorder->getDevicePtr(),m_particle_gradPhi->getDevicePtr(),3,2); } else { PotentialInterpolateFarField(m_evalPos->getDevicePtr(), m_far_gradPhi->getDevicePtr(),m_particle_gradPhi->getDevicePtr(), m_SpatialHasher_mass.getCellSize().x, m_gridx,m_gridy,m_gridz,m_M_eval,m_origin); Potential_PPCorrMN(m_SpatialHasher_mass.getStartTable(), m_SpatialHasher_mass.getEndTable(), m_evalPos->getDevicePtr(), m_p_massPos_Reorder->getDevicePtr(), m_particle_mass_Reorder->getDevicePtr(), m_particle_gradPhi->getDevicePtr(), 1.0/h, h, 1.0, 1.0, make_uint3(m_gridx,m_gridy,m_gridz), make_uint3(m_gridx,m_gridy,m_gridz), make_uint2(m_gridx*m_gridy,m_gridx), make_uint2(m_gridx*m_gridy,m_gridx), m_K, m_M_eval, m_N_mass, m_origin); } PotentialComputeGradForOutParticle(m_evalPos->getDevicePtr(),m_total_mass, m_center, m_SpatialHasher_mass.getWorldOrigin(), make_float3(m_SpatialHasher_mass.getWorldOrigin().x+m_L, m_SpatialHasher_mass.getWorldOrigin().y+m_L, m_SpatialHasher_mass.getWorldOrigin().z+m_L), 1.0,1.0,m_particle_gradPhi->getDevicePtr(),m_M_eval); return true; }
void reference_calc_custom(const uchar4* const h_sourceImg, const size_t numRowsSource, const size_t numColsSource, const uchar4* const h_destImg, uchar4* const h_blendedImg, const unsigned char* h_mask, const unsigned char* h_border, const unsigned char* h_interior){ //we need to create a list of border pixels and interior pixels //this is a conceptually simple implementation, not a particularly efficient one... //first create mask size_t srcSize = numRowsSource * numColsSource; const unsigned char* mask = h_mask; // new unsigned char[srcSize]; /* for (int i = 0; i < srcSize; ++i) { mask[i] = (h_sourceImg[i].x + h_sourceImg[i].y + h_sourceImg[i].z < 3 * 255) ? 1 : 0; } */ //next compute strictly interior pixels and border pixels const unsigned char *borderPixels = h_border; // new unsigned char[srcSize]; const unsigned char *strictInteriorPixels = h_interior; // new unsigned char[srcSize]; std::vector<uint2> interiorPixelList; //the source region in the homework isn't near an image boundary, so we can //simplify the conditionals a little... for (size_t r = 1; r < numRowsSource - 1; ++r) { for (size_t c = 1; c < numColsSource - 1; ++c) { if (mask[r * numColsSource + c]) { if (mask[(r -1) * numColsSource + c] && mask[(r + 1) * numColsSource + c] && mask[r * numColsSource + c - 1] && mask[r * numColsSource + c + 1]) { // strictInteriorPixels[r * numColsSource + c] = 1; // borderPixels[r * numColsSource + c] = 0; interiorPixelList.push_back(make_uint2(r, c)); } else { // strictInteriorPixels[r * numColsSource + c] = 0; // borderPixels[r * numColsSource + c] = 1; } } else { // strictInteriorPixels[r * numColsSource + c] = 0; // borderPixels[r * numColsSource + c] = 0; } } } //split the source and destination images into their respective //channels unsigned char* red_src = new unsigned char[srcSize]; unsigned char* blue_src = new unsigned char[srcSize]; unsigned char* green_src = new unsigned char[srcSize]; for (int i = 0; i < srcSize; ++i) { red_src[i] = h_sourceImg[i].x; blue_src[i] = h_sourceImg[i].y; green_src[i] = h_sourceImg[i].z; } unsigned char* red_dst = new unsigned char[srcSize]; unsigned char* blue_dst = new unsigned char[srcSize]; unsigned char* green_dst = new unsigned char[srcSize]; for (int i = 0; i < srcSize; ++i) { red_dst[i] = h_destImg[i].x; blue_dst[i] = h_destImg[i].y; green_dst[i] = h_destImg[i].z; } //next we'll precompute the g term - it never changes, no need to recompute every iteration float *g_red = new float[srcSize]; float *g_blue = new float[srcSize]; float *g_green = new float[srcSize]; memset(g_red, 0, srcSize * sizeof(float)); memset(g_blue, 0, srcSize * sizeof(float)); memset(g_green, 0, srcSize * sizeof(float)); computeG_custom(red_src, g_red, numColsSource, interiorPixelList); computeG_custom(blue_src, g_blue, numColsSource, interiorPixelList); computeG_custom(green_src, g_green, numColsSource, interiorPixelList); //for each color channel we'll need two buffers and we'll ping-pong between them float *blendedValsRed_1 = new float[srcSize]; float *blendedValsRed_2 = new float[srcSize]; float *blendedValsBlue_1 = new float[srcSize]; float *blendedValsBlue_2 = new float[srcSize]; float *blendedValsGreen_1 = new float[srcSize]; float *blendedValsGreen_2 = new float[srcSize]; //IC is the source image, copy over for (size_t i = 0; i < srcSize; ++i) { blendedValsRed_1[i] = red_src[i]; blendedValsRed_2[i] = red_src[i]; blendedValsBlue_1[i] = blue_src[i]; blendedValsBlue_2[i] = blue_src[i]; blendedValsGreen_1[i] = green_src[i]; blendedValsGreen_2[i] = green_src[i]; } //Perform the solve on each color channel const size_t numIterations = 800; for (size_t i = 0; i < numIterations; ++i) { computeIteration_custom(red_dst, strictInteriorPixels, borderPixels, interiorPixelList, numColsSource, blendedValsRed_1, g_red, blendedValsRed_2); std::swap(blendedValsRed_1, blendedValsRed_2); } for (size_t i = 0; i < numIterations; ++i) { computeIteration_custom(blue_dst, strictInteriorPixels, borderPixels, interiorPixelList, numColsSource, blendedValsBlue_1, g_blue, blendedValsBlue_2); std::swap(blendedValsBlue_1, blendedValsBlue_2); } for (size_t i = 0; i < numIterations; ++i) { computeIteration_custom(green_dst, strictInteriorPixels, borderPixels, interiorPixelList, numColsSource, blendedValsGreen_1, g_green, blendedValsGreen_2); std::swap(blendedValsGreen_1, blendedValsGreen_2); } std::swap(blendedValsRed_1, blendedValsRed_2); //put output into _2 std::swap(blendedValsBlue_1, blendedValsBlue_2); //put output into _2 std::swap(blendedValsGreen_1, blendedValsGreen_2); //put output into _2 //copy the destination image to the output memcpy(h_blendedImg, h_destImg, sizeof(uchar4) * srcSize); //copy computed values for the interior into the output for (size_t i = 0; i < interiorPixelList.size(); ++i) { uint2 coord = interiorPixelList[i]; unsigned int offset = coord.x * numColsSource + coord.y; h_blendedImg[offset].x = blendedValsRed_2[offset]; h_blendedImg[offset].y = blendedValsBlue_2[offset]; h_blendedImg[offset].z = blendedValsGreen_2[offset]; } //wow, we allocated a lot of memory! // delete[] mask; delete[] blendedValsRed_1; delete[] blendedValsRed_2; delete[] blendedValsBlue_1; delete[] blendedValsBlue_2; delete[] blendedValsGreen_1; delete[] blendedValsGreen_2; delete[] g_red; delete[] g_blue; delete[] g_green; delete[] red_src; delete[] red_dst; delete[] blue_src; delete[] blue_dst; delete[] green_src; delete[] green_dst; // delete[] borderPixels; // delete[] strictInteriorPixels; }