Fluid3InitializeSource::Fluid3InitializeSource(ProgramFactory& factory, int xSize, int ySize, int zSize, int numXThreads, int numYThreads, int numZThreads, std::shared_ptr<ConstantBuffer> const& parameters) : mNumXGroups(xSize/numXThreads), mNumYGroups(ySize/numYThreads), mNumZGroups(zSize/numZThreads) { // Create the resources for generating velocity from vortices. mVortex = std::make_shared<ConstantBuffer>(sizeof(Vortex), true); mVelocity0 = std::make_shared<Texture3>(DF_R32G32B32A32_FLOAT, xSize, ySize, zSize); mVelocity0->SetUsage(Resource::SHADER_OUTPUT); mVelocity1 = std::make_shared<Texture3>(DF_R32G32B32A32_FLOAT, xSize, ySize, zSize); mVelocity1->SetUsage(Resource::SHADER_OUTPUT); // Create the resources for generating velocity from wind and gravity. mExternal = std::make_shared<ConstantBuffer>(sizeof(External), false); External& e = *mExternal->Get<External>(); e.densityProducer = { 0.5f, 0.5f, 0.5f, 0.0f }; e.densityPData = { 0.01f, 16.0f, 0.0f, 0.0f }; e.densityConsumer = { 0.75f, 0.75f, 0.75f, 0.0f }; e.densityCData = { 0.01f, 0.0f, 0.0f, 0.0f }; e.gravity = { 0.0f, 0.0f, 0.0f, 0.0f }; e.windData = { 0.001f, 0.0f, 0.0f, 0.0f }; mSource = std::make_shared<Texture3>(DF_R32G32B32A32_FLOAT, xSize, ySize, zSize); mSource->SetUsage(Resource::SHADER_OUTPUT); // Create the shader for generating velocity from vortices. int i = factory.GetAPI(); factory.PushDefines(); factory.defines.Set("NUM_X_THREADS", numXThreads); factory.defines.Set("NUM_Y_THREADS", numYThreads); factory.defines.Set("NUM_Z_THREADS", numZThreads); std::shared_ptr<ComputeShader> cshader; mGenerateVortex = factory.CreateFromSource(*msGenerateSource[i]); if (mGenerateVortex) { cshader = mGenerateVortex->GetCShader(); cshader->Set("Parameters", parameters); cshader->Set("Vortex", mVortex); cshader->Set("inVelocity", mVelocity0); cshader->Set("outVelocity", mVelocity1); } // Create the shader for generating the sources to the fluid simulation. mInitializeSource = factory.CreateFromSource(*msInitializeSource[i]); if (mInitializeSource) { cshader = mInitializeSource->GetCShader(); cshader->Set("Parameters", parameters); cshader->Set("External", mExternal); cshader->Set("source", mSource); } factory.PopDefines(); }
Fluid3ComputeDivergence::Fluid3ComputeDivergence(ProgramFactory& factory, int xSize, int ySize, int zSize, int numXThreads, int numYThreads, int numZThreads, std::shared_ptr<ConstantBuffer> const& parameters) : mNumXGroups(xSize/numXThreads), mNumYGroups(ySize/numYThreads), mNumZGroups(zSize/numZThreads) { mDivergence = std::make_shared<Texture3>(DF_R32_FLOAT, xSize, ySize, zSize); mDivergence->SetUsage(Resource::SHADER_OUTPUT); int i = factory.GetAPI(); factory.PushDefines(); factory.defines.Set("NUM_X_THREADS", numXThreads); factory.defines.Set("NUM_Y_THREADS", numYThreads); factory.defines.Set("NUM_Z_THREADS", numZThreads); mComputeDivergence = factory.CreateFromSource(*msSource[i]); if (mComputeDivergence) { mComputeDivergence->GetCShader()->Set("Parameters", parameters); mComputeDivergence->GetCShader()->Set("divergence", mDivergence); } factory.PopDefines(); }
Fluid2InitializeState::Fluid2InitializeState(ProgramFactory& factory, int xSize, int ySize, int numXThreads, int numYThreads) : mNumXGroups(xSize/numXThreads), mNumYGroups(ySize/numYThreads) { // Use a Mersenne twister engine for random numbers. std::mt19937 mte; std::uniform_real_distribution<float> unirnd(0.0f, 1.0f); // Initial density values are randomly generated. mDensity = std::make_shared<Texture2>(DF_R32_FLOAT, xSize, ySize); float* data = mDensity->Get<float>(); for (unsigned int i = 0; i < mDensity->GetNumElements(); ++i, ++data) { *data = unirnd(mte); } // Initial velocity values are zero. mVelocity = std::make_shared<Texture2>(DF_R32G32_FLOAT, xSize, ySize); memset(mVelocity->GetData(), 0, mVelocity->GetNumBytes()); // The states at time 0 and time -dt are initialized by a compute shader. mStateTm1 = std::make_shared<Texture2>(DF_R32G32B32A32_FLOAT, xSize, ySize); mStateTm1->SetUsage(Resource::SHADER_OUTPUT); mStateT = std::make_shared<Texture2>(DF_R32G32B32A32_FLOAT, xSize, ySize); mStateT->SetUsage(Resource::SHADER_OUTPUT); // Create the shader for initializing velocity and density. int i = factory.GetAPI(); factory.PushDefines(); factory.defines.Set("NUM_X_THREADS", numXThreads); factory.defines.Set("NUM_Y_THREADS", numYThreads); mInitializeState = factory.CreateFromSource(*msSource[i]); if (mInitializeState) { std::shared_ptr<ComputeShader> cshader = mInitializeState->GetCShader(); cshader->Set("density", mDensity); cshader->Set("velocity", mVelocity); cshader->Set("stateTm1", mStateTm1); cshader->Set("stateT", mStateT); } factory.PopDefines(); }
Fluid2SolvePoisson::Fluid2SolvePoisson(ProgramFactory& factory, int xSize, int ySize, int numXThreads, int numYThreads, std::shared_ptr<ConstantBuffer> const& parameters, int numIterations) : mNumXGroups(xSize/numXThreads), mNumYGroups(ySize/numYThreads), mNumIterations(numIterations) { mPoisson0 = std::make_shared<Texture2>(DF_R32_FLOAT, xSize, ySize); mPoisson0->SetUsage(Resource::SHADER_OUTPUT); mPoisson1 = std::make_shared<Texture2>(DF_R32_FLOAT, xSize, ySize); mPoisson1->SetUsage(Resource::SHADER_OUTPUT); int i = factory.GetAPI(); factory.PushDefines(); factory.defines.Set("NUM_X_THREADS", numXThreads); factory.defines.Set("NUM_Y_THREADS", numYThreads); // For zeroing mPoisson0 on the GPU. mZeroPoisson = factory.CreateFromSource(*msZeroSource[i]); if (mZeroPoisson) { mZeroPoisson->GetCShader()->Set("poisson", mPoisson0); } // Create the shader for generating velocity from vortices. mSolvePoisson = factory.CreateFromSource(*msSolveSource[i]); if (mSolvePoisson) { mSolvePoisson->GetCShader()->Set("Parameters", parameters); } factory.defines.Clear(); factory.defines.Set("USE_ZERO_X_EDGE", 1); factory.defines.Set("NUM_Y_THREADS", numYThreads); mWriteXEdge = factory.CreateFromSource(*msEnforceSource[i]); factory.defines.Clear(); factory.defines.Set("USE_ZERO_Y_EDGE", 1); factory.defines.Set("NUM_X_THREADS", numXThreads); mWriteYEdge = factory.CreateFromSource(*msEnforceSource[i]); factory.PopDefines(); }
Fluid2AdjustVelocity::Fluid2AdjustVelocity(ProgramFactory& factory, int xSize, int ySize, int numXThreads, int numYThreads, std::shared_ptr<ConstantBuffer> const& parameters) : mNumXGroups(xSize/numXThreads), mNumYGroups(ySize/numYThreads) { int i = factory.GetAPI(); factory.PushDefines(); factory.defines.Set("NUM_X_THREADS", numXThreads); factory.defines.Set("NUM_Y_THREADS", numYThreads); mAdjustVelocity = factory.CreateFromSource(*msSource[i]); if (mAdjustVelocity) { mAdjustVelocity->GetCShader()->Set("Parameters", parameters); } factory.PopDefines(); }
GpuMassSpringVolume::GpuMassSpringVolume(ProgramFactory& factory, int numColumns, int numRows, int numSlices, float step, float viscosity, Environment& environment, bool& created) : mNumColumns(numColumns), mNumRows(numRows), mNumSlices(numSlices) { created = false; // Create the shaders. std::string path = environment.GetPath("RungeKutta.hlsl"); int const numThreads = 4; factory.PushDefines(); factory.defines.Set("NUM_X_THREADS", numThreads); factory.defines.Set("NUM_Y_THREADS", numThreads); factory.defines.Set("NUM_Z_THREADS", numThreads); for (int i = 0; i < 8; ++i) { factory.csEntry = "RK4Step"; factory.csEntry += std::to_string(1 + i/2); factory.csEntry += ((i & 1) == 0 ? "a" : "b"); mRK4Shader[i] = factory.CreateFromFile(path); if (!mRK4Shader[i]) { return; } } // The cbuffer is tightly packed. Only time, halfTime, and fullTime vary. mParameters = std::make_shared<ConstantBuffer>(sizeof(SimulationParameters), true); SimulationParameters& p = *mParameters->Get<SimulationParameters>(); p.dimensions[0] = numColumns; p.dimensions[1] = numRows; p.dimensions[2] = numSlices; p.dimensions[3] = numColumns * numRows; p.viscosity = viscosity; p.time = 0.0f; p.delta = step; p.halfDelta = p.delta / 2.0f; p.sixthDelta = p.delta / 6.0f; p.halfTime = p.time + p.halfDelta; p.fullTime = p.time + p.delta; unsigned int const numParticles = p.dimensions[2] * p.dimensions[3]; size_t const vecsize = sizeof(Vector3<float>); mMass = std::make_shared<StructuredBuffer>(numParticles, sizeof(float)); mInvMass = std::make_shared<StructuredBuffer>(numParticles, sizeof(float)); mPosition = std::make_shared<StructuredBuffer>(numParticles, vecsize); mPosition->SetUsage(Resource::SHADER_OUTPUT); mPosition->SetCopyType(Resource::COPY_STAGING_TO_CPU); mVelocity = std::make_shared<StructuredBuffer>(numParticles, vecsize); mVelocity->SetUsage(Resource::SHADER_OUTPUT); mConstantC = std::make_shared<StructuredBuffer>(numParticles, sizeof(float)); mLengthC = std::make_shared<StructuredBuffer>(numParticles, sizeof(float)); mConstantR = std::make_shared<StructuredBuffer>(numParticles, sizeof(float)); mLengthR = std::make_shared<StructuredBuffer>(numParticles, sizeof(float)); mConstantS = std::make_shared<StructuredBuffer>(numParticles, sizeof(float)); mLengthS = std::make_shared<StructuredBuffer>(numParticles, sizeof(float)); mPTmp = std::make_shared<StructuredBuffer>(numParticles, vecsize, true); mPTmp->SetUsage(Resource::SHADER_OUTPUT); mPTmp->SetCopyType(Resource::COPY_STAGING_TO_CPU); mPAllTmp = std::make_shared<StructuredBuffer>(numParticles, 4 * vecsize, true); mPAllTmp->SetUsage(Resource::SHADER_OUTPUT); mPAllTmp->SetCopyType(Resource::COPY_STAGING_TO_CPU); mVTmp = std::make_shared<StructuredBuffer>(numParticles, vecsize, true); mVTmp->SetUsage(Resource::SHADER_OUTPUT); mVTmp->SetCopyType(Resource::COPY_STAGING_TO_CPU); mVAllTmp = std::make_shared<StructuredBuffer>(numParticles, 4 * vecsize, true); mVAllTmp->SetUsage(Resource::SHADER_OUTPUT); mVAllTmp->SetCopyType(Resource::COPY_STAGING_TO_CPU); mNumXGroups = p.dimensions[0] / numThreads; mNumYGroups = p.dimensions[1] / numThreads; mNumZGroups = p.dimensions[2] / numThreads; std::shared_ptr<ComputeShader> cshader = mRK4Shader[0]->GetCShader(); cshader->Set("SimulationParameters", mParameters); cshader->Set("invMass", mInvMass); cshader->Set("constantC", mConstantC); cshader->Set("lengthC", mLengthC); cshader->Set("constantR", mConstantR); cshader->Set("lengthR", mLengthR); cshader->Set("constantS", mConstantS); cshader->Set("lengthS", mLengthS); cshader->Set("pAllTmp", mPAllTmp); cshader->Set("vAllTmp", mVAllTmp); cshader->Set("position", mPosition); cshader->Set("velocity", mVelocity); cshader = mRK4Shader[1]->GetCShader(); cshader->Set("SimulationParameters", mParameters); cshader->Set("invMass", mInvMass); cshader->Set("pTmp", mPTmp); cshader->Set("vTmp", mVTmp); cshader->Set("pAllTmp", mPAllTmp); cshader->Set("vAllTmp", mVAllTmp); cshader->Set("position", mPosition); cshader->Set("velocity", mVelocity); cshader = mRK4Shader[2]->GetCShader(); cshader->Set("SimulationParameters", mParameters); cshader->Set("invMass", mInvMass); cshader->Set("constantC", mConstantC); cshader->Set("lengthC", mLengthC); cshader->Set("constantR", mConstantR); cshader->Set("lengthR", mLengthR); cshader->Set("constantS", mConstantS); cshader->Set("lengthS", mLengthS); cshader->Set("pTmp", mPTmp); cshader->Set("vTmp", mVTmp); cshader->Set("pAllTmp", mPAllTmp); cshader->Set("vAllTmp", mVAllTmp); cshader->Set("velocity", mVelocity); cshader = mRK4Shader[3]->GetCShader(); cshader->Set("SimulationParameters", mParameters); cshader->Set("invMass", mInvMass); cshader->Set("pTmp", mPTmp); cshader->Set("vTmp", mVTmp); cshader->Set("pAllTmp", mPAllTmp); cshader->Set("vAllTmp", mVAllTmp); cshader->Set("position", mPosition); cshader->Set("velocity", mVelocity); cshader = mRK4Shader[4]->GetCShader(); cshader->Set("SimulationParameters", mParameters); cshader->Set("invMass", mInvMass); cshader->Set("constantC", mConstantC); cshader->Set("lengthC", mLengthC); cshader->Set("constantR", mConstantR); cshader->Set("lengthR", mLengthR); cshader->Set("constantS", mConstantS); cshader->Set("lengthS", mLengthS); cshader->Set("pTmp", mPTmp); cshader->Set("vTmp", mVTmp); cshader->Set("pAllTmp", mPAllTmp); cshader->Set("vAllTmp", mVAllTmp); cshader->Set("velocity", mVelocity); cshader = mRK4Shader[5]->GetCShader(); cshader->Set("SimulationParameters", mParameters); cshader->Set("invMass", mInvMass); cshader->Set("pTmp", mPTmp); cshader->Set("vTmp", mVTmp); cshader->Set("pAllTmp", mPAllTmp); cshader->Set("vAllTmp", mVAllTmp); cshader->Set("position", mPosition); cshader->Set("velocity", mVelocity); cshader = mRK4Shader[6]->GetCShader(); cshader->Set("SimulationParameters", mParameters); cshader->Set("invMass", mInvMass); cshader->Set("constantC", mConstantC); cshader->Set("lengthC", mLengthC); cshader->Set("constantR", mConstantR); cshader->Set("lengthR", mLengthR); cshader->Set("constantS", mConstantS); cshader->Set("lengthS", mLengthS); cshader->Set("pTmp", mPTmp); cshader->Set("vTmp", mVTmp); cshader->Set("pAllTmp", mPAllTmp); cshader->Set("vAllTmp", mVAllTmp); cshader->Set("velocity", mVelocity); cshader = mRK4Shader[7]->GetCShader(); cshader->Set("SimulationParameters", mParameters); cshader->Set("invMass", mInvMass); cshader->Set("position", mPosition); cshader->Set("velocity", mVelocity); cshader->Set("pAllTmp", mPAllTmp); cshader->Set("vAllTmp", mVAllTmp); factory.PopDefines(); created = true; }
GpuShortestPath::GpuShortestPath(ProgramFactory& factory, std::shared_ptr<Texture2> const& weights, Environment const& env, bool& created) : mSize(static_cast<int>(weights->GetWidth())) { created = false; mLogSize = Log2OfPowerOfTwo(mSize); mDistance = std::make_shared<Texture2>(DF_R32_FLOAT, mSize, mSize); mDistance->SetUsage(Resource::SHADER_OUTPUT); memset(mDistance->GetData(), 0, mDistance->GetNumBytes()); mPrevious = std::make_shared<Texture2>(DF_R32G32_SINT, mSize, mSize); mPrevious->SetUsage(Resource::SHADER_OUTPUT); mPrevious->SetCopyType(Resource::COPY_STAGING_TO_CPU); mSegment = std::make_shared<ConstantBuffer>(3 * sizeof(int), true); factory.PushDefines(); factory.defines.Set("ISIZE", mSize); mInitializeDiagToRow = factory.CreateFromFile( env.GetPath("InitializeDiagToRow.hlsl")); if (!mInitializeDiagToRow) { return; } std::shared_ptr<ComputeShader> cshader = mInitializeDiagToRow->GetCShader(); cshader->Set("weights", weights); cshader->Set("previous", mPrevious); cshader->Set("sum", mDistance); mInitializeDiagToCol = factory.CreateFromFile( env.GetPath("InitializeDiagToCol.hlsl")); if (!mInitializeDiagToCol) { return; } cshader = mInitializeDiagToCol->GetCShader(); cshader->Set("weights", weights); cshader->Set("previous", mPrevious); cshader->Set("sum", mDistance); mPartialSumDiagToRow.resize(mLogSize); mPartialSumDiagToCol.resize(mLogSize); for (int i = 0; i < mLogSize; ++i) { factory.defines.Set("LOGN", mLogSize); factory.defines.Set("P", i + 1); mPartialSumDiagToRow[i] = factory.CreateFromFile( env.GetPath("PartialSumsDiagToRow.hlsl")); if (!mPartialSumDiagToRow[i]) { return; } mPartialSumDiagToRow[i]->GetCShader()->Set("sum", mDistance); mPartialSumDiagToCol[i] = factory.CreateFromFile( env.GetPath("PartialSumsDiagToCol.hlsl")); if (!mPartialSumDiagToCol[i]) { return; } mPartialSumDiagToCol[i]->GetCShader()->Set("sum", mDistance); } mUpdate = factory.CreateFromFile(env.GetPath("UpdateShader.hlsl")); if (!mUpdate) { return; } cshader = mUpdate->GetCShader(); cshader->Set("Segment", mSegment); cshader->Set("weights", weights); cshader->Set("distance", mDistance); cshader->Set("previous", mPrevious); factory.PopDefines(); created = true; }
Fluid3EnforceStateBoundary::Fluid3EnforceStateBoundary( ProgramFactory& factory, int xSize, int ySize, int zSize, int numXThreads, int numYThreads, int numZThreads) : mNumXGroups(xSize/numXThreads), mNumYGroups(ySize/numYThreads), mNumZGroups(zSize/numZThreads) { mXMin = std::make_shared<Texture2>(DF_R32G32_FLOAT, ySize, zSize); mXMin->SetUsage(Resource::SHADER_OUTPUT); mXMax = std::make_shared<Texture2>(DF_R32G32_FLOAT, ySize, zSize); mXMax->SetUsage(Resource::SHADER_OUTPUT); mYMin = std::make_shared<Texture2>(DF_R32G32_FLOAT, xSize, zSize); mYMin->SetUsage(Resource::SHADER_OUTPUT); mYMax = std::make_shared<Texture2>(DF_R32G32_FLOAT, xSize, zSize); mYMax->SetUsage(Resource::SHADER_OUTPUT); mZMin = std::make_shared<Texture2>(DF_R32G32_FLOAT, xSize, ySize); mZMin->SetUsage(Resource::SHADER_OUTPUT); mZMax = std::make_shared<Texture2>(DF_R32G32_FLOAT, xSize, ySize); mZMax->SetUsage(Resource::SHADER_OUTPUT); int i = factory.GetAPI(); factory.PushDefines(); factory.defines.Set("USE_COPY_X_FACE", 1); factory.defines.Set("NUM_Y_THREADS", numYThreads); factory.defines.Set("NUM_Z_THREADS", numZThreads); mCopyXFace = factory.CreateFromSource(*msSource[i]); if (mCopyXFace) { mCopyXFace->GetCShader()->Set("xMin", mXMin); mCopyXFace->GetCShader()->Set("xMax", mXMax); } factory.defines.Clear(); factory.defines.Set("USE_WRITE_X_FACE", 1); factory.defines.Set("NUM_Y_THREADS", numYThreads); factory.defines.Set("NUM_Z_THREADS", numZThreads); mWriteXFace = factory.CreateFromSource(*msSource[i]); if (mWriteXFace) { mWriteXFace->GetCShader()->Set("xMin", mXMin); mWriteXFace->GetCShader()->Set("xMax", mXMax); } factory.defines.Clear(); factory.defines.Set("USE_COPY_Y_FACE", 1); factory.defines.Set("NUM_X_THREADS", numXThreads); factory.defines.Set("NUM_Z_THREADS", numZThreads); mCopyYFace = factory.CreateFromSource(*msSource[i]); if (mCopyYFace) { mCopyYFace->GetCShader()->Set("yMin", mYMin); mCopyYFace->GetCShader()->Set("yMax", mYMax); } factory.defines.Clear(); factory.defines.Set("USE_WRITE_Y_FACE", 1); factory.defines.Set("NUM_X_THREADS", numXThreads); factory.defines.Set("NUM_Z_THREADS", numZThreads); mWriteYFace = factory.CreateFromSource(*msSource[i]); if (mWriteYFace) { mWriteYFace->GetCShader()->Set("yMin", mYMin); mWriteYFace->GetCShader()->Set("yMax", mYMax); } factory.defines.Clear(); factory.defines.Set("USE_COPY_Z_FACE", 1); factory.defines.Set("NUM_X_THREADS", numXThreads); factory.defines.Set("NUM_Y_THREADS", numYThreads); mCopyZFace = factory.CreateFromSource(*msSource[i]); if (mCopyZFace) { mCopyZFace->GetCShader()->Set("zMin", mZMin); mCopyZFace->GetCShader()->Set("zMax", mZMax); } factory.defines.Clear(); factory.defines.Set("USE_WRITE_Z_FACE", 1); factory.defines.Set("NUM_X_THREADS", numXThreads); factory.defines.Set("NUM_Y_THREADS", numYThreads); mWriteZFace = factory.CreateFromSource(*msSource[i]); if (mWriteZFace) { mWriteZFace->GetCShader()->Set("zMin", mZMin); mWriteZFace->GetCShader()->Set("zMax", mZMax); } factory.PopDefines(); }