void HairBcsdf::precomputeAzimuthalDistributions() { const int Resolution = PrecomputedAzimuthalLobe::AzimuthalResolution; std::unique_ptr<Vec3f[]> valuesR (new Vec3f[Resolution*Resolution]); std::unique_ptr<Vec3f[]> valuesTT (new Vec3f[Resolution*Resolution]); std::unique_ptr<Vec3f[]> valuesTRT(new Vec3f[Resolution*Resolution]); // Ideally we could simply make this a constexpr, but MSVC does not support that yet (boo!) #define NumPoints 140 GaussLegendre<NumPoints> integrator; const auto points = integrator.points(); const auto weights = integrator.weights(); // Cache the gammaI across all integration points std::array<float, NumPoints> gammaIs; for (int i = 0; i < NumPoints; ++i) gammaIs[i] = std::asin(points[i]); // Precompute the Gaussian detector and sample it into three 1D tables. // This is the only part of the precomputation that is actually approximate. // 2048 samples are enough to support the lowest roughness that the BCSDF // can reliably simulate const int NumGaussianSamples = 2048; std::unique_ptr<float[]> Ds[3]; for (int p = 0; p < 3; ++p) { Ds[p].reset(new float[NumGaussianSamples]); for (int i = 0; i < NumGaussianSamples; ++i) Ds[p][i] = D(_betaR, i/(NumGaussianSamples - 1.0f)*TWO_PI); } // Simple wrapped linear interpolation of the precomputed table auto approxD = [&](int p, float phi) { float u = std::abs(phi*(INV_TWO_PI*(NumGaussianSamples - 1))); int x0 = int(u); int x1 = x0 + 1; u -= x0; return Ds[p][x0 % NumGaussianSamples]*(1.0f - u) + Ds[p][x1 % NumGaussianSamples]*u; }; // Here follows the actual precomputation of the azimuthal scattering functions // The scattering functions are parametrized with the azimuthal angle phi, // and the cosine of the half angle, cos(thetaD). // This parametrization makes the azimuthal function relatively smooth and allows using // really low resolutions for the table (64x64 in this case) without any visual // deviation from ground truth, even at the lowest supported roughness setting for (int y = 0; y < Resolution; ++y) { float cosHalfAngle = y/(Resolution - 1.0f); // Precompute reflection Fresnel factor and reduced absorption coefficient float iorPrime = std::sqrt(Eta*Eta - (1.0f - cosHalfAngle*cosHalfAngle))/cosHalfAngle; float cosThetaT = std::sqrt(1.0f - (1.0f - cosHalfAngle*cosHalfAngle)*sqr(1.0f/Eta)); Vec3f sigmaAPrime = _sigmaA/cosThetaT; // Precompute gammaT, f_t and internal absorption across all integration points std::array<float, NumPoints> fresnelTerms, gammaTs; std::array<Vec3f, NumPoints> absorptions; for (int i = 0; i < NumPoints; ++i) { gammaTs[i] = std::asin(clamp(points[i]/iorPrime, -1.0f, 1.0f)); fresnelTerms[i] = Fresnel::dielectricReflectance(1.0f/Eta, cosHalfAngle*std::cos(gammaIs[i])); absorptions[i] = std::exp(-sigmaAPrime*2.0f*std::cos(gammaTs[i])); } for (int phiI = 0; phiI < Resolution; ++phiI) { float phi = TWO_PI*phiI/(Resolution - 1.0f); float integralR = 0.0f; Vec3f integralTT(0.0f); Vec3f integralTRT(0.0f); // Here follows the integration across the fiber width, h. // Since we were able to precompute most of the factors that // are constant w.r.t phi for a given h, // we don't have to do much work here. for (int i = 0; i < integrator.numSamples(); ++i) { float fR = fresnelTerms[i]; Vec3f T = absorptions[i]; float AR = fR; Vec3f ATT = (1.0f - fR)*(1.0f - fR)*T; Vec3f ATRT = ATT*fR*T; integralR += weights[i]*approxD(0, phi - Phi(gammaIs[i], gammaTs[i], 0))*AR; integralTT += weights[i]*approxD(1, phi - Phi(gammaIs[i], gammaTs[i], 1))*ATT; integralTRT += weights[i]*approxD(2, phi - Phi(gammaIs[i], gammaTs[i], 2))*ATRT; } valuesR [phiI + y*Resolution] = Vec3f(0.5f*integralR); valuesTT [phiI + y*Resolution] = 0.5f*integralTT; valuesTRT[phiI + y*Resolution] = 0.5f*integralTRT; } } // Hand the values off to the helper class to construct sampling CDFs and so forth _nR .reset(new PrecomputedAzimuthalLobe(std::move(valuesR))); _nTT .reset(new PrecomputedAzimuthalLobe(std::move(valuesTT))); _nTRT.reset(new PrecomputedAzimuthalLobe(std::move(valuesTRT))); }
int main() { // --added for timing code-- // timekeeper myTimer; unsigned precomputationRealTime, precomputationClockTime; unsigned sumOfInterpolationRealTimes = 0; unsigned sumOfInterpolationClockTimes = 0; int W=50; std::vector<Vector<2,double> > samplesPos(W*W); std::vector<double> values1(W*W, 0.); std::vector<double> values2(W*W, 0.); std::vector<double> valuesR(W*W, 0.); for (int i=0; i<W; i++) { for (int j=0; j<W; j++) { samplesPos[i*W+j] = Vector<2,double>(i/(double)W,j/(double)W); // the data lie on the unit grid values1[i*W+j] = 0.; values2[i*W+j] = 0.; // generate a square if (i>5 && i<45 && j>5 && j<45) values1[i*W+j] = 1; else values1[i*W+j] = 0; // generate a disk if ((i-25)*(i-25)+(j-25)*(j-25)<509) values2[i*W+j] = 1; else values2[i*W+j] = 0; } } Interpolator<2,double> interp(samplesPos, values1, // source distribution, in R^2 samplesPos, values2, // target distribution sqrDistLinear, rbfFuncLinear, interpolateBinsLinear, // how to represent and move the particles 2, // particle spread : distance to 2nd nearest neighbor at each sample point 1); // 1 frequency band (standard displacement interpolation) // --added for timing code-- // myTimer.start(); interp.precompute(); // --added for timing code-- // myTimer.stop(); precomputationRealTime = myTimer.getElapsedRealMS(); precomputationClockTime = myTimer.getElapsedClockMS(); myTimer.clear(); int N = 50; // we get 50 intermediate steps for (int p=0; p<N; p++) { double alpha = p/((double)N-1.); // --added for timing code-- // myTimer.start(); interp.interpolate(alpha, samplesPos, valuesR); // --added for timing code-- // myTimer.stop(); sumOfInterpolationRealTimes += myTimer.getElapsedRealMS(); sumOfInterpolationClockTimes += myTimer.getElapsedClockMS(); myTimer.clear(); saveFile(p, valuesR); } // --added for timing code-- // FILE* timingRecord = fopen("timing", "a"); fprintf(timingRecord, "entry start\n"); fprintf(timingRecord, "precomputation (ms): %u\n", precomputationRealTime); fprintf(timingRecord, "precomputation (clock ms): %u\n", precomputationClockTime); fprintf(timingRecord, "total interpolation (ms): %u\n", sumOfInterpolationRealTimes); fprintf(timingRecord, "total interpolation (clock ms): %u\n", sumOfInterpolationClockTimes); fprintf(timingRecord, "interpolation count: %d\n", N); fprintf(timingRecord, "\n"); return 0; }