float3 solve_monic(float3 p) { p = p * (1.0f / 3.0f); float pz = p.z; // compute a normalization value to scale the vector by. // The normalization factor is divided by 2^20. // This is supposed to make internal calculations unlikely // to overflow while also making underflows unlikely. float scal = 1.0f; float cx = static_cast < float >(cbrt(fabs(p.x))); float cy = static_cast < float >(cbrt(fabs(p.y))); scal = fmax(fmax(fabsf(p.z), cx), cy * cy) * (1.0f / 1048576.0f); float rscal = 1.0f / scal; p = p * float3(rscal * rscal * rscal, rscal * rscal, rscal); float bb = p.z * p.z; // div scal^2 float nq = bb - p.y; // div scal^2 float r = 1.5f * (p.y * p.z - p.x) - p.z * bb; // div scal^3 float nq3 = nq * nq * nq; // div scal^6 float r2 = r * r; // div scal^6 if (nq3 < r2) { // one root float root = sqrt(r2 - nq3); // div scal^3 float s = static_cast < float >(cbrt(r + root)); // div scal float t = static_cast < float >(cbrt(r - root)); // div scal return float3((s + t) * scal - pz, nan(0), nan(0)); } else { // three roots float phi_r = inversesqrt(nq3); // div scal ^ -3 float phi_root = static_cast < float >(cbrt(phi_r * nq3)); // div scal float theta = acospi(r * phi_r); theta *= 1.0f / 3.0f; float ncprod = phi_root * cospi(theta); float dev = 1.73205080756887729353f * phi_root * sinpi(theta); return float3(2 * ncprod, -dev - ncprod, dev - ncprod) * scal - pz; } }
// from [http://www.thetenthplanet.de/archives/1180] mat3 cotangent_frame( vec3 N, vec3 p, vec2 uv ) { // get edge vectors of the pixel triangle vec3 dp1 = dFdx( p ); vec3 dp2 = dFdy( p ); vec2 duv1 = dFdx( uv ); vec2 duv2 = dFdy( uv ); // solve the linear system vec3 dp2perp = cross( dp2, N ); vec3 dp1perp = cross( N, dp1 ); vec3 T = dp2perp * duv1.x + dp1perp * duv2.x; vec3 B = dp2perp * duv1.y + dp1perp * duv2.y; // construct a scale-invariant frame float invmax = inversesqrt( max( dot(T,T), dot(B,B) ) ); return mat3( T * invmax, B * invmax, N ); }
/* * This function is not overflow-safe. Use with care. */ float4 solve_monic(float4 p) { // step 1: depress the input polynomial float bias = p.w * 0.25f; float3 qv = float3((-3.0f / 256.0f) * p.w * p.w, (1.0f / 8.0f) * p.w, (-3.0 / 8.0f)); float3 rv = float3((1.0f / 16.0f) * p.z * p.w - (1.0f / 4.0f) * p.y, (-1.0f / 2.0f) * p.z, 0.0f); float3 qx = float3(qv * p.w + rv) * p.w + p.xyz; // step 2: solve a cubic equation to get hold of a parameter p. float3 monicp = float3(-qx.y * qx.y, (qx.z * qx.z) - (4.0f * qx.x), 2.0f * qx.z); float4 v = float4(solve_monic(monicp), 1e-37f); // the cubic equation may have multiple solutions; at least one of them // is numerically at least nonnegative (but may have become negative as a result of // a roundoff error). We use fmax() to extract this value or a very small positive value. float2 v2 = fmax(v.xy, v.zw); float p2 = fmax(v2.x, v2.y); // p^2 float pr = inversesqrt(p2); // 1/p float pm = p2 * pr; // p // step 3: use the solution for the cubic equation to set up two quadratic equations; // these two equations then result in the 4 possible roots. float f1 = qx.z + p2; float f2 = qx.y * pr; float s = 0.5f * (f1 + f2); float q = 0.5f * (f1 - f2); float4 res = float4(solve_monic(float2(q, pm)), solve_monic(float2(s, -pm))); // finally, order the results and apply the bias. if (res.x != res.x) return res.zwxy - bias; else return res - bias; }