bool BBox::intersect(const Ray& ray, float *tnear, float *tfar) const { // you may already have those values hanging around somewhere const __m128 plus_inf = loadps(ps_cst_plus_inf), minus_inf = loadps(ps_cst_minus_inf); // use whatever's apropriate to load. const __m128 box_min = loadps(&min), box_max = loadps(&max), pos = loadps(&ray.o), inv_dir = loadps(&ray.inv_d); // use a div if inverted directions aren't available const __m128 l1 = mulps(subps(box_min, pos), inv_dir); const __m128 l2 = mulps(subps(box_max, pos), inv_dir); // the order we use for those min/max is vital to filter out // NaNs that happens when an inv_dir is +/- inf and // (box_min - pos) is 0. inf * 0 = NaN const __m128 filtered_l1a = minps(l1, plus_inf); const __m128 filtered_l2a = minps(l2, plus_inf); const __m128 filtered_l1b = maxps(l1, minus_inf); const __m128 filtered_l2b = maxps(l2, minus_inf); // now that we're back on our feet, test those slabs. __m128 lmax = maxps(filtered_l1a, filtered_l2a); __m128 lmin = minps(filtered_l1b, filtered_l2b); // unfold back. try to hide the latency of the shufps & co. const __m128 lmax0 = rotatelps(lmax); const __m128 lmin0 = rotatelps(lmin); lmax = minss(lmax, lmax0); lmin = maxss(lmin, lmin0); const __m128 lmax1 = muxhps(lmax,lmax); const __m128 lmin1 = muxhps(lmin,lmin); lmax = minss(lmax, lmax1); lmin = maxss(lmin, lmin1); const bool ret = _mm_comige_ss(lmax, _mm_setzero_ps()) & _mm_comige_ss(lmax,lmin); storess(lmin, tnear); storess(lmax, tfar); return ret; }
inline bool ray_box_intersect(const box_t & b, const ray_t & ray, rayseg_t & rs) { /* you may already have those values hanging around somewhere */ const __m128 plus_inf = loadps(ps_cst_plus_inf), minus_inf = loadps(ps_cst_minus_inf); /* use whatever's apropriate to load. */ const __m128 box_min = loadps(&b.min), box_max = loadps(&b.max), pos = loadps(&ray.pos), inv_dir = loadps(&ray.inv_dir); /* use a div if inverted directions aren't available */ const __m128 l1 = mulps(subps(box_min, pos), inv_dir); const __m128 l2 = mulps(subps(box_max, pos), inv_dir); /* the order we use for those min/max is vital to filter out */ /* NaNs that happens when an inv_dir is +/- inf and */ /* (box_min - pos) is 0. inf * 0 = NaN */ const __m128 filtered_l1a = minps(l1, plus_inf); const __m128 filtered_l2a = minps(l2, plus_inf); const __m128 filtered_l1b = maxps(l1, minus_inf); const __m128 filtered_l2b = maxps(l2, minus_inf); /* now that we're back on our feet, test those slabs. */ __m128 lmax = maxps(filtered_l1a, filtered_l2a); __m128 lmin = minps(filtered_l1b, filtered_l2b); /* unfold back. try to hide the latency of the shufps & co. */ const __m128 lmax0 = rotatelps(lmax); const __m128 lmin0 = rotatelps(lmin); lmax = minss(lmax, lmax0); lmin = maxss(lmin, lmin0); const __m128 lmax1 = muxhps(lmax, lmax); const __m128 lmin1 = muxhps(lmin, lmin); lmax = minss(lmax, lmax1); lmin = maxss(lmin, lmin1); const bool ret = _mm_comige_ss(lmax, _mm_setzero_ps()) & _mm_comige_ss(lmax, lmin); storess(lmin, &rs.t_near); storess(lmax, &rs.t_far); return ret; }
bool Sphere::hit(const Ray &ray, IntersectionInfo &ii) const { // Make a vector to avoid Point -> Vector casting below const Point3 localRayOrigin((worldToObject * ray.origin).get128()); const Vector3 localRayOriginAsVec(localRayOrigin); const Vector3 localRayDir ((worldToObject * ray.direction()).get128()); #ifdef SSE4 const __m128 a = dotps(localRayDir.get128(), localRayDir.get128()); const __m128 b = mulps(set1ps(2), dotps(localRayDir.get128(), localRayOriginAsVec.get128())); const __m128 rv = set1ps(r); const __m128 c = subps(dotps(localRayOriginAsVec.get128(), localRayOriginAsVec.get128()), mulps(rv, rv)); const float ar = a.m128_f32[0]; const float br = b.m128_f32[0]; const float cr = c.m128_f32[0]; // Solve quadratic const float d = (subps(mulps(b,b), mulps(set1ps(4), mulps(a, c)))).m128_f32[0]; #else const float a = dot(localRayDir, localRayDir); const float b = 2.f * dot(localRayDir, localRayOriginAsVec); const float c = dot(localRayOriginAsVec, localRayOriginAsVec) - r*r; const float ar = a; const float br = b; const float cr = c; // Solve quadratic const float d = b*b - 4.f * a*c; #endif if (d < 0) return false; const float sqrtD = sqrt(d); float q; if (br < 0) q = -0.5f * (br - sqrtD); else q = -0.5f * (br + sqrtD); float t0 = q / ar; float t1 = cr / q; if (t0 > t1) std::swap(t0, t1); if (t0 > ray.maxT || t1 < 0) return false; float hit = t0; if (t0 < 0) { hit = t1; if (hit > ray.maxT) return false; } ray.maxT = hit; // Now that we have a hit fill the intersection info structure const Point3 localHitP(localRayOrigin + ray.maxT * localRayDir); ii.P = Point3((objectToWorld * localHitP).get128()); ii.Ng = Vector3(localHitP) * invr; ii.Ng = Vector3((worldToObjectN * ii.Ng).get128()); ii.N = ii.Ng; ii.Cs = color; ii.Os = opacity; const float invPi = 1.f / 3.141592654f; ii.s = ::asinf(ii.Ng.getX()) * invPi + 0.5f; ii.t = ::asinf(ii.Ng.getY()) * invPi + 0.5f; return true; }