void bumps_t::initialize ( const bump_specifier_t & b0, const bump_specifier_t & b1) { // Precompute the coefficients of four cubic polynomials in t, giving // the two smoothstep regions of the each of the two bump functions. v4f b0t = load4f (& b0.t0); // b0.t0 b0.t1 b0.t2 b0.t2 v4f b1t = load4f (& b1.t0); // b1.t0 b1.t1 b1.t2 b1.t2 v4f b0v = _mm_movelh_ps (load4f (& b0.v0), _mm_setzero_ps ()); // b0.v0 b0.v1 v4f b1v = _mm_movelh_ps (load4f (& b1.v0), _mm_setzero_ps ()); // b1.v0 b1.v1 v4f S = SHUFPS (b0t, b1t, (0, 2, 0, 2)); // b0.t0 b0.t2 b1.t0 b1.t2 v4f T = SHUFPS (b0t, b1t, (1, 3, 1, 3)); // b0.t1 b0.t3 b1.t1 b1.t3 v4f U = SHUFPS (b0v, b1v, (0, 2, 0, 2)); // b0.v0 0 b1.v0 0 v4f V1 = SHUFPS (b0v, b1v, (1, 0, 1, 0)); // b0.v1 b0.v0 b1.v1 b1.v0 v4f V2 = SHUFPS (b0v, b1v, (2, 1, 2, 1)); // 0 b0.v1 0 b1.v1 v4f V = V1 - V2; v4f d = T - S; v4f a = T + S; v4f m = (V - U) / (d * d * d); store4f (c [0], U + m * S * S * (a + d + d)); store4f (c [1], _mm_set1_ps (-6.0f) * m * S * T); store4f (c [2], _mm_set1_ps (+3.0f) * m * a); store4f (c [3], _mm_set1_ps (-2.0f) * m); store4f (S0, S); store4f (T0, T); store4f (U0, U); store4f (V0, V); }
void step_t::initialize (float t0, float t1) { // Precompute the coefficents c of the cubic polynomial f // such that f(t0)=0, f(t1)=1, f'(t0)=0 and f'(t0)=1. float d = t1 - t0; float a = t1 + t0; c [0] = t0 * t0 * (a + d + d); c [1] = -6 * t0 * t1; c [2] = 3 * a; c [3] = -2; // Divide c [] by d^3. v4f dt = _mm_set1_ps (d); store4f (c, load4f (c) / (dt * dt * dt)); T [0] = t0; T [1] = t1; T [2] = t1; T [3] = std::numeric_limits <float>::infinity (); }
void BVH4mbBuilder::computePrimRefsTrianglesMB(const size_t threadID, const size_t numThreads) { DBG(PING); const size_t numGroups = scene->size(); const size_t startID = (threadID+0)*numPrimitives/numThreads; const size_t endID = (threadID+1)*numPrimitives/numThreads; PrimRef *__restrict__ const prims = this->prims; // === find first group containing startID === unsigned int g=0, numSkipped = 0; for (; g<numGroups; g++) { if (unlikely(scene->get(g) == NULL)) continue; if (unlikely(scene->get(g)->type != TRIANGLE_MESH)) continue; const TriangleMeshScene::TriangleMesh* __restrict__ const mesh = scene->getTriangleMesh(g); if (unlikely(!mesh->isEnabled())) continue; if (unlikely(mesh->numTimeSteps == 1)) continue; const size_t numTriangles = mesh->numTriangles; if (numSkipped + numTriangles > startID) break; numSkipped += numTriangles; } // === start with first group containing startID === mic_f bounds_scene_min((float)pos_inf); mic_f bounds_scene_max((float)neg_inf); mic_f bounds_centroid_min((float)pos_inf); mic_f bounds_centroid_max((float)neg_inf); unsigned int num = 0; unsigned int currentID = startID; unsigned int offset = startID - numSkipped; __align(64) PrimRef local_prims[2]; size_t numLocalPrims = 0; PrimRef *__restrict__ dest = &prims[currentID]; for (; g<numGroups; g++) { if (unlikely(scene->get(g) == NULL)) continue; if (unlikely(scene->get(g)->type != TRIANGLE_MESH)) continue; const TriangleMeshScene::TriangleMesh* __restrict__ const mesh = scene->getTriangleMesh(g); if (unlikely(!mesh->isEnabled())) continue; if (unlikely(mesh->numTimeSteps == 1)) continue; for (unsigned int i=offset; i<mesh->numTriangles && currentID < endID; i++, currentID++) { //DBG_PRINT(currentID); const TriangleMeshScene::TriangleMesh::Triangle& tri = mesh->triangle(i); prefetch<PFHINT_L2>(&tri + L2_PREFETCH_ITEMS); prefetch<PFHINT_L1>(&tri + L1_PREFETCH_ITEMS); const float *__restrict__ const vptr0 = (float*)&mesh->vertex(tri.v[0]); const float *__restrict__ const vptr1 = (float*)&mesh->vertex(tri.v[1]); const float *__restrict__ const vptr2 = (float*)&mesh->vertex(tri.v[2]); const mic_f v0 = broadcast4to16f(vptr0); const mic_f v1 = broadcast4to16f(vptr1); const mic_f v2 = broadcast4to16f(vptr2); const mic_f bmin = min(min(v0,v1),v2); const mic_f bmax = max(max(v0,v1),v2); bounds_scene_min = min(bounds_scene_min,bmin); bounds_scene_max = max(bounds_scene_max,bmax); const mic_f centroid2 = bmin+bmax; bounds_centroid_min = min(bounds_centroid_min,centroid2); bounds_centroid_max = max(bounds_centroid_max,centroid2); store4f(&local_prims[numLocalPrims].lower,bmin); store4f(&local_prims[numLocalPrims].upper,bmax); local_prims[numLocalPrims].lower.a = g; local_prims[numLocalPrims].upper.a = i; //DBG_PRINT( local_prims[numLocalPrims] ); numLocalPrims++; if (unlikely(((size_t)dest % 64) != 0) && numLocalPrims == 1) { *dest = local_prims[0]; dest++; numLocalPrims--; } else { const mic_f twoAABBs = load16f(local_prims); if (numLocalPrims == 2) { numLocalPrims = 0; store16f_ngo(dest,twoAABBs); dest+=2; } } } if (currentID == endID) break; offset = 0; } /* is there anything left in the local queue? */ if (numLocalPrims % 2 != 0) *dest = local_prims[0]; /* update global bounds */ Centroid_Scene_AABB bounds; store4f(&bounds.centroid2.lower,bounds_centroid_min); store4f(&bounds.centroid2.upper,bounds_centroid_max); store4f(&bounds.geometry.lower,bounds_scene_min); store4f(&bounds.geometry.upper,bounds_scene_max); global_bounds.extend_atomic(bounds); }
void BVH4HairBuilder::computePrimRefsBezierCurves(const size_t threadID, const size_t numThreads) { DBG(PING); const size_t numTotalGroups = scene->size(); /* count total number of virtual objects */ const size_t numBezierCurves = numPrimitives; const size_t startID = (threadID+0)*numBezierCurves/numThreads; const size_t endID = (threadID+1)*numBezierCurves/numThreads; Bezier1i *__restrict__ const bptr = (Bezier1i*)this->prims; // === find first group containing startID === unsigned int g=0, numSkipped = 0; for (; g<numTotalGroups; g++) { if (unlikely(scene->get(g) == NULL)) continue; if (unlikely((scene->get(g)->type != BEZIER_CURVES))) continue; if (unlikely(!scene->get(g)->isEnabled())) continue; BezierCurves* geom = (BezierCurves*) scene->getBezierCurves(g); const size_t numPrims = geom->numCurves; if (numSkipped + numPrims > startID) break; numSkipped += numPrims; } /* start with first group containing startID */ mic_f bounds_scene_min((float)pos_inf); mic_f bounds_scene_max((float)neg_inf); mic_f bounds_centroid_min((float)pos_inf); mic_f bounds_centroid_max((float)neg_inf); unsigned int num = 0; unsigned int currentID = startID; unsigned int offset = startID - numSkipped; for (; g<numTotalGroups; g++) { if (unlikely(scene->get(g) == NULL)) continue; if (unlikely((scene->get(g)->type != BEZIER_CURVES))) continue; if (unlikely(!scene->get(g)->isEnabled())) continue; BezierCurves* geom = (BezierCurves*) scene->getBezierCurves(g); size_t N = geom->numCurves; for (unsigned int i=offset; i<N && currentID < endID; i++, currentID++) { const mic2f b2 = geom->bounds_mic2f(i); const mic_f bmin = b2.x; const mic_f bmax = b2.y; bounds_scene_min = min(bounds_scene_min,bmin); bounds_scene_max = max(bounds_scene_max,bmax); const mic_f centroid2 = bmin+bmax; bounds_centroid_min = min(bounds_centroid_min,centroid2); bounds_centroid_max = max(bounds_centroid_max,centroid2); bptr[currentID].p = geom->fristVertexPtr(i); bptr[currentID].geomID = g; bptr[currentID].primID = i; } if (currentID == endID) break; offset = 0; } /* update global bounds */ Centroid_Scene_AABB bounds; store4f(&bounds.centroid2.lower,bounds_centroid_min); store4f(&bounds.centroid2.upper,bounds_centroid_max); store4f(&bounds.geometry.lower,bounds_scene_min); store4f(&bounds.geometry.upper,bounds_scene_max); global_bounds.extend_atomic(bounds); }
void make_system (unsigned q, unsigned r, const float (& xyz_in) [3] [4], float (* nodes) [4], std::uint8_t (* indices) [6]) { std::uint8_t * P, * Q, * R; // Permutations taking triangles around nodes. std::uint8_t * Qi; // Inverse of the permutation Q. std::uint8_t * Px, * Rx; // Map a triangle to its P- or R-node. std::uint8_t memory [360]; std::uint8_t * memp = memory; P = memp; memp += 60; Q = memp; memp += 60; R = memp; memp += 60; Qi = memp; memp += 60; Px = memp; memp += 60; Rx = memp; // memp += 60; const std::uint8_t undef = 0xff; const unsigned p = 2, N = 2 * p * q * r / (q * r + r * p + p * q - p * q * r); for (unsigned n = 0; n != sizeof memory; ++ n) memory [n] = undef; for (unsigned n = 0; n != N; ++ n) { n [Q] = n - n % q + (n + 1) % q; n [Q] [Qi] = n; } unsigned next_node = N / q; // We are given the coordinates of the P-, Q- and R-nodes in triangle 0. store4f (nodes [Px [0] = next_node ++], load4f (xyz_in [0])); store4f (nodes [0], load4f (xyz_in [1])); store4f (nodes [Rx [0] = next_node ++], load4f (xyz_in [2])); float two_pi = 0x1.921fb6P+002f; float A = two_pi / ui2f (p); float B = two_pi / ui2f (q); unsigned n0 = 0, m0 = 0; while ([& m0, Px, Rx, q, r, nodes, & next_node, N, B] () -> bool { // Calculate coordinates of any unknown P- and R-nodes around Q-node m0. ALIGNED16 rotor_t Y_rotate (nodes [m0 / q], B); for (unsigned n = m0 + 1; n != m0 + q; ++ n) { if (Px [n] == undef) { Px [n] = next_node; Y_rotate (nodes [Px [n - 1]], nodes [next_node]); ++ next_node; } if (Rx [n] == undef) { Rx [n] = next_node; Y_rotate (nodes [Rx [n - 1]], nodes [next_node]); ++ next_node; } } m0 += q; return m0 != N; } ()) { while (n0 [P] != undef) ++ n0; // Attach triangle m0 to triangle n0's dangling P-node. // At this point we learn the coordinates of the next Q-node. Px [m0] = Px [n0]; ALIGNED16 rotor_t X_rotate (nodes [Px [n0]], A); X_rotate (nodes [n0 / q], nodes [m0 / q]); // Work out the consequences of attaching the new triangle. // Invariant: n [P] = m if and only if m [Q] [R] = n, for all m, n < N. unsigned n = n0, m = m0; do { n [P] = m; m = m [Q]; m [R] = n; Rx [m] = Rx [n] = Rx [m] & Rx [n]; unsigned d = 1; while (d != r && n [R] != undef) { n = n [R]; ++ d; } while (d != r && m [P] != undef) { m = m [P] [Q]; ++ d; } if (d == r - 1) { n [R] = m; n = n [Qi]; m [P] = n; Px [m] = Px [n] = Px [m] & Px [n]; } if (n [P] != undef) { n = m0; m = n0; } } while (n [P] == undef); } for (unsigned n = 0; n != N; ++ n) { unsigned i = n; unsigned j = i [R]; unsigned k = j [P]; indices [n] [0] = Px [j]; indices [n] [1] = j / q; indices [n] [2] = Rx [i]; indices [n] [3] = Px [i]; indices [n] [4] = k / q; indices [n] [5] = Rx [k]; } }