static inline offset_t __npg_bk_op(offset_t start, offset_t end, offset_t upper, uint64_t attr, npg_op_t *op, uint8_t act) { offset_t addr; if(op->nxt) { offset_t start_up = __align_next(start, op->sz); bool_t diff_tbl = (pg_abs_idx(start, op->shf) != pg_abs_idx(end, op->shf)); if(__aligned(start, op->sz) && diff_tbl) { op->fnc[act](start, attr); addr = start_up; } else addr = __npg_bk_op(start, end, start_up, attr, op->nxt, act); } else addr = __align(start, op->sz); while(addr < min(__align(end, op->sz), upper)) { op->fnc[act](addr, attr); addr += op->sz; } return addr; }
//aligned alloc bytes with header size static inline uint64_t __get_alloc_bytes(struct xheap *xheap, uint64_t bytes) { if (bytes < 1<<(xheap->alignment_unit + SMALL_LIMIT)) return __align(bytes + sizeof(struct xheap_header), xheap->alignment_unit); else if (bytes < 1<<(xheap->alignment_unit + MEDIUM_LIMIT)) return __align(bytes + sizeof(struct xheap_header), xheap->alignment_unit + MEDIUM_AL_UNIT); else return __align(bytes + sizeof(struct xheap_header), xheap->alignment_unit + LARGE_AL_UNIT); }
void BVH4BuilderTopLevel::recurseSAH(size_t depth, BuildRecord& task, const size_t mode, const size_t threadID, const size_t numThreads) { /* return leaf node */ assert(task.end-task.begin > 0); if (unlikely(task.end-task.begin == 1)) { *(NodeRef*)task.parentNode = refs[task.begin].node; return; } /* create leaf node */ if (unlikely(task.depth >= BVH4::maxBuildDepth)) { createLeaf(task,threadID,numThreads); return; } /*! initialize task list */ BuildRecord childTasks[4]; childTasks[0] = task; size_t numChildren = 1; /*! split until node is full */ do { /*! find best child to split */ float bestArea = inf; ssize_t bestChild = -1; for (size_t i=0; i<numChildren; i++) { float A = childTasks[i].sceneArea(); size_t items = childTasks[i].items(); if (items > 1 && A <= bestArea) { bestChild = i; bestArea = A; } } if (bestChild == -1) break; /*! split best child into left and right child */ __align(64) BuildRecord left, right; split(childTasks[bestChild],left,right,mode,threadID,numThreads); /* add new children left and right */ left.depth = right.depth = task.depth+1; childTasks[bestChild] = childTasks[numChildren-1]; childTasks[numChildren-1] = left; childTasks[numChildren+0] = right; numChildren++; } while (numChildren < 4); /* recurse */ BVH4::Node* node = bvh->allocNode(threadID); for (ssize_t i=numChildren-1; i>=0; i--) { childTasks[i].parentNode = (size_t)&node->child(i); recurse(depth+1,childTasks[i],mode,threadID,numThreads); node->set(i,childTasks[i].bounds.geometry); } *(NodeRef*)task.parentNode = bvh->encodeNode(node); }
static inline void __npg_fw_op(offset_t start, offset_t end, uint64_t attr, npg_op_t *op, uint8_t act) { offset_t addr = start; while(addr < __align(end, op->sz)) { op->fnc[act](addr, attr); addr += op->sz; } if(op->nxt && !__aligned(end, op->sz)) __npg_fw_op(addr, end, attr, op->nxt, act); }
void BVH4mbBuilder::computePrimRefsTrianglesMB(const size_t threadID, const size_t numThreads) { DBG(PING); const size_t numGroups = scene->size(); const size_t startID = (threadID+0)*numPrimitives/numThreads; const size_t endID = (threadID+1)*numPrimitives/numThreads; PrimRef *__restrict__ const prims = this->prims; // === find first group containing startID === unsigned int g=0, numSkipped = 0; for (; g<numGroups; g++) { if (unlikely(scene->get(g) == NULL)) continue; if (unlikely(scene->get(g)->type != TRIANGLE_MESH)) continue; const TriangleMeshScene::TriangleMesh* __restrict__ const mesh = scene->getTriangleMesh(g); if (unlikely(!mesh->isEnabled())) continue; if (unlikely(mesh->numTimeSteps == 1)) continue; const size_t numTriangles = mesh->numTriangles; if (numSkipped + numTriangles > startID) break; numSkipped += numTriangles; } // === start with first group containing startID === mic_f bounds_scene_min((float)pos_inf); mic_f bounds_scene_max((float)neg_inf); mic_f bounds_centroid_min((float)pos_inf); mic_f bounds_centroid_max((float)neg_inf); unsigned int num = 0; unsigned int currentID = startID; unsigned int offset = startID - numSkipped; __align(64) PrimRef local_prims[2]; size_t numLocalPrims = 0; PrimRef *__restrict__ dest = &prims[currentID]; for (; g<numGroups; g++) { if (unlikely(scene->get(g) == NULL)) continue; if (unlikely(scene->get(g)->type != TRIANGLE_MESH)) continue; const TriangleMeshScene::TriangleMesh* __restrict__ const mesh = scene->getTriangleMesh(g); if (unlikely(!mesh->isEnabled())) continue; if (unlikely(mesh->numTimeSteps == 1)) continue; for (unsigned int i=offset; i<mesh->numTriangles && currentID < endID; i++, currentID++) { //DBG_PRINT(currentID); const TriangleMeshScene::TriangleMesh::Triangle& tri = mesh->triangle(i); prefetch<PFHINT_L2>(&tri + L2_PREFETCH_ITEMS); prefetch<PFHINT_L1>(&tri + L1_PREFETCH_ITEMS); const float *__restrict__ const vptr0 = (float*)&mesh->vertex(tri.v[0]); const float *__restrict__ const vptr1 = (float*)&mesh->vertex(tri.v[1]); const float *__restrict__ const vptr2 = (float*)&mesh->vertex(tri.v[2]); const mic_f v0 = broadcast4to16f(vptr0); const mic_f v1 = broadcast4to16f(vptr1); const mic_f v2 = broadcast4to16f(vptr2); const mic_f bmin = min(min(v0,v1),v2); const mic_f bmax = max(max(v0,v1),v2); bounds_scene_min = min(bounds_scene_min,bmin); bounds_scene_max = max(bounds_scene_max,bmax); const mic_f centroid2 = bmin+bmax; bounds_centroid_min = min(bounds_centroid_min,centroid2); bounds_centroid_max = max(bounds_centroid_max,centroid2); store4f(&local_prims[numLocalPrims].lower,bmin); store4f(&local_prims[numLocalPrims].upper,bmax); local_prims[numLocalPrims].lower.a = g; local_prims[numLocalPrims].upper.a = i; //DBG_PRINT( local_prims[numLocalPrims] ); numLocalPrims++; if (unlikely(((size_t)dest % 64) != 0) && numLocalPrims == 1) { *dest = local_prims[0]; dest++; numLocalPrims--; } else { const mic_f twoAABBs = load16f(local_prims); if (numLocalPrims == 2) { numLocalPrims = 0; store16f_ngo(dest,twoAABBs); dest+=2; } } } if (currentID == endID) break; offset = 0; } /* is there anything left in the local queue? */ if (numLocalPrims % 2 != 0) *dest = local_prims[0]; /* update global bounds */ Centroid_Scene_AABB bounds; store4f(&bounds.centroid2.lower,bounds_centroid_min); store4f(&bounds.centroid2.upper,bounds_centroid_max); store4f(&bounds.geometry.lower,bounds_scene_min); store4f(&bounds.geometry.upper,bounds_scene_max); global_bounds.extend_atomic(bounds); }
void fn08048300(code * * edx, word32 dwArg00) { __align(fp + 0x00000004); __libc_start_main(&globals->ptr8048410, dwArg00, fp + 0x00000004, &globals->ptr804829C, &globals->ptr8048690, edx, fp - 0x00000004); __hlt(); }
void BVH4iIntersector1::occluded(BVH4i* bvh, Ray& ray) { /* near and node stack */ __align(64) NodeRef stack_node[3*BVH4i::maxDepth+1]; /* setup */ const mic3f rdir16 = rcp_safe(mic3f(ray.dir.x,ray.dir.y,ray.dir.z)); const mic_f inf = mic_f(pos_inf); const mic_f zero = mic_f::zero(); const Node * __restrict__ nodes = (Node *)bvh->nodePtr(); const Triangle1 * __restrict__ accel = (Triangle1*)bvh->triPtr(); stack_node[0] = BVH4i::invalidNode; stack_node[1] = bvh->root; size_t sindex = 2; const mic_f org_xyz = loadAOS4to16f(ray.org.x,ray.org.y,ray.org.z); const mic_f dir_xyz = loadAOS4to16f(ray.dir.x,ray.dir.y,ray.dir.z); const mic_f rdir_xyz = loadAOS4to16f(rdir16.x[0],rdir16.y[0],rdir16.z[0]); const mic_f org_rdir_xyz = org_xyz * rdir_xyz; const mic_f min_dist_xyz = broadcast1to16f(&ray.tnear); const mic_f max_dist_xyz = broadcast1to16f(&ray.tfar); const unsigned int leaf_mask = BVH4I_LEAF_MASK; while (1) { NodeRef curNode = stack_node[sindex-1]; sindex--; while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf(leaf_mask))) break; const Node* __restrict__ const node = curNode.node(nodes); const float* __restrict const plower = (float*)node->lower; const float* __restrict const pupper = (float*)node->upper; prefetch<PFHINT_L1>((char*)node + 0); prefetch<PFHINT_L1>((char*)node + 64); /* intersect single ray with 4 bounding boxes */ const mic_f tLowerXYZ = load16f(plower) * rdir_xyz - org_rdir_xyz; const mic_f tUpperXYZ = load16f(pupper) * rdir_xyz - org_rdir_xyz; const mic_f tLower = mask_min(0x7777,min_dist_xyz,tLowerXYZ,tUpperXYZ); const mic_f tUpper = mask_max(0x7777,max_dist_xyz,tLowerXYZ,tUpperXYZ); sindex--; curNode = stack_node[sindex]; const Node* __restrict__ const next = curNode.node(nodes); prefetch<PFHINT_L2>((char*)next + 0); prefetch<PFHINT_L2>((char*)next + 64); const mic_f tNear = vreduce_max4(tLower); const mic_f tFar = vreduce_min4(tUpper); const mic_m hitm = le(0x8888,tNear,tFar); const mic_f tNear_pos = select(hitm,tNear,inf); /* if no child is hit, continue with early popped child */ if (unlikely(none(hitm))) continue; sindex++; const unsigned long hiti = toInt(hitm); const unsigned long pos_first = bitscan64(hiti); const unsigned long num_hitm = countbits(hiti); /* if a single child is hit, continue with that child */ curNode = ((unsigned int *)plower)[pos_first]; if (likely(num_hitm == 1)) continue; /* if two children are hit, push in correct order */ const unsigned long pos_second = bitscan64(pos_first,hiti); if (likely(num_hitm == 2)) { const unsigned int dist_first = ((unsigned int*)&tNear)[pos_first]; const unsigned int dist_second = ((unsigned int*)&tNear)[pos_second]; const unsigned int node_first = curNode; const unsigned int node_second = ((unsigned int*)plower)[pos_second]; if (dist_first <= dist_second) { stack_node[sindex] = node_second; sindex++; assert(sindex < 3*BVH4i::maxDepth+1); continue; } else { stack_node[sindex] = curNode; curNode = node_second; sindex++; assert(sindex < 3*BVH4i::maxDepth+1); continue; } } /* continue with closest child and push all others */ const mic_f min_dist = set_min_lanes(tNear_pos); const unsigned old_sindex = sindex; sindex += countbits(hiti) - 1; assert(sindex < 3*BVH4i::maxDepth+1); const mic_m closest_child = eq(hitm,min_dist,tNear); const unsigned long closest_child_pos = bitscan64(closest_child); const mic_m m_pos = andn(hitm,andn(closest_child,(mic_m)((unsigned int)closest_child - 1))); const mic_i plower_node = load16i((int*)plower); curNode = ((unsigned int*)plower)[closest_child_pos]; compactustore16i(m_pos,&stack_node[old_sindex],plower_node); } /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* intersect one ray against four triangles */ ////////////////////////////////////////////////////////////////////////////////////////////////// const Triangle1* tptr = (Triangle1*) curNode.leaf(accel); prefetch<PFHINT_L1>(tptr + 3); prefetch<PFHINT_L1>(tptr + 2); prefetch<PFHINT_L1>(tptr + 1); prefetch<PFHINT_L1>(tptr + 0); const mic_i and_mask = broadcast4to16i(zlc4); const mic_f v0 = gather_4f_zlc(and_mask, (float*)&tptr[0].v0, (float*)&tptr[1].v0, (float*)&tptr[2].v0, (float*)&tptr[3].v0); const mic_f v1 = gather_4f_zlc(and_mask, (float*)&tptr[0].v1, (float*)&tptr[1].v1, (float*)&tptr[2].v1, (float*)&tptr[3].v1); const mic_f v2 = gather_4f_zlc(and_mask, (float*)&tptr[0].v2, (float*)&tptr[1].v2, (float*)&tptr[2].v2, (float*)&tptr[3].v2); const mic_f e1 = v1 - v0; const mic_f e2 = v0 - v2; const mic_f normal = lcross_zxy(e1,e2); const mic_f org = v0 - org_xyz; const mic_f odzxy = msubr231(org * swizzle(dir_xyz,_MM_SWIZ_REG_DACB), dir_xyz, swizzle(org,_MM_SWIZ_REG_DACB)); const mic_f den = ldot3_zxy(dir_xyz,normal); const mic_f rcp_den = rcp(den); const mic_f uu = ldot3_zxy(e2,odzxy); const mic_f vv = ldot3_zxy(e1,odzxy); const mic_f u = uu * rcp_den; const mic_f v = vv * rcp_den; #if defined(__BACKFACE_CULLING__) const mic_m m_init = (mic_m)0x1111 & (den > zero); #else const mic_m m_init = 0x1111; #endif const mic_m valid_u = ge(m_init,u,zero); const mic_m valid_v = ge(valid_u,v,zero); const mic_m m_aperture = le(valid_v,u+v,mic_f::one()); const mic_f nom = ldot3_zxy(org,normal); const mic_f t = rcp_den*nom; if (unlikely(none(m_aperture))) continue; mic_m m_final = lt(lt(m_aperture,min_dist_xyz,t),t,max_dist_xyz); #if defined(__USE_RAY_MASK__) const mic_i rayMask(ray.mask); const mic_i triMask = swDDDD(gather16i_4i_align(&tptr[0].v2,&tptr[1].v2,&tptr[2].v2,&tptr[3].v2)); const mic_m m_ray_mask = (rayMask & triMask) != mic_i::zero(); m_final &= m_ray_mask; #endif #if defined(__INTERSECTION_FILTER__) /* did the ray hit one of the four triangles? */ while (any(m_final)) { const mic_f temp_t = select(m_final,t,max_dist_xyz); const mic_f min_dist = vreduce_min(temp_t); const mic_m m_dist = eq(min_dist,temp_t); const size_t vecIndex = bitscan(toInt(m_dist)); const size_t triIndex = vecIndex >> 2; const Triangle1 *__restrict__ tri_ptr = tptr + triIndex; const mic_m m_tri = m_dist^(m_dist & (mic_m)((unsigned int)m_dist - 1)); const mic_f gnormalx = mic_f(tri_ptr->Ng.x); const mic_f gnormaly = mic_f(tri_ptr->Ng.y); const mic_f gnormalz = mic_f(tri_ptr->Ng.z); const int geomID = tri_ptr->geomID(); const int primID = tri_ptr->primID(); Geometry* geom = ((Scene*)bvh->geometry)->get(geomID); if (likely(!geom->hasOcclusionFilter1())) break; if (runOcclusionFilter1(geom,ray,u,v,min_dist,gnormalx,gnormaly,gnormalz,m_tri,geomID,primID)) break; m_final ^= m_tri; /* clear bit */ } #endif if (unlikely(any(m_final))) { ray.geomID = 0; return; } ////////////////////////////////////////////////////////////////////////////////////////////////// } }
void BVH4mbIntersector16Single::occluded(mic_i* valid_i, BVH4mb* bvh, Ray16& ray16) { /* near and node stack */ __align(64) NodeRef stack_node[3*BVH4i::maxDepth+1]; /* setup */ const mic_m m_valid = *(mic_i*)valid_i != mic_i(0); const mic3f rdir16 = rcp_safe(ray16.dir); unsigned int terminated = toInt(!m_valid); const mic_f inf = mic_f(pos_inf); const mic_f zero = mic_f::zero(); const Node * __restrict__ nodes = (Node *)bvh->nodePtr(); const BVH4mb::Triangle01 * __restrict__ accel = (BVH4mb::Triangle01 *)bvh->triPtr(); stack_node[0] = BVH4i::invalidNode; long rayIndex = -1; while((rayIndex = bitscan64(rayIndex,toInt(m_valid))) != BITSCAN_NO_BIT_SET_64) { stack_node[1] = bvh->root; size_t sindex = 2; const mic_f org_xyz = loadAOS4to16f(rayIndex,ray16.org.x,ray16.org.y,ray16.org.z); const mic_f dir_xyz = loadAOS4to16f(rayIndex,ray16.dir.x,ray16.dir.y,ray16.dir.z); const mic_f rdir_xyz = loadAOS4to16f(rayIndex,rdir16.x,rdir16.y,rdir16.z); const mic_f org_rdir_xyz = org_xyz * rdir_xyz; const mic_f min_dist_xyz = broadcast1to16f(&ray16.tnear[rayIndex]); const mic_f max_dist_xyz = broadcast1to16f(&ray16.tfar[rayIndex]); const mic_f time = broadcast1to16f(&ray16.time[rayIndex]); const unsigned int leaf_mask = BVH4I_LEAF_MASK; while (1) { NodeRef curNode = stack_node[sindex-1]; sindex--; const mic_f one_time = (mic_f::one() - time); while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf(leaf_mask))) break; const Node* __restrict__ const node = curNode.node(nodes); const float* __restrict const plower = (float*)node->lower; const float* __restrict const pupper = (float*)node->upper; prefetch<PFHINT_L1>((char*)node + 0*64); prefetch<PFHINT_L1>((char*)node + 1*64); prefetch<PFHINT_L1>((char*)node + 2*64); prefetch<PFHINT_L1>((char*)node + 3*64); const BVH4mb::Node* __restrict__ const nodeMB = (BVH4mb::Node*)node; const mic_f lower = one_time * load16f((float*)nodeMB->lower) + time * load16f((float*)nodeMB->lower_t1); const mic_f upper = one_time * load16f((float*)nodeMB->upper) + time * load16f((float*)nodeMB->upper_t1); /* intersect single ray with 4 bounding boxes */ const mic_f tLowerXYZ = lower * rdir_xyz - org_rdir_xyz; const mic_f tUpperXYZ = upper * rdir_xyz - org_rdir_xyz; const mic_f tLower = mask_min(0x7777,min_dist_xyz,tLowerXYZ,tUpperXYZ); const mic_f tUpper = mask_max(0x7777,max_dist_xyz,tLowerXYZ,tUpperXYZ); const Node* __restrict__ const next = curNode.node(nodes); prefetch<PFHINT_L2>((char*)next + 0); prefetch<PFHINT_L2>((char*)next + 64); sindex--; const mic_f tNear = vreduce_max4(tLower); const mic_f tFar = vreduce_min4(tUpper); const mic_m hitm = le(0x8888,tNear,tFar); const mic_f tNear_pos = select(hitm,tNear,inf); curNode = stack_node[sindex]; // early pop of next node /* if no child is hit, continue with early popped child */ if (unlikely(none(hitm))) continue; sindex++; const unsigned long hiti = toInt(hitm); const unsigned long pos_first = bitscan64(hiti); const unsigned long num_hitm = countbits(hiti); /* if a single child is hit, continue with that child */ curNode = ((unsigned int *)plower)[pos_first]; if (likely(num_hitm == 1)) continue; /* if two children are hit, push in correct order */ const unsigned long pos_second = bitscan64(pos_first,hiti); if (likely(num_hitm == 2)) { const unsigned int dist_first = ((unsigned int*)&tNear)[pos_first]; const unsigned int dist_second = ((unsigned int*)&tNear)[pos_second]; const unsigned int node_first = curNode; const unsigned int node_second = ((unsigned int*)plower)[pos_second]; if (dist_first <= dist_second) { stack_node[sindex] = node_second; sindex++; assert(sindex < 3*BVH4i::maxDepth+1); continue; } else { stack_node[sindex] = curNode; curNode = node_second; sindex++; assert(sindex < 3*BVH4i::maxDepth+1); continue; } } /* continue with closest child and push all others */ const mic_f min_dist = set_min_lanes(tNear_pos); const unsigned int old_sindex = sindex; sindex += countbits(hiti) - 1; assert(sindex < 3*BVH4i::maxDepth+1); const mic_m closest_child = eq(hitm,min_dist,tNear); const unsigned long closest_child_pos = bitscan64(closest_child); const mic_m m_pos = andn(hitm,andn(closest_child,(mic_m)((unsigned int)closest_child - 1))); const mic_i plower_node = load16i((int*)plower); curNode = ((unsigned int*)plower)[closest_child_pos]; compactustore16i(m_pos,&stack_node[old_sindex],plower_node); } /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* intersect one ray against four triangles */ ////////////////////////////////////////////////////////////////////////////////////////////////// const BVH4mb::Triangle01* tptr = (BVH4mb::Triangle01*) curNode.leaf(accel); prefetch<PFHINT_L1>((mic_f*)tptr + 0); prefetch<PFHINT_L1>((mic_f*)tptr + 1); prefetch<PFHINT_L1>((mic_f*)tptr + 2); prefetch<PFHINT_L1>((mic_f*)tptr + 3); const mic_i and_mask = broadcast4to16i(zlc4); const mic_f v0_t0 = gather_4f_zlc(and_mask, (float*)&tptr[0].t0.v0, (float*)&tptr[1].t0.v0, (float*)&tptr[2].t0.v0, (float*)&tptr[3].t0.v0); const mic_f v1_t0 = gather_4f_zlc(and_mask, (float*)&tptr[0].t0.v1, (float*)&tptr[1].t0.v1, (float*)&tptr[2].t0.v1, (float*)&tptr[3].t0.v1); const mic_f v2_t0 = gather_4f_zlc(and_mask, (float*)&tptr[0].t0.v2, (float*)&tptr[1].t0.v2, (float*)&tptr[2].t0.v2, (float*)&tptr[3].t0.v2); prefetch<PFHINT_L2>((mic_f*)tptr + 4); prefetch<PFHINT_L2>((mic_f*)tptr + 5); prefetch<PFHINT_L2>((mic_f*)tptr + 6); prefetch<PFHINT_L2>((mic_f*)tptr + 7); const mic_f v0_t1 = gather_4f_zlc(and_mask, (float*)&tptr[0].t1.v0, (float*)&tptr[1].t1.v0, (float*)&tptr[2].t1.v0, (float*)&tptr[3].t1.v0); const mic_f v1_t1 = gather_4f_zlc(and_mask, (float*)&tptr[0].t1.v1, (float*)&tptr[1].t1.v1, (float*)&tptr[2].t1.v1, (float*)&tptr[3].t1.v1); const mic_f v2_t1 = gather_4f_zlc(and_mask, (float*)&tptr[0].t1.v2, (float*)&tptr[1].t1.v2, (float*)&tptr[2].t1.v2, (float*)&tptr[3].t1.v2); const mic_f v0 = v0_t0 * one_time + time * v0_t1; const mic_f v1 = v1_t0 * one_time + time * v1_t1; const mic_f v2 = v2_t0 * one_time + time * v2_t1; const mic_f e1 = v1 - v0; const mic_f e2 = v0 - v2; const mic_f normal = lcross_zxy(e1,e2); const mic_f org = v0 - org_xyz; const mic_f odzxy = msubr231(org * swizzle(dir_xyz,_MM_SWIZ_REG_DACB), dir_xyz, swizzle(org,_MM_SWIZ_REG_DACB)); const mic_f den = ldot3_zxy(dir_xyz,normal); const mic_f rcp_den = rcp(den); const mic_f uu = ldot3_zxy(e2,odzxy); const mic_f vv = ldot3_zxy(e1,odzxy); const mic_f u = uu * rcp_den; const mic_f v = vv * rcp_den; #if defined(__BACKFACE_CULLING__) const mic_m m_init = (mic_m)0x1111 & (den > zero); #else const mic_m m_init = 0x1111; #endif const mic_m valid_u = ge((mic_m)m_init,u,zero); const mic_m valid_v = ge(valid_u,v,zero); const mic_m m_aperture = le(valid_v,u+v,mic_f::one()); const mic_f nom = ldot3_zxy(org,normal); const mic_f t = rcp_den*nom; if (unlikely(none(m_aperture))) continue; mic_m m_final = lt(lt(m_aperture,min_dist_xyz,t),t,max_dist_xyz); #if defined(__USE_RAY_MASK__) const mic_i rayMask(ray16.mask[rayIndex]); const mic_i triMask = swDDDD(gather16i_4i_align(&tptr[0].t0.v2,&tptr[1].t0.v2,&tptr[2].t0.v2,&tptr[3].t0.v2)); const mic_m m_ray_mask = (rayMask & triMask) != mic_i::zero(); m_final &= m_ray_mask; #endif if (unlikely(any(m_final))) { terminated |= mic_m::shift1[rayIndex]; break; } ////////////////////////////////////////////////////////////////////////////////////////////////// } if (unlikely(all(toMask(terminated)))) break; } store16i(m_valid & toMask(terminated),&ray16.geomID,0); }
void fn08048278(code * * edx, word32 dwArg00) { __align(fp + 0x00000004); __libc_start_main(&globals->ptr804835D, dwArg00, fp + 0x00000004, &globals->ptr8048390, &globals->ptr80483C0, edx, fp - 0x00000004); __hlt(); }