C++ (Cpp) broadcast4to16f Beispiele

Beispiel #1

0

Datei anzeigen

Datei: bvh4mb_builder.cpp Projekt: jamesvecore/embree

  void BVH4mbBuilder::computePrimRefsTrianglesMB(const size_t threadID, const size_t numThreads) 
  {
    DBG(PING);
    const size_t numGroups = scene->size();
    const size_t startID = (threadID+0)*numPrimitives/numThreads;
    const size_t endID   = (threadID+1)*numPrimitives/numThreads;
    
    PrimRef *__restrict__ const prims     = this->prims;

    // === find first group containing startID ===
    unsigned int g=0, numSkipped = 0;
    for (; g<numGroups; g++) {       
      if (unlikely(scene->get(g) == NULL)) continue;
      if (unlikely(scene->get(g)->type != TRIANGLE_MESH)) continue;
      const TriangleMeshScene::TriangleMesh* __restrict__ const mesh = scene->getTriangleMesh(g);
      if (unlikely(!mesh->isEnabled())) continue;
      if (unlikely(mesh->numTimeSteps == 1)) continue;

      const size_t numTriangles = mesh->numTriangles;
      if (numSkipped + numTriangles > startID) break;
      numSkipped += numTriangles;
    }

    // === start with first group containing startID ===
    mic_f bounds_scene_min((float)pos_inf);
    mic_f bounds_scene_max((float)neg_inf);
    mic_f bounds_centroid_min((float)pos_inf);
    mic_f bounds_centroid_max((float)neg_inf);

    unsigned int num = 0;
    unsigned int currentID = startID;
    unsigned int offset = startID - numSkipped;

    __align(64) PrimRef local_prims[2];
    size_t numLocalPrims = 0;
    PrimRef *__restrict__ dest = &prims[currentID];

    for (; g<numGroups; g++) 
    {
      if (unlikely(scene->get(g) == NULL)) continue;
      if (unlikely(scene->get(g)->type != TRIANGLE_MESH)) continue;
      const TriangleMeshScene::TriangleMesh* __restrict__ const mesh = scene->getTriangleMesh(g);
      if (unlikely(!mesh->isEnabled())) continue;
      if (unlikely(mesh->numTimeSteps == 1)) continue;

      for (unsigned int i=offset; i<mesh->numTriangles && currentID < endID; i++, currentID++)	 
      { 			    
	//DBG_PRINT(currentID);
	const TriangleMeshScene::TriangleMesh::Triangle& tri = mesh->triangle(i);
	prefetch<PFHINT_L2>(&tri + L2_PREFETCH_ITEMS);
	prefetch<PFHINT_L1>(&tri + L1_PREFETCH_ITEMS);

	const float *__restrict__ const vptr0 = (float*)&mesh->vertex(tri.v[0]);
	const float *__restrict__ const vptr1 = (float*)&mesh->vertex(tri.v[1]);
	const float *__restrict__ const vptr2 = (float*)&mesh->vertex(tri.v[2]);

	const mic_f v0 = broadcast4to16f(vptr0);
	const mic_f v1 = broadcast4to16f(vptr1);
	const mic_f v2 = broadcast4to16f(vptr2);

	const mic_f bmin = min(min(v0,v1),v2);
	const mic_f bmax = max(max(v0,v1),v2);
	bounds_scene_min = min(bounds_scene_min,bmin);
	bounds_scene_max = max(bounds_scene_max,bmax);
	const mic_f centroid2 = bmin+bmax;
	bounds_centroid_min = min(bounds_centroid_min,centroid2);
	bounds_centroid_max = max(bounds_centroid_max,centroid2);

	store4f(&local_prims[numLocalPrims].lower,bmin);
	store4f(&local_prims[numLocalPrims].upper,bmax);	
	local_prims[numLocalPrims].lower.a = g;
	local_prims[numLocalPrims].upper.a = i;

	//DBG_PRINT( local_prims[numLocalPrims] );

	numLocalPrims++;
	if (unlikely(((size_t)dest % 64) != 0) && numLocalPrims == 1)
	  {
	    *dest = local_prims[0];
	    dest++;
	    numLocalPrims--;
	  }
	else
	  {
	    const mic_f twoAABBs = load16f(local_prims);
	    if (numLocalPrims == 2)
	      {
		numLocalPrims = 0;
		store16f_ngo(dest,twoAABBs);
		dest+=2;
	      }
	  }	
      }
      if (currentID == endID) break;
      offset = 0;
    }

    /* is there anything left in the local queue? */
    if (numLocalPrims % 2 != 0)
      *dest = local_prims[0];

    /* update global bounds */
    Centroid_Scene_AABB bounds;
    
    store4f(&bounds.centroid2.lower,bounds_centroid_min);
    store4f(&bounds.centroid2.upper,bounds_centroid_max);
    store4f(&bounds.geometry.lower,bounds_scene_min);
    store4f(&bounds.geometry.upper,bounds_scene_max);

    global_bounds.extend_atomic(bounds);    
  }

Beispiel #2

0

Datei anzeigen

Datei: bvh4hair_builder.cpp Projekt: AranHase/embree

  void BVH4HairBuilder::parallelBinningGlobal(const size_t threadID, const size_t numThreads)
  {
    BuildRecord &current = global_sharedData.rec;

    const unsigned int items = current.items();
    const unsigned int startID = current.begin + ((threadID+0)*items/numThreads);
    const unsigned int endID   = current.begin + ((threadID+1)*items/numThreads);

    const mic_f centroidMin = broadcast4to16f(&current.bounds.centroid2.lower);
    const mic_f centroidMax = broadcast4to16f(&current.bounds.centroid2.upper);

    const mic_f centroidBoundsMin_2 = centroidMin;
    const mic_f centroidDiagonal_2  = centroidMax-centroidMin;
    const mic_f scale = select(centroidDiagonal_2 != 0.0f,rcp(centroidDiagonal_2) * mic_f(16.0f * 0.99f),mic_f::zero());

    Bezier1i  *__restrict__ const tmp_prims = (Bezier1i*)accel;

    fastbin_copy<Bezier1i,false>(prims,tmp_prims,startID,endID,centroidBoundsMin_2,scale,global_bin16[threadID]);    

    LockStepTaskScheduler::syncThreadsWithReduction( threadID, numThreads, reduceBinsParallel, global_bin16 );
    
    if (threadID == 0)
      {
	const float voxelArea = area(current.bounds.geometry);

	global_sharedData.split.cost = items * voxelArea * INTERSECTION_COST;;
	
	const Bin16 &bin16 = global_bin16[0];

	for (size_t dim=0;dim<3;dim++)
	  {
	    if (unlikely(centroidDiagonal_2[dim] == 0.0f)) continue;

	    const mic_f rArea = prefix_area_rl(bin16.min_x[dim],bin16.min_y[dim],bin16.min_z[dim],
					       bin16.max_x[dim],bin16.max_y[dim],bin16.max_z[dim]);
	    const mic_f lArea = prefix_area_lr(bin16.min_x[dim],bin16.min_y[dim],bin16.min_z[dim],
					       bin16.max_x[dim],bin16.max_y[dim],bin16.max_z[dim]);
	    const mic_i lnum  = prefix_count(bin16.count[dim]);

	    const mic_i rnum    = mic_i(items) - lnum;
	    const mic_i lblocks = (lnum + mic_i(3)) >> 2;
	    const mic_i rblocks = (rnum + mic_i(3)) >> 2;
	    const mic_m m_lnum  = lnum == 0;
	    const mic_m m_rnum  = rnum == 0;
	    const mic_f cost    = select(m_lnum|m_rnum,mic_f::inf(),lArea * mic_f(lblocks) + rArea * mic_f(rblocks) + voxelArea );

	    if (lt(cost,mic_f(global_sharedData.split.cost)))
	      {

		const mic_f min_cost    = vreduce_min(cost); 
		const mic_m m_pos       = min_cost == cost;
		const unsigned long pos = bitscan64(m_pos);	    
		
		assert(pos < 15);
		if (pos < 15)
		  {
		    global_sharedData.split.cost    = cost[pos];
		    global_sharedData.split.pos     = pos+1;
		    global_sharedData.split.dim     = dim;	    
		    global_sharedData.split.numLeft = lnum[pos];
		  }
	      }
	  }
      }
  }

Beispiel #3

0

Datei anzeigen

Datei: bvh4mb_builder.cpp Projekt: jamesvecore/embree

					     BVH4mb::Triangle01 * __restrict__ const acc)
  {
    const TriangleMeshScene::TriangleMesh* __restrict__ const mesh = scene->getTriangleMesh(geomID);
    const TriangleMeshScene::TriangleMesh::Triangle & tri = mesh->triangle(primID);

    const mic_i pID(primID);
    const mic_i gID(geomID);

    const float *__restrict__ const vptr0_t0 = (float*)&mesh->vertex(tri.v[0]);
    const float *__restrict__ const vptr1_t0 = (float*)&mesh->vertex(tri.v[1]);
    const float *__restrict__ const vptr2_t0 = (float*)&mesh->vertex(tri.v[2]);

    prefetch<PFHINT_L1>(vptr1_t0);
    prefetch<PFHINT_L1>(vptr2_t0);

    const mic_f v0_t0 = broadcast4to16f(vptr0_t0); 
    const mic_f v1_t0 = broadcast4to16f(vptr1_t0);
    const mic_f v2_t0 = broadcast4to16f(vptr2_t0);

    const mic_f tri_accel_t0 = initTriangle1(v0_t0,v1_t0,v2_t0,gID,pID,mic_i(mesh->mask));

    store16f_ngo(&acc->t0,tri_accel_t0);

    if ((int)mesh->numTimeSteps == 1)
      {
	store16f_ngo(&acc->t1,tri_accel_t0);
      }
    else
      {
	assert( (int)mesh->numTimeSteps == 2 );
	const float *__restrict__ const vptr0_t1 = (float*)&mesh->vertex(tri.v[0],1);

Beispiel #4

0

Datei anzeigen

Datei: bvh4hair_builder.cpp Projekt: AranHase/embree

	      }
	  }
      }
  }


  void BVH4HairBuilder::parallelPartitioning(BuildRecord& current,
					  Bezier1i * __restrict__ l_source,
					  Bezier1i * __restrict__ r_source,
					  Bezier1i * __restrict__ l_dest,
					  Bezier1i * __restrict__ r_dest,
					  const Split &split,
					  Centroid_Scene_AABB &local_left,
					  Centroid_Scene_AABB &local_right)
  {
    const mic_f centroidMin = broadcast4to16f(&current.bounds.centroid2.lower);
    const mic_f centroidMax = broadcast4to16f(&current.bounds.centroid2.upper);

    const mic_f centroidBoundsMin_2 = centroidMin;
    const mic_f centroidDiagonal_2  = centroidMax-centroidMin;
    const mic_f scale = select(centroidDiagonal_2 != 0.0f,rcp(centroidDiagonal_2) * mic_f(16.0f * 0.99f),mic_f::zero());
 
    const unsigned int bestSplitDim = split.dim;
    const unsigned int bestSplit    = split.pos;

    const mic_f c = mic_f(centroidBoundsMin_2[bestSplitDim]);
    const mic_f s = mic_f(scale[bestSplitDim]);

    mic_f leftSceneBoundsMin((float)pos_inf);
    mic_f leftSceneBoundsMax((float)neg_inf);
    mic_f leftCentroidBoundsMin((float)pos_inf);