Example #1
0
int main()
{
    assert( sizeof(float16) == 2 );
    assert( float16() == 0.f );

    for( int i = -2048; i <= 2048; ++i )
        check_precision_error( i );

    check_precision_error(  3.14159f );
    check_precision_error( -3.14159f );
    check_precision_error(  0.00001f );
    check_precision_error( -0.00001f );
    check_precision_error(  0.00000f );
    check_precision_error( -0.00000f );
    check_precision_error(  1.00000f );
    check_precision_error( -1.00000f );

    float16 f(3.1459f);
    check_precision_error( f += 1.f );
    check_precision_error( f  = 4.f );
    check_precision_error( f  = float16() );
    check_precision_error( f *= 0.0001f );

    std::cout << "All ok." << std::endl;

    return 0;
}
Example #2
0
void check_precision_error( float value )
{
    float original = value;
    float converted1 = half2float(float2half(value));

    if( std::abs(original-converted1) > 0 )
        std::cout << "Precision warning: " << value << " (" << (original-converted1 > 0 ? "+" : "") << (original-converted1) << " err)" << std::endl;
    else ;// value ok;

    float converted2 = float16(value);
    assert( converted1 == converted2 );
}
    void BVH4mbIntersector16Hybrid<LeafIntersector>::intersect(int16* valid_i, BVH4mb* bvh, Ray16& ray16)
    {
      /* near and node stack */
      __aligned(64) float16   stack_dist[3*BVH4i::maxDepth+1];
      __aligned(64) NodeRef stack_node[3*BVH4i::maxDepth+1];
      __aligned(64) NodeRef stack_node_single[3*BVH4i::maxDepth+1]; 

      /* load ray */
      const bool16 valid0     = *(int16*)valid_i != int16(0);
      const Vec3f16 rdir16     = rcp_safe(ray16.dir);
      const Vec3f16 org_rdir16 = ray16.org * rdir16;
      float16 ray_tnear        = select(valid0,ray16.tnear,pos_inf);
      float16 ray_tfar         = select(valid0,ray16.tfar ,neg_inf);
      const float16 inf        = float16(pos_inf);
      
      /* allocate stack and push root node */
      stack_node[0] = BVH4i::invalidNode;
      stack_dist[0] = inf;
      stack_node[1] = bvh->root;
      stack_dist[1] = ray_tnear; 
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      float16*   __restrict__ sptr_dist = stack_dist + 2;
      
      const Node      * __restrict__ nodes = (Node     *)bvh->nodePtr();
      const BVH4mb::Triangle01 * __restrict__ accel = (BVH4mb::Triangle01 *)bvh->triPtr();

      while (1) pop:
      {
        /* pop next node from stack */
        NodeRef curNode = *(sptr_node-1);
        float16 curDist   = *(sptr_dist-1);
        sptr_node--;
        sptr_dist--;
	const bool16 m_stackDist = ray_tfar > curDist;

	/* stack emppty ? */
        if (unlikely(curNode == BVH4i::invalidNode))  break;
        
        /* cull node if behind closest hit point */
        if (unlikely(none(m_stackDist))) continue;
        
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////

	/* switch to single ray mode */
        if (unlikely(countbits(m_stackDist) <= BVH4i::hybridSIMDUtilSwitchThreshold)) 
	  {
	    float   *__restrict__ stack_dist_single = (float*)sptr_dist;
	    store16f(stack_dist_single,inf);

	    /* traverse single ray */	  	  
	    long rayIndex = -1;
	    while((rayIndex = bitscan64(rayIndex,m_stackDist)) != BITSCAN_NO_BIT_SET_64) 
	      {	    
		stack_node_single[0] = BVH4i::invalidNode;
		stack_node_single[1] = curNode;
		size_t sindex = 2;

		const float16 org_xyz      = loadAOS4to16f(rayIndex,ray16.org.x,ray16.org.y,ray16.org.z);
		const float16 dir_xyz      = loadAOS4to16f(rayIndex,ray16.dir.x,ray16.dir.y,ray16.dir.z);
		const float16 rdir_xyz     = loadAOS4to16f(rayIndex,rdir16.x,rdir16.y,rdir16.z);
		const float16 org_rdir_xyz = org_xyz * rdir_xyz;
		const float16 min_dist_xyz = broadcast1to16f(&ray16.tnear[rayIndex]);
		float16       max_dist_xyz = broadcast1to16f(&ray16.tfar[rayIndex]);
		const float16 time         = broadcast1to16f(&ray16.time[rayIndex]);

		const unsigned int leaf_mask = BVH4I_LEAF_MASK;

		while (1) 
		  {
		    NodeRef curNode = stack_node_single[sindex-1];
		    sindex--;
            
		    traverse_single_intersect(curNode,
					      sindex,
					      rdir_xyz,
					      org_rdir_xyz,
					      min_dist_xyz,
					      max_dist_xyz,
					      time,
					      stack_node_single,
					      stack_dist_single,
					      nodes,
					      leaf_mask);	    

		    /* return if stack is empty */
		    if (unlikely(curNode == BVH4i::invalidNode)) break;


		    /* intersect one ray against four triangles */
		    const bool hit = LeafIntersector::intersect(curNode,
								rayIndex,
								dir_xyz,
								org_xyz,
								min_dist_xyz,
								max_dist_xyz,
								ray16,
								accel,
								(Scene*)bvh->geometry);
		    
		    if (hit)
		      compactStack(stack_node_single,stack_dist_single,sindex,max_dist_xyz);

		  }	  
	      }
	    ray_tfar = select(valid0,ray16.tfar ,neg_inf);
	    continue;
	  }

	///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////

	const unsigned int leaf_mask = BVH4I_LEAF_MASK;

	const float16 time     = ray16.time;
	const float16 one_time = (float16::one() - time);

        while (1)
        {
          /* test if this is a leaf node */
          if (unlikely(curNode.isLeaf(leaf_mask))) break;
          
          STAT3(normal.trav_nodes,1,popcnt(ray_tfar > curDist),16);
          const Node* __restrict__ const node = curNode.node(nodes);
          
          const BVH4mb::Node* __restrict__ const nodeMB = (BVH4mb::Node*)node;

          /* pop of next node */
          sptr_node--;
          sptr_dist--;
          curNode = *sptr_node; 
          curDist = *sptr_dist;
          
	  prefetch<PFHINT_L1>((char*)node + 0*64); 
	  prefetch<PFHINT_L1>((char*)node + 1*64); 
	  prefetch<PFHINT_L1>((char*)node + 2*64); 
	  prefetch<PFHINT_L1>((char*)node + 3*64); 

#pragma unroll(4)
          for (unsigned int i=0; i<4; i++)
          {
	    const NodeRef child = node->lower[i].child;

	    const float16 lower_x =  one_time * nodeMB->lower[i].x + time * nodeMB->lower_t1[i].x;
	    const float16 lower_y =  one_time * nodeMB->lower[i].y + time * nodeMB->lower_t1[i].y;
	    const float16 lower_z =  one_time * nodeMB->lower[i].z + time * nodeMB->lower_t1[i].z;
	    const float16 upper_x =  one_time * nodeMB->upper[i].x + time * nodeMB->upper_t1[i].x;
	    const float16 upper_y =  one_time * nodeMB->upper[i].y + time * nodeMB->upper_t1[i].y;
	    const float16 upper_z =  one_time * nodeMB->upper[i].z + time * nodeMB->upper_t1[i].z;

	    if (unlikely(i >=2 && child == BVH4i::invalidNode)) break;

            const float16 lclipMinX = msub(lower_x,rdir16.x,org_rdir16.x);
            const float16 lclipMinY = msub(lower_y,rdir16.y,org_rdir16.y);
            const float16 lclipMinZ = msub(lower_z,rdir16.z,org_rdir16.z);
            const float16 lclipMaxX = msub(upper_x,rdir16.x,org_rdir16.x);
            const float16 lclipMaxY = msub(upper_y,rdir16.y,org_rdir16.y);
            const float16 lclipMaxZ = msub(upper_z,rdir16.z,org_rdir16.z);
	    
            const float16 lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
            const float16 lfarP  = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
            const bool16 lhit   = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar);   
	    const float16 childDist = select(lhit,lnearP,inf);
            const bool16 m_child_dist = childDist < curDist;
            /* if we hit the child we choose to continue with that child if it 
               is closer than the current next child, or we push it onto the stack */
            if (likely(any(lhit)))
            {
              sptr_node++;
              sptr_dist++;
              
              /* push cur node onto stack and continue with hit child */
              if (any(m_child_dist))
              {
                *(sptr_node-1) = curNode;
                *(sptr_dist-1) = curDist; 
                curDist = childDist;
                curNode = child;
              }              
              /* push hit child onto stack*/
              else 
		{
		  *(sptr_node-1) = child;
		  *(sptr_dist-1) = childDist; 
		}
              assert(sptr_node - stack_node < BVH4i::maxDepth);
            }	      
          }
#if SWITCH_ON_DOWN_TRAVERSAL == 1
	  const bool16 curUtil = ray_tfar > curDist;
	  if (unlikely(countbits(curUtil) <= BVH4i::hybridSIMDUtilSwitchThreshold))
	    {
	      *sptr_node++ = curNode;
	      *sptr_dist++ = curDist; 
	      goto pop;
	    }
#endif

        }
        
        /* return if stack is empty */
        if (unlikely(curNode == BVH4i::invalidNode)) break;
        

        /* intersect leaf */
        const bool16 m_valid_leaf = ray_tfar > curDist;
        STAT3(normal.trav_leaves,1,popcnt(m_valid_leaf),16);

	LeafIntersector::intersect16(curNode,
				     m_valid_leaf,
				     ray16.dir,
				     ray16.org,
				     ray16,
				     accel,
				     (Scene*)bvh->geometry);


        ray_tfar = select(m_valid_leaf,ray16.tfar,ray_tfar);
      }
bool
HalftoneShaderEx::set_uniform_variables() const
{
   assert(_patch);
   _patch->update_dynamic_samples();
    
   // glUniform1f (_lod_loc, float(0));
   

   glUniform1i (m_tex_loc, m_texture->get_tex_unit() - GL_TEXTURE0);
   glUniform1i (m_width, VIEW::peek()->width());
   glUniform1i (m_height, VIEW::peek()->height());
   if(_perlin)
      glUniform1i(_perlin_loc, _perlin->get_tex_unit() - GL_TEXTURE0);
   
   glUniform1i(_style_loc, _style);


   //send the derivative of the transformation matrix
   //under construction ... seems to be working now


   
   Wtransf P_matrix =  VIEW::peek()->wpt_to_pix_proj();

   //make sure that the matrix derivative is taken at the correct Z value 

  
   //cerr << "Projection Matrix : " << endl << P_matrix << endl;

   //GLSL stroes matrices in column major order
   //while jot's mlib stores matrices in row major order
   //therefore a transpose is needed

   glUniformMatrix4fv(_proj_der_loc,1,GL_TRUE /* transpose = true */,(const GLfloat*) float16(P_matrix.inverse())/*temp*/);


   return true;
}
 float16 operator-() const { return float16(-static_cast<float>(*this)); }
Example #6
0
bool
BasecoatShader::set_uniform_variables() const
{
   // send uniform variable values to the program

   assert(_tex);
   glUniform1i(_blend_normal_loc, _blend_normal);
   glUniform1f(_user_depth_loc, _user_depth);
   glUniform1f(_unit_len_loc, _unit_len);
   glUniform1f(_edge_len_scale_loc, _edge_len_scale);
   glUniform1f(_ratio_scale_loc, _ratio_scale);

   glUniform1i(_tex_loc, _tex->get_raw_unit());
   if(_tex_2d){
      glUniform1i(_is_tex_2d_loc, 1);
      glUniform1i(_tex_2d_loc, _tex_2d->get_raw_unit());
   }
   else{
      glUniform1i(_is_tex_2d_loc, 0);
      glUniform1i(_tex_2d_loc, 0);
   }

   glUniform1i(_is_enabled_loc, _layer[0]._is_enabled);
   glUniform1i(_remap_nl_loc  , _layer[0]._remap_nl);
   glUniform1i(_remap_loc     , _layer[0]._remap);
   glUniform1i(_backlight_loc , _layer[0]._backlight);
   glUniform1f(_e0_loc        , _layer[0]._e0);
   glUniform1f(_e1_loc        , _layer[0]._e1);
   glUniform1f(_s0_loc        , _layer[0]._s0);
   glUniform1f(_s1_loc        , _layer[0]._s1);
   if(_basecoat_mode == 0)
      glUniform3fv(_base_color_loc[0], 1, float3(VIEW::peek()->color()));
   else
      glUniform3fv(_base_color_loc[0], 1, float3(_base_color[0]));
   glUniform3fv(_base_color_loc[1], 1, float3(_base_color[1]));
   glUniform1f(_color_offset_loc, _color_offset);
   glUniform1f(_color_steepness_loc, _color_steepness);
   glUniform1i(_light_separation_loc, _light_separation);

   glUniform1f(_global_edge_len_loc, _global_edge_len);

   Wtransf P_matrix =  VIEW::peek()->eye_to_pix_proj();

   glUniformMatrix4fv(_proj_der_loc,1,GL_TRUE /* transpose = true */,(const GLfloat*) float16(P_matrix));

   return true;
}
Example #7
0
static void send_status_message() {
    INSCompDebugState attstate = inscomp_get_debug_state();
    VectorF<3> rpy = quat_to_rpy(attstate.quat);
    ControllerDebug controllerdebug = controller_get_debug();
    ControllerGains controllergains = controller_get_gains();

    msg.id = MSGID_STATUS;

    msg.roll = float16(rpy[0], 4);
    msg.pitch = float16(rpy[1], 4);
    msg.yaw = float16(rpy[2], 4);

    msg.roll_rate = float16(attstate.rate[0], 4);
    msg.pitch_rate = float16(attstate.rate[1], 4);
    msg.yaw_rate = float16(attstate.rate[2], 4);

    msg.roll_bias = float16(attstate.bias_gyro[0], 4);
    msg.pitch_bias = float16(attstate.bias_gyro[1], 4);
    msg.yaw_bias = float16(attstate.bias_gyro[2], 4);

    msg.roll_p = float16(controllerdebug.pout[0], 4);
    msg.pitch_p = float16(controllerdebug.pout[1], 4);
    msg.yaw_p = float16(controllerdebug.pout[2], 4);

    msg.roll_d = float16(controllerdebug.dout[0], 4);
    msg.pitch_d = float16(controllerdebug.dout[1], 4);
    msg.yaw_d = float16(controllerdebug.dout[2], 4);

    msg.gain_roll_p = float16(controllergains.p[0], 4);
    msg.gain_pitch_p = float16(controllergains.p[1], 4);
    msg.gain_yaw_p = float16(controllergains.p[2], 4);

    msg.gain_roll_d = float16(controllergains.d[0], 4);
    msg.gain_pitch_d = float16(controllergains.d[1], 4);
    msg.gain_yaw_d = float16(controllergains.d[2], 4);

    msg.mag_x_bias = float16(attstate.bias_mag[0], 4);
    msg.mag_y_bias = float16(attstate.bias_mag[1], 4);
    msg.mag_z_bias = float16(attstate.bias_mag[2], 4);

    msg.esc_fl = float16(controllerdebug.motors[0], 4);
    msg.esc_fr = float16(controllerdebug.motors[1], 4);
    msg.esc_rr = float16(controllerdebug.motors[2], 4);
    msg.esc_rl = float16(controllerdebug.motors[3], 4);

    msg.altitude = float16(altitude_get(), 3);
    msg.altitude_rate = float16(altitude_get_rate(), 3);

    msg.battery = float16(board_get_voltage(), 3);

    XBeeSendResponse resp = xbee_send(1, reinterpret_cast<const char *>(&msg), sizeof(msg));
    valid = (resp == XBeeSendResponse::SUCCESS);
}
Example #8
0
bool
BlurShader::set_uniform_variables() const
{
   // send uniform variable values to the program
   
   if(_patch){
      //tone map variables
      glUniform1i(_tone_tex_loc, ColorRefImage::lookup_raw_tex_unit(0));
      glUniform1f(_width_loc,  1.0/VIEW::peek()->width());
      glUniform1f(_height_loc, 1.0/VIEW::peek()->height());
      glUniform1f(_blur_size_loc, _blur_size);

      glUniform1i(_detail_func_loc, _detail_func);
      glUniform1f(_unit_len_loc, _unit_len);
      glUniform1f(_edge_len_scale_loc, _edge_len_scale);
      glUniform1f(_ratio_scale_loc, _ratio_scale);
      glUniform1f(_user_depth_loc, _user_depth);
      glUniform1f(_global_edge_len_loc, _global_edge_len);
      Wtransf P_matrix =  VIEW::peek()->eye_to_pix_proj();

      glUniformMatrix4fv(_proj_der_loc,1,GL_TRUE /* transpose = true */,(const GLfloat*) float16(P_matrix));

      return true;
   }

   return false;
}
    void BVH8Intersector16Chunk<PrimitiveIntersector16>::intersect(bool16* valid_i, BVH8* bvh, Ray16& ray)
    {
#if defined(__AVX512__)
      
      /* load ray */
      const bool16 valid0 = *valid_i;
      const Vec3f16 rdir = rcp_safe(ray.dir);
      const Vec3f16 org_rdir = ray.org * rdir;
      float16 ray_tnear = select(valid0,ray.tnear,pos_inf);
      float16 ray_tfar  = select(valid0,ray.tfar ,neg_inf);
      const float16 inf = float16(pos_inf);
      Precalculations pre(valid0,ray);
      
      /* allocate stack and push root node */
      float16    stack_near[3*BVH8::maxDepth+1];
      NodeRef stack_node[3*BVH8::maxDepth+1];
      stack_node[0] = BVH8::invalidNode;
      stack_near[0] = inf;
      stack_node[1] = bvh->root;
      stack_near[1] = ray_tnear; 
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      float16*    __restrict__ sptr_near = stack_near + 2;
      
      while (1)
      {
        /* pop next node from stack */
        sptr_node--;
        sptr_near--;
        NodeRef cur = *sptr_node;
        if (unlikely(cur == BVH8::invalidNode)) 
          break;
        
        /* cull node if behind closest hit point */
        float16 curDist = *sptr_near;
        if (unlikely(none(ray_tfar > curDist))) 
          continue;
        
        while (1)
        {
          /* test if this is a leaf node */
          if (unlikely(cur.isLeaf()))
            break;
          
          const bool16 valid_node = ray_tfar > curDist;
          STAT3(normal.trav_nodes,1,popcnt(valid_node),8);
          const Node* __restrict__ const node = (BVH8::Node*)cur.node();
          
          /* pop of next node */
          sptr_node--;
          sptr_near--;
          cur = *sptr_node; // FIXME: this trick creates issues with stack depth
          curDist = *sptr_near;
          
          for (unsigned i=0; i<BVH8::N; i++)
          {
            const NodeRef child = node->children[i];
            if (unlikely(child == BVH8::emptyNode)) break;
            
            const float16 lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x);
            const float16 lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y);
            const float16 lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z);
            const float16 lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x);
            const float16 lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y);
            const float16 lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z);
            const float16 lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
            const float16 lfarP  = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
            const bool16 lhit   = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar);      
            
            /* if we hit the child we choose to continue with that child if it 
               is closer than the current next child, or we push it onto the stack */
            if (likely(any(lhit)))
            {
              const float16 childDist = select(lhit,lnearP,inf);
              const NodeRef child = node->children[i];
              
              /* push cur node onto stack and continue with hit child */
              if (any(childDist < curDist))
              {
                *sptr_node = cur;
                *sptr_near = curDist; 
		sptr_node++;
		sptr_near++;

                curDist = childDist;
                cur = child;
              }
              
              /* push hit child onto stack*/
              else {
                *sptr_node = child;
                *sptr_near = childDist; 
		sptr_node++;
		sptr_near++;

              }
              assert(sptr_node - stack_node < BVH8::maxDepth);
            }	      
          }
        }
        
        /* return if stack is empty */
        if (unlikely(cur == BVH8::invalidNode)) 
          break;
        
        /* intersect leaf */
	assert(cur != BVH8::emptyNode);
        const bool16 valid_leaf = ray_tfar > curDist;
        STAT3(normal.trav_leaves,1,popcnt(valid_leaf),8);
        size_t items; const Triangle* tri  = (Triangle*) cur.leaf(items);
        PrimitiveIntersector16::intersect(valid_leaf,pre,ray,tri,items,bvh->scene);
        ray_tfar = select(valid_leaf,ray.tfar,ray_tfar);
      }
      AVX_ZERO_UPPER();
#endif       
    }
Example #10
0
 constexpr complex(float16 __r = float16(0), float16 __i = float16(0)) {
   _M_value.real = __r;
   _M_value.imag = __i;
 }