int main() { assert( sizeof(float16) == 2 ); assert( float16() == 0.f ); for( int i = -2048; i <= 2048; ++i ) check_precision_error( i ); check_precision_error( 3.14159f ); check_precision_error( -3.14159f ); check_precision_error( 0.00001f ); check_precision_error( -0.00001f ); check_precision_error( 0.00000f ); check_precision_error( -0.00000f ); check_precision_error( 1.00000f ); check_precision_error( -1.00000f ); float16 f(3.1459f); check_precision_error( f += 1.f ); check_precision_error( f = 4.f ); check_precision_error( f = float16() ); check_precision_error( f *= 0.0001f ); std::cout << "All ok." << std::endl; return 0; }
void check_precision_error( float value ) { float original = value; float converted1 = half2float(float2half(value)); if( std::abs(original-converted1) > 0 ) std::cout << "Precision warning: " << value << " (" << (original-converted1 > 0 ? "+" : "") << (original-converted1) << " err)" << std::endl; else ;// value ok; float converted2 = float16(value); assert( converted1 == converted2 ); }
void BVH4mbIntersector16Hybrid<LeafIntersector>::intersect(int16* valid_i, BVH4mb* bvh, Ray16& ray16) { /* near and node stack */ __aligned(64) float16 stack_dist[3*BVH4i::maxDepth+1]; __aligned(64) NodeRef stack_node[3*BVH4i::maxDepth+1]; __aligned(64) NodeRef stack_node_single[3*BVH4i::maxDepth+1]; /* load ray */ const bool16 valid0 = *(int16*)valid_i != int16(0); const Vec3f16 rdir16 = rcp_safe(ray16.dir); const Vec3f16 org_rdir16 = ray16.org * rdir16; float16 ray_tnear = select(valid0,ray16.tnear,pos_inf); float16 ray_tfar = select(valid0,ray16.tfar ,neg_inf); const float16 inf = float16(pos_inf); /* allocate stack and push root node */ stack_node[0] = BVH4i::invalidNode; stack_dist[0] = inf; stack_node[1] = bvh->root; stack_dist[1] = ray_tnear; NodeRef* __restrict__ sptr_node = stack_node + 2; float16* __restrict__ sptr_dist = stack_dist + 2; const Node * __restrict__ nodes = (Node *)bvh->nodePtr(); const BVH4mb::Triangle01 * __restrict__ accel = (BVH4mb::Triangle01 *)bvh->triPtr(); while (1) pop: { /* pop next node from stack */ NodeRef curNode = *(sptr_node-1); float16 curDist = *(sptr_dist-1); sptr_node--; sptr_dist--; const bool16 m_stackDist = ray_tfar > curDist; /* stack emppty ? */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* cull node if behind closest hit point */ if (unlikely(none(m_stackDist))) continue; /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /* switch to single ray mode */ if (unlikely(countbits(m_stackDist) <= BVH4i::hybridSIMDUtilSwitchThreshold)) { float *__restrict__ stack_dist_single = (float*)sptr_dist; store16f(stack_dist_single,inf); /* traverse single ray */ long rayIndex = -1; while((rayIndex = bitscan64(rayIndex,m_stackDist)) != BITSCAN_NO_BIT_SET_64) { stack_node_single[0] = BVH4i::invalidNode; stack_node_single[1] = curNode; size_t sindex = 2; const float16 org_xyz = loadAOS4to16f(rayIndex,ray16.org.x,ray16.org.y,ray16.org.z); const float16 dir_xyz = loadAOS4to16f(rayIndex,ray16.dir.x,ray16.dir.y,ray16.dir.z); const float16 rdir_xyz = loadAOS4to16f(rayIndex,rdir16.x,rdir16.y,rdir16.z); const float16 org_rdir_xyz = org_xyz * rdir_xyz; const float16 min_dist_xyz = broadcast1to16f(&ray16.tnear[rayIndex]); float16 max_dist_xyz = broadcast1to16f(&ray16.tfar[rayIndex]); const float16 time = broadcast1to16f(&ray16.time[rayIndex]); const unsigned int leaf_mask = BVH4I_LEAF_MASK; while (1) { NodeRef curNode = stack_node_single[sindex-1]; sindex--; traverse_single_intersect(curNode, sindex, rdir_xyz, org_rdir_xyz, min_dist_xyz, max_dist_xyz, time, stack_node_single, stack_dist_single, nodes, leaf_mask); /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* intersect one ray against four triangles */ const bool hit = LeafIntersector::intersect(curNode, rayIndex, dir_xyz, org_xyz, min_dist_xyz, max_dist_xyz, ray16, accel, (Scene*)bvh->geometry); if (hit) compactStack(stack_node_single,stack_dist_single,sindex,max_dist_xyz); } } ray_tfar = select(valid0,ray16.tfar ,neg_inf); continue; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// const unsigned int leaf_mask = BVH4I_LEAF_MASK; const float16 time = ray16.time; const float16 one_time = (float16::one() - time); while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf(leaf_mask))) break; STAT3(normal.trav_nodes,1,popcnt(ray_tfar > curDist),16); const Node* __restrict__ const node = curNode.node(nodes); const BVH4mb::Node* __restrict__ const nodeMB = (BVH4mb::Node*)node; /* pop of next node */ sptr_node--; sptr_dist--; curNode = *sptr_node; curDist = *sptr_dist; prefetch<PFHINT_L1>((char*)node + 0*64); prefetch<PFHINT_L1>((char*)node + 1*64); prefetch<PFHINT_L1>((char*)node + 2*64); prefetch<PFHINT_L1>((char*)node + 3*64); #pragma unroll(4) for (unsigned int i=0; i<4; i++) { const NodeRef child = node->lower[i].child; const float16 lower_x = one_time * nodeMB->lower[i].x + time * nodeMB->lower_t1[i].x; const float16 lower_y = one_time * nodeMB->lower[i].y + time * nodeMB->lower_t1[i].y; const float16 lower_z = one_time * nodeMB->lower[i].z + time * nodeMB->lower_t1[i].z; const float16 upper_x = one_time * nodeMB->upper[i].x + time * nodeMB->upper_t1[i].x; const float16 upper_y = one_time * nodeMB->upper[i].y + time * nodeMB->upper_t1[i].y; const float16 upper_z = one_time * nodeMB->upper[i].z + time * nodeMB->upper_t1[i].z; if (unlikely(i >=2 && child == BVH4i::invalidNode)) break; const float16 lclipMinX = msub(lower_x,rdir16.x,org_rdir16.x); const float16 lclipMinY = msub(lower_y,rdir16.y,org_rdir16.y); const float16 lclipMinZ = msub(lower_z,rdir16.z,org_rdir16.z); const float16 lclipMaxX = msub(upper_x,rdir16.x,org_rdir16.x); const float16 lclipMaxY = msub(upper_y,rdir16.y,org_rdir16.y); const float16 lclipMaxZ = msub(upper_z,rdir16.z,org_rdir16.z); const float16 lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const float16 lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const bool16 lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); const float16 childDist = select(lhit,lnearP,inf); const bool16 m_child_dist = childDist < curDist; /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { sptr_node++; sptr_dist++; /* push cur node onto stack and continue with hit child */ if (any(m_child_dist)) { *(sptr_node-1) = curNode; *(sptr_dist-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack*/ else { *(sptr_node-1) = child; *(sptr_dist-1) = childDist; } assert(sptr_node - stack_node < BVH4i::maxDepth); } } #if SWITCH_ON_DOWN_TRAVERSAL == 1 const bool16 curUtil = ray_tfar > curDist; if (unlikely(countbits(curUtil) <= BVH4i::hybridSIMDUtilSwitchThreshold)) { *sptr_node++ = curNode; *sptr_dist++ = curDist; goto pop; } #endif } /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* intersect leaf */ const bool16 m_valid_leaf = ray_tfar > curDist; STAT3(normal.trav_leaves,1,popcnt(m_valid_leaf),16); LeafIntersector::intersect16(curNode, m_valid_leaf, ray16.dir, ray16.org, ray16, accel, (Scene*)bvh->geometry); ray_tfar = select(m_valid_leaf,ray16.tfar,ray_tfar); }
bool HalftoneShaderEx::set_uniform_variables() const { assert(_patch); _patch->update_dynamic_samples(); // glUniform1f (_lod_loc, float(0)); glUniform1i (m_tex_loc, m_texture->get_tex_unit() - GL_TEXTURE0); glUniform1i (m_width, VIEW::peek()->width()); glUniform1i (m_height, VIEW::peek()->height()); if(_perlin) glUniform1i(_perlin_loc, _perlin->get_tex_unit() - GL_TEXTURE0); glUniform1i(_style_loc, _style); //send the derivative of the transformation matrix //under construction ... seems to be working now Wtransf P_matrix = VIEW::peek()->wpt_to_pix_proj(); //make sure that the matrix derivative is taken at the correct Z value //cerr << "Projection Matrix : " << endl << P_matrix << endl; //GLSL stroes matrices in column major order //while jot's mlib stores matrices in row major order //therefore a transpose is needed glUniformMatrix4fv(_proj_der_loc,1,GL_TRUE /* transpose = true */,(const GLfloat*) float16(P_matrix.inverse())/*temp*/); return true; }
float16 operator-() const { return float16(-static_cast<float>(*this)); }
bool BasecoatShader::set_uniform_variables() const { // send uniform variable values to the program assert(_tex); glUniform1i(_blend_normal_loc, _blend_normal); glUniform1f(_user_depth_loc, _user_depth); glUniform1f(_unit_len_loc, _unit_len); glUniform1f(_edge_len_scale_loc, _edge_len_scale); glUniform1f(_ratio_scale_loc, _ratio_scale); glUniform1i(_tex_loc, _tex->get_raw_unit()); if(_tex_2d){ glUniform1i(_is_tex_2d_loc, 1); glUniform1i(_tex_2d_loc, _tex_2d->get_raw_unit()); } else{ glUniform1i(_is_tex_2d_loc, 0); glUniform1i(_tex_2d_loc, 0); } glUniform1i(_is_enabled_loc, _layer[0]._is_enabled); glUniform1i(_remap_nl_loc , _layer[0]._remap_nl); glUniform1i(_remap_loc , _layer[0]._remap); glUniform1i(_backlight_loc , _layer[0]._backlight); glUniform1f(_e0_loc , _layer[0]._e0); glUniform1f(_e1_loc , _layer[0]._e1); glUniform1f(_s0_loc , _layer[0]._s0); glUniform1f(_s1_loc , _layer[0]._s1); if(_basecoat_mode == 0) glUniform3fv(_base_color_loc[0], 1, float3(VIEW::peek()->color())); else glUniform3fv(_base_color_loc[0], 1, float3(_base_color[0])); glUniform3fv(_base_color_loc[1], 1, float3(_base_color[1])); glUniform1f(_color_offset_loc, _color_offset); glUniform1f(_color_steepness_loc, _color_steepness); glUniform1i(_light_separation_loc, _light_separation); glUniform1f(_global_edge_len_loc, _global_edge_len); Wtransf P_matrix = VIEW::peek()->eye_to_pix_proj(); glUniformMatrix4fv(_proj_der_loc,1,GL_TRUE /* transpose = true */,(const GLfloat*) float16(P_matrix)); return true; }
static void send_status_message() { INSCompDebugState attstate = inscomp_get_debug_state(); VectorF<3> rpy = quat_to_rpy(attstate.quat); ControllerDebug controllerdebug = controller_get_debug(); ControllerGains controllergains = controller_get_gains(); msg.id = MSGID_STATUS; msg.roll = float16(rpy[0], 4); msg.pitch = float16(rpy[1], 4); msg.yaw = float16(rpy[2], 4); msg.roll_rate = float16(attstate.rate[0], 4); msg.pitch_rate = float16(attstate.rate[1], 4); msg.yaw_rate = float16(attstate.rate[2], 4); msg.roll_bias = float16(attstate.bias_gyro[0], 4); msg.pitch_bias = float16(attstate.bias_gyro[1], 4); msg.yaw_bias = float16(attstate.bias_gyro[2], 4); msg.roll_p = float16(controllerdebug.pout[0], 4); msg.pitch_p = float16(controllerdebug.pout[1], 4); msg.yaw_p = float16(controllerdebug.pout[2], 4); msg.roll_d = float16(controllerdebug.dout[0], 4); msg.pitch_d = float16(controllerdebug.dout[1], 4); msg.yaw_d = float16(controllerdebug.dout[2], 4); msg.gain_roll_p = float16(controllergains.p[0], 4); msg.gain_pitch_p = float16(controllergains.p[1], 4); msg.gain_yaw_p = float16(controllergains.p[2], 4); msg.gain_roll_d = float16(controllergains.d[0], 4); msg.gain_pitch_d = float16(controllergains.d[1], 4); msg.gain_yaw_d = float16(controllergains.d[2], 4); msg.mag_x_bias = float16(attstate.bias_mag[0], 4); msg.mag_y_bias = float16(attstate.bias_mag[1], 4); msg.mag_z_bias = float16(attstate.bias_mag[2], 4); msg.esc_fl = float16(controllerdebug.motors[0], 4); msg.esc_fr = float16(controllerdebug.motors[1], 4); msg.esc_rr = float16(controllerdebug.motors[2], 4); msg.esc_rl = float16(controllerdebug.motors[3], 4); msg.altitude = float16(altitude_get(), 3); msg.altitude_rate = float16(altitude_get_rate(), 3); msg.battery = float16(board_get_voltage(), 3); XBeeSendResponse resp = xbee_send(1, reinterpret_cast<const char *>(&msg), sizeof(msg)); valid = (resp == XBeeSendResponse::SUCCESS); }
bool BlurShader::set_uniform_variables() const { // send uniform variable values to the program if(_patch){ //tone map variables glUniform1i(_tone_tex_loc, ColorRefImage::lookup_raw_tex_unit(0)); glUniform1f(_width_loc, 1.0/VIEW::peek()->width()); glUniform1f(_height_loc, 1.0/VIEW::peek()->height()); glUniform1f(_blur_size_loc, _blur_size); glUniform1i(_detail_func_loc, _detail_func); glUniform1f(_unit_len_loc, _unit_len); glUniform1f(_edge_len_scale_loc, _edge_len_scale); glUniform1f(_ratio_scale_loc, _ratio_scale); glUniform1f(_user_depth_loc, _user_depth); glUniform1f(_global_edge_len_loc, _global_edge_len); Wtransf P_matrix = VIEW::peek()->eye_to_pix_proj(); glUniformMatrix4fv(_proj_der_loc,1,GL_TRUE /* transpose = true */,(const GLfloat*) float16(P_matrix)); return true; } return false; }
void BVH8Intersector16Chunk<PrimitiveIntersector16>::intersect(bool16* valid_i, BVH8* bvh, Ray16& ray) { #if defined(__AVX512__) /* load ray */ const bool16 valid0 = *valid_i; const Vec3f16 rdir = rcp_safe(ray.dir); const Vec3f16 org_rdir = ray.org * rdir; float16 ray_tnear = select(valid0,ray.tnear,pos_inf); float16 ray_tfar = select(valid0,ray.tfar ,neg_inf); const float16 inf = float16(pos_inf); Precalculations pre(valid0,ray); /* allocate stack and push root node */ float16 stack_near[3*BVH8::maxDepth+1]; NodeRef stack_node[3*BVH8::maxDepth+1]; stack_node[0] = BVH8::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* __restrict__ sptr_node = stack_node + 2; float16* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ sptr_node--; sptr_near--; NodeRef cur = *sptr_node; if (unlikely(cur == BVH8::invalidNode)) break; /* cull node if behind closest hit point */ float16 curDist = *sptr_near; if (unlikely(none(ray_tfar > curDist))) continue; while (1) { /* test if this is a leaf node */ if (unlikely(cur.isLeaf())) break; const bool16 valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),8); const Node* __restrict__ const node = (BVH8::Node*)cur.node(); /* pop of next node */ sptr_node--; sptr_near--; cur = *sptr_node; // FIXME: this trick creates issues with stack depth curDist = *sptr_near; for (unsigned i=0; i<BVH8::N; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH8::emptyNode)) break; const float16 lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const float16 lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const float16 lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const float16 lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const float16 lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const float16 lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); const float16 lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const float16 lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const bool16 lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { const float16 childDist = select(lhit,lnearP,inf); const NodeRef child = node->children[i]; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *sptr_node = cur; *sptr_near = curDist; sptr_node++; sptr_near++; curDist = childDist; cur = child; } /* push hit child onto stack*/ else { *sptr_node = child; *sptr_near = childDist; sptr_node++; sptr_near++; } assert(sptr_node - stack_node < BVH8::maxDepth); } } } /* return if stack is empty */ if (unlikely(cur == BVH8::invalidNode)) break; /* intersect leaf */ assert(cur != BVH8::emptyNode); const bool16 valid_leaf = ray_tfar > curDist; STAT3(normal.trav_leaves,1,popcnt(valid_leaf),8); size_t items; const Triangle* tri = (Triangle*) cur.leaf(items); PrimitiveIntersector16::intersect(valid_leaf,pre,ray,tri,items,bvh->scene); ray_tfar = select(valid_leaf,ray.tfar,ray_tfar); } AVX_ZERO_UPPER(); #endif }
constexpr complex(float16 __r = float16(0), float16 __i = float16(0)) { _M_value.real = __r; _M_value.imag = __i; }