/* Find texture coordinates for an object-space point. */ void CMappingDefinition::GetTextureCoordinates( const CMappingVectors &mvDefault, const FLOAT3D &vSpace, MEX2D &vTexture) const { const FLOAT3D vOffset = vSpace-mvDefault.mv_vO; const FLOAT s = mvDefault.mv_vU % vOffset; const FLOAT t = mvDefault.mv_vV % vOffset; const FLOAT u = s*md_fUoS + t*md_fUoT; const FLOAT v = s*md_fVoS + t*md_fVoT; vTexture(1) = FloatToInt( (u+md_fUOffset)*1024.0f); vTexture(2) = FloatToInt( (v+md_fVOffset)*1024.0f); }
SingleColourFit::SingleColourFit( ColourSet const* colours, int flags ) : ColourFit( colours, flags ) { // grab the single colour Vec3 const* values = m_colours->GetPoints(); m_colour[0] = ( u8 )FloatToInt( 255.0f*values->X(), 255 ); m_colour[1] = ( u8 )FloatToInt( 255.0f*values->Y(), 255 ); m_colour[2] = ( u8 )FloatToInt( 255.0f*values->Z(), 255 ); // initialise the best error m_besterror = INT_MAX; }
void Material::BindTextures(asset::MaterialLoader *loader) { gls.DisableAllMTSources(); if (!loader) return; for (int i = 0; i < kMaterialTextureSource_MaxIndices; ++i) { pkg::Asset::Ref a = loader->Texture(i); if (!a) continue; // allows gaps here (may be a procedural texture) GLTextureAsset *tex = GLTextureAsset::Cast(a); RAD_ASSERT(tex); int index = 0; if (tex->numTextures > 1) { index = FloatToInt(time.get() * TextureFPS(i)); if ((timingMode == kTimingMode_Relative) && ClampTextureFrames(i)) index = std::min(index, tex->numTextures.get()-1); else index = index % tex->numTextures.get(); } gls.SetMTSource(kMaterialTextureSource_Texture, i, tex->Texture(index)); } }
void acarFluid::advectLinear(Real * in, Real *out) { int starti = 1; int startj = 1; int endi = width-1; int endj = height-1; Real maxX = width-1.5; Real maxY = height-1.5; int index; Real vx,vy; for(int i=starti;i<endi;++i) { for(int j=startj;j<endj;++j) { index = i*height+j; vx = velocityU[index]; vy = velocityV[index]; // Previous Real dx = dt*vx; Real dy = dt*vy; Real iPrev = i - dx; Real jPrev = j - dy; iPrev = std::min(std::max(iPrev, 0.5), maxX ); jPrev = std::min(std::max( jPrev, 0.5), maxY ); // Advected value int x0 = FloatToInt( iPrev ); int y0 = FloatToInt( jPrev ); int index2 = x0*height+y0; Real a1 = iPrev - (Real)x0; Real b1 = jPrev - (Real)y0; Real a0 = 1.0 - a1; Real b0 = 1.0 - b1; out[index] = b0*( a0*in[index2] + a1*in[index2+height] ) + b1*( a0*in[index2+1] + a1*in[index2+height+1] ); } } }
void Float32ToNativeInt32( const float *src, int *dst, unsigned int numToConvert ) { const float *src0 = src; int *dst0 = dst; unsigned int count = numToConvert; if (count >= 4) { // vector -- requires 4+ samples ROUNDMODE_NEG_INF const __m128 vround = (const __m128) { 0.5f, 0.5f, 0.5f, 0.5f }; const __m128 vmin = (const __m128) { -2147483648.0f, -2147483648.0f, -2147483648.0f, -2147483648.0f }; const __m128 vmax = (const __m128) { kMaxFloat32, kMaxFloat32, kMaxFloat32, kMaxFloat32 }; const __m128 vscale = (const __m128) { 2147483648.0f, 2147483648.0f, 2147483648.0f, 2147483648.0f }; __m128 vf0; __m128i vi0; #define F32TOLE32(x) \ vf##x = _mm_mul_ps(vf##x, vscale); \ vf##x = _mm_add_ps(vf##x, vround); \ vf##x = _mm_max_ps(vf##x, vmin); \ vf##x = _mm_min_ps(vf##x, vmax); \ vi##x = _mm_cvtps_epi32(vf##x); \ int falign = (uintptr_t)src & 0xF; int ialign = (uintptr_t)dst & 0xF; if (falign != 0 || ialign != 0) { // do one unaligned conversion vf0 = _mm_loadu_ps(src); F32TOLE32(0) _mm_storeu_si128((__m128i *)dst, vi0); // and advance such that the destination ints are aligned unsigned int n = (16 - ialign) / 4; src += n; dst += n; count -= n; falign = (uintptr_t)src & 0xF; if (falign != 0) { // unaligned loads, aligned stores while (count >= 4) { vf0 = _mm_loadu_ps(src); F32TOLE32(0) _mm_store_si128((__m128i *)dst, vi0); src += 4; dst += 4; count -= 4; } goto VectorCleanup; } } while (count >= 4) { vf0 = _mm_load_ps(src); F32TOLE32(0) _mm_store_si128((__m128i *)dst, vi0); src += 4; dst += 4; count -= 4; } VectorCleanup: if (count > 0) { // unaligned cleanup -- just do one unaligned vector at the end src = src0 + numToConvert - 4; dst = dst0 + numToConvert - 4; vf0 = _mm_loadu_ps(src); F32TOLE32(0) _mm_storeu_si128((__m128i *)dst, vi0); } RESTORE_ROUNDMODE return; } // scalar for small numbers of samples if (count > 0) { double scale = 2147483648.0, round = 0.5, max32 = 2147483648.0 - 1.0 - 0.5, min32 = 0.; ROUNDMODE_NEG_INF while (count-- > 0) { double f0 = *src++; f0 = f0 * scale + round; int i0 = FloatToInt(f0, min32, max32); *dst++ = i0; } RESTORE_ROUNDMODE } } void NativeInt32ToFloat32( const int *src, float *dst, unsigned int numToConvert ) { const int *src0 = src; float *dst0 = dst; unsigned int count = numToConvert; if (count >= 4) { // vector -- requires 4+ samples #define LEI32TOF32(x) \ vf##x = _mm_cvtepi32_ps(vi##x); \ vf##x = _mm_mul_ps(vf##x, vscale); \ const __m128 vscale = (const __m128) { 1.0/2147483648.0f, 1.0/2147483648.0f, 1.0/2147483648.0f, 1.0/2147483648.0f }; __m128 vf0; __m128i vi0; int ialign = (uintptr_t)src & 0xF; int falign = (uintptr_t)dst & 0xF; if (falign != 0 || ialign != 0) { // do one unaligned conversion vi0 = _mm_loadu_si128((__m128i const *)src); LEI32TOF32(0) _mm_storeu_ps(dst, vf0); // and advance such that the destination floats are aligned unsigned int n = (16 - falign) / 4; src += n; dst += n; count -= n; ialign = (uintptr_t)src & 0xF; if (ialign != 0) { // unaligned loads, aligned stores while (count >= 4) { vi0 = _mm_loadu_si128((__m128i const *)src); LEI32TOF32(0) _mm_store_ps(dst, vf0); src += 4; dst += 4; count -= 4; } goto VectorCleanup; } } // aligned loads, aligned stores while (count >= 4) { vi0 = _mm_load_si128((__m128i const *)src); LEI32TOF32(0) _mm_store_ps(dst, vf0); src += 4; dst += 4; count -= 4; } VectorCleanup: if (count > 0) { // unaligned cleanup -- just do one unaligned vector at the end src = src0 + numToConvert - 4; dst = dst0 + numToConvert - 4; vi0 = _mm_loadu_si128((__m128i const *)src); LEI32TOF32(0) _mm_storeu_ps(dst, vf0); } return; } // scalar for small numbers of samples if (count > 0) { double scale = 1./2147483648.0f; while (count-- > 0) { int i = *src++; double f = (double)i * scale; *dst++ = f; } } } int alsa_set_hwparams(alsa_dev_t *dev, snd_pcm_t *handle, snd_pcm_hw_params_t *params, snd_pcm_access_t access) { unsigned int rrate; snd_pcm_uframes_t size; int err, dir; /* choose all parameters */ err = snd_pcm_hw_params_any(handle, params); if (err < 0) { printf("Broken configuration for playback: no configurations available: %s\n", snd_strerror(err)); return err; } /* set the interleaved read/write format */ err = snd_pcm_hw_params_set_access(handle, params, access); if (err < 0) { printf("Access type not available for playback: %s\n", snd_strerror(err)); return err; } /* set the sample format */ err = snd_pcm_hw_params_set_format(handle, params, dev->format); if (err < 0) { printf("Sample format not available for playback: %s\n", snd_strerror(err)); return err; } /* set the count of channels */ err = snd_pcm_hw_params_set_channels(handle, params, dev->channels); if (err < 0) { printf("Channels count (%d) not available for playbacks: %s\n", dev->channels, snd_strerror(err)); return err; } /* set the stream rate */ rrate = dev->rate; err = snd_pcm_hw_params_set_rate_near(handle, params, &rrate, 0); if (err < 0) { printf("Rate %d Hz not available for playback: %s\n", dev->rate, snd_strerror(err)); return err; } if (rrate != dev->rate) { printf("Rate doesn't match (requested %dHz, get %dHz)\n", dev->rate, rrate); return -EINVAL; } /* set the period size */ err = snd_pcm_hw_params_set_period_size(handle, params, dev->period_size, 0); if (err < 0) { printf("Unable to set period size %d for playback: %s\n", (int)dev->period_size, snd_strerror(err)); return err; } err = snd_pcm_hw_params_get_period_size(params, &size, &dir); if (err < 0) { printf("Unable to get period size for playback: %s\n", snd_strerror(err)); return err; } if (dev->period_size != size) { printf("Period size doesn't match (requested %d, got %d)\n", (int)dev->period_size, (int)size); return -EINVAL; } /* set the buffer size */ err = snd_pcm_hw_params_set_buffer_size(handle, params, dev->buffer_size); if (err < 0) { printf("Unable to set buffer size %d for playback: %s\n", (int)dev->buffer_size, snd_strerror(err)); return err; } err = snd_pcm_hw_params_get_buffer_size(params, &size); if (err < 0) { printf("Unable to get buffer size for playback: %s\n", snd_strerror(err)); return err; } if (size != (snd_pcm_uframes_t)dev->buffer_size) { printf("Buffer size doesn't match (requested %d, got %d)\n", (int)dev->buffer_size, (int)size); return -EINVAL; } /* write the parameters to device */ err = snd_pcm_hw_params(handle, params); if (err < 0) { printf("Unable to set hw params for playback: %s\n", snd_strerror(err)); return err; } return 0; } int alsa_set_swparams(alsa_dev_t *dev, snd_pcm_t *handle, snd_pcm_sw_params_t *swparams) { int err; /* get the current swparams */ err = snd_pcm_sw_params_current(handle, swparams); if (err < 0) { printf("Unable to determine current swparams for playback: %s\n", snd_strerror(err)); return err; } /* allow the transfer when at least period_size samples can be processed */ /* or disable this mechanism when period event is enabled (aka interrupt like style processing) */ err = snd_pcm_sw_params_set_avail_min(handle, swparams, dev->period_size); if (err < 0) { printf("Unable to set avail min for playback: %s\n", snd_strerror(err)); return err; } /* enable period events */ err = snd_pcm_sw_params_set_period_event(handle, swparams, 1); if (err < 0) { printf("Unable to set period event: %s\n", snd_strerror(err)); return err; } /* write the parameters to the playback device */ err = snd_pcm_sw_params(handle, swparams); if (err < 0) { printf("Unable to set sw params for playback: %s\n", snd_strerror(err)); return err; } return 0; }
int CFloatVariable::GetInteger(void) const { return FloatToInt(m_Value); }
void CIntegerVariable::SetFloat(const double Value) { m_Value = FloatToInt(Value); }
void Float32ToNativeInt16_X86( const Float32 *src, SInt16 *dst, unsigned int numToConvert ) { const float *src0 = src; int16_t *dst0 = dst; unsigned int count = numToConvert; if (count >= 8) { // vector -- requires 8+ samples ROUNDMODE_NEG_INF const __m128 vround = (const __m128) { 0.5f, 0.5f, 0.5f, 0.5f }; const __m128 vmin = (const __m128) { -32768.0f, -32768.0f, -32768.0f, -32768.0f }; const __m128 vmax = (const __m128) { 32767.0f, 32767.0f, 32767.0f, 32767.0f }; const __m128 vscale = (const __m128) { 32768.0f, 32768.0f, 32768.0f, 32768.0f }; __m128 vf0, vf1; __m128i vi0, vi1, vpack0; #define F32TOLE16 \ vf0 = _mm_mul_ps(vf0, vscale); \ vf1 = _mm_mul_ps(vf1, vscale); \ vf0 = _mm_add_ps(vf0, vround); \ vf1 = _mm_add_ps(vf1, vround); \ vf0 = _mm_max_ps(vf0, vmin); \ vf1 = _mm_max_ps(vf1, vmin); \ vf0 = _mm_min_ps(vf0, vmax); \ vf1 = _mm_min_ps(vf1, vmax); \ vi0 = _mm_cvtps_epi32(vf0); \ vi1 = _mm_cvtps_epi32(vf1); \ vpack0 = _mm_packs_epi32(vi0, vi1); int falign = (uintptr_t)src & 0xF; int ialign = (uintptr_t)dst & 0xF; if (falign != 0 || ialign != 0) { // do one unaligned conversion vf0 = _mm_loadu_ps(src); vf1 = _mm_loadu_ps(src+4); F32TOLE16 _mm_storeu_si128((__m128i *)dst, vpack0); // advance such that the destination ints are aligned unsigned int n = (16 - ialign) / 2; src += n; dst += n; count -= n; falign = (uintptr_t)src & 0xF; if (falign != 0) { // unaligned loads, aligned stores while (count >= 8) { vf0 = _mm_loadu_ps(src); vf1 = _mm_loadu_ps(src+4); F32TOLE16 _mm_store_si128((__m128i *)dst, vpack0); src += 8; dst += 8; count -= 8; } goto VectorCleanup; } } // aligned loads, aligned stores while (count >= 8) { vf0 = _mm_load_ps(src); vf1 = _mm_load_ps(src+4); F32TOLE16 _mm_store_si128((__m128i *)dst, vpack0); src += 8; dst += 8; count -= 8; } VectorCleanup: if (count > 0) { // unaligned cleanup -- just do one unaligned vector at the end src = src0 + numToConvert - 8; dst = dst0 + numToConvert - 8; vf0 = _mm_loadu_ps(src); vf1 = _mm_loadu_ps(src+4); F32TOLE16 _mm_storeu_si128((__m128i *)dst, vpack0); } RESTORE_ROUNDMODE return; } // scalar for small numbers of samples if (count > 0) { double scale = 2147483648.0, round = 32768.0, max32 = 2147483648.0 - 1.0 - 32768.0, min32 = 0.; ROUNDMODE_NEG_INF while (count-- > 0) { double f0 = *src++; f0 = f0 * scale + round; SInt32 i0 = FloatToInt(f0, min32, max32); i0 >>= 16; *dst++ = i0; } RESTORE_ROUNDMODE } } // =================================================================================================== void Float32ToSwapInt16_X86( const Float32 *src, SInt16 *dst, unsigned int numToConvert ) { const float *src0 = src; int16_t *dst0 = dst; unsigned int count = numToConvert; if (count >= 8) { // vector -- requires 8+ samples ROUNDMODE_NEG_INF const __m128 vround = (const __m128) { 0.5f, 0.5f, 0.5f, 0.5f }; const __m128 vmin = (const __m128) { -32768.0f, -32768.0f, -32768.0f, -32768.0f }; const __m128 vmax = (const __m128) { 32767.0f, 32767.0f, 32767.0f, 32767.0f }; const __m128 vscale = (const __m128) { 32768.0f, 32768.0f, 32768.0f, 32768.0f }; __m128 vf0, vf1; __m128i vi0, vi1, vpack0; #define F32TOBE16 \ vf0 = _mm_mul_ps(vf0, vscale); \ vf1 = _mm_mul_ps(vf1, vscale); \ vf0 = _mm_add_ps(vf0, vround); \ vf1 = _mm_add_ps(vf1, vround); \ vf0 = _mm_max_ps(vf0, vmin); \ vf1 = _mm_max_ps(vf1, vmin); \ vf0 = _mm_min_ps(vf0, vmax); \ vf1 = _mm_min_ps(vf1, vmax); \ vi0 = _mm_cvtps_epi32(vf0); \ vi1 = _mm_cvtps_epi32(vf1); \ vpack0 = _mm_packs_epi32(vi0, vi1); \ vpack0 = byteswap16(vpack0); int falign = (uintptr_t)src & 0xF; int ialign = (uintptr_t)dst & 0xF; if (falign != 0 || ialign != 0) { // do one unaligned conversion vf0 = _mm_loadu_ps(src); vf1 = _mm_loadu_ps(src+4); F32TOBE16 _mm_storeu_si128((__m128i *)dst, vpack0); // and advance such that the destination ints are aligned unsigned int n = (16 - ialign) / 2; src += n; dst += n; count -= n; falign = (uintptr_t)src & 0xF; if (falign != 0) { // unaligned loads, aligned stores while (count >= 8) { vf0 = _mm_loadu_ps(src); vf1 = _mm_loadu_ps(src+4); F32TOBE16 _mm_store_si128((__m128i *)dst, vpack0); src += 8; dst += 8; count -= 8; } goto VectorCleanup; } } // aligned loads, aligned stores while (count >= 8) { vf0 = _mm_load_ps(src); vf1 = _mm_load_ps(src+4); F32TOBE16 _mm_store_si128((__m128i *)dst, vpack0); src += 8; dst += 8; count -= 8; } VectorCleanup: if (count > 0) { // unaligned cleanup -- just do one unaligned vector at the end src = src0 + numToConvert - 8; dst = dst0 + numToConvert - 8; vf0 = _mm_loadu_ps(src); vf1 = _mm_loadu_ps(src+4); F32TOBE16 _mm_storeu_si128((__m128i *)dst, vpack0); } RESTORE_ROUNDMODE return; } // scalar for small numbers of samples if (count > 0) { double scale = 2147483648.0, round = 32768.0, max32 = 2147483648.0 - 1.0 - 32768.0, min32 = 0.; ROUNDMODE_NEG_INF while (count-- > 0) { double f0 = *src++; f0 = f0 * scale + round; SInt32 i0 = FloatToInt(f0, min32, max32); i0 >>= 16; *dst++ = OSSwapInt16(i0); } RESTORE_ROUNDMODE } } // =================================================================================================== void Float32ToNativeInt32_X86( const Float32 *src, SInt32 *dst, unsigned int numToConvert ) { const float *src0 = src; SInt32 *dst0 = dst; unsigned int count = numToConvert; if (count >= 4) { // vector -- requires 4+ samples ROUNDMODE_NEG_INF const __m128 vround = (const __m128) { 0.5f, 0.5f, 0.5f, 0.5f }; const __m128 vmin = (const __m128) { -2147483648.0f, -2147483648.0f, -2147483648.0f, -2147483648.0f }; const __m128 vmax = (const __m128) { kMaxFloat32, kMaxFloat32, kMaxFloat32, kMaxFloat32 }; const __m128 vscale = (const __m128) { 2147483648.0f, 2147483648.0f, 2147483648.0f, 2147483648.0f }; __m128 vf0; __m128i vi0; #define F32TOLE32(x) \ vf##x = _mm_mul_ps(vf##x, vscale); \ vf##x = _mm_add_ps(vf##x, vround); \ vf##x = _mm_max_ps(vf##x, vmin); \ vf##x = _mm_min_ps(vf##x, vmax); \ vi##x = _mm_cvtps_epi32(vf##x); \ int falign = (uintptr_t)src & 0xF; int ialign = (uintptr_t)dst & 0xF; if (falign != 0 || ialign != 0) { // do one unaligned conversion vf0 = _mm_loadu_ps(src); F32TOLE32(0) _mm_storeu_si128((__m128i *)dst, vi0); // and advance such that the destination ints are aligned unsigned int n = (16 - ialign) / 4; src += n; dst += n; count -= n; falign = (uintptr_t)src & 0xF; if (falign != 0) { // unaligned loads, aligned stores while (count >= 4) { vf0 = _mm_loadu_ps(src); F32TOLE32(0) _mm_store_si128((__m128i *)dst, vi0); src += 4; dst += 4; count -= 4; } goto VectorCleanup; } } while (count >= 4) { vf0 = _mm_load_ps(src); F32TOLE32(0) _mm_store_si128((__m128i *)dst, vi0); src += 4; dst += 4; count -= 4; } VectorCleanup: if (count > 0) { // unaligned cleanup -- just do one unaligned vector at the end src = src0 + numToConvert - 4; dst = dst0 + numToConvert - 4; vf0 = _mm_loadu_ps(src); F32TOLE32(0) _mm_storeu_si128((__m128i *)dst, vi0); } RESTORE_ROUNDMODE return; } // scalar for small numbers of samples if (count > 0) { double scale = 2147483648.0, round = 0.5, max32 = 2147483648.0 - 1.0 - 0.5, min32 = 0.; ROUNDMODE_NEG_INF while (count-- > 0) { double f0 = *src++; f0 = f0 * scale + round; SInt32 i0 = FloatToInt(f0, min32, max32); *dst++ = i0; } RESTORE_ROUNDMODE } } // =================================================================================================== void Float32ToSwapInt32_X86( const Float32 *src, SInt32 *dst, unsigned int numToConvert ) { const float *src0 = src; SInt32 *dst0 = dst; unsigned int count = numToConvert; if (count >= 4) { // vector -- requires 4+ samples ROUNDMODE_NEG_INF const __m128 vround = (const __m128) { 0.5f, 0.5f, 0.5f, 0.5f }; const __m128 vmin = (const __m128) { -2147483648.0f, -2147483648.0f, -2147483648.0f, -2147483648.0f }; const __m128 vmax = (const __m128) { kMaxFloat32, kMaxFloat32, kMaxFloat32, kMaxFloat32 }; const __m128 vscale = (const __m128) { 2147483648.0f, 2147483648.0f, 2147483648.0f, 2147483648.0f }; __m128 vf0; __m128i vi0; #define F32TOBE32(x) \ vf##x = _mm_mul_ps(vf##x, vscale); \ vf##x = _mm_add_ps(vf##x, vround); \ vf##x = _mm_max_ps(vf##x, vmin); \ vf##x = _mm_min_ps(vf##x, vmax); \ vi##x = _mm_cvtps_epi32(vf##x); \ vi##x = byteswap32(vi##x); int falign = (uintptr_t)src & 0xF; int ialign = (uintptr_t)dst & 0xF; if (falign != 0 || ialign != 0) { // do one unaligned conversion vf0 = _mm_loadu_ps(src); F32TOBE32(0) _mm_storeu_si128((__m128i *)dst, vi0); // and advance such that the destination ints are aligned unsigned int n = (16 - ialign) / 4; src += n; dst += n; count -= n; falign = (uintptr_t)src & 0xF; if (falign != 0) { // unaligned loads, aligned stores while (count >= 4) { vf0 = _mm_loadu_ps(src); F32TOBE32(0) _mm_store_si128((__m128i *)dst, vi0); src += 4; dst += 4; count -= 4; } goto VectorCleanup; } } while (count >= 4) { vf0 = _mm_load_ps(src); F32TOBE32(0) _mm_store_si128((__m128i *)dst, vi0); src += 4; dst += 4; count -= 4; } VectorCleanup: if (count > 0) { // unaligned cleanup -- just do one unaligned vector at the end src = src0 + numToConvert - 4; dst = dst0 + numToConvert - 4; vf0 = _mm_loadu_ps(src); F32TOBE32(0) _mm_storeu_si128((__m128i *)dst, vi0); } RESTORE_ROUNDMODE return; } // scalar for small numbers of samples if (count > 0) { double scale = 2147483648.0, round = 0.5, max32 = 2147483648.0 - 1.0 - 0.5, min32 = 0.; ROUNDMODE_NEG_INF while (count-- > 0) { double f0 = *src++; f0 = f0 * scale + round; SInt32 i0 = FloatToInt(f0, min32, max32); *dst++ = OSSwapInt32(i0); } RESTORE_ROUNDMODE } } // =================================================================================================== // ~14 instructions static inline __m128i Pack32ToLE24(__m128i val, __m128i mask) { __m128i store; val = _mm_srli_si128(val, 1); store = _mm_and_si128(val, mask); val = _mm_srli_si128(val, 1); mask = _mm_slli_si128(mask, 3); store = _mm_or_si128(store, _mm_and_si128(val, mask)); val = _mm_srli_si128(val, 1); mask = _mm_slli_si128(mask, 3); store = _mm_or_si128(store, _mm_and_si128(val, mask)); val = _mm_srli_si128(val, 1); mask = _mm_slli_si128(mask, 3); store = _mm_or_si128(store, _mm_and_si128(val, mask)); return store; } // marginally faster than scalar void Float32ToNativeInt24_X86( const Float32 *src, UInt8 *dst, unsigned int numToConvert ) { const Float32 *src0 = src; UInt8 *dst0 = dst; unsigned int count = numToConvert; if (count >= 6) { // vector -- requires 6+ samples ROUNDMODE_NEG_INF const __m128 vround = (const __m128) { 0.5f, 0.5f, 0.5f, 0.5f }; const __m128 vmin = (const __m128) { -2147483648.0f, -2147483648.0f, -2147483648.0f, -2147483648.0f }; const __m128 vmax = (const __m128) { kMaxFloat32, kMaxFloat32, kMaxFloat32, kMaxFloat32 }; const __m128 vscale = (const __m128) { 2147483648.0f, 2147483648.0f, 2147483648.0f, 2147483648.0f }; __m128i mask = _mm_setr_epi32(0x00FFFFFF, 0, 0, 0); // it is actually cheaper to copy and shift this mask on the fly than to have 4 of them __m128i store; union { UInt32 i[4]; __m128i v; } u; __m128 vf0; __m128i vi0; int falign = (uintptr_t)src & 0xF; if (falign != 0) { // do one unaligned conversion vf0 = _mm_loadu_ps(src); F32TOLE32(0) store = Pack32ToLE24(vi0, mask); _mm_storeu_si128((__m128i *)dst, store); // and advance such that the source floats are aligned unsigned int n = (16 - falign) / 4; src += n; dst += 3*n; // bytes count -= n; } while (count >= 6) { vf0 = _mm_load_ps(src); F32TOLE32(0) store = Pack32ToLE24(vi0, mask); _mm_storeu_si128((__m128i *)dst, store); // destination always unaligned src += 4; dst += 12; // bytes count -= 4; } if (count >= 4) { vf0 = _mm_load_ps(src); F32TOLE32(0) u.v = Pack32ToLE24(vi0, mask); ((UInt32 *)dst)[0] = u.i[0]; ((UInt32 *)dst)[1] = u.i[1]; ((UInt32 *)dst)[2] = u.i[2]; src += 4; dst += 12; // bytes count -= 4; } if (count > 0) { // unaligned cleanup -- just do one unaligned vector at the end src = src0 + numToConvert - 4; dst = dst0 + 3*numToConvert - 12; vf0 = _mm_loadu_ps(src); F32TOLE32(0) u.v = Pack32ToLE24(vi0, mask); ((UInt32 *)dst)[0] = u.i[0]; ((UInt32 *)dst)[1] = u.i[1]; ((UInt32 *)dst)[2] = u.i[2]; } RESTORE_ROUNDMODE return; } // scalar for small numbers of samples if (count > 0) { double scale = 2147483648.0, round = 0.5, max32 = 2147483648.0 - 1.0 - 0.5, min32 = 0.; ROUNDMODE_NEG_INF while (count-- > 0) { double f0 = *src++; f0 = f0 * scale + round; UInt32 i0 = FloatToInt(f0, min32, max32); dst[0] = (UInt8)(i0 >> 8); dst[1] = (UInt8)(i0 >> 16); dst[2] = (UInt8)(i0 >> 24); dst += 3; } RESTORE_ROUNDMODE } } // =================================================================================================== #pragma mark - #pragma mark Int -> Float void NativeInt16ToFloat32_X86( const SInt16 *src, Float32 *dst, unsigned int numToConvert ) { const SInt16 *src0 = src; Float32 *dst0 = dst; unsigned int count = numToConvert; if (count >= 8) { // vector -- requires 8+ samples // convert the 16-bit words to the high word of 32-bit values #define LEI16TOF32(x, y) \ vi##x = _mm_unpacklo_epi16(zero, vpack##x); \ vi##y = _mm_unpackhi_epi16(zero, vpack##x); \ vf##x = _mm_cvtepi32_ps(vi##x); \ vf##y = _mm_cvtepi32_ps(vi##y); \ vf##x = _mm_mul_ps(vf##x, vscale); \ vf##y = _mm_mul_ps(vf##y, vscale); const __m128 vscale = (const __m128) { 1.0/2147483648.0f, 1.0/2147483648.0f, 1.0/2147483648.0f, 1.0/2147483648.0f }; const __m128i zero = _mm_setzero_si128(); __m128 vf0, vf1; __m128i vi0, vi1, vpack0; int ialign = (uintptr_t)src & 0xF; int falign = (uintptr_t)dst & 0xF; if (falign != 0 || ialign != 0) { // do one unaligned conversion vpack0 = _mm_loadu_si128((__m128i const *)src); LEI16TOF32(0, 1) _mm_storeu_ps(dst, vf0); _mm_storeu_ps(dst+4, vf1); // and advance such that the destination floats are aligned unsigned int n = (16 - falign) / 4; src += n; dst += n; count -= n; ialign = (uintptr_t)src & 0xF; if (ialign != 0) { // unaligned loads, aligned stores while (count >= 8) { vpack0 = _mm_loadu_si128((__m128i const *)src); LEI16TOF32(0, 1) _mm_store_ps(dst, vf0); _mm_store_ps(dst+4, vf1); src += 8; dst += 8; count -= 8; } goto VectorCleanup; } } // aligned loads, aligned stores while (count >= 8) { vpack0 = _mm_load_si128((__m128i const *)src); LEI16TOF32(0, 1) _mm_store_ps(dst, vf0); _mm_store_ps(dst+4, vf1); src += 8; dst += 8; count -= 8; } VectorCleanup: if (count > 0) { // unaligned cleanup -- just do one unaligned vector at the end src = src0 + numToConvert - 8; dst = dst0 + numToConvert - 8; vpack0 = _mm_loadu_si128((__m128i const *)src); LEI16TOF32(0, 1) _mm_storeu_ps(dst, vf0); _mm_storeu_ps(dst+4, vf1); } return; } // scalar for small numbers of samples if (count > 0) { double scale = 1./32768.f; while (count-- > 0) { SInt16 i = *src++; double f = (double)i * scale; *dst++ = f; } } } // =================================================================================================== void SwapInt16ToFloat32_X86( const SInt16 *src, Float32 *dst, unsigned int numToConvert ) { const SInt16 *src0 = src; Float32 *dst0 = dst; unsigned int count = numToConvert; if (count >= 8) { // vector -- requires 8+ samples // convert the 16-bit words to the high word of 32-bit values #define BEI16TOF32 \ vpack0 = byteswap16(vpack0); \ vi0 = _mm_unpacklo_epi16(zero, vpack0); \ vi1 = _mm_unpackhi_epi16(zero, vpack0); \ vf0 = _mm_cvtepi32_ps(vi0); \ vf1 = _mm_cvtepi32_ps(vi1); \ vf0 = _mm_mul_ps(vf0, vscale); \ vf1 = _mm_mul_ps(vf1, vscale); const __m128 vscale = (const __m128) { 1.0/2147483648.0f, 1.0/2147483648.0f, 1.0/2147483648.0f, 1.0/2147483648.0f }; const __m128i zero = _mm_setzero_si128(); __m128 vf0, vf1; __m128i vi0, vi1, vpack0; int ialign = (uintptr_t)src & 0xF; int falign = (uintptr_t)dst & 0xF; if (falign != 0 || ialign != 0) { // do one unaligned conversion vpack0 = _mm_loadu_si128((__m128i const *)src); BEI16TOF32 _mm_storeu_ps(dst, vf0); _mm_storeu_ps(dst+4, vf1); // and advance such that the destination floats are aligned unsigned int n = (16 - falign) / 4; src += n; dst += n; count -= n; ialign = (uintptr_t)src & 0xF; if (ialign != 0) { // unaligned loads, aligned stores while (count >= 8) { vpack0 = _mm_loadu_si128((__m128i const *)src); BEI16TOF32 _mm_store_ps(dst, vf0); _mm_store_ps(dst+4, vf1); src += 8; dst += 8; count -= 8; } goto VectorCleanup; } } // aligned loads, aligned stores while (count >= 8) { vpack0 = _mm_load_si128((__m128i const *)src); BEI16TOF32 _mm_store_ps(dst, vf0); _mm_store_ps(dst+4, vf1); src += 8; dst += 8; count -= 8; } VectorCleanup: if (count > 0) { // unaligned cleanup -- just do one unaligned vector at the end src = src0 + numToConvert - 8; dst = dst0 + numToConvert - 8; vpack0 = _mm_loadu_si128((__m128i const *)src); BEI16TOF32 _mm_storeu_ps(dst, vf0); _mm_storeu_ps(dst+4, vf1); } return; } // scalar for small numbers of samples if (count > 0) { double scale = 1./32768.f; while (count-- > 0) { SInt16 i = *src++; i = OSSwapInt16(i); double f = (double)i * scale; *dst++ = f; } } } // =================================================================================================== void NativeInt32ToFloat32_X86( const SInt32 *src, Float32 *dst, unsigned int numToConvert ) { const SInt32 *src0 = src; Float32 *dst0 = dst; unsigned int count = numToConvert; if (count >= 4) { // vector -- requires 4+ samples #define LEI32TOF32(x) \ vf##x = _mm_cvtepi32_ps(vi##x); \ vf##x = _mm_mul_ps(vf##x, vscale); \ const __m128 vscale = (const __m128) { 1.0/2147483648.0f, 1.0/2147483648.0f, 1.0/2147483648.0f, 1.0/2147483648.0f }; __m128 vf0; __m128i vi0; int ialign = (uintptr_t)src & 0xF; int falign = (uintptr_t)dst & 0xF; if (falign != 0 || ialign != 0) { // do one unaligned conversion vi0 = _mm_loadu_si128((__m128i const *)src); LEI32TOF32(0) _mm_storeu_ps(dst, vf0); // and advance such that the destination floats are aligned unsigned int n = (16 - falign) / 4; src += n; dst += n; count -= n; ialign = (uintptr_t)src & 0xF; if (ialign != 0) { // unaligned loads, aligned stores while (count >= 4) { vi0 = _mm_loadu_si128((__m128i const *)src); LEI32TOF32(0) _mm_store_ps(dst, vf0); src += 4; dst += 4; count -= 4; } goto VectorCleanup; } } // aligned loads, aligned stores while (count >= 4) { vi0 = _mm_load_si128((__m128i const *)src); LEI32TOF32(0) _mm_store_ps(dst, vf0); src += 4; dst += 4; count -= 4; } VectorCleanup: if (count > 0) { // unaligned cleanup -- just do one unaligned vector at the end src = src0 + numToConvert - 4; dst = dst0 + numToConvert - 4; vi0 = _mm_loadu_si128((__m128i const *)src); LEI32TOF32(0) _mm_storeu_ps(dst, vf0); } return; } // scalar for small numbers of samples if (count > 0) { double scale = 1./2147483648.0f; while (count-- > 0) { SInt32 i = *src++; double f = (double)i * scale; *dst++ = f; } } } // =================================================================================================== void SwapInt32ToFloat32_X86( const SInt32 *src, Float32 *dst, unsigned int numToConvert ) { const SInt32 *src0 = src; Float32 *dst0 = dst; unsigned int count = numToConvert; if (count >= 4) { // vector -- requires 4+ samples #define BEI32TOF32(x) \ vi##x = byteswap32(vi##x); \ vf##x = _mm_cvtepi32_ps(vi##x); \ vf##x = _mm_mul_ps(vf##x, vscale); \ const __m128 vscale = (const __m128) { 1.0/2147483648.0f, 1.0/2147483648.0f, 1.0/2147483648.0f, 1.0/2147483648.0f }; __m128 vf0; __m128i vi0; int ialign = (uintptr_t)src & 0xF; int falign = (uintptr_t)dst & 0xF; if (falign != 0 || ialign != 0) { // do one unaligned conversion vi0 = _mm_loadu_si128((__m128i const *)src); BEI32TOF32(0) _mm_storeu_ps(dst, vf0); // and advance such that the destination floats are aligned unsigned int n = (16 - falign) / 4; src += n; dst += n; count -= n; ialign = (uintptr_t)src & 0xF; if (ialign != 0) { // unaligned loads, aligned stores while (count >= 4) { vi0 = _mm_loadu_si128((__m128i const *)src); BEI32TOF32(0) _mm_store_ps(dst, vf0); src += 4; dst += 4; count -= 4; } goto VectorCleanup; } } // aligned loads, aligned stores while (count >= 4) { vi0 = _mm_load_si128((__m128i const *)src); BEI32TOF32(0) _mm_store_ps(dst, vf0); src += 4; dst += 4; count -= 4; } VectorCleanup: if (count > 0) { // unaligned cleanup -- just do one unaligned vector at the end src = src0 + numToConvert - 4; dst = dst0 + numToConvert - 4; vi0 = _mm_loadu_si128((__m128i const *)src); BEI32TOF32(0) _mm_storeu_ps(dst, vf0); } return; } // scalar for small numbers of samples if (count > 0) { double scale = 1./2147483648.0f; while (count-- > 0) { SInt32 i = *src++; i = OSSwapInt32(i); double f = (double)i * scale; *dst++ = f; } } }
void GLDraw::DrawCircle( const Rect &r, float percent, const Rect *clip, const Vec3 &zRot, // (X, Y) is rotation center, Z is rotation in degrees r::Material &m, asset::MaterialLoader *l, bool sampleMaterialColor, const Vec4 &rgba ) { percent = math::Clamp(percent, -1.f, 1.f); int firstTri; int numTris; if (percent >= 0.f) { firstTri = 0; numTris = FloatToInt(percent * kNumCircleSteps) * kNumCircleStepTris; } else { firstTri = FloatToInt((1 + percent) * kNumCircleSteps) * kNumCircleStepTris; numTris = (kNumCircleSteps*kNumCircleStepTris) - firstTri; } if (numTris < 1) return; int flags = 0; if (clip) { flags |= kScissorTest_Enable; float x = ((clip->x - m_srcvp[0]) * m_todst[0]) + m_dstvp[0]; float y = ((clip->y - m_srcvp[1]) * m_todst[1]) + m_dstvp[1]; float w = clip->w * m_todst[0]; float h = clip->h * m_todst[1]; gls.Scissor( FloorFastInt(x), FloorFastInt(m_dstvp[3]-(y+h)), FloorFastInt(w), FloorFastInt(h) ); } else { flags |= kScissorTest_Disable; } gl.MatrixMode(GL_MODELVIEW); gl.PushMatrix(); gl.Translatef((float)r.x, (float)r.y, 0.f); if (zRot[2] != 0.f) { float cx = zRot[0]-r.x; float cy = zRot[1]-r.y; gl.Translatef(cx, cy, 0.f); gl.Rotatef(zRot[2], 0.f, 0.f, 1.f); gl.Translatef(-cx, -cy, 0.f); } gl.Scalef(r.w / (float)kBaseRectSize, r.h / (float)kBaseRectSize, 1.f); m.BindStates(flags); m.BindTextures(l); m.shader->Begin(r::Shader::kPass_Default, m); m_circle->BindAll(m.shader.get().get()); Shader::Uniforms u(rgba); m.shader->BindStates(u, sampleMaterialColor); gls.Commit(); m_circle->CompileArrayStates(*m.shader.get()); m_circle->Draw(firstTri, numTris); CHECK_GL_ERRORS(); m.shader->End(); gl.PopMatrix(); }
// Draw function for this object type void RedSquareDraw ( REDSQUARE *self ) { WriteImageToScreen( "REDSQUARE", FloatToInt( self->rect_.center_.x_ + EPSILON ), FloatToInt( self->rect_.center_.y_ + EPSILON) ); }
//////////////////////////////////////////////////////////////////////// // Procedure DrawScene is called whenever the scene needs to be drawn. void DrawScene(Scene& scene, int width, int height) { // Choose OpenGL or student rendering here. The decision can be // based on useOpenGLRendering, useFastRendering, or both: // if (useOpenGLRendering || useFastRendering) if (scene.UseOpenGLRendering) { DrawSceneWithOpenGL(scene, width, height); return; } // --------------------------------------------------------------------------- // Student rendering code goes here glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glBegin(GL_POINTS); w = width; h = height; unsigned nLength = (unsigned) (width * height); zBuf = new float[nLength]; for (unsigned i = 0; i < nLength; ++i) zBuf[i] = 1.f; // for each object size_t nObjects = scene.objects.size(); for (size_t i = 0; i < nObjects; ++i) { Object &obj = scene.objects[i]; // for each polygon size_t nPolys = obj.polygons.size(); for (size_t j = 0; j < nPolys; ++j) { EdgeTable.clear(); APolygon &poly = obj.polygons[j]; std::vector<Vector3D> vVertices; // Set polygon color float rgba[4]; Vector3D v3Light = bybyte_cast<Vector3D>(scene.lights[0].position); v3Light.normalize(); SetColor(v3Light, poly[0].N, obj.Kd, rgba); // make the A object blue both sides if (i == 2) { for (int c = 0; c < 3; ++c) rgba[c] = abs(rgba[c]); } // Get pixel coords for polygon & push edges size_t nVerts = poly.size(); for (size_t k = 0; k < nVerts; ++k) { // current vertex Vector4D v4T = scene.viewing.Transform(poly[k].V); Vector3D v3S = bybyte_cast<Vector3D>(scene.projection.Transform(v4T).Hdiv()); v3S[0] = (float) FloatToInt((v3S[0] + 1.f) * width / 2.f); v3S[1] = (float) FloatToInt((v3S[1] + 1.f) * height / 2.f); vVertices.push_back(v3S); } // put vertices in correct order for (size_t k = 0; k < nVerts; ++k) { unsigned nNext = poly[k].prevIndex; // skip horizontal edges if ((int) vVertices[k][1] == (int) vVertices[nNext][1]) continue; if (vVertices[k][1] < vVertices[nNext][1]) { Edge e(vVertices[k], vVertices[nNext]); EdgeTable.push_back(e); } else { Edge e(vVertices[nNext], vVertices[k]); EdgeTable.push_back(e); } } ActiveEdgeList.clear(); EdgeListIt ETIt, AELIt; size_t nEdges = EdgeTable.size(); // Cull / clip edges ETIt = EdgeTable.begin(); while (ETIt != EdgeTable.end()) { // y culling if (ETIt->v1[1] < 0.f || ETIt->v0[1] >= height) EdgeTable.erase(ETIt++); else { // y clipping if (ETIt->v0[1] < 0) { ETIt->x += (-ETIt->v0[1] * ETIt->dx); ETIt->z += (-ETIt->v0[1] * ETIt->dz); } else if (ETIt->v1[1] > height) { float fYMax = (float) (height - 1); float fYDif = ETIt->v1[1] - fYMax; ETIt->v1[1] = fYMax; ETIt->v1[0] -= (fYDif * ETIt->dx); ETIt->v1[2] -= (fYDif * ETIt->dz); } ++ETIt; } } // Set values for scanline 0 ETIt = EdgeTable.begin(); while (ETIt != EdgeTable.end()) { if ((ETIt->v0[1] <= 0) && (ETIt->v0[1] != ETIt->v1[1]) && (ETIt->v1[1] != 0.f)) { ActiveEdgeList.push_back(*ETIt); EdgeTable.erase(ETIt++); } else ++ETIt; } ActiveEdgeList.sort(); // for each scanline for (int y = 0; y < height; ++y) { // draw by pair for (AELIt = ActiveEdgeList.begin(); AELIt != ActiveEdgeList.end(); ++AELIt) { EdgeListIt AELItPrev = AELIt++; float z = AELItPrev->z; float dzdx = (AELIt->z - AELItPrev->z) / (AELIt->x - AELItPrev->x); int x0 = FloatToInt(AELItPrev->x), x1 = FloatToInt(AELIt->x); SetBounds(x0, x1); for (int x = x0; x < x1; ++x) { if (z < ZBuf(x, y)) { ZBuf(x, y) = z; glColor4fv(rgba); glVertex2i(x, y); } z += dzdx; } } // insert edges into AEL bool bSort = false; ETIt = EdgeTable.begin(); while (ETIt != EdgeTable.end()) { if (IsInRange((float) y, ETIt->v0[1], ETIt->v1[1])) { ActiveEdgeList.push_back(*ETIt); EdgeTable.erase(ETIt++); bSort = true; } else ++ETIt; } // increment edges on AEL & remove passed edges AELIt = ActiveEdgeList.begin(); while (AELIt != ActiveEdgeList.end()) { AELIt->Inc(); if (!IsInRange((float) y, AELIt->v0[1], AELIt->v1[1])) ActiveEdgeList.erase(AELIt++); else ++AELIt; } // sort the AEL if (bSort) ActiveEdgeList.sort(); } // [END] for each scanline } // [END] for each polygon } // [END] for each object delete [] zBuf; glEnd(); }