UTItype __udivmodti4 (UTItype num, UTItype den, UTItype * rp) { qword shift = si_from_uint (count_leading_zeros (den) - count_leading_zeros (num)); qword n0 = si_from_UTItype (num); qword d0 = si_from_UTItype (den); qword bit = si_andi (si_fsmbi (1), 1); qword r0 = si_il (0); qword m1 = si_fsmbi (0x000f); qword mask, r1, n1; d0 = si_shlqbybi (si_shlqbi (d0, shift), shift); bit = si_shlqbybi (si_shlqbi (bit, shift), shift); do { r1 = si_or (r0, bit); // n1 = n0 - d0 in TImode n1 = si_bg (d0, n0); n1 = si_shlqbyi (n1, 4); n1 = si_sf (m1, n1); n1 = si_bgx (d0, n0, n1); n1 = si_shlqbyi (n1, 4); n1 = si_sf (m1, n1); n1 = si_bgx (d0, n0, n1); n1 = si_shlqbyi (n1, 4); n1 = si_sf (m1, n1); n1 = si_sfx (d0, n0, n1); mask = si_fsm (si_cgti (n1, -1)); r0 = si_selb (r0, r1, mask); n0 = si_selb (n0, n1, mask); bit = si_rotqmbii (bit, -1); d0 = si_rotqmbii (d0, -1); } while (si_to_uint (si_orx (bit))); if (rp) *rp = si_to_UTItype (n0); return si_to_UTItype (r0); }
qword __float_unssidf (qword SI) { qword t0, t1, t2, t3, t4, t5, t6, t7; t0 = si_clz (SI); t1 = si_il (1054); t2 = si_shl (SI, t0); t3 = si_ceqi (t0, 32); t4 = si_sf (t0, t1); t5 = si_a (t2, t2); t6 = si_andc (t4, t3); t7 = si_shufb (t6, t5, *(const qword *) __sidf_pat); return si_shlqbii (t7, 4); }
qword __float_unsdidf (qword DI) { qword t0, t1, t2, t3, t4, t5, t6, t7, t8; t0 = si_clz (DI); t1 = si_shl (DI, t0); t2 = si_ceqi (t0, 32); t3 = si_sf (t0, *(const qword *) __didf_scale); t4 = si_a (t1, t1); t5 = si_andc (t3, t2); t6 = si_shufb (t5, t4, *(const qword *) __didf_pat); t7 = si_shlqbii (t6, 4); t8 = si_shlqbyi (t7, 8); return si_dfa (t7, t8); }
void InitBasisEtc() { // Use a fixed initial step size for now; 128m for lod 0. // This yields an fft tile size of 32 x 128m = 4096m for lod 0 // and maximum dimensions of 8192m x 8192m step = g_R2OCon.m_step; vf32 step_vec = (vf32){step, 0, step, 0}; // get inverse-step using float magic (since taking the reciprocal of a power of 2 yields a 1-bit error) qword q_step = si_from_float(step); qword q_magic = si_ilhu(0x7F00); inv_step = si_to_float(si_sf(q_step, q_magic)); // set clip window clip_min = g_WaterObject.m_origin; clip_max = g_WaterObject.m_origin + g_WaterObject.m_dimensions; // set origin at gridpoint below clip min f32 magic_float = 1.5f * 8388608.0f * step; vf32 magic_vf32 = (vf32){magic_float, 0, magic_float, 0}; origin_world = (clip_min + magic_vf32) - magic_vf32; // compute gridpoint above clip max vf32 max_corner = (clip_max + magic_vf32) - magic_vf32; max_corner += step_vec; // offset both corners by the necessary amount of padding origin_world -= step_vec * spu_splats(8.0f); max_corner += step_vec * spu_splats(8.0f); // set num cols & num rows vf32 dims = max_corner - origin_world; nc = (i32)(spu_extract(dims,0) * inv_step) + 1; nr = (i32)(spu_extract(dims,2) * inv_step) + 1; // record true nc, nr true_nc = nc - 16; true_nr = nr - 16; // alignment requirements (ooh, that's a bit strict) nc = (nc + 7) & -8; nr = (nr + 7) & -8; // deal with large grids if (nc > 80) { nc = 80; true_nc = 64; dims = spu_insert((nc-1)*step, dims, 0); } if (nr > 80) { nr = 80; true_nr = 64; dims = spu_insert((nr-1)*step, dims, 2); } max_corner = origin_world + dims; even_step = step; even_inv_step = inv_step; even_basis_col = (vf32){1.0f, 0.0f, 0.0f, 0.0f}; even_basis_row = (vf32){0.0f, 0.0f, 1.0f, 0.0f}; const f32 r = 0.707106781187f; odd_step = even_step * r; odd_inv_step= even_inv_step * r * 2.0f; odd_basis_col = (vf32){ r, 0.0f, r, 0.0f}; odd_basis_row = (vf32){-r, 0.0f, r, 0.0f}; basis_col = even_basis_col; basis_row = even_basis_row; dvc_world = spu_splats(step) * basis_col; dvr_world = spu_splats(step) * basis_row; // set base lod origin g_RenderData.m_origins[0] = origin_world; g_RenderData.m_cols_rows[0] = nc<<8 | nr; c0_amb = 0; r0_amb = 0; SetBasisEtc(0,0); }