Beispiel #1
0
UTItype
__udivmodti4 (UTItype num, UTItype den, UTItype * rp)
{
  qword shift =
    si_from_uint (count_leading_zeros (den) - count_leading_zeros (num));
  qword n0 = si_from_UTItype (num);
  qword d0 = si_from_UTItype (den);
  qword bit = si_andi (si_fsmbi (1), 1);
  qword r0 = si_il (0);
  qword m1 = si_fsmbi (0x000f);
  qword mask, r1, n1;

  d0 = si_shlqbybi (si_shlqbi (d0, shift), shift);
  bit = si_shlqbybi (si_shlqbi (bit, shift), shift);

  do
    {
      r1 = si_or (r0, bit);

      // n1 = n0 - d0 in TImode
      n1 = si_bg (d0, n0);
      n1 = si_shlqbyi (n1, 4);
      n1 = si_sf (m1, n1);
      n1 = si_bgx (d0, n0, n1);
      n1 = si_shlqbyi (n1, 4);
      n1 = si_sf (m1, n1);
      n1 = si_bgx (d0, n0, n1);
      n1 = si_shlqbyi (n1, 4);
      n1 = si_sf (m1, n1);
      n1 = si_sfx (d0, n0, n1);

      mask = si_fsm (si_cgti (n1, -1));
      r0 = si_selb (r0, r1, mask);
      n0 = si_selb (n0, n1, mask);
      bit = si_rotqmbii (bit, -1);
      d0 = si_rotqmbii (d0, -1);
    }
  while (si_to_uint (si_orx (bit)));
  if (rp)
    *rp = si_to_UTItype (n0);
  return si_to_UTItype (r0);
}
Beispiel #2
0
qword
__float_unssidf (qword SI)
{
  qword t0, t1, t2, t3, t4, t5, t6, t7;
  t0 = si_clz (SI);
  t1 = si_il (1054);
  t2 = si_shl (SI, t0);
  t3 = si_ceqi (t0, 32);
  t4 = si_sf (t0, t1);
  t5 = si_a (t2, t2);
  t6 = si_andc (t4, t3);
  t7 = si_shufb (t6, t5, *(const qword *) __sidf_pat);
  return si_shlqbii (t7, 4);
}
Beispiel #3
0
qword
__float_unsdidf (qword DI)
{
  qword t0, t1, t2, t3, t4, t5, t6, t7, t8;
  t0 = si_clz (DI);
  t1 = si_shl (DI, t0);
  t2 = si_ceqi (t0, 32);
  t3 = si_sf (t0, *(const qword *) __didf_scale);
  t4 = si_a (t1, t1);
  t5 = si_andc (t3, t2);
  t6 = si_shufb (t5, t4, *(const qword *) __didf_pat);
  t7 = si_shlqbii (t6, 4);
  t8 = si_shlqbyi (t7, 8);
  return si_dfa (t7, t8);
}
Beispiel #4
0
void InitBasisEtc()
{
  // Use a fixed initial step size for now; 128m for lod 0.
  // This yields an fft tile size of 32 x 128m = 4096m for lod 0
  // and maximum dimensions of 8192m x 8192m
  step = g_R2OCon.m_step;
  vf32 step_vec = (vf32){step, 0, step, 0};
  
  // get inverse-step using float magic (since taking the reciprocal of a power of 2 yields a 1-bit error)
  qword q_step  = si_from_float(step);
  qword q_magic = si_ilhu(0x7F00);
  inv_step = si_to_float(si_sf(q_step, q_magic));

  // set clip window
  clip_min = g_WaterObject.m_origin;
  clip_max = g_WaterObject.m_origin + g_WaterObject.m_dimensions;

  // set origin at gridpoint below clip min
  f32 magic_float = 1.5f * 8388608.0f * step;
  vf32 magic_vf32 = (vf32){magic_float, 0, magic_float, 0};
  origin_world = (clip_min + magic_vf32) - magic_vf32;

  // compute gridpoint above clip max
  vf32 max_corner = (clip_max + magic_vf32) - magic_vf32;
  max_corner += step_vec;

  // offset both corners by the necessary amount of padding
  origin_world -= step_vec * spu_splats(8.0f);
  max_corner   += step_vec * spu_splats(8.0f);

  // set num cols & num rows
  vf32 dims = max_corner - origin_world;
  nc = (i32)(spu_extract(dims,0) * inv_step) + 1;
  nr = (i32)(spu_extract(dims,2) * inv_step) + 1;

  // record true nc, nr
  true_nc = nc - 16;
  true_nr = nr - 16;

  // alignment requirements (ooh, that's a bit strict)
  nc = (nc + 7) & -8;
  nr = (nr + 7) & -8;

  // deal with large grids
  if (nc > 80)
  {
    nc = 80;
    true_nc = 64;
    dims = spu_insert((nc-1)*step, dims, 0);
  }
  if (nr > 80)
  {
    nr = 80;
    true_nr = 64;
    dims = spu_insert((nr-1)*step, dims, 2);
  }
  max_corner = origin_world + dims;


  even_step = step;
  even_inv_step = inv_step;
  even_basis_col = (vf32){1.0f, 0.0f, 0.0f, 0.0f};
  even_basis_row = (vf32){0.0f, 0.0f, 1.0f, 0.0f};

  const f32 r = 0.707106781187f;
  odd_step    = even_step * r;
  odd_inv_step= even_inv_step * r * 2.0f;
  odd_basis_col = (vf32){ r, 0.0f, r, 0.0f};
  odd_basis_row = (vf32){-r, 0.0f, r, 0.0f};

  basis_col = even_basis_col;
  basis_row = even_basis_row;
  dvc_world = spu_splats(step) * basis_col;
  dvr_world = spu_splats(step) * basis_row;

  // set base lod origin
  g_RenderData.m_origins[0]   = origin_world;
  g_RenderData.m_cols_rows[0] = nc<<8 | nr;
  c0_amb = 0;
  r0_amb = 0;

  SetBasisEtc(0,0);
}