Beispiel #1
0
bool BBox::intersect(const Ray& ray, float *tnear, float *tfar) const {

	// you may already have those values hanging around somewhere
	const __m128
		plus_inf	= loadps(ps_cst_plus_inf),
		minus_inf	= loadps(ps_cst_minus_inf);

	// use whatever's apropriate to load.
	const __m128
		box_min	= loadps(&min),
		box_max	= loadps(&max),
		pos	= loadps(&ray.o),
		inv_dir	= loadps(&ray.inv_d);

	// use a div if inverted directions aren't available
	const __m128 l1 = mulps(subps(box_min, pos), inv_dir);
	const __m128 l2 = mulps(subps(box_max, pos), inv_dir);

	// the order we use for those min/max is vital to filter out
	// NaNs that happens when an inv_dir is +/- inf and
	// (box_min - pos) is 0. inf * 0 = NaN
	const __m128 filtered_l1a = minps(l1, plus_inf);
	const __m128 filtered_l2a = minps(l2, plus_inf);

	const __m128 filtered_l1b = maxps(l1, minus_inf);
	const __m128 filtered_l2b = maxps(l2, minus_inf);

	// now that we're back on our feet, test those slabs.
	__m128 lmax = maxps(filtered_l1a, filtered_l2a);
	__m128 lmin = minps(filtered_l1b, filtered_l2b);

	// unfold back. try to hide the latency of the shufps & co.
	const __m128 lmax0 = rotatelps(lmax);
	const __m128 lmin0 = rotatelps(lmin);
	lmax = minss(lmax, lmax0);
	lmin = maxss(lmin, lmin0);

	const __m128 lmax1 = muxhps(lmax,lmax);
	const __m128 lmin1 = muxhps(lmin,lmin);
	lmax = minss(lmax, lmax1);
	lmin = maxss(lmin, lmin1);

	const bool ret = _mm_comige_ss(lmax, _mm_setzero_ps()) & _mm_comige_ss(lmax,lmin);

	storess(lmin, tnear);
	storess(lmax, tfar);

	return  ret;
}
Beispiel #2
0
/* upper bound for \delta(vec): estimate loss of accuracy when evaluating
 * \tilde{v} on the vec[i] */
static long
vtilde_prec(GEN nf, GEN vec, GEN ell)
{
  long v0 = 0, i, l = lg(vec);
  for (i = 1; i < l; i++)
    v0 = maxss(v0, vtilde_prec_x(nf, gel(vec,i), ell));
  return 3 + v0 + z_pval(nf_get_degree(nf), ell);
}
Beispiel #3
0
static long
vtilde_prec_x(GEN nf, GEN x, GEN ell)
{
  long i, l, v;
  GEN G;
  if (typ(x) != t_MAT) return vnorm_x(nf,x,ell);
  G = gel(x,1); l = lg(G); v = 0;
  for (i = 1; i < l; i++) v = maxss(v, vnorm_x(nf,gel(G,i),ell));
  return v;
}
Beispiel #4
0
inline bool ray_box_intersect(const box_t & b, const ray_t & ray, rayseg_t & rs) {
    /* you may already have those values hanging around somewhere */
    const __m128
        plus_inf = loadps(ps_cst_plus_inf), minus_inf = loadps(ps_cst_minus_inf);

    /* use whatever's apropriate to load. */
    const __m128
        box_min = loadps(&b.min), box_max = loadps(&b.max), pos =
        loadps(&ray.pos), inv_dir = loadps(&ray.inv_dir);

    /* use a div if inverted directions aren't available */
    const __m128 l1 = mulps(subps(box_min, pos), inv_dir);
    const __m128 l2 = mulps(subps(box_max, pos), inv_dir);

    /* the order we use for those min/max is vital to filter out */
    /* NaNs that happens when an inv_dir is +/- inf and */
    /* (box_min - pos) is 0. inf * 0 = NaN */
    const __m128 filtered_l1a = minps(l1, plus_inf);
    const __m128 filtered_l2a = minps(l2, plus_inf);
    const __m128 filtered_l1b = maxps(l1, minus_inf);
    const __m128 filtered_l2b = maxps(l2, minus_inf);

    /* now that we're back on our feet, test those slabs. */
    __m128 lmax = maxps(filtered_l1a, filtered_l2a);
    __m128 lmin = minps(filtered_l1b, filtered_l2b);

    /* unfold back. try to hide the latency of the shufps & co. */
    const __m128 lmax0 = rotatelps(lmax);
    const __m128 lmin0 = rotatelps(lmin);
    lmax = minss(lmax, lmax0);
    lmin = maxss(lmin, lmin0);
    const __m128 lmax1 = muxhps(lmax, lmax);
    const __m128 lmin1 = muxhps(lmin, lmin);
    lmax = minss(lmax, lmax1);
    lmin = maxss(lmin, lmin1);
    const bool ret =
        _mm_comige_ss(lmax, _mm_setzero_ps()) & _mm_comige_ss(lmax, lmin);
    storess(lmin, &rs.t_near);
    storess(lmax, &rs.t_far);
    return ret;
}
Beispiel #5
0
GEN
shallowmatconcat(GEN v)
{
  long i, j, h, l = lg(v), L = 0, H = 0;
  GEN M, maxh, maxl;
  if (l == 1) return cgetg(1,t_MAT);
  switch(typ(v))
  {
    case t_VEC:
      for (i = 1; i < l; i++)
      {
        GEN c = gel(v,i);
        GEN s = _matsize(c);
        H = maxss(H, s[1]);
        L += s[2];
      }
      M = zeromatcopy(H, L);
      L = 0;
      for (i = 1; i < l; i++)
      {
        GEN c = gel(v,i);
        GEN s = _matsize(c);
        matfill(M, c, 0, L, 1);
        L += s[2];
      }
      return M;

    case t_COL:
      for (i = 1; i < l; i++)
      {
        GEN c = gel(v,i);
        GEN s = _matsize(c);
        H += s[1];
        L = maxss(L, s[2]);
      }
      M = zeromatcopy(H, L);
      H = 0;
      for (i = 1; i < l; i++)
      {
        GEN c = gel(v,i);
        GEN s = _matsize(c);
        matfill(M, c, H, 0, 1);
        H += s[1];
      }
      return M;
    case t_MAT:
      h = lgcols(v);
      maxh = zero_zv(h-1);
      maxl = zero_zv(l-1);
      for (j = 1; j < l; j++)
        for (i = 1; i < h; i++)
        {
          GEN c = gcoeff(v,i,j);
          GEN s = _matsize(c);
          if (s[1] > maxh[i]) maxh[i] = s[1];
          if (s[2] > maxl[j]) maxl[j] = s[2];
        }
      for (i = 1, H = 0; i < h; i++) H += maxh[i];
      for (j = 1, L = 0; j < l; j++) L += maxl[j];
      M = zeromatcopy(H, L);
      for (j = 1, L = 0; j < l; j++)
      {
        for (i = 1, H = 0; i < h; i++)
        {
          GEN c = gcoeff(v,i,j);
          matfill(M, c, H, L, minss(maxh[i], maxl[j]));
          H += maxh[i];
        }
        L += maxl[j];
      }
      return M;
    default:
      pari_err_TYPE("shallowmatconcat", v);
      return NULL;
  }
}
Beispiel #6
0
static GEN
bnflog_i(GEN bnf, GEN ell)
{
  long prec0, prec;
  GEN nf, US, vdegS, S, T, M, CLp, CLt, Ftilde, vtG, ellk;
  GEN D, Ap, cycAp, bnfS;
  long i, j, lS, lvAp;

  checkbnf(bnf);
  nf = checknf(bnf);
  S = idealprimedec(nf, ell);
  bnfS = bnfsunit0(bnf, S, nf_GENMAT, LOWDEFAULTPREC); /* S-units */
  US = leafcopy(gel(bnfS,1));
  prec0 = maxss(30, vtilde_prec(nf, US, ell));
  US = shallowconcat(bnf_get_fu(bnf), US);
  settyp(US, t_COL);
  T = padicfact(nf, S, prec0);
  lS = lg(S); Ftilde = cgetg(lS, t_VECSMALL);
  for (j = 1; j < lS; j++) Ftilde[j] = ftilde(nf, gel(S,j), gel(T,j));
  CLp = CL_prime(bnf, ell, S);
  cycAp = gel(CLp,1);
  Ap = gel(CLp,2);
  for(;;)
  {
    CLt = CL_tilde(nf, US, ell, T, Ftilde, &vtG, prec0);
    if (CLt) break;
    prec0 <<= 1;
    T = padicfact(nf, S, prec0);
  }
  prec = ellexpo(cycAp, ell) + ellexpo(CLt,ell) + 1;
  if (prec == 1) return mkvec3(cgetg(1,t_VEC), cgetg(1,t_VEC), cgetg(1,t_VEC));

  vdegS = get_vdegS(Ftilde, ell, prec0);
  ellk = powiu(ell, prec);
  lvAp = lg(Ap);
  if (lvAp > 1)
  {
    GEN Kcyc = bnf_get_cyc(bnf);
    GEN C = zeromatcopy(lvAp-1, lS-1);
    GEN Rell = gel(CLp,3), Uell = gel(CLp,4), ordS = gel(CLp,5);
    for (i = 1; i < lvAp; i++)
    {
      GEN a, b, bi, A = gel(Ap,i), d = gel(cycAp,i);
      bi = isprincipal(bnf, A);
      a = vecmodii(ZC_Z_mul(bi,d), Kcyc);
      /* a in subgroup generated by S = Rell; hence b integral */
      b = hnf_invimage(Rell, a);
      b = vecmodii(ZM_ZC_mul(Uell, ZC_neg(b)), ordS);
      A = mkvec2(A, cgetg(1,t_MAT));
      A = idealpowred(nf, A, d);
      /* find a principal representative of A_i^cycA_i up to elements of S */
      a = isprincipalfact(bnf,gel(A,1),S,b,nf_GENMAT|nf_FORCE);
      if (!gequal0(gel(a,1))) pari_err_BUG("bnflog");
      a = famat_mul_shallow(gel(A,2), gel(a,2)); /* principal part */
      if (lg(a) == 1) continue;
      for (j = 1; j < lS; j++)
        gcoeff(C,i,j) = vtilde(nf, a, gel(T,j), gel(vdegS,j), ell, prec0);
    }
    C = gmod(gneg(C),ellk);
    C = shallowtrans(C);
    M = mkmat2(mkcol2(diagonal_shallow(cycAp), C), mkcol2(gen_0, vtG));
    M = shallowmatconcat(M); /* relation matrix */
  }
  else
    M = vtG;
  M = ZM_hnfmodid(M, ellk);
  D = matsnf0(M, 4);
  if (lg(D) == 1 || !dvdii(gel(D,1), ellk))
    pari_err_BUG("bnflog [missing Z_l component]");
  D = vecslice(D,2,lg(D)-1);
  return mkvec3(D, CLt, ellsylow(cycAp, ell));
}