bool BBox::intersect(const Ray& ray, float *tnear, float *tfar) const { // you may already have those values hanging around somewhere const __m128 plus_inf = loadps(ps_cst_plus_inf), minus_inf = loadps(ps_cst_minus_inf); // use whatever's apropriate to load. const __m128 box_min = loadps(&min), box_max = loadps(&max), pos = loadps(&ray.o), inv_dir = loadps(&ray.inv_d); // use a div if inverted directions aren't available const __m128 l1 = mulps(subps(box_min, pos), inv_dir); const __m128 l2 = mulps(subps(box_max, pos), inv_dir); // the order we use for those min/max is vital to filter out // NaNs that happens when an inv_dir is +/- inf and // (box_min - pos) is 0. inf * 0 = NaN const __m128 filtered_l1a = minps(l1, plus_inf); const __m128 filtered_l2a = minps(l2, plus_inf); const __m128 filtered_l1b = maxps(l1, minus_inf); const __m128 filtered_l2b = maxps(l2, minus_inf); // now that we're back on our feet, test those slabs. __m128 lmax = maxps(filtered_l1a, filtered_l2a); __m128 lmin = minps(filtered_l1b, filtered_l2b); // unfold back. try to hide the latency of the shufps & co. const __m128 lmax0 = rotatelps(lmax); const __m128 lmin0 = rotatelps(lmin); lmax = minss(lmax, lmax0); lmin = maxss(lmin, lmin0); const __m128 lmax1 = muxhps(lmax,lmax); const __m128 lmin1 = muxhps(lmin,lmin); lmax = minss(lmax, lmax1); lmin = maxss(lmin, lmin1); const bool ret = _mm_comige_ss(lmax, _mm_setzero_ps()) & _mm_comige_ss(lmax,lmin); storess(lmin, tnear); storess(lmax, tfar); return ret; }
/* upper bound for \delta(vec): estimate loss of accuracy when evaluating * \tilde{v} on the vec[i] */ static long vtilde_prec(GEN nf, GEN vec, GEN ell) { long v0 = 0, i, l = lg(vec); for (i = 1; i < l; i++) v0 = maxss(v0, vtilde_prec_x(nf, gel(vec,i), ell)); return 3 + v0 + z_pval(nf_get_degree(nf), ell); }
static long vtilde_prec_x(GEN nf, GEN x, GEN ell) { long i, l, v; GEN G; if (typ(x) != t_MAT) return vnorm_x(nf,x,ell); G = gel(x,1); l = lg(G); v = 0; for (i = 1; i < l; i++) v = maxss(v, vnorm_x(nf,gel(G,i),ell)); return v; }
inline bool ray_box_intersect(const box_t & b, const ray_t & ray, rayseg_t & rs) { /* you may already have those values hanging around somewhere */ const __m128 plus_inf = loadps(ps_cst_plus_inf), minus_inf = loadps(ps_cst_minus_inf); /* use whatever's apropriate to load. */ const __m128 box_min = loadps(&b.min), box_max = loadps(&b.max), pos = loadps(&ray.pos), inv_dir = loadps(&ray.inv_dir); /* use a div if inverted directions aren't available */ const __m128 l1 = mulps(subps(box_min, pos), inv_dir); const __m128 l2 = mulps(subps(box_max, pos), inv_dir); /* the order we use for those min/max is vital to filter out */ /* NaNs that happens when an inv_dir is +/- inf and */ /* (box_min - pos) is 0. inf * 0 = NaN */ const __m128 filtered_l1a = minps(l1, plus_inf); const __m128 filtered_l2a = minps(l2, plus_inf); const __m128 filtered_l1b = maxps(l1, minus_inf); const __m128 filtered_l2b = maxps(l2, minus_inf); /* now that we're back on our feet, test those slabs. */ __m128 lmax = maxps(filtered_l1a, filtered_l2a); __m128 lmin = minps(filtered_l1b, filtered_l2b); /* unfold back. try to hide the latency of the shufps & co. */ const __m128 lmax0 = rotatelps(lmax); const __m128 lmin0 = rotatelps(lmin); lmax = minss(lmax, lmax0); lmin = maxss(lmin, lmin0); const __m128 lmax1 = muxhps(lmax, lmax); const __m128 lmin1 = muxhps(lmin, lmin); lmax = minss(lmax, lmax1); lmin = maxss(lmin, lmin1); const bool ret = _mm_comige_ss(lmax, _mm_setzero_ps()) & _mm_comige_ss(lmax, lmin); storess(lmin, &rs.t_near); storess(lmax, &rs.t_far); return ret; }
GEN shallowmatconcat(GEN v) { long i, j, h, l = lg(v), L = 0, H = 0; GEN M, maxh, maxl; if (l == 1) return cgetg(1,t_MAT); switch(typ(v)) { case t_VEC: for (i = 1; i < l; i++) { GEN c = gel(v,i); GEN s = _matsize(c); H = maxss(H, s[1]); L += s[2]; } M = zeromatcopy(H, L); L = 0; for (i = 1; i < l; i++) { GEN c = gel(v,i); GEN s = _matsize(c); matfill(M, c, 0, L, 1); L += s[2]; } return M; case t_COL: for (i = 1; i < l; i++) { GEN c = gel(v,i); GEN s = _matsize(c); H += s[1]; L = maxss(L, s[2]); } M = zeromatcopy(H, L); H = 0; for (i = 1; i < l; i++) { GEN c = gel(v,i); GEN s = _matsize(c); matfill(M, c, H, 0, 1); H += s[1]; } return M; case t_MAT: h = lgcols(v); maxh = zero_zv(h-1); maxl = zero_zv(l-1); for (j = 1; j < l; j++) for (i = 1; i < h; i++) { GEN c = gcoeff(v,i,j); GEN s = _matsize(c); if (s[1] > maxh[i]) maxh[i] = s[1]; if (s[2] > maxl[j]) maxl[j] = s[2]; } for (i = 1, H = 0; i < h; i++) H += maxh[i]; for (j = 1, L = 0; j < l; j++) L += maxl[j]; M = zeromatcopy(H, L); for (j = 1, L = 0; j < l; j++) { for (i = 1, H = 0; i < h; i++) { GEN c = gcoeff(v,i,j); matfill(M, c, H, L, minss(maxh[i], maxl[j])); H += maxh[i]; } L += maxl[j]; } return M; default: pari_err_TYPE("shallowmatconcat", v); return NULL; } }
static GEN bnflog_i(GEN bnf, GEN ell) { long prec0, prec; GEN nf, US, vdegS, S, T, M, CLp, CLt, Ftilde, vtG, ellk; GEN D, Ap, cycAp, bnfS; long i, j, lS, lvAp; checkbnf(bnf); nf = checknf(bnf); S = idealprimedec(nf, ell); bnfS = bnfsunit0(bnf, S, nf_GENMAT, LOWDEFAULTPREC); /* S-units */ US = leafcopy(gel(bnfS,1)); prec0 = maxss(30, vtilde_prec(nf, US, ell)); US = shallowconcat(bnf_get_fu(bnf), US); settyp(US, t_COL); T = padicfact(nf, S, prec0); lS = lg(S); Ftilde = cgetg(lS, t_VECSMALL); for (j = 1; j < lS; j++) Ftilde[j] = ftilde(nf, gel(S,j), gel(T,j)); CLp = CL_prime(bnf, ell, S); cycAp = gel(CLp,1); Ap = gel(CLp,2); for(;;) { CLt = CL_tilde(nf, US, ell, T, Ftilde, &vtG, prec0); if (CLt) break; prec0 <<= 1; T = padicfact(nf, S, prec0); } prec = ellexpo(cycAp, ell) + ellexpo(CLt,ell) + 1; if (prec == 1) return mkvec3(cgetg(1,t_VEC), cgetg(1,t_VEC), cgetg(1,t_VEC)); vdegS = get_vdegS(Ftilde, ell, prec0); ellk = powiu(ell, prec); lvAp = lg(Ap); if (lvAp > 1) { GEN Kcyc = bnf_get_cyc(bnf); GEN C = zeromatcopy(lvAp-1, lS-1); GEN Rell = gel(CLp,3), Uell = gel(CLp,4), ordS = gel(CLp,5); for (i = 1; i < lvAp; i++) { GEN a, b, bi, A = gel(Ap,i), d = gel(cycAp,i); bi = isprincipal(bnf, A); a = vecmodii(ZC_Z_mul(bi,d), Kcyc); /* a in subgroup generated by S = Rell; hence b integral */ b = hnf_invimage(Rell, a); b = vecmodii(ZM_ZC_mul(Uell, ZC_neg(b)), ordS); A = mkvec2(A, cgetg(1,t_MAT)); A = idealpowred(nf, A, d); /* find a principal representative of A_i^cycA_i up to elements of S */ a = isprincipalfact(bnf,gel(A,1),S,b,nf_GENMAT|nf_FORCE); if (!gequal0(gel(a,1))) pari_err_BUG("bnflog"); a = famat_mul_shallow(gel(A,2), gel(a,2)); /* principal part */ if (lg(a) == 1) continue; for (j = 1; j < lS; j++) gcoeff(C,i,j) = vtilde(nf, a, gel(T,j), gel(vdegS,j), ell, prec0); } C = gmod(gneg(C),ellk); C = shallowtrans(C); M = mkmat2(mkcol2(diagonal_shallow(cycAp), C), mkcol2(gen_0, vtG)); M = shallowmatconcat(M); /* relation matrix */ } else M = vtG; M = ZM_hnfmodid(M, ellk); D = matsnf0(M, 4); if (lg(D) == 1 || !dvdii(gel(D,1), ellk)) pari_err_BUG("bnflog [missing Z_l component]"); D = vecslice(D,2,lg(D)-1); return mkvec3(D, CLt, ellsylow(cycAp, ell)); }