bool BBox::intersect(const Ray& ray, float *tnear, float *tfar) const { // you may already have those values hanging around somewhere const __m128 plus_inf = loadps(ps_cst_plus_inf), minus_inf = loadps(ps_cst_minus_inf); // use whatever's apropriate to load. const __m128 box_min = loadps(&min), box_max = loadps(&max), pos = loadps(&ray.o), inv_dir = loadps(&ray.inv_d); // use a div if inverted directions aren't available const __m128 l1 = mulps(subps(box_min, pos), inv_dir); const __m128 l2 = mulps(subps(box_max, pos), inv_dir); // the order we use for those min/max is vital to filter out // NaNs that happens when an inv_dir is +/- inf and // (box_min - pos) is 0. inf * 0 = NaN const __m128 filtered_l1a = minps(l1, plus_inf); const __m128 filtered_l2a = minps(l2, plus_inf); const __m128 filtered_l1b = maxps(l1, minus_inf); const __m128 filtered_l2b = maxps(l2, minus_inf); // now that we're back on our feet, test those slabs. __m128 lmax = maxps(filtered_l1a, filtered_l2a); __m128 lmin = minps(filtered_l1b, filtered_l2b); // unfold back. try to hide the latency of the shufps & co. const __m128 lmax0 = rotatelps(lmax); const __m128 lmin0 = rotatelps(lmin); lmax = minss(lmax, lmax0); lmin = maxss(lmin, lmin0); const __m128 lmax1 = muxhps(lmax,lmax); const __m128 lmin1 = muxhps(lmin,lmin); lmax = minss(lmax, lmax1); lmin = maxss(lmin, lmin1); const bool ret = _mm_comige_ss(lmax, _mm_setzero_ps()) & _mm_comige_ss(lmax,lmin); storess(lmin, tnear); storess(lmax, tfar); return ret; }
/* min(v, v(Log_p Norm_{F_\p/Q_p}(x))) */ static long vlognorm(GEN nf, GEN T, GEN x, GEN p, long v) { GEN a = nf_to_scalar_or_alg(nf, x); GEN N = RgXQ_norm(a, T); if (typ(N) != t_PADIC) N = cvtop(N, p, v); return minss(v, valp( Qp_log(N) )); }
inline bool ray_box_intersect(const box_t & b, const ray_t & ray, rayseg_t & rs) { /* you may already have those values hanging around somewhere */ const __m128 plus_inf = loadps(ps_cst_plus_inf), minus_inf = loadps(ps_cst_minus_inf); /* use whatever's apropriate to load. */ const __m128 box_min = loadps(&b.min), box_max = loadps(&b.max), pos = loadps(&ray.pos), inv_dir = loadps(&ray.inv_dir); /* use a div if inverted directions aren't available */ const __m128 l1 = mulps(subps(box_min, pos), inv_dir); const __m128 l2 = mulps(subps(box_max, pos), inv_dir); /* the order we use for those min/max is vital to filter out */ /* NaNs that happens when an inv_dir is +/- inf and */ /* (box_min - pos) is 0. inf * 0 = NaN */ const __m128 filtered_l1a = minps(l1, plus_inf); const __m128 filtered_l2a = minps(l2, plus_inf); const __m128 filtered_l1b = maxps(l1, minus_inf); const __m128 filtered_l2b = maxps(l2, minus_inf); /* now that we're back on our feet, test those slabs. */ __m128 lmax = maxps(filtered_l1a, filtered_l2a); __m128 lmin = minps(filtered_l1b, filtered_l2b); /* unfold back. try to hide the latency of the shufps & co. */ const __m128 lmax0 = rotatelps(lmax); const __m128 lmin0 = rotatelps(lmin); lmax = minss(lmax, lmax0); lmin = maxss(lmin, lmin0); const __m128 lmax1 = muxhps(lmax, lmax); const __m128 lmin1 = muxhps(lmin, lmin); lmax = minss(lmax, lmax1); lmin = maxss(lmin, lmin1); const bool ret = _mm_comige_ss(lmax, _mm_setzero_ps()) & _mm_comige_ss(lmax, lmin); storess(lmin, &rs.t_near); storess(lmax, &rs.t_far); return ret; }
GEN shallowmatconcat(GEN v) { long i, j, h, l = lg(v), L = 0, H = 0; GEN M, maxh, maxl; if (l == 1) return cgetg(1,t_MAT); switch(typ(v)) { case t_VEC: for (i = 1; i < l; i++) { GEN c = gel(v,i); GEN s = _matsize(c); H = maxss(H, s[1]); L += s[2]; } M = zeromatcopy(H, L); L = 0; for (i = 1; i < l; i++) { GEN c = gel(v,i); GEN s = _matsize(c); matfill(M, c, 0, L, 1); L += s[2]; } return M; case t_COL: for (i = 1; i < l; i++) { GEN c = gel(v,i); GEN s = _matsize(c); H += s[1]; L = maxss(L, s[2]); } M = zeromatcopy(H, L); H = 0; for (i = 1; i < l; i++) { GEN c = gel(v,i); GEN s = _matsize(c); matfill(M, c, H, 0, 1); H += s[1]; } return M; case t_MAT: h = lgcols(v); maxh = zero_zv(h-1); maxl = zero_zv(l-1); for (j = 1; j < l; j++) for (i = 1; i < h; i++) { GEN c = gcoeff(v,i,j); GEN s = _matsize(c); if (s[1] > maxh[i]) maxh[i] = s[1]; if (s[2] > maxl[j]) maxl[j] = s[2]; } for (i = 1, H = 0; i < h; i++) H += maxh[i]; for (j = 1, L = 0; j < l; j++) L += maxl[j]; M = zeromatcopy(H, L); for (j = 1, L = 0; j < l; j++) { for (i = 1, H = 0; i < h; i++) { GEN c = gcoeff(v,i,j); matfill(M, c, H, L, minss(maxh[i], maxl[j])); H += maxh[i]; } L += maxl[j]; } return M; default: pari_err_TYPE("shallowmatconcat", v); return NULL; } }