static inline void light_picker(ogles_context_t* c) { if (ggl_likely(!c->lighting.enable)) { c->lighting.lightVertex = lightVertexNop; return; } if (c->lighting.colorMaterial.enable) { c->lighting.lightVertex = lightVertexMaterial; } else { c->lighting.lightVertex = lightVertex; } }
void vnorm3(GLfixed* d, const GLfixed* a) { // we must take care of overflows when normalizing a vector GLfixed n; int32_t x = a[0]; x = x>=0 ? x : -x; int32_t y = a[1]; y = y>=0 ? y : -y; int32_t z = a[2]; z = z>=0 ? z : -z; if (ggl_likely(x<=0x6800 && y<=0x6800 && z<= 0x6800)) { // in this case this will all fit on 32 bits n = x*x + y*y + z*z; n = gglSqrtRecipx(n); n <<= 8; } else { // here norm^2 is at least 0x7EC00000 (>>32 == 0.495117) n = vsquare3(x, y, z); n = gglSqrtRecipx(n); } vscale3(d, a, n); }
void ggl_pick(context_t* c) { if (ggl_likely(!c->dirty)) return; // compute needs, see if they changed... const uint32_t enables = c->state.enables; needs_t new_needs(c->state.needs); if (c->dirty & GGL_CB_STATE) { new_needs.n &= ~GGL_NEEDS_CB_FORMAT_MASK; new_needs.n |= GGL_BUILD_NEEDS(c->state.buffers.color.format, CB_FORMAT); if (enables & GGL_ENABLE_BLENDING) c->dirty |= GGL_PIXEL_PIPELINE_STATE; } if (c->dirty & GGL_PIXEL_PIPELINE_STATE) { uint32_t n = GGL_BUILD_NEEDS(c->state.buffers.color.format, CB_FORMAT); uint32_t p = 0; if (enables & GGL_ENABLE_BLENDING) { uint32_t src = c->state.blend.src; uint32_t dst = c->state.blend.dst; uint32_t src_alpha = c->state.blend.src_alpha; uint32_t dst_alpha = c->state.blend.dst_alpha; const GGLFormat& cbf = c->formats[ c->state.buffers.color.format ]; if (!cbf.c[GGLFormat::ALPHA].h) { if ((src == GGL_ONE_MINUS_DST_ALPHA) || (src == GGL_DST_ALPHA)) { src = GGL_ONE; } if ((src_alpha == GGL_ONE_MINUS_DST_ALPHA) || (src_alpha == GGL_DST_ALPHA)) { src_alpha = GGL_ONE; } if ((dst == GGL_ONE_MINUS_DST_ALPHA) || (dst == GGL_DST_ALPHA)) { dst = GGL_ONE; } if ((dst_alpha == GGL_ONE_MINUS_DST_ALPHA) || (dst_alpha == GGL_DST_ALPHA)) { dst_alpha = GGL_ONE; } } src = ggl_blendfactor_to_needs(src); dst = ggl_blendfactor_to_needs(dst); src_alpha = ggl_blendfactor_to_needs(src_alpha); dst_alpha = ggl_blendfactor_to_needs(dst_alpha); n |= GGL_BUILD_NEEDS( src, BLEND_SRC ); n |= GGL_BUILD_NEEDS( dst, BLEND_DST ); if (c->state.blend.alpha_separate) { n |= GGL_BUILD_NEEDS( src_alpha, BLEND_SRCA ); n |= GGL_BUILD_NEEDS( dst_alpha, BLEND_DSTA ); } else { n |= GGL_BUILD_NEEDS( src, BLEND_SRCA ); n |= GGL_BUILD_NEEDS( dst, BLEND_DSTA ); } } else { n |= GGL_BUILD_NEEDS( GGL_ONE, BLEND_SRC ); n |= GGL_BUILD_NEEDS( GGL_ZERO, BLEND_DST ); n |= GGL_BUILD_NEEDS( GGL_ONE, BLEND_SRCA ); n |= GGL_BUILD_NEEDS( GGL_ZERO, BLEND_DSTA ); } n |= GGL_BUILD_NEEDS(c->state.mask.color^0xF, MASK_ARGB); n |= GGL_BUILD_NEEDS((enables & GGL_ENABLE_SMOOTH) ?1:0, SHADE); if (enables & GGL_ENABLE_TMUS) { n |= GGL_BUILD_NEEDS((enables & GGL_ENABLE_W) ?1:0, W); } p |= GGL_BUILD_NEEDS((enables & GGL_ENABLE_DITHER) ?1:0, P_DITHER); p |= GGL_BUILD_NEEDS((enables & GGL_ENABLE_AA) ?1:0, P_AA); p |= GGL_BUILD_NEEDS((enables & GGL_ENABLE_FOG) ?1:0, P_FOG); if (enables & GGL_ENABLE_LOGIC_OP) { n |= GGL_BUILD_NEEDS(c->state.logic_op.opcode, LOGIC_OP); } else { n |= GGL_BUILD_NEEDS(GGL_COPY, LOGIC_OP); } if (enables & GGL_ENABLE_ALPHA_TEST) { p |= GGL_BUILD_NEEDS(c->state.alpha_test.func, P_ALPHA_TEST); } else { p |= GGL_BUILD_NEEDS(GGL_ALWAYS, P_ALPHA_TEST); } if (enables & GGL_ENABLE_DEPTH_TEST) { p |= GGL_BUILD_NEEDS(c->state.depth_test.func, P_DEPTH_TEST); p |= GGL_BUILD_NEEDS(c->state.mask.depth&1, P_MASK_Z); } else { p |= GGL_BUILD_NEEDS(GGL_ALWAYS, P_DEPTH_TEST); // writing to the z-buffer is always disabled if depth-test // is disabled. } new_needs.n = n; new_needs.p = p; } if (c->dirty & GGL_TMU_STATE) { int idx = 0; for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { const texture_t& tx = c->state.texture[i]; if (tx.enable) { uint32_t t = 0; t |= GGL_BUILD_NEEDS(tx.surface.format, T_FORMAT); t |= GGL_BUILD_NEEDS(ggl_env_to_needs(tx.env), T_ENV); t |= GGL_BUILD_NEEDS(0, T_POT); // XXX: not used yet if (tx.s_coord==GGL_ONE_TO_ONE && tx.t_coord==GGL_ONE_TO_ONE) { // we encode 1-to-1 into the wrap mode t |= GGL_BUILD_NEEDS(GGL_NEEDS_WRAP_11, T_S_WRAP); t |= GGL_BUILD_NEEDS(GGL_NEEDS_WRAP_11, T_T_WRAP); } else { t |= GGL_BUILD_NEEDS(ggl_wrap_to_needs(tx.s_wrap), T_S_WRAP); t |= GGL_BUILD_NEEDS(ggl_wrap_to_needs(tx.t_wrap), T_T_WRAP); } if (tx.mag_filter == GGL_LINEAR) { t |= GGL_BUILD_NEEDS(1, T_LINEAR); } if (tx.min_filter == GGL_LINEAR) { t |= GGL_BUILD_NEEDS(1, T_LINEAR); } new_needs.t[idx++] = t; } else { new_needs.t[i] = 0; } } } if (new_needs != c->state.needs) { c->state.needs = new_needs; ggl_pick_texture(c); ggl_pick_cb(c); ggl_pick_scanline(c); } c->dirty = 0; }
void lightVertex(ogles_context_t* c, vertex_t* v) { // emission and ambient for the whole scene vec4_t r = c->lighting.implicitSceneEmissionAndAmbient; uint32_t en = c->lighting.enabledLights; if (ggl_likely(en)) { // since we do the lighting in object-space, we don't need to // transform each normal. However, we might still have to normalize // it if GL_NORMALIZE is enabled. vec4_t n; c->arrays.normal.fetch(c, n.v, c->arrays.normal.element(v->index & vertex_cache_t::INDEX_MASK)); // TODO: right now we handle GL_RESCALE_NORMALS as if ti were // GL_NORMALIZE. We could optimize this by scaling mvui // appropriately instead. if (c->transforms.rescaleNormals) vnorm3(n.v, n.v); const material_t& material = c->lighting.front; const int twoSide = c->lighting.lightModel.twoSide; while (en) { const int i = 31 - gglClz(en); en &= ~(1<<i); const light_t& l = c->lighting.lights[i]; vec4_t d, t; GLfixed s; GLfixed sqDist = 0x10000; // compute vertex-to-light vector if (ggl_unlikely(l.position.w)) { // lightPos/1.0 - vertex/vertex.w == lightPos*vertex.w - vertex vss3(d.v, l.objPosition.v, v->obj.w, v->obj.v); sqDist = dot3(d.v, d.v); vscale3(d.v, d.v, gglSqrtRecipx(sqDist)); } else { // TODO: avoid copy here d = l.normalizedObjPosition; } // ambient & diffuse s = dot3(n.v, d.v); s = (s<0) ? (twoSide?(-s):0) : s; vsa3(t.v, l.implicitDiffuse.v, s, l.implicitAmbient.v); // specular if (ggl_unlikely(s && l.implicitSpecular.v[3])) { vec4_t h; h.x = d.x; h.y = d.y; h.z = d.z + 0x10000; vnorm3(h.v, h.v); s = dot3(n.v, h.v); s = (s<0) ? (twoSide?(-s):0) : s; if (s > 0) { s = gglPowx(s, material.shininess); vsa3(t.v, l.implicitSpecular.v, s, t.v); } } // spot if (ggl_unlikely(l.spotCutoff != gglIntToFixed(180))) { GLfixed spotAtt = -dot3(l.normalizedSpotDir.v, d.v); if (spotAtt >= l.spotCutoffCosine) { vscale3(t.v, t.v, gglPowx(spotAtt, l.spotExp)); } } // attenuation if (ggl_unlikely(l.position.w)) { if (l.rConstAttenuation) { s = l.rConstAttenuation; } else { s = gglMulAddx(sqDist, l.attenuation[2], l.attenuation[0]); if (l.attenuation[1]) s = gglMulAddx(gglSqrtx(sqDist), l.attenuation[1], s); s = gglRecipFast(s); } vscale3(t.v, t.v, s); } r.r += t.r; r.g += t.g; r.b += t.b; } } v->color.r = gglClampx(r.r); v->color.g = gglClampx(r.g); v->color.b = gglClampx(r.b); v->color.a = gglClampx(r.a); v->flags |= vertex_t::LIT; }
static void pick_scanline(context_t* c) { #if (!defined(DEBUG__CODEGEN_ONLY) || (DEBUG__CODEGEN_ONLY == 0)) #if ANDROID_CODEGEN == ANDROID_CODEGEN_GENERIC c->init_y = init_y; c->step_y = step_y__generic; c->scanline = scanline; return; #endif //printf("*** needs [%08lx:%08lx:%08lx:%08lx]\n", // c->state.needs.n, c->state.needs.p, // c->state.needs.t[0], c->state.needs.t[1]); // first handle the special case that we cannot test with a filter const uint32_t cb_format = GGL_READ_NEEDS(CB_FORMAT, c->state.needs.n); if (GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0]) == cb_format) { if (c->state.needs.match(noblend1to1)) { // this will match regardless of dithering state, since both // src and dest have the same format anyway, there is no dithering // to be done. const GGLFormat* f = &(c->formats[GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0])]); if ((f->components == GGL_RGB) || (f->components == GGL_RGBA) || (f->components == GGL_LUMINANCE) || (f->components == GGL_LUMINANCE_ALPHA)) { // format must have all of RGB components // (so the current color doesn't show through) c->scanline = scanline_memcpy; c->init_y = init_y_noop; return; } } } if (c->state.needs.match(fill16noblend)) { c->init_y = init_y_packed; switch (c->formats[cb_format].size) { case 1: c->scanline = scanline_memset8; return; case 2: c->scanline = scanline_memset16; return; case 4: c->scanline = scanline_memset32; return; } } const int numFilters = sizeof(shortcuts)/sizeof(shortcut_t); for (int i=0 ; i<numFilters ; i++) { if (c->state.needs.match(shortcuts[i].filter)) { c->scanline = shortcuts[i].scanline; c->init_y = shortcuts[i].init_y; return; } } #endif // DEBUG__CODEGEN_ONLY c->init_y = init_y; c->step_y = step_y__generic; #if ANDROID_ARM_CODEGEN // we're going to have to generate some code... // here, generate code for our pixel pipeline const AssemblyKey<needs_t> key(c->state.needs); sp<Assembly> assembly = gCodeCache.lookup(key); if (assembly == 0) { // create a new assembly region sp<ScanlineAssembly> a = new ScanlineAssembly(c->state.needs, ASSEMBLY_SCRATCH_SIZE); // initialize our assembler GGLAssembler assembler( new ARMAssembler(a) ); //GGLAssembler assembler( // new ARMAssemblerOptimizer(new ARMAssembler(a)) ); // generate the scanline code for the given needs int err = assembler.scanline(c->state.needs, c); if (ggl_likely(!err)) { // finally, cache this assembly err = gCodeCache.cache(a->key(), a); } if (ggl_unlikely(err)) { LOGE("error generating or caching assembly. Reverting to NOP."); c->scanline = scanline_noop; c->init_y = init_y_noop; c->step_y = step_y__nop; return; } assembly = a; } // release the previous assembly if (c->scanline_as) { c->scanline_as->decStrong(c); } //LOGI("using generated pixel-pipeline"); c->scanline_as = assembly.get(); c->scanline_as->incStrong(c); // hold on to assembly c->scanline = (void(*)(context_t* c))assembly->base(); #else // LOGW("using generic (slow) pixel-pipeline"); c->scanline = scanline; #endif }