void GGLAssembler::extract(component_t& d, const pixel_t& s, int component) { integer_t r(d.reg, 32, d.flags); extract(r, s.reg, s.format.c[component].h, s.format.c[component].l, s.size()); d = component_t(r); }
void GGLAssembler::build_blendOneMinusFF( component_t& temp, const integer_t& factor, const integer_t& fragment, const integer_t& fb) { // R = S*f + D*(1-f) = (S-D)*f + D Scratch scratches(registerFile()); // compute D-S integer_t diff(fb.flags & CORRUPTIBLE ? fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE); const int shift = fragment.size() - fb.size(); if (shift>0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift)); else if (shift<0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift)); else SUB(AL, 0, diff.reg, fb.reg, fragment.reg); mul_factor_add(temp, diff, factor, component_t(fragment)); }
void GGLAssembler::build_blending( component_t& temp, // incomming fragment / output const pixel_t& pixel, // framebuffer int component, Scratch& regs) { if (!mInfo[component].blend) return; int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) fs = GGL_ONE; const int blending = blending_codes(fs, fd); if (!temp.size()) { // here, blending will produce something which doesn't depend on // that component (eg: GL_ZERO:GL_*), so the register has not been // allocated yet. Will never be used as a source. temp = component_t(regs.obtain(), CORRUPTIBLE); } // we are doing real blending... // fb: extracted dst // fragment: extracted src // temp: component_t(fragment) and result // scoped register allocator Scratch scratches(registerFile()); comment("blending"); // we can optimize these cases a bit... // (1) saturation is not needed // (2) we can use only one multiply instead of 2 // (3) we can reduce the register pressure // R = S*f + D*(1-f) = (S-D)*f + D // R = S*(1-f) + D*f = (D-S)*f + S const bool same_factor_opt1 = (fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) || (fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) || (fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) || (fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA); const bool same_factor_opt2 = (fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) || (fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) || (fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) || (fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA); // XXX: we could also optimize these cases: // R = S*f + D*f = (S+D)*f // R = S*(1-f) + D*(1-f) = (S+D)*(1-f) // R = S*D + D*S = 2*S*D // see if we need to extract 'component' from the destination (fb) integer_t fb; if (blending & (BLEND_DST|FACTOR_DST)) { fb.setTo(scratches.obtain(), 32); extract(fb, pixel, component); if (mDithering) { // XXX: maybe what we should do instead, is simply // expand fb -or- fragment to the larger of the two if (fb.size() < temp.size()) { // for now we expand 'fb' to min(fragment, 8) int new_size = temp.size() < 8 ? temp.size() : 8; expand(fb, fb, new_size); } } } // convert input fragment to integer_t if (temp.l && (temp.flags & CORRUPTIBLE)) { MOV(AL, 0, temp.reg, reg_imm(temp.reg, LSR, temp.l)); temp.h -= temp.l; temp.l = 0; } integer_t fragment(temp.reg, temp.size(), temp.flags); // if not done yet, convert input fragment to integer_t if (temp.l) { // here we know temp is not CORRUPTIBLE fragment.reg = scratches.obtain(); MOV(AL, 0, fragment.reg, reg_imm(temp.reg, LSR, temp.l)); fragment.flags |= CORRUPTIBLE; } if (!(temp.flags & CORRUPTIBLE)) { // temp is not corruptible, but since it's the destination it // will be modified, so we need to allocate a new register. temp.reg = regs.obtain(); temp.flags &= ~CORRUPTIBLE; fragment.flags &= ~CORRUPTIBLE; } if ((blending & BLEND_SRC) && !same_factor_opt1) { // source (fragment) is needed for the blending stage // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1) fragment.flags &= ~CORRUPTIBLE; } if (same_factor_opt1) { // R = S*f + D*(1-f) = (S-D)*f + D integer_t factor; build_blend_factor(factor, fs, component, pixel, fragment, fb, scratches); // fb is always corruptible from this point fb.flags |= CORRUPTIBLE; build_blendFOneMinusF(temp, factor, fragment, fb); } else if (same_factor_opt2) { // R = S*(1-f) + D*f = (D-S)*f + S integer_t factor; // fb is always corrruptible here fb.flags |= CORRUPTIBLE; build_blend_factor(factor, fd, component, pixel, fragment, fb, scratches); build_blendOneMinusFF(temp, factor, fragment, fb); } else { integer_t src_factor; integer_t dst_factor; // if destination (fb) is not needed for the blending stage, // then it can be marked as CORRUPTIBLE if (!(blending & BLEND_DST)) { fb.flags |= CORRUPTIBLE; } // XXX: try to mark some registers as CORRUPTIBLE // in most case we could make those corruptible // when we're processing the last component // but not always, for instance // when fragment is constant and not reloaded // when fb is needed for logic-ops or masking // when a register is aliased (for instance with mAlphaSource) // blend away... if (fs==GGL_ZERO) { if (fd==GGL_ZERO) { // R = 0 // already taken care of } else if (fd==GGL_ONE) { // R = D // already taken care of } else { // R = D*fd // compute fd build_blend_factor(dst_factor, fd, component, pixel, fragment, fb, scratches); mul_factor(temp, fb, dst_factor); } } else if (fs==GGL_ONE) { if (fd==GGL_ZERO) { // R = S // NOP, taken care of } else if (fd==GGL_ONE) { // R = S + D component_add(temp, fb, fragment); // args order matters component_sat(temp); } else { // R = S + D*fd // compute fd build_blend_factor(dst_factor, fd, component, pixel, fragment, fb, scratches); mul_factor_add(temp, fb, dst_factor, component_t(fragment)); component_sat(temp); } } else { // compute fs build_blend_factor(src_factor, fs, component, pixel, fragment, fb, scratches); if (fd==GGL_ZERO) { // R = S*fs mul_factor(temp, fragment, src_factor); } else if (fd==GGL_ONE) { // R = S*fs + D mul_factor_add(temp, fragment, src_factor, component_t(fb)); component_sat(temp); } else { // R = S*fs + D*fd mul_factor(temp, fragment, src_factor); if (scratches.isUsed(src_factor.reg)) scratches.recycle(src_factor.reg); // compute fd build_blend_factor(dst_factor, fd, component, pixel, fragment, fb, scratches); mul_factor_add(temp, fb, dst_factor, temp); if (!same_factor_opt1 && !same_factor_opt2) { component_sat(temp); } } } } // now we can be corrupted (it's the dest) temp.flags |= CORRUPTIBLE; }
void GGLAssembler::expand(component_t& d, const component_t& s, int dbits) { integer_t r(d.reg, 32, d.flags); expand(r, s, dbits); d = component_t(r); }