void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits) { assert(src.size()); int sbits = src.size(); int s = src.reg; int d = dst.reg; // be sure to set 'dst' after we read 'src' as they may be identical dst.s = dbits; dst.flags = 0; if (dbits<=sbits) { if (s != d) { MOV(AL, 0, d, s); } return; } if (sbits == 1) { RSB(AL, 0, d, s, reg_imm(s, LSL, dbits)); // d = (s<<dbits) - s; return; } if (dbits % sbits) { MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits)); // d = s << (dbits-sbits); dbits -= sbits; do { ORR(AL, 0, d, d, reg_imm(d, LSR, sbits)); // d |= d >> sbits; dbits -= sbits; sbits *= 2; } while(dbits>0); return; } dbits -= sbits; do { ORR(AL, 0, d, s, reg_imm(s, LSL, sbits)); // d |= d<<sbits; s = d; dbits -= sbits; if (sbits*2 < dbits) { sbits *= 2; } } while(dbits>0); }
void GGLAssembler::build_blendOneMinusFF( component_t& temp, const integer_t& factor, const integer_t& fragment, const integer_t& fb) { // R = S*f + D*(1-f) = (S-D)*f + D Scratch scratches(registerFile()); // compute D-S integer_t diff(fb.flags & CORRUPTIBLE ? fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE); const int shift = fragment.size() - fb.size(); if (shift>0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift)); else if (shift<0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift)); else SUB(AL, 0, diff.reg, fb.reg, fragment.reg); mul_factor_add(temp, diff, factor, component_t(fragment)); }
void GGLAssembler::component_add(component_t& d, const integer_t& dst, const integer_t& src) { // here we're guaranteed that fragment.size() >= fb.size() const int shift = src.size() - dst.size(); if (!shift) { ADD(AL, 0, d.reg, src.reg, dst.reg); } else { ADD(AL, 0, d.reg, src.reg, reg_imm(dst.reg, LSL, shift)); } d.h = src.size(); if (mDithering) { d.l = 0; } else { d.l = shift; d.flags |= CLEAR_LO; } }
void GGLAssembler::mul_factor_add( component_t& d, const integer_t& v, const integer_t& f, const component_t& a) { // XXX: we could have special cases for 1 bit mul Scratch scratches(registerFile()); int vs = v.size(); int fs = f.size(); int as = a.h; int ms = vs+fs; ALOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as); integer_t add(a.reg, a.h, a.flags); // 'a' is a component_t but it is guaranteed to have // its high bits set to 0. However in the dithering case, // we can't get away with truncating the potentially bad bits // so extraction is needed. if ((mDithering) && (a.size() < ms)) { // we need to expand a if (!(a.flags & CORRUPTIBLE)) { // ... but it's not corruptible, so we need to pick a // temporary register. // Try to uses the destination register first (it's likely // to be usable, unless it aliases an input). if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) { add.reg = d.reg; } else { add.reg = scratches.obtain(); } } expand(add, a, ms); // extracts and expands as = ms; } if (ms == as) { if (vs<16 && fs<16) SMLABB(AL, d.reg, v.reg, f.reg, add.reg); else MLA(AL, 0, d.reg, v.reg, f.reg, add.reg); } else { int temp = d.reg; if (temp == add.reg) { // the mul will modify add.reg, we need an intermediary reg if (v.flags & CORRUPTIBLE) temp = v.reg; else if (f.flags & CORRUPTIBLE) temp = f.reg; else temp = scratches.obtain(); } if (vs<16 && fs<16) SMULBB(AL, temp, v.reg, f.reg); else MUL(AL, 0, temp, v.reg, f.reg); if (ms>as) { ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSL, ms-as)); } else if (ms<as) { // not sure if we should expand the mul instead? ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSR, as-ms)); } } d.h = ms; if (mDithering) { d.l = a.l; } else { d.l = fs>a.l ? fs : a.l; d.flags |= CLEAR_LO; } }
void GGLAssembler::mul_factor( component_t& d, const integer_t& v, const integer_t& f) { int vs = v.size(); int fs = f.size(); int ms = vs+fs; // XXX: we could have special cases for 1 bit mul // all this code below to use the best multiply instruction // wrt the parameters size. We take advantage of the fact // that the 16-bits multiplies allow a 16-bit shift // The trick is that we just make sure that we have at least 8-bits // per component (which is enough for a 8 bits display). int xy; int vshift = 0; int fshift = 0; int smulw = 0; if (vs<16) { if (fs<16) { xy = xyBB; } else if (GGL_BETWEEN(fs, 24, 31)) { ms -= 16; xy = xyTB; } else { // eg: 15 * 18 -> 15 * 15 fshift = fs - 15; ms -= fshift; xy = xyBB; } } else if (GGL_BETWEEN(vs, 24, 31)) { if (fs<16) { ms -= 16; xy = xyTB; } else if (GGL_BETWEEN(fs, 24, 31)) { ms -= 32; xy = xyTT; } else { // eg: 24 * 18 -> 8 * 18 fshift = fs - 15; ms -= 16 + fshift; xy = xyTB; } } else { if (fs<16) { // eg: 18 * 15 -> 15 * 15 vshift = vs - 15; ms -= vshift; xy = xyBB; } else if (GGL_BETWEEN(fs, 24, 31)) { // eg: 18 * 24 -> 15 * 8 vshift = vs - 15; ms -= 16 + vshift; xy = xyBT; } else { // eg: 18 * 18 -> (15 * 18)>>16 fshift = fs - 15; ms -= 16 + fshift; xy = yB; //XXX SMULWB smulw = 1; } } ALOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs); int vreg = v.reg; int freg = f.reg; if (vshift) { MOV(AL, 0, d.reg, reg_imm(vreg, LSR, vshift)); vreg = d.reg; } if (fshift) { MOV(AL, 0, d.reg, reg_imm(vreg, LSR, fshift)); freg = d.reg; } if (smulw) SMULW(AL, xy, d.reg, vreg, freg); else SMUL(AL, xy, d.reg, vreg, freg); d.h = ms; if (mDithering) { d.l = 0; } else { d.l = fs; d.flags |= CLEAR_LO; } }