void GGLAssembler::component_sat(const component_t& v) { const int one = ((1<<v.size())-1)<<v.l; CMP(AL, v.reg, imm( 1<<v.h )); if (isValidImmediate(one)) { MOV(HS, 0, v.reg, imm( one )); } else if (isValidImmediate(~one)) { MVN(HS, 0, v.reg, imm( ~one )); } else { MOV(HS, 0, v.reg, imm( 1<<v.h )); SUB(HS, 0, v.reg, v.reg, imm( 1<<v.l )); } }
void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits) { const int maskLen = h-l; #ifdef __mips__ assert(maskLen<=11); #else assert(maskLen<=8); #endif assert(h); #if __ARM_ARCH__ >= 7 const int mask = (1<<maskLen)-1; if ((h == bits) && !l && (s != d.reg)) { MOV(AL, 0, d.reg, s); // component = packed; } else if ((h == bits) && l) { MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l; } else if (!l && isValidImmediate(mask)) { AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask; } else if (!l && isValidImmediate(~mask)) { BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask; } else { UBFX(AL, d.reg, s, l, maskLen); // component = (packed & mask) >> l; } #else if (h != bits) { const int mask = ((1<<maskLen)-1) << l; if (isValidImmediate(mask)) { AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask; } else if (isValidImmediate(~mask)) { BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask; } else { MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h)); l += 32-h; h = 32; } s = d.reg; } if (l) { MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l; s = d.reg; } if (s != d.reg) { MOV(AL, 0, d.reg, s); } #endif d.s = maskLen; }
void GGLAssembler::downshift( pixel_t& d, int component, component_t s, const reg_t& dither) { const needs_t& needs = mBuilderContext.needs; Scratch scratches(registerFile()); int sh = s.h; int sl = s.l; int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0; int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0; int sbits = sh - sl; int dh = d.format.c[component].h; int dl = d.format.c[component].l; int dbits = dh - dl; int dithering = 0; ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits); if (sbits>dbits) { // see if we need to dither dithering = mDithering; } int ireg = d.reg; if (!(d.flags & FIRST)) { if (s.flags & CORRUPTIBLE) { ireg = s.reg; } else { ireg = scratches.obtain(); } } d.flags &= ~FIRST; if (maskHiBits) { // we need to mask the high bits (and possibly the lowbits too) // and we might be able to use immediate mask. if (!dithering) { // we don't do this if we only have maskLoBits because we can // do it more efficiently below (in the case where dl=0) const int offset = sh - dbits; if (dbits<=8 && offset >= 0) { const uint32_t mask = ((1<<dbits)-1) << offset; if (isValidImmediate(mask) || isValidImmediate(~mask)) { build_and_immediate(ireg, s.reg, mask, 32); sl = offset; s.reg = ireg; sbits = dbits; maskLoBits = maskHiBits = 0; } } } else { // in the dithering case though, we need to preserve the lower bits const uint32_t mask = ((1<<sbits)-1) << sl; if (isValidImmediate(mask) || isValidImmediate(~mask)) { build_and_immediate(ireg, s.reg, mask, 32); s.reg = ireg; maskLoBits = maskHiBits = 0; } } } // XXX: we could special case (maskHiBits & !maskLoBits) // like we do for maskLoBits below, but it happens very rarely // that we have maskHiBits only and the conditions necessary to lead // to better code (like doing d |= s << 24) if (maskHiBits) { MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh)); sl += 32-sh; sh = 32; s.reg = ireg; maskHiBits = 0; } // Downsampling should be performed as follows: // V * ((1<<dbits)-1) / ((1<<sbits)-1) // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)] // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)] // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits)) // // By approximating (1>>dbits) and (1>>sbits) to 0: // // V>>(sbits-dbits) - V>>sbits // // A good approximation is V>>(sbits-dbits), // but better one (needed for dithering) is: // // (V>>(sbits-dbits)<<sbits - V)>>sbits // (V<<dbits - V)>>sbits // (V - V>>dbits)>>(sbits-dbits) // Dithering is done here if (dithering) { comment("dithering"); if (sl) { MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl)); sh -= sl; sl = 0; s.reg = ireg; } // scaling (V-V>>dbits) SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits)); const int shift = (GGL_DITHER_BITS - (sbits-dbits)); if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift)); else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift)); else ADD(AL, 0, ireg, ireg, dither.reg); s.reg = ireg; } if ((maskLoBits|dithering) && (sh > dbits)) { int shift = sh-dbits; if (dl) { MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift)); if (ireg == d.reg) { MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl)); } else { ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl)); } } else { if (ireg == d.reg) { MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); } else { ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); } } } else { int shift = sh-dh; if (shift>0) { if (ireg == d.reg) { MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); } else { ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); } } else if (shift<0) { if (ireg == d.reg) { MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift)); } else { ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift)); } } else { if (ireg == d.reg) { if (s.reg != d.reg) { MOV(AL, 0, d.reg, s.reg); } } else { ORR(AL, 0, d.reg, d.reg, s.reg); } } } }