mlib_status mlib_ImageConvKernelConvert(mlib_s32 *ikernel, mlib_s32 *iscale, const mlib_d64 *fkernel, mlib_s32 m, mlib_s32 n, mlib_type type) { mlib_d64 sum_pos, sum_neg, sum, norm, max, f; mlib_s32 isum_pos, isum_neg, isum, test; mlib_s32 i, scale, scale1, chk_flag; if (ikernel == NULL || iscale == NULL || fkernel == NULL || m < 1 || n < 1) { return MLIB_FAILURE; } if ((type == MLIB_BYTE) || (type == MLIB_SHORT) || (type == MLIB_USHORT)) { if (type != MLIB_SHORT) { /* MLIB_BYTE, MLIB_USHORT */ sum_pos = 0; sum_neg = 0; for (i = 0; i < m * n; i++) { if (fkernel[i] > 0) sum_pos += fkernel[i]; else sum_neg -= fkernel[i]; } sum = (sum_pos > sum_neg) ? sum_pos : sum_neg; scale = mlib_ilogb(sum); scale++; scale = 31 - scale; } else { /* MLIB_SHORT */ sum = 0; max = 0; for (i = 0; i < m * n; i++) { f = mlib_fabs(fkernel[i]); sum += f; max = (max > f) ? max : f; } scale1 = mlib_ilogb(max) + 1; scale = mlib_ilogb(sum); scale = (scale > scale1) ? scale : scale1; scale++; scale = 32 - scale; } if (scale <= 16) return MLIB_FAILURE; if (scale > 31) scale = 31; *iscale = scale; chk_flag = mlib_ImageConvVersion(m, n, scale, type); if (!chk_flag) { norm = (1u << scale); for (i = 0; i < m * n; i++) { CLAMP_S32(ikernel[i], fkernel[i] * norm); } return MLIB_SUCCESS; } /* try to round coefficients */ #ifdef __sparc scale1 = 16; /* shift of coefficients is 16 */ #else if (chk_flag == 3) scale1 = 16; /* MMX */ else scale1 = (type == MLIB_BYTE) ? 8 : 16; #endif /* __sparc */ norm = (1u << (scale - scale1)); for (i = 0; i < m * n; i++) { if (fkernel[i] > 0) ikernel[i] = (mlib_s32) (fkernel[i] * norm + 0.5); else ikernel[i] = (mlib_s32) (fkernel[i] * norm - 0.5); } isum_pos = 0; isum_neg = 0; test = 0; for (i = 0; i < m * n; i++) { if (ikernel[i] > 0) isum_pos += ikernel[i]; else isum_neg -= ikernel[i]; } if (type == MLIB_BYTE || type == MLIB_USHORT) { isum = (isum_pos > isum_neg) ? isum_pos : isum_neg; if (isum >= (1 << (31 - scale1))) test = 1; } else { isum = isum_pos + isum_neg; if (isum >= (1 << (32 - scale1))) test = 1; for (i = 0; i < m * n; i++) { if (abs(ikernel[i]) >= (1 << (31 - scale1))) test = 1; } } if (test == 1) { /* rounding according scale1 cause overflow, truncate instead of round */ for (i = 0; i < m * n; i++) ikernel[i] = (mlib_s32) (fkernel[i] * norm) << scale1; } else { /* rounding is Ok */ for (i = 0; i < m * n; i++) ikernel[i] = ikernel[i] << scale1; } return MLIB_SUCCESS; } else if ((type == MLIB_INT) || (type == MLIB_BIT)) { max = 0; for (i = 0; i < m * n; i++) { f = mlib_fabs(fkernel[i]); max = (max > f) ? max : f; } scale = mlib_ilogb(max); if (scale > 29) return MLIB_FAILURE; if (scale < -100) scale = -100; *iscale = 29 - scale; scale = 29 - scale; norm = 1.0; while (scale > 30) { norm *= (1 << 30); scale -= 30; } norm *= (1 << scale); for (i = 0; i < m * n; i++) { if (fkernel[i] > 0) { CLAMP_S32(ikernel[i], fkernel[i] * norm + 0.5); } else { CLAMP_S32(ikernel[i], fkernel[i] * norm - 0.5); } } return MLIB_SUCCESS; } else { return MLIB_FAILURE; } }
mlib_status mlib_c_conv2x2ext_u8( mlib_image *dst, const mlib_image *src, mlib_s32 dx_l, mlib_s32 dx_r, mlib_s32 dy_t, mlib_s32 dy_b, const mlib_s32 *kern, mlib_s32 scalef_expon, mlib_s32 cmask) { mlib_d64 buff_arr[4 * BUFF_LINE]; mlib_s32 *pbuff = (mlib_s32 *)buff_arr, *buff0, *buff1, *buff2, *buffT; DTYPE *adr_src, *sl, *sp, *sl1; DTYPE *adr_dst, *dl, *dp; mlib_d64 k0, k1, k2, k3, scalef = 1.0; mlib_d64 p00, p01, p02, p10, p11, p12; mlib_s32 wid, hgt, sll, dll, wid1; mlib_s32 nchannel, chan1, chan2; mlib_s32 i, j, c, swid; LOAD_KERNEL_INTO_DOUBLE(); GET_SRC_DST_PARAMETERS(DTYPE); vis_write_gsr(23 << 3); swid = wid + D_KER; wid1 = (swid + 1) & ~1; if (wid1 > BUFF_LINE) { pbuff = __mlib_malloc(4 * sizeof (mlib_s32) * wid1); if (pbuff == NULL) return (MLIB_FAILURE); } buff0 = pbuff; buff1 = buff0 + wid1; buff2 = buff1 + wid1; chan1 = nchannel; chan2 = chan1 + chan1; swid -= dx_r; for (c = 0; c < nchannel; c++) { if (!(cmask & (1 << (nchannel - 1 - c)))) continue; sl = adr_src + c; dl = adr_dst + c; if ((hgt - dy_b) > 0) sl1 = sl + sll; else sl1 = sl; #pragma pipeloop(0) for (i = 0; i < swid; i++) { buff0[i - 1] = (mlib_s32)sl[i * chan1]; buff1[i - 1] = (mlib_s32)sl1[i * chan1]; } if (dx_r != 0) { buff0[swid - 1] = buff0[swid - 2]; buff1[swid - 1] = buff1[swid - 2]; } if ((hgt - dy_b) > 1) sl = sl1 + sll; else sl = sl1; for (j = 0; j < hgt; j++) { sp = sl; dp = dl; buff2[-1] = (mlib_s32)sp[0]; sp += chan1; p02 = buff0[-1]; p12 = buff1[-1]; #pragma pipeloop(0) for (i = 0; i <= (wid - 2); i += 2) { d64_2x32 sd0, sd1; d64_2x32 dd0, dd1; p00 = p02; p10 = p12; sd0.d64 = *(TYPE_64BIT *) (buff0 + i); sd1.d64 = *(TYPE_64BIT *) (buff1 + i); p01 = (mlib_d64)sd0.i32s.i0; p02 = (mlib_d64)sd0.i32s.i1; p11 = (mlib_d64)sd1.i32s.i0; p12 = (mlib_d64)sd1.i32s.i1; LOAD_BUFF(buff2); dd0.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3); dd0.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3); dd1.d64 = vis_fpack32(dd1.d64, dd0.d64); STORE2(dd1.i32s.i0, dd1.i32s.i1); sp += chan2; dp += chan2; } for (; i < wid; i++) { d64_2x32 dd0, dd1; p00 = buff0[i - 1]; p10 = buff1[i - 1]; p01 = buff0[i]; p11 = buff1[i]; buff2[i] = (mlib_s32)sp[0]; dd0.i32s.i1 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3); dd1.d64 = vis_fpack32(dd1.d64, dd0.d64); dp[0] = dd1.i32s.i1; sp += chan1; dp += chan1; } if (dx_r != 0) buff2[swid - 1] = buff2[swid - 2]; if (j < hgt - dy_b - 2) sl += sll; dl += dll; buffT = buff0; buff0 = buff1; buff1 = buff2; buff2 = buffT; } } if (pbuff != (mlib_s32 *)buff_arr) __mlib_free(pbuff); return (MLIB_SUCCESS); }