void vp9_iwht4x4_1_add_c(const int16_t *in, uint8_t *dest, int dest_stride) { int i; int a1, e1; int16_t tmp[4]; const int16_t *ip = in; int16_t *op = tmp; a1 = ip[0] >> UNIT_QUANT_SHIFT; e1 = a1 >> 1; a1 -= e1; op[0] = a1; op[1] = op[2] = op[3] = e1; ip = tmp; for (i = 0; i < 4; i++) { e1 = ip[0] >> 1; a1 = ip[0] - e1; dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1); dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + e1); dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + e1); dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + e1); ip++; dest++; } }
void vp9_short_iwalsh4x4_1_add_c(int16_t *in, uint8_t *dest, int dest_stride) { int i; int a1, e1; int16_t tmp[4]; int16_t *ip = in; int16_t *op = tmp; a1 = ip[0] >> WHT_UPSCALE_FACTOR; e1 = a1 >> 1; a1 -= e1; op[0] = a1; op[1] = op[2] = op[3] = e1; ip = tmp; for (i = 0; i < 4; i++) { e1 = ip[0] >> 1; a1 = ip[0] - e1; dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1); dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + e1); dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + e1); dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + e1); ip++; dest++; } }
void vp9_short_iwalsh4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) { /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, 0.5 shifts per pixel. */ int i; int16_t output[16]; int a1, b1, c1, d1, e1; int16_t *ip = input; int16_t *op = output; for (i = 0; i < 4; i++) { a1 = ip[0] >> WHT_UPSCALE_FACTOR; c1 = ip[1] >> WHT_UPSCALE_FACTOR; d1 = ip[2] >> WHT_UPSCALE_FACTOR; b1 = ip[3] >> WHT_UPSCALE_FACTOR; a1 += c1; d1 -= b1; e1 = (a1 - d1) >> 1; b1 = e1 - b1; c1 = e1 - c1; a1 -= b1; d1 += c1; op[0] = a1; op[1] = b1; op[2] = c1; op[3] = d1; ip += 4; op += 4; } ip = output; for (i = 0; i < 4; i++) { a1 = ip[4 * 0]; c1 = ip[4 * 1]; d1 = ip[4 * 2]; b1 = ip[4 * 3]; a1 += c1; d1 -= b1; e1 = (a1 - d1) >> 1; b1 = e1 - b1; c1 = e1 - c1; a1 -= b1; d1 += c1; dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1); dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + b1); dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + c1); dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + d1); ip++; dest++; } }
void vp9_iwht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) { /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, 0.5 shifts per pixel. */ int i; int16_t output[16]; int a1, b1, c1, d1, e1; const int16_t *ip = input; int16_t *op = output; for (i = 0; i < 4; i++) { a1 = ip[0] >> UNIT_QUANT_SHIFT; c1 = ip[1] >> UNIT_QUANT_SHIFT; d1 = ip[2] >> UNIT_QUANT_SHIFT; b1 = ip[3] >> UNIT_QUANT_SHIFT; a1 += c1; d1 -= b1; e1 = (a1 - d1) >> 1; b1 = e1 - b1; c1 = e1 - c1; a1 -= b1; d1 += c1; op[0] = a1; op[1] = b1; op[2] = c1; op[3] = d1; ip += 4; op += 4; } ip = output; for (i = 0; i < 4; i++) { a1 = ip[4 * 0]; c1 = ip[4 * 1]; d1 = ip[4 * 2]; b1 = ip[4 * 3]; a1 += c1; d1 -= b1; e1 = (a1 - d1) >> 1; b1 = e1 - b1; c1 = e1 - c1; a1 -= b1; d1 += c1; dest[stride * 0] = clip_pixel(dest[stride * 0] + a1); dest[stride * 1] = clip_pixel(dest[stride * 1] + b1); dest[stride * 2] = clip_pixel(dest[stride * 2] + c1); dest[stride * 3] = clip_pixel(dest[stride * 3] + d1); ip++; dest++; } }
/* ======================================= 画正圆 ======================================= */ CR_API void_t draw_circle ( __CR_IO__ const sIMAGE* dst, __CR_IN__ sint_t cx, __CR_IN__ sint_t cy, __CR_IN__ sint_t radius, __CR_IN__ cpix_t color, __CR_IN__ pixdraw_t pixel_draw ) { sint_t xx, yy, dd; const sRECT* clip; if (radius <= 0) return; clip = &dst->clip_win; /* 安全检查, 加法溢出无碍 */ if ((cx > clip->x2 + radius) || (cx + radius < clip->x1) || (cy > clip->y2 + radius) || (cy + radius < clip->y1)) return; yy = radius; dd = 3 - radius * 2; for (xx = 0; xx <= yy; xx++) { if (clip_pixel(cx + xx, cy + yy, clip)) pixel_draw(dst, cx + xx, cy + yy, color); if (clip_pixel(cx + xx, cy - yy, clip)) pixel_draw(dst, cx + xx, cy - yy, color); if (clip_pixel(cx - xx, cy - yy, clip)) pixel_draw(dst, cx - xx, cy - yy, color); if (clip_pixel(cx - xx, cy + yy, clip)) pixel_draw(dst, cx - xx, cy + yy, color); if (clip_pixel(cx + yy, cy + xx, clip)) pixel_draw(dst, cx + yy, cy + xx, color); if (clip_pixel(cx + yy, cy - xx, clip)) pixel_draw(dst, cx + yy, cy - xx, color); if (clip_pixel(cx - yy, cy - xx, clip)) pixel_draw(dst, cx - yy, cy - xx, color); if (clip_pixel(cx - yy, cy + xx, clip)) pixel_draw(dst, cx - yy, cy + xx, color); if (dd < 0) dd += xx * 4 + 6; else dd += (xx - yy--) * 4 + 10; } }
void vp9_short_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) { int i; int a1; int16_t out = dct_const_round_shift(input[0] * cospi_16_64); out = dct_const_round_shift(out * cospi_16_64); a1 = ROUND_POWER_OF_TWO(out, 4); for (i = 0; i < 4; i++) { dest[0] = clip_pixel(dest[0] + a1); dest[1] = clip_pixel(dest[1] + a1); dest[2] = clip_pixel(dest[2] + a1); dest[3] = clip_pixel(dest[3] + a1); dest += dest_stride; } }
static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *y_filters, int y0_q4, int y_step_q4, int w, int h) { int x, y; src -= src_stride * (SUBPEL_TAPS / 2 - 1); for (x = 0; x < w; ++x) { int y_q4 = y0_q4; for (y = 0; y < h; ++y) { const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; int k, sum = 0; for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_y[k * src_stride] * y_filter[k]; dst[y * dst_stride] = ROUND_POWER_OF_TWO( dst[y * dst_stride] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); y_q4 += y_step_q4; } ++src; ++dst; } }
void vp9_iht16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride, int tx_type) { int i, j; int16_t out[16 * 16]; int16_t *outptr = out; int16_t temp_in[16], temp_out[16]; const transform_2d ht = IHT_16[tx_type]; // Rows for (i = 0; i < 16; ++i) { ht.rows(input, outptr); input += 16; outptr += 16; } // Columns for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; ht.cols(temp_in, temp_out); for (j = 0; j < 16; ++j) dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + dest[j * stride + i]); } }
void vp9_iht8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride, int tx_type) { int i, j; int16_t out[8 * 8]; int16_t *outptr = out; int16_t temp_in[8], temp_out[8]; const transform_2d ht = IHT_8[tx_type]; // inverse transform row vectors for (i = 0; i < 8; ++i) { ht.rows(input, outptr); input += 8; outptr += 8; } // inverse transform column vectors for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; ht.cols(temp_in, temp_out); for (j = 0; j < 8; ++j) dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * stride + i]); } }
void vp9_short_iht4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride, int tx_type) { const transform_2d IHT_4[] = { { vp9_idct4_1d, vp9_idct4_1d }, // DCT_DCT = 0 { iadst4_1d, vp9_idct4_1d }, // ADST_DCT = 1 { vp9_idct4_1d, iadst4_1d }, // DCT_ADST = 2 { iadst4_1d, iadst4_1d } // ADST_ADST = 3 }; int i, j; int16_t out[4 * 4]; int16_t *outptr = out; int16_t temp_in[4], temp_out[4]; // inverse transform row vectors for (i = 0; i < 4; ++i) { IHT_4[tx_type].rows(input, outptr); input += 4; outptr += 4; } // inverse transform column vectors for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; IHT_4[tx_type].cols(temp_in, temp_out); for (j = 0; j < 4; ++j) dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]); } }
void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride) { int16_t out[8 * 8] = { 0 }; int16_t *outptr = out; int i, j; int16_t temp_in[8], temp_out[8]; // First transform rows // only first 4 row has non-zero coefs for (i = 0; i < 4; ++i) { idct8_1d(input, outptr); input += 8; outptr += 8; } // Then transform columns for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; idct8_1d(temp_in, temp_out); for (j = 0; j < 8; ++j) dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]); } }
void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride) { int16_t out[16 * 16] = { 0 }; int16_t *outptr = out; int i, j; int16_t temp_in[16], temp_out[16]; // First transform rows. Since all non-zero dct coefficients are in // upper-left 4x4 area, we only need to calculate first 4 rows here. for (i = 0; i < 4; ++i) { idct16_1d(input, outptr); input += 16; outptr += 16; } // Then transform columns for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j*16 + i]; idct16_1d(temp_in, temp_out); for (j = 0; j < 16; ++j) dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + dest[j * dest_stride + i]); } }
void vp9_short_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) { int i, j; int a1; int16_t out = dct_const_round_shift(input[0] * cospi_16_64); out = dct_const_round_shift(out * cospi_16_64); a1 = ROUND_POWER_OF_TWO(out, 5); for (j = 0; j < 8; ++j) { for (i = 0; i < 8; ++i) dest[i] = clip_pixel(dest[i] + a1); dest += dest_stride; } }
void vp9_idct16x16_1_add_c(const int16_t *input, uint8_t *dest, int stride) { int i, j; int a1; int16_t out = dct_const_round_shift(input[0] * cospi_16_64); out = dct_const_round_shift(out * cospi_16_64); a1 = ROUND_POWER_OF_TWO(out, 6); for (j = 0; j < 16; ++j) { for (i = 0; i < 16; ++i) dest[i] = clip_pixel(dest[i] + a1); dest += stride; } }
static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *x_filters, int x0_q4, int x_step_q4, int w, int h) { int x, y; src -= SUBPEL_TAPS / 2 - 1; for (y = 0; y < h; ++y) { int x_q4 = x0_q4; for (x = 0; x < w; ++x) { const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; int k, sum = 0; for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k]; dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); x_q4 += x_step_q4; } src += src_stride; dst += dst_stride; } }
static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y0, int y_step_q4, int w, int h, int taps) { int x, y, k; /* NOTE: This assumes that the filter table is 256-byte aligned. */ /* TODO(agrange) Modify to make independent of table alignment. */ const int16_t *const filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff); /* Adjust base pointer address for this source column */ src -= src_stride * (taps / 2 - 1); for (x = 0; x < w; ++x) { /* Initial phase offset */ int y_q4 = (int)(filter_y0 - filter_y_base) / taps; for (y = 0; y < h; ++y) { /* Per-pixel src offset */ const int src_y = y_q4 >> SUBPEL_BITS; int sum = 0; /* Pointer to filter to use */ const int16_t *const filter_y = filter_y_base + (y_q4 & SUBPEL_MASK) * taps; for (k = 0; k < taps; ++k) sum += src[(src_y + k) * src_stride] * filter_y[k]; dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); /* Move to the next source pixel */ y_q4 += y_step_q4; } ++src; ++dst; } }
static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x0, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int taps) { int x, y, k; /* NOTE: This assumes that the filter table is 256-byte aligned. */ /* TODO(agrange) Modify to make independent of table alignment. */ const int16_t *const filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff); /* Adjust base pointer address for this source line */ src -= taps / 2 - 1; for (y = 0; y < h; ++y) { /* Initial phase offset */ int x_q4 = (int)(filter_x0 - filter_x_base) / taps; for (x = 0; x < w; ++x) { /* Per-pixel src offset */ const int src_x = x_q4 >> SUBPEL_BITS; int sum = 0; /* Pointer to filter to use */ const int16_t *const filter_x = filter_x_base + (x_q4 & SUBPEL_MASK) * taps; for (k = 0; k < taps; ++k) sum += src[src_x + k] * filter_x[k]; dst[x] = ROUND_POWER_OF_TWO(dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); /* Move to the next source pixel */ x_q4 += x_step_q4; } src += src_stride; dst += dst_stride; } }
void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride) { int16_t out[16 * 16]; int16_t *outptr = out; int i, j; int16_t temp_in[16], temp_out[16]; // First transform rows for (i = 0; i < 16; ++i) { idct16_1d(input, outptr); input += 16; outptr += 16; } // Then transform columns for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; idct16_1d(temp_in, temp_out); for (j = 0; j < 16; ++j) dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + dest[j * dest_stride + i]); } }
void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride) { int16_t out[8 * 8]; int16_t *outptr = out; int i, j; int16_t temp_in[8], temp_out[8]; // First transform rows for (i = 0; i < 8; ++i) { idct8(input, outptr); input += 8; outptr += 8; } // Then transform columns for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; idct8(temp_in, temp_out); for (j = 0; j < 8; ++j) dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * stride + i]); } }
void vp9_short_idct32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride) { int16_t out[32 * 32]; int16_t *outptr = out; int i, j; int16_t temp_in[32], temp_out[32]; // Rows for (i = 0; i < 32; ++i) { idct32_1d(input, outptr); input += 32; outptr += 32; } // Columns for (i = 0; i < 32; ++i) { for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; idct32_1d(temp_in, temp_out); for (j = 0; j < 32; ++j) dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + dest[j * dest_stride + i]); } }
void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) { int16_t out[4 * 4]; int16_t *outptr = out; int i, j; int16_t temp_in[4], temp_out[4]; // Rows for (i = 0; i < 4; ++i) { vp9_idct4_1d(input, outptr); input += 4; outptr += 4; } // Columns for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; vp9_idct4_1d(temp_in, temp_out); for (j = 0; j < 4; ++j) dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]); } }
void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) { int16_t out[32 * 32]; int16_t *outptr = out; int i, j; int16_t temp_in[32], temp_out[32]; // Rows for (i = 0; i < 32; ++i) { int16_t zero_coeff[16]; for (j = 0; j < 16; ++j) zero_coeff[j] = input[2 * j] | input[2 * j + 1]; for (j = 0; j < 8; ++j) zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; for (j = 0; j < 4; ++j) zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; for (j = 0; j < 2; ++j) zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; if (zero_coeff[0] | zero_coeff[1]) idct32(input, outptr); else vpx_memset(outptr, 0, sizeof(int16_t) * 32); input += 32; outptr += 32; } // Columns for (i = 0; i < 32; ++i) { for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; idct32(temp_in, temp_out); for (j = 0; j < 32; ++j) dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + dest[j * stride + i]); } }
void vp9_idct32x32_34_add_c(const int16_t *input, uint8_t *dest, int stride) { int16_t out[32 * 32] = {0}; int16_t *outptr = out; int i, j; int16_t temp_in[32], temp_out[32]; // Rows // only upper-left 8x8 has non-zero coeff for (i = 0; i < 8; ++i) { idct32(input, outptr); input += 32; outptr += 32; } // Columns for (i = 0; i < 32; ++i) { for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; idct32(temp_in, temp_out); for (j = 0; j < 32; ++j) dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + dest[j * stride + i]); } }
/* --------------------------------------- 绘制像素 --------------------------------------- */ static bool_t qst_crh_pixel ( __CR_IN__ void_t* parm, __CR_IN__ uint_t argc, __CR_IN__ ansi_t** argv ) { sint_t posx; sint_t posy; sIMAGE* draw; /* 参数解析 <X> <Y> */ if (argc < 3) return (FALSE); draw = ((sQstView2D*)parm)->paint; if (draw == NULL) return (FALSE); posx = (sint_t)str2intxA(argv[1]); posy = (sint_t)str2intxA(argv[2]); if (!clip_pixel(posx, posy, &draw->clip_win)) return (TRUE); s_pixdraw(draw, posx, posy, s_color); return (TRUE); }
/* ======================================= 画椭圆 ======================================= */ CR_API void_t draw_ellipse ( __CR_IO__ const sIMAGE* dst, __CR_IN__ const sRECT* rect, __CR_IN__ cpix_t color, __CR_IN__ pixdraw_t pixel_draw ) { const sRECT* clip; sint_t cx, cy, r1, r2, rr; sint_t xx, yy, tn, r12, r22, xmax; r1 = (rect->ww / 2); r2 = (rect->hh / 2); cx = (rect->x1 + rect->x2) / 2; cy = (rect->y1 + rect->y2) / 2; clip = &dst->clip_win; /* 安全检查, 加法溢出无碍 */ if ((cx > clip->x2 + r1) || (cx + r1 < clip->x1) || (cy > clip->y2 + r2) || (cy + r2 < clip->y1)) return; xx = 0; yy = r2; r12 = r1 * r1; r22 = r2 * r2; xmax = (sint_t)(r12 / sqrt(r12 + r22)); tn = r12 - 2 * r2 * r12; while (xx <= xmax) { if (tn < 0 || yy == 0) { tn += (4 * xx + 2) * r22; } else { tn += (4 * xx + 2) * r22 + (1 - yy) * 4 * r12; yy -= 1; } if (clip_pixel(cx + xx, cy + yy, clip)) pixel_draw(dst, cx + xx, cy + yy, color); if (clip_pixel(cx - xx, cy + yy, clip)) pixel_draw(dst, cx - xx, cy + yy, color); if (clip_pixel(cx + xx, cy - yy, clip)) pixel_draw(dst, cx + xx, cy - yy, color); if (clip_pixel(cx - xx, cy - yy, clip)) pixel_draw(dst, cx - xx, cy - yy, color); xx += 1; } if (clip_pixel(cx + xx, cy + yy, clip)) pixel_draw(dst, cx + xx, cy + yy, color); if (clip_pixel(cx - xx, cy + yy, clip)) pixel_draw(dst, cx - xx, cy + yy, color); if (clip_pixel(cx + xx, cy - yy, clip)) pixel_draw(dst, cx + xx, cy - yy, color); if (clip_pixel(cx - xx, cy - yy, clip)) pixel_draw(dst, cx - xx, cy - yy, color); CR_SWAP(r1, r2, rr); xx = 0; yy = r2; r12 = r1 * r1; r22 = r2 * r2; xmax = (sint_t)(r12 / sqrt(r12 + r22)); tn = r12 - 2 * r2 * r12; while (xx <= xmax) { if (tn < 0 || yy == 0) { tn += (4 * xx + 2) * r22; } else { tn += (4 * xx + 2) * r22 + (1 - yy) * 4 * r12; yy -= 1; } if (clip_pixel(cx + yy, cy + xx, clip)) pixel_draw(dst, cx + yy, cy + xx, color); if (clip_pixel(cx + yy, cy - xx, clip)) pixel_draw(dst, cx + yy, cy - xx, color); if (clip_pixel(cx - yy, cy + xx, clip)) pixel_draw(dst, cx - yy, cy + xx, color); if (clip_pixel(cx - yy, cy - xx, clip)) pixel_draw(dst, cx - yy, cy - xx, color); xx += 1; } if (clip_pixel(cx + yy, cy + xx, clip)) pixel_draw(dst, cx + yy, cy + xx, color); if (clip_pixel(cx + yy, cy - xx, clip)) pixel_draw(dst, cx + yy, cy - xx, color); if (clip_pixel(cx - yy, cy + xx, clip)) pixel_draw(dst, cx - yy, cy + xx, color); if (clip_pixel(cx - yy, cy - xx, clip)) pixel_draw(dst, cx - yy, cy - xx, color); }
void yuv_abgr_convert_msa (JSAMPROW p_in_y, JSAMPROW p_in_cb, JSAMPROW p_in_cr, JSAMPROW p_rgb, JDIMENSION out_width) { int y, cb, cr; unsigned int col, num_cols_mul_16 = out_width >> 4; unsigned int remaining_wd = out_width & 0xF; v16i8 alpha = __msa_ldi_b(0xFF); v16i8 const_128 = __msa_ldi_b(128); v16u8 out0, out1, out2, out3, input_y = {0}; v16i8 input_cb, input_cr, out_rgb0, out_rgb1, out_ab0, out_ab1; v8i16 y_h0, y_h1, cb_h0, cb_h1, cr_h0, cr_h1; v4i32 cb_w0, cb_w1, cb_w2, cb_w3, cr_w0, cr_w1, cr_w2, cr_w3, zero = {0}; v16i8 out_r0, out_g0, out_b0; for (col = num_cols_mul_16; col--;) { input_y = LD_UB(p_in_y); input_cb = LD_SB(p_in_cb); input_cr = LD_SB(p_in_cr); p_in_y += 16; p_in_cb += 16; p_in_cr += 16; input_cb -= const_128; input_cr -= const_128; UNPCK_UB_SH(input_y, y_h0, y_h1); UNPCK_SB_SH(input_cb, cb_h0, cb_h1); UNPCK_SB_SH(input_cr, cr_h0, cr_h1); CALC_G4_FRM_YUV(y_h0, y_h1, cb_h0, cb_h1, cr_h0, cr_h1, out_g0); UNPCK_SH_SW(cr_h0, cr_w0, cr_w1); UNPCK_SH_SW(cr_h1, cr_w2, cr_w3); CALC_R4_FRM_YUV(y_h0, y_h1, cr_w0, cr_w1, cr_w2, cr_w3, out_r0); UNPCK_SH_SW(cb_h0, cb_w0, cb_w1); UNPCK_SH_SW(cb_h1, cb_w2, cb_w3); CALC_B4_FRM_YUV(y_h0, y_h1, cb_w0, cb_w1, cb_w2, cb_w3, out_b0); ILVRL_B2_SB(out_r0, out_g0, out_rgb0, out_rgb1); ILVRL_B2_SB(out_b0, alpha, out_ab0, out_ab1); ILVRL_H2_UB(out_rgb0, out_ab0, out0, out1); ILVRL_H2_UB(out_rgb1, out_ab1, out2, out3); ST_UB4(out0, out1, out2, out3, p_rgb, 16); p_rgb += 16 * 4; } if (remaining_wd >= 8) { uint64_t in_y, in_cb, in_cr; v16i8 input_cbcr = {0}; in_y = LD(p_in_y); in_cb = LD(p_in_cb); in_cr = LD(p_in_cr); p_in_y += 8; p_in_cb += 8; p_in_cr += 8; input_y = (v16u8) __msa_insert_d((v2i64) input_y, 0, in_y); input_cbcr = (v16i8) __msa_insert_d((v2i64) input_cbcr, 0, in_cb); input_cbcr = (v16i8) __msa_insert_d((v2i64) input_cbcr, 1, in_cr); input_cbcr -= const_128; y_h0 = (v8i16) __msa_ilvr_b((v16i8) zero, (v16i8) input_y); UNPCK_SB_SH(input_cbcr, cb_h0, cr_h0); UNPCK_SH_SW(cb_h0, cb_w0, cb_w1); UNPCK_SH_SW(cr_h0, cr_w0, cr_w1); CALC_R2_FRM_YUV(y_h0, cr_w0, cr_w1, out_r0); CALC_G2_FRM_YUV(y_h0, cb_h0, cr_h0, out_g0); CALC_B2_FRM_YUV(y_h0, cb_w0, cb_w1, out_b0); out_rgb0 = (v16i8) __msa_ilvr_b((v16i8) out_r0, (v16i8) out_g0); out_ab0 = (v16i8) __msa_ilvr_b((v16i8) out_b0, alpha); ILVRL_H2_UB(out_rgb0, out_ab0, out0, out1); ST_UB2(out0, out1, p_rgb, 16); p_rgb += 16 * 2; remaining_wd -= 8; } for (col = 0; col < remaining_wd; col++) { y = (int) (p_in_y[col]); cb = (int) (p_in_cb[col]) - 128; cr = (int) (p_in_cr[col]) - 128; p_rgb[0] = 0xFF; p_rgb[1] = clip_pixel(y + ROUND_POWER_OF_TWO(FIX_1_77200 * cb, 16)); p_rgb[2] = clip_pixel(y + ROUND_POWER_OF_TWO(((-FIX_0_34414) * cb - FIX_0_71414 * cr), 16)); p_rgb[3] = clip_pixel(y + ROUND_POWER_OF_TWO(FIX_1_40200 * cr, 16)); p_rgb += 4; } }
void yuv_bgr_convert_msa (JSAMPROW p_in_y, JSAMPROW p_in_cb, JSAMPROW p_in_cr, JSAMPROW p_rgb, JDIMENSION out_width) { int32_t y, cb, cr; uint32_t col, num_cols_mul_16 = out_width >> 4; uint32_t remaining_wd = out_width & 0xF; v16u8 mask_rgb0 = {0, 1, 16, 2, 3, 17, 4, 5, 18, 6, 7, 19, 8, 9, 20, 10}; v16u8 mask_rgb1 = {11, 21, 12, 13, 22, 14, 15, 23, 0, 1, 24, 2, 3, 25, 4, 5}; v16u8 mask_rgb2 = {26, 6, 7, 27, 8, 9, 28, 10, 11, 29, 12, 13, 30, 14, 15, 31}; v16u8 tmp0, tmp1, out0, out1, out2, input_y = {0}; v16i8 input_cb, input_cr, out_rgb0, out_rgb1, const_128 = __msa_ldi_b(128); v8i16 y_h0, y_h1, cb_h0, cb_h1, cr_h0, cr_h1; v4i32 cb_w0, cb_w1, cb_w2, cb_w3, cr_w0, cr_w1, cr_w2, cr_w3, zero = {0}; v16i8 out_r0, out_g0, out_b0; for (col = num_cols_mul_16; col--;) { input_y = LD_UB(p_in_y); input_cb = LD_SB(p_in_cb); input_cr = LD_SB(p_in_cr); p_in_y += 16; p_in_cb += 16; p_in_cr += 16; input_cb -= const_128; input_cr -= const_128; UNPCK_UB_SH(input_y, y_h0, y_h1); UNPCK_SB_SH(input_cb, cb_h0, cb_h1); UNPCK_SB_SH(input_cr, cr_h0, cr_h1); CALC_G4_FRM_YUV(y_h0, y_h1, cb_h0, cb_h1, cr_h0, cr_h1, out_g0); UNPCK_SH_SW(cr_h0, cr_w0, cr_w1); UNPCK_SH_SW(cr_h1, cr_w2, cr_w3); CALC_R4_FRM_YUV(y_h0, y_h1, cr_w0, cr_w1, cr_w2, cr_w3, out_r0); UNPCK_SH_SW(cb_h0, cb_w0, cb_w1); UNPCK_SH_SW(cb_h1, cb_w2, cb_w3); CALC_B4_FRM_YUV(y_h0, y_h1, cb_w0, cb_w1, cb_w2, cb_w3, out_b0); ILVRL_B2_SB(out_g0, out_b0, out_rgb0, out_rgb1); VSHF_B2_UB(out_rgb0, out_r0, out_rgb0, out_r0, mask_rgb0, mask_rgb1, out0, tmp0); VSHF_B2_UB(out_rgb1, out_r0, out_rgb1, out_r0, mask_rgb1, mask_rgb2, tmp1, out2); out1 = (v16u8) __msa_sldi_b((v16i8) zero, (v16i8) tmp1, 8); out1 = (v16u8) __msa_pckev_d((v2i64) out1, (v2i64) tmp0); ST_UB(out0, p_rgb); p_rgb += 16; ST_UB(out1, p_rgb); p_rgb += 16; ST_UB(out2, p_rgb); p_rgb += 16; } if (remaining_wd >= 8) { uint64_t in_y, in_cb, in_cr; v16i8 input_cbcr = {0}; in_y = LD(p_in_y); in_cb = LD(p_in_cb); in_cr = LD(p_in_cr); p_in_y += 8; p_in_cb += 8; p_in_cr += 8; input_y = (v16u8) __msa_insert_d((v2i64) input_y, 0, in_y); input_cbcr = (v16i8) __msa_insert_d((v2i64) input_cbcr, 0, in_cb); input_cbcr = (v16i8) __msa_insert_d((v2i64) input_cbcr, 1, in_cr); input_cbcr -= const_128; y_h0 = (v8i16) __msa_ilvr_b((v16i8) zero, (v16i8) input_y); UNPCK_SB_SH(input_cbcr, cb_h0, cr_h0); UNPCK_SH_SW(cb_h0, cb_w0, cb_w1); UNPCK_SH_SW(cr_h0, cr_w0, cr_w1); CALC_R2_FRM_YUV(y_h0, cr_w0, cr_w1, out_r0); CALC_G2_FRM_YUV(y_h0, cb_h0, cr_h0, out_g0); CALC_B2_FRM_YUV(y_h0, cb_w0, cb_w1, out_b0); out_rgb0 = (v16i8) __msa_ilvr_b((v16i8) out_g0, (v16i8) out_b0); VSHF_B2_UB(out_rgb0, out_r0, out_rgb0, out_r0, mask_rgb0, mask_rgb1, out0, out1); ST_UB(out0, p_rgb); p_rgb += 16; ST8x1_UB(out1, p_rgb); p_rgb += 8; remaining_wd -= 8; } for (col = 0; col < remaining_wd; col++) { y = (int) (p_in_y[col]); cb = (int) (p_in_cb[col]) - 128; cr = (int) (p_in_cr[col]) - 128; /* Range-limiting is essential due to noise introduced by DCT losses. */ p_rgb[0] = clip_pixel(y + ROUND_POWER_OF_TWO(FIX_1_77200 * cb, 16)); p_rgb[1] = clip_pixel(y + ROUND_POWER_OF_TWO(((-FIX_0_34414) * cb - FIX_0_71414 * cr), 16)); p_rgb[2] = clip_pixel(y + ROUND_POWER_OF_TWO(FIX_1_40200 * cr, 16)); p_rgb += 3; } }