bool sna_transform_is_integer_translation(const PictTransform *t, int16_t *tx, int16_t *ty) { if (t == NULL) { *tx = *ty = 0; return true; } if (t->matrix[0][0] != IntToxFixed(1) || t->matrix[0][1] != 0 || t->matrix[1][0] != 0 || t->matrix[1][1] != IntToxFixed(1) || t->matrix[2][0] != 0 || t->matrix[2][1] != 0 || t->matrix[2][2] != IntToxFixed(1)) return false; if (pixman_fixed_fraction(t->matrix[0][2]) || pixman_fixed_fraction(t->matrix[1][2])) return false; *tx = pixman_fixed_to_int(t->matrix[0][2]); *ty = pixman_fixed_to_int(t->matrix[1][2]); return true; }
bool sna_transform_is_imprecise_integer_translation(const PictTransform *t, int filter, bool precise, int16_t *tx, int16_t *ty) { if (t == NULL) { DBG(("%s: no transform\n", __FUNCTION__)); *tx = *ty = 0; return true; } DBG(("%s: FilterNearest?=%d, precise?=%d, transform=[%f %f %f, %f %f %f, %f %f %f]\n", __FUNCTION__, filter==PictFilterNearest, precise, t->matrix[0][0]/65536., t->matrix[0][1]/65536., t->matrix[0][2]/65536., t->matrix[1][0]/65536., t->matrix[1][1]/65536., t->matrix[1][2]/65536., t->matrix[2][0]/65536., t->matrix[2][1]/65536., t->matrix[2][2]/65536.)); if (t->matrix[0][0] != IntToxFixed(1) || t->matrix[0][1] != 0 || t->matrix[1][0] != 0 || t->matrix[1][1] != IntToxFixed(1) || t->matrix[2][0] != 0 || t->matrix[2][1] != 0 || t->matrix[2][2] != IntToxFixed(1)) { DBG(("%s: not unity scaling\n", __FUNCTION__)); return false; } if (filter != PictFilterNearest) { if (precise) { if (pixman_fixed_fraction(t->matrix[0][2]) || pixman_fixed_fraction(t->matrix[1][2])) { DBG(("%s: precise, fractional translation\n", __FUNCTION__)); return false; } } else { int f; f = pixman_fixed_fraction(t->matrix[0][2]); if (f > IntToxFixed(1)/4 && f < IntToxFixed(3)/4) { DBG(("%s: imprecise, fractional translation X: %x\n", __FUNCTION__, f)); return false; } f = pixman_fixed_fraction(t->matrix[1][2]); if (f > IntToxFixed(1)/4 && f < IntToxFixed(3)/4) { DBG(("%s: imprecise, fractional translation Y: %x\n", __FUNCTION__, f)); return false; } } } *tx = pixman_fixed_to_int(t->matrix[0][2] + IntToxFixed(1)/2); *ty = pixman_fixed_to_int(t->matrix[1][2] + IntToxFixed(1)/2); return true; }
static force_inline uint32_t bits_image_fetch_pixel_bilinear (bits_image_t *image, pixman_fixed_t x, pixman_fixed_t y, get_pixel_t get_pixel) { pixman_repeat_t repeat_mode = image->common.repeat; int width = image->width; int height = image->height; int x1, y1, x2, y2; uint32_t tl, tr, bl, br; int32_t distx, disty; x1 = x - pixman_fixed_1 / 2; y1 = y - pixman_fixed_1 / 2; distx = pixman_fixed_to_bilinear_weight (x1); disty = pixman_fixed_to_bilinear_weight (y1); x1 = pixman_fixed_to_int (x1); y1 = pixman_fixed_to_int (y1); x2 = x1 + 1; y2 = y1 + 1; if (repeat_mode != PIXMAN_REPEAT_NONE) { repeat (repeat_mode, &x1, width); repeat (repeat_mode, &y1, height); repeat (repeat_mode, &x2, width); repeat (repeat_mode, &y2, height); tl = get_pixel (image, x1, y1, FALSE); bl = get_pixel (image, x1, y2, FALSE); tr = get_pixel (image, x2, y1, FALSE); br = get_pixel (image, x2, y2, FALSE); } else { tl = get_pixel (image, x1, y1, TRUE); tr = get_pixel (image, x2, y1, TRUE); bl = get_pixel (image, x1, y2, TRUE); br = get_pixel (image, x2, y2, TRUE); } return bilinear_interpolation (tl, tr, bl, br, distx, disty); }
PIXMAN_EXPORT void pixman_add_traps (pixman_image_t * image, int16_t x_off, int16_t y_off, int ntrap, pixman_trap_t * traps) { int bpp; int width; int height; pixman_fixed_t x_off_fixed; pixman_fixed_t y_off_fixed; pixman_edge_t l, r; pixman_fixed_t t, b; _pixman_image_validate (image); width = image->bits.width; height = image->bits.height; bpp = PIXMAN_FORMAT_BPP (image->bits.format); x_off_fixed = pixman_int_to_fixed (x_off); y_off_fixed = pixman_int_to_fixed (y_off); while (ntrap--) { t = traps->top.y + y_off_fixed; if (t < 0) t = 0; t = pixman_sample_ceil_y (t, bpp); b = traps->bot.y + y_off_fixed; if (pixman_fixed_to_int (b) >= height) b = pixman_int_to_fixed (height) - 1; b = pixman_sample_floor_y (b, bpp); if (b >= t) { /* initialize edge walkers */ pixman_edge_init (&l, bpp, t, traps->top.l + x_off_fixed, traps->top.y + y_off_fixed, traps->bot.l + x_off_fixed, traps->bot.y + y_off_fixed); pixman_edge_init (&r, bpp, t, traps->top.r + x_off_fixed, traps->top.y + y_off_fixed, traps->bot.r + x_off_fixed, traps->bot.y + y_off_fixed); pixman_rasterize_edges (image, &l, &r, t, b); } traps++; } }
static force_inline uint32_t bits_image_fetch_pixel_nearest (bits_image_t *image, pixman_fixed_t x, pixman_fixed_t y, get_pixel_t get_pixel) { int x0 = pixman_fixed_to_int (x - pixman_fixed_e); int y0 = pixman_fixed_to_int (y - pixman_fixed_e); if (image->common.repeat != PIXMAN_REPEAT_NONE) { repeat (image->common.repeat, &x0, image->width); repeat (image->common.repeat, &y0, image->height); return get_pixel (image, x0, y0, FALSE); } else { return get_pixel (image, x0, y0, TRUE); } }
PIXMAN_EXPORT void pixman_rasterize_trapezoid (pixman_image_t * image, const pixman_trapezoid_t *trap, int x_off, int y_off) { int bpp; int width; int height; pixman_fixed_t x_off_fixed; pixman_fixed_t y_off_fixed; pixman_edge_t l, r; pixman_fixed_t t, b; return_if_fail (image->type == BITS); _pixman_image_validate (image); if (!pixman_trapezoid_valid (trap)) return; width = image->bits.width; height = image->bits.height; bpp = PIXMAN_FORMAT_BPP (image->bits.format); x_off_fixed = pixman_int_to_fixed (x_off); y_off_fixed = pixman_int_to_fixed (y_off); t = trap->top + y_off_fixed; if (t < 0) t = 0; t = pixman_sample_ceil_y (t, bpp); b = trap->bottom + y_off_fixed; if (pixman_fixed_to_int (b) >= height) b = pixman_int_to_fixed (height) - 1; b = pixman_sample_floor_y (b, bpp); if (b >= t) { /* initialize edge walkers */ pixman_line_fixed_edge_init (&l, bpp, t, &trap->left, x_off, y_off); pixman_line_fixed_edge_init (&r, bpp, t, &trap->right, x_off, y_off); pixman_rasterize_edges (image, &l, &r, t, b); } }
PIXMAN_EXPORT pixman_fixed_t pixman_sample_ceil_y (pixman_fixed_t y, int n) { pixman_fixed_t f = pixman_fixed_frac (y); pixman_fixed_t i = pixman_fixed_floor (y); f = DIV (f - Y_FRAC_FIRST (n) + (STEP_Y_SMALL (n) - pixman_fixed_e), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) + Y_FRAC_FIRST (n); if (f > Y_FRAC_LAST (n)) { if (pixman_fixed_to_int (i) == 0x7fff) { f = 0xffff; /* saturate */ } else { f = Y_FRAC_FIRST (n); i += pixman_fixed_1; } } return (i | f); }
/* * Compute the largest value strictly less than y which is on a * grid row. */ PIXMAN_EXPORT pixman_fixed_t pixman_sample_floor_y (pixman_fixed_t y, int n) { pixman_fixed_t f = pixman_fixed_frac (y); pixman_fixed_t i = pixman_fixed_floor (y); f = DIV (f - pixman_fixed_e - Y_FRAC_FIRST (n), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) + Y_FRAC_FIRST (n); if (f < Y_FRAC_FIRST (n)) { if (pixman_fixed_to_int (i) == 0x8000) { f = 0; /* saturate */ } else { f = Y_FRAC_LAST (n); i -= pixman_fixed_1; } } return (i | f); }
static force_inline void bits_image_fetch_bilinear_affine (pixman_image_t * image, int offset, int line, int width, uint32_t * buffer, const uint32_t * mask, convert_pixel_t convert_pixel, pixman_format_code_t format, pixman_repeat_t repeat_mode) { pixman_fixed_t x, y; pixman_fixed_t ux, uy; pixman_vector_t v; bits_image_t *bits = &image->bits; int i; /* reference point is the center of the pixel */ v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; v.vector[2] = pixman_fixed_1; if (!pixman_transform_point_3d (image->common.transform, &v)) return; ux = image->common.transform->matrix[0][0]; uy = image->common.transform->matrix[1][0]; x = v.vector[0]; y = v.vector[1]; for (i = 0; i < width; ++i) { int x1, y1, x2, y2; uint32_t tl, tr, bl, br; int32_t distx, disty; int width = image->bits.width; int height = image->bits.height; const uint8_t *row1; const uint8_t *row2; if (mask && !mask[i]) goto next; x1 = x - pixman_fixed_1 / 2; y1 = y - pixman_fixed_1 / 2; distx = pixman_fixed_to_bilinear_weight (x1); disty = pixman_fixed_to_bilinear_weight (y1); y1 = pixman_fixed_to_int (y1); y2 = y1 + 1; x1 = pixman_fixed_to_int (x1); x2 = x1 + 1; if (repeat_mode != PIXMAN_REPEAT_NONE) { uint32_t mask; mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; repeat (repeat_mode, &x1, width); repeat (repeat_mode, &y1, height); repeat (repeat_mode, &x2, width); repeat (repeat_mode, &y2, height); row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1; row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2; tl = convert_pixel (row1, x1) | mask; tr = convert_pixel (row1, x2) | mask; bl = convert_pixel (row2, x1) | mask; br = convert_pixel (row2, x2) | mask; } else { uint32_t mask1, mask2; int bpp; /* Note: PIXMAN_FORMAT_BPP() returns an unsigned value, * which means if you use it in expressions, those * expressions become unsigned themselves. Since * the variables below can be negative in some cases, * that will lead to crashes on 64 bit architectures. * * So this line makes sure bpp is signed */ bpp = PIXMAN_FORMAT_BPP (format); if (x1 >= width || x2 < 0 || y1 >= height || y2 < 0) { buffer[i] = 0; goto next; } if (y2 == 0) { row1 = zero; mask1 = 0; } else { row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1; row1 += bpp / 8 * x1; mask1 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; } if (y1 == height - 1) { row2 = zero; mask2 = 0; } else { row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2; row2 += bpp / 8 * x1; mask2 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; } if (x2 == 0) { tl = 0; bl = 0; } else { tl = convert_pixel (row1, 0) | mask1; bl = convert_pixel (row2, 0) | mask2; } if (x1 == width - 1) { tr = 0; br = 0; } else { tr = convert_pixel (row1, 1) | mask1; br = convert_pixel (row2, 1) | mask2; } } buffer[i] = bilinear_interpolation ( tl, tr, bl, br, distx, disty); next: x += ux; y += uy; } }
int main (int argc, char *argv[]) { bench_info_t binfo; pixman_filter_t filter = PIXMAN_FILTER_NEAREST; pixman_format_code_t src_format = PIXMAN_a8r8g8b8; pixman_format_code_t mask_format = 0; pixman_format_code_t dest_format = PIXMAN_a8r8g8b8; pixman_box32_t dest_box = { 0, 0, WIDTH, HEIGHT }; box_48_16_t transformed = { 0 }; int32_t xmin, ymin, xmax, ymax; uint32_t *src, *mask, *dest; binfo.op = PIXMAN_OP_SRC; binfo.mask_image = NULL; pixman_transform_init_identity (&binfo.transform); ++argv; if (*argv && (*argv)[0] == '-' && (*argv)[1] == 'n') { filter = PIXMAN_FILTER_NEAREST; ++argv; --argc; } if (*argv && (*argv)[0] == '-' && (*argv)[1] == 'b') { filter = PIXMAN_FILTER_BILINEAR; ++argv; --argc; } if (argc == 1 || !parse_arguments (argc, argv, &binfo.transform, &binfo.op, &src_format, &mask_format, &dest_format)) { printf ("Usage: affine-bench [-n] [-b] axx [axy] [ayx] [ayy] [combine type]\n"); printf (" [src format] [mask format] [dest format]\n"); printf (" -n : nearest scaling (default)\n"); printf (" -b : bilinear scaling\n"); printf (" axx : x_out:x_in factor\n"); printf (" axy : x_out:y_in factor (default 0)\n"); printf (" ayx : y_out:x_in factor (default 0)\n"); printf (" ayy : y_out:y_in factor (default 1)\n"); printf (" combine type : src, over, in etc (default src)\n"); printf (" src format : a8r8g8b8, r5g6b5 etc (default a8r8g8b8)\n"); printf (" mask format : as for src format, but no mask used if omitted\n"); printf (" dest format : as for src format (default a8r8g8b8)\n"); printf ("The output is a single number in megapixels/second.\n"); return EXIT_FAILURE; } /* Compute required extents for source and mask image so they qualify * for COVER fast paths and get the flags in pixman.c:analyze_extent(). * These computations are for FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, * but at the same time they also allow COVER_CLIP_NEAREST. */ compute_transformed_extents (&binfo.transform, &dest_box, &transformed); xmin = pixman_fixed_to_int (transformed.x1 - pixman_fixed_1 / 2); ymin = pixman_fixed_to_int (transformed.y1 - pixman_fixed_1 / 2); xmax = pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2); ymax = pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2); /* Note: * The upper limits can be reduced to the following when fetchers * are guaranteed to not access pixels with zero weight. This concerns * particularly all bilinear samplers. * * xmax = pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2 - pixman_fixed_e); * ymax = pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2 - pixman_fixed_e); * This is equivalent to subtracting 0.5 and rounding up, rather than * subtracting 0.5, rounding down and adding 1. */ binfo.src_x = -xmin; binfo.src_y = -ymin; /* Always over-allocate width by 64 pixels for all src, mask and dst, * so that we can iterate over an x-offset 0..63 in bench (). * This is similar to lowlevel-blt-bench, which uses the same method * to hit different cacheline misalignments. */ create_image (xmax - xmin + 64, ymax - ymin + 1, src_format, filter, &src, &binfo.src_image); if (mask_format) { create_image (xmax - xmin + 64, ymax - ymin + 1, mask_format, filter, &mask, &binfo.mask_image); if ((PIXMAN_FORMAT_R(mask_format) || PIXMAN_FORMAT_G(mask_format) || PIXMAN_FORMAT_B(mask_format))) { pixman_image_set_component_alpha (binfo.mask_image, 1); } } create_image (WIDTH + 64, HEIGHT, dest_format, filter, &dest, &binfo.dest_image); run_benchmark (&binfo); return EXIT_SUCCESS; }
/* * pixman_composite_trapezoids() * * All the trapezoids are conceptually rendered to an infinitely big image. * The (0, 0) coordinates of this image are then aligned with the (x, y) * coordinates of the source image, and then both images are aligned with * the (x, y) coordinates of the destination. Then, in principle, compositing * of these three images takes place across the entire destination. * * FIXME: However, there is currently a bug, where we restrict this compositing * to the bounding box of the trapezoids. This is incorrect for operators such * as SRC and IN where blank source pixels do have an effect on the destination. */ PIXMAN_EXPORT void pixman_composite_trapezoids (pixman_op_t op, pixman_image_t * src, pixman_image_t * dst, pixman_format_code_t mask_format, int x_src, int y_src, int x_dst, int y_dst, int n_traps, const pixman_trapezoid_t * traps) { int i; if (n_traps <= 0) return; _pixman_image_validate (src); _pixman_image_validate (dst); if (op == PIXMAN_OP_ADD && (src->common.flags & FAST_PATH_IS_OPAQUE) && (mask_format == dst->common.extended_format_code) && !(dst->common.have_clip_region)) { for (i = 0; i < n_traps; ++i) { const pixman_trapezoid_t *trap = &(traps[i]); if (!pixman_trapezoid_valid (trap)) continue; pixman_rasterize_trapezoid (dst, trap, x_dst, y_dst); } } else { pixman_image_t *tmp; pixman_box32_t box; box.x1 = INT32_MAX; box.y1 = INT32_MAX; box.x2 = INT32_MIN; box.y2 = INT32_MIN; for (i = 0; i < n_traps; ++i) { const pixman_trapezoid_t *trap = &(traps[i]); int y1, y2; if (!pixman_trapezoid_valid (trap)) continue; y1 = pixman_fixed_to_int (trap->top); if (y1 < box.y1) box.y1 = y1; y2 = pixman_fixed_to_int (pixman_fixed_ceil (trap->bottom)); if (y2 > box.y2) box.y2 = y2; #define EXTEND_MIN(x) \ if (pixman_fixed_to_int ((x)) < box.x1) \ box.x1 = pixman_fixed_to_int ((x)); #define EXTEND_MAX(x) \ if (pixman_fixed_to_int (pixman_fixed_ceil ((x))) > box.x2) \ box.x2 = pixman_fixed_to_int (pixman_fixed_ceil ((x))); #define EXTEND(x) \ EXTEND_MIN(x); \ EXTEND_MAX(x); EXTEND(trap->left.p1.x); EXTEND(trap->left.p2.x); EXTEND(trap->right.p1.x); EXTEND(trap->right.p2.x); } if (box.x1 >= box.x2 || box.y1 >= box.y2) return; tmp = pixman_image_create_bits ( mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1); for (i = 0; i < n_traps; ++i) { const pixman_trapezoid_t *trap = &(traps[i]); if (!pixman_trapezoid_valid (trap)) continue; pixman_rasterize_trapezoid (tmp, trap, - box.x1, - box.y1); } pixman_image_composite (op, src, tmp, dst, x_src + box.x1, y_src + box.y1, 0, 0, x_dst + box.x1, y_dst + box.y1, box.x2 - box.x1, box.y2 - box.y1); pixman_image_unref (tmp); } }
static void ssse3_fetch_horizontal (bits_image_t *image, line_t *line, int y, pixman_fixed_t x, pixman_fixed_t ux, int n) { uint32_t *bits = image->bits + y * image->rowstride; __m128i vx = _mm_set_epi16 ( - (x + 1), x, - (x + 1), x, - (x + ux + 1), x + ux, - (x + ux + 1), x + ux); __m128i vux = _mm_set_epi16 ( - 2 * ux, 2 * ux, - 2 * ux, 2 * ux, - 2 * ux, 2 * ux, - 2 * ux, 2 * ux); __m128i vaddc = _mm_set_epi16 (1, 0, 1, 0, 1, 0, 1, 0); __m128i *b = (__m128i *)line->buffer; __m128i vrl0, vrl1; while ((n -= 2) >= 0) { __m128i vw, vr, s; vrl1 = _mm_loadl_epi64 ( (__m128i *)(bits + pixman_fixed_to_int (x + ux))); /* vrl1: R1, L1 */ final_pixel: vrl0 = _mm_loadl_epi64 ( (__m128i *)(bits + pixman_fixed_to_int (x))); /* vrl0: R0, L0 */ /* The weights are based on vx which is a vector of * * - (x + 1), x, - (x + 1), x, * - (x + ux + 1), x + ux, - (x + ux + 1), x + ux * * so the 16 bit weights end up like this: * * iw0, w0, iw0, w0, iw1, w1, iw1, w1 * * and after shifting and packing, we get these bytes: * * iw0, w0, iw0, w0, iw1, w1, iw1, w1, * iw0, w0, iw0, w0, iw1, w1, iw1, w1, * * which means the first and the second input pixel * have to be interleaved like this: * * la0, ra0, lr0, rr0, la1, ra1, lr1, rr1, * lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1 * * before maddubsw can be used. */ vw = _mm_add_epi16 ( vaddc, _mm_srli_epi16 (vx, 16 - BILINEAR_INTERPOLATION_BITS)); /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1 */ vw = _mm_packus_epi16 (vw, vw); /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1, * iw0, w0, iw0, w0, iw1, w1, iw1, w1 */ vx = _mm_add_epi16 (vx, vux); x += 2 * ux; vr = _mm_unpacklo_epi16 (vrl1, vrl0); /* vr: rar0, rar1, rgb0, rgb1, lar0, lar1, lgb0, lgb1 */ s = _mm_shuffle_epi32 (vr, _MM_SHUFFLE (1, 0, 3, 2)); /* s: lar0, lar1, lgb0, lgb1, rar0, rar1, rgb0, rgb1 */ vr = _mm_unpackhi_epi8 (vr, s); /* vr: la0, ra0, lr0, rr0, la1, ra1, lr1, rr1, * lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1 */ vr = _mm_maddubs_epi16 (vr, vw); /* When the weight is 0, the inverse weight is * 128 which can't be represented in a signed byte. * As a result maddubsw computes the following: * * r = l * -128 + r * 0 * * rather than the desired * * r = l * 128 + r * 0 * * We fix this by taking the absolute value of the * result. */ vr = _mm_abs_epi16 (vr); /* vr: A0, R0, A1, R1, G0, B0, G1, B1 */ _mm_store_si128 (b++, vr); } if (n == -1) { vrl1 = _mm_setzero_si128(); goto final_pixel; } line->y = y; }
static uint32_t * ssse3_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask) { pixman_fixed_t fx, ux; bilinear_info_t *info = iter->data; line_t *line0, *line1; int y0, y1; int32_t dist_y; __m128i vw; int i; fx = info->x; ux = iter->image->common.transform->matrix[0][0]; y0 = pixman_fixed_to_int (info->y); y1 = y0 + 1; line0 = &info->lines[y0 & 0x01]; line1 = &info->lines[y1 & 0x01]; if (line0->y != y0) { ssse3_fetch_horizontal ( &iter->image->bits, line0, y0, fx, ux, iter->width); } if (line1->y != y1) { ssse3_fetch_horizontal ( &iter->image->bits, line1, y1, fx, ux, iter->width); } dist_y = pixman_fixed_to_bilinear_weight (info->y); dist_y <<= (16 - BILINEAR_INTERPOLATION_BITS); vw = _mm_set_epi16 ( dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y); for (i = 0; i + 3 < iter->width; i += 4) { __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i)); __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i)); __m128i top1 = _mm_load_si128 ((__m128i *)(line0->buffer + i + 2)); __m128i bot1 = _mm_load_si128 ((__m128i *)(line1->buffer + i + 2)); __m128i r0, r1, tmp, p; r0 = _mm_mulhi_epu16 ( _mm_sub_epi16 (bot0, top0), vw); tmp = _mm_cmplt_epi16 (bot0, top0); tmp = _mm_and_si128 (tmp, vw); r0 = _mm_sub_epi16 (r0, tmp); r0 = _mm_add_epi16 (r0, top0); r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS); /* r0: A0 R0 A1 R1 G0 B0 G1 B1 */ r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1)); /* r0: A1 R1 G1 B1 A0 R0 G0 B0 */ r1 = _mm_mulhi_epu16 ( _mm_sub_epi16 (bot1, top1), vw); tmp = _mm_cmplt_epi16 (bot1, top1); tmp = _mm_and_si128 (tmp, vw); r1 = _mm_sub_epi16 (r1, tmp); r1 = _mm_add_epi16 (r1, top1); r1 = _mm_srli_epi16 (r1, BILINEAR_INTERPOLATION_BITS); r1 = _mm_shuffle_epi32 (r1, _MM_SHUFFLE (2, 0, 3, 1)); /* r1: A3 R3 G3 B3 A2 R2 G2 B2 */ p = _mm_packus_epi16 (r0, r1); _mm_storeu_si128 ((__m128i *)(iter->buffer + i), p); } while (i < iter->width) { __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i)); __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i)); __m128i r0, tmp, p; r0 = _mm_mulhi_epu16 ( _mm_sub_epi16 (bot0, top0), vw); tmp = _mm_cmplt_epi16 (bot0, top0); tmp = _mm_and_si128 (tmp, vw); r0 = _mm_sub_epi16 (r0, tmp); r0 = _mm_add_epi16 (r0, top0); r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS); /* r0: A0 R0 A1 R1 G0 B0 G1 B1 */ r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1)); /* r0: A1 R1 G1 B1 A0 R0 G0 B0 */ p = _mm_packus_epi16 (r0, r0); if (iter->width - i == 1) { *(uint32_t *)(iter->buffer + i) = _mm_cvtsi128_si32 (p); i++; } else { _mm_storel_epi64 ((__m128i *)(iter->buffer + i), p); i += 2; } } info->y += iter->image->common.transform->matrix[1][1]; return iter->buffer; }
static pixman_bool_t get_trap_extents (pixman_op_t op, pixman_image_t *dest, const pixman_trapezoid_t *traps, int n_traps, pixman_box32_t *box) { int i; /* When the operator is such that a zero source has an * effect on the underlying image, we have to * composite across the entire destination */ if (!zero_src_has_no_effect [op]) { box->x1 = 0; box->y1 = 0; box->x2 = dest->bits.width; box->y2 = dest->bits.height; return TRUE; } box->x1 = INT32_MAX; box->y1 = INT32_MAX; box->x2 = INT32_MIN; box->y2 = INT32_MIN; for (i = 0; i < n_traps; ++i) { const pixman_trapezoid_t *trap = &(traps[i]); int y1, y2; if (!pixman_trapezoid_valid (trap)) continue; y1 = pixman_fixed_to_int (trap->top); if (y1 < box->y1) box->y1 = y1; y2 = pixman_fixed_to_int (pixman_fixed_ceil (trap->bottom)); if (y2 > box->y2) box->y2 = y2; #define EXTEND_MIN(x) \ if (pixman_fixed_to_int ((x)) < box->x1) \ box->x1 = pixman_fixed_to_int ((x)); #define EXTEND_MAX(x) \ if (pixman_fixed_to_int (pixman_fixed_ceil ((x))) > box->x2) \ box->x2 = pixman_fixed_to_int (pixman_fixed_ceil ((x))); #define EXTEND(x) \ EXTEND_MIN(x); \ EXTEND_MAX(x); EXTEND(trap->left.p1.x); EXTEND(trap->left.p2.x); EXTEND(trap->right.p1.x); EXTEND(trap->right.p2.x); } if (box->x1 >= box->x2 || box->y1 >= box->y2) return FALSE; return TRUE; }
static force_inline void bits_image_fetch_nearest_affine (pixman_image_t * image, int offset, int line, int width, uint32_t * buffer, const uint32_t * mask, convert_pixel_t convert_pixel, pixman_format_code_t format, pixman_repeat_t repeat_mode) { pixman_fixed_t x, y; pixman_fixed_t ux, uy; pixman_vector_t v; bits_image_t *bits = &image->bits; int i; /* reference point is the center of the pixel */ v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; v.vector[2] = pixman_fixed_1; if (!pixman_transform_point_3d (image->common.transform, &v)) return; ux = image->common.transform->matrix[0][0]; uy = image->common.transform->matrix[1][0]; x = v.vector[0]; y = v.vector[1]; for (i = 0; i < width; ++i) { int width, height, x0, y0; const uint8_t *row; if (mask && !mask[i]) goto next; width = image->bits.width; height = image->bits.height; x0 = pixman_fixed_to_int (x - pixman_fixed_e); y0 = pixman_fixed_to_int (y - pixman_fixed_e); if (repeat_mode == PIXMAN_REPEAT_NONE && (y0 < 0 || y0 >= height || x0 < 0 || x0 >= width)) { buffer[i] = 0; } else { uint32_t mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; if (repeat_mode != PIXMAN_REPEAT_NONE) { repeat (repeat_mode, &x0, width); repeat (repeat_mode, &y0, height); } row = (uint8_t *)bits->bits + bits->rowstride * 4 * y0; buffer[i] = convert_pixel (row, x0) | mask; } next: x += ux; y += uy; } }
/* * We want to detect the case where we add the same value to a long * span of pixels. The triangles on the end are filled in while we * count how many sub-pixel scanlines contribute to the middle section. * * +--------------------------+ * fill_height =| \ / * +------------------+ * |================| * fill_start fill_end */ static void rasterize_edges_8 (pixman_image_t *image, pixman_edge_t * l, pixman_edge_t * r, pixman_fixed_t t, pixman_fixed_t b) { pixman_fixed_t y = t; uint32_t *line; int fill_start = -1, fill_end = -1; int fill_size = 0; uint32_t *buf = (image)->bits.bits; int stride = (image)->bits.rowstride; int width = (image)->bits.width; line = buf + pixman_fixed_to_int (y) * stride; for (;;) { uint8_t *ap = (uint8_t *) line; pixman_fixed_t lx, rx; int lxi, rxi; /* clip X */ lx = l->x; if (lx < 0) lx = 0; rx = r->x; if (pixman_fixed_to_int (rx) >= width) { /* Use the last pixel of the scanline, covered 100%. * We can't use the first pixel following the scanline, * because accessing it could result in a buffer overrun. */ rx = pixman_int_to_fixed (width) - 1; } /* Skip empty (or backwards) sections */ if (rx > lx) { int lxs, rxs; /* Find pixel bounds for span. */ lxi = pixman_fixed_to_int (lx); rxi = pixman_fixed_to_int (rx); /* Sample coverage for edge pixels */ lxs = RENDER_SAMPLES_X (lx, 8); rxs = RENDER_SAMPLES_X (rx, 8); /* Add coverage across row */ if (lxi == rxi) { WRITE (image, ap + lxi, clip255 (READ (image, ap + lxi) + rxs - lxs)); } else { WRITE (image, ap + lxi, clip255 (READ (image, ap + lxi) + N_X_FRAC (8) - lxs)); /* Move forward so that lxi/rxi is the pixel span */ lxi++; /* Don't bother trying to optimize the fill unless * the span is longer than 4 pixels. */ if (rxi - lxi > 4) { if (fill_start < 0) { fill_start = lxi; fill_end = rxi; fill_size++; } else { if (lxi >= fill_end || rxi < fill_start) { /* We're beyond what we saved, just fill it */ ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8), fill_end - fill_start); fill_start = lxi; fill_end = rxi; fill_size = 1; } else { /* Update fill_start */ if (lxi > fill_start) { ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8), lxi - fill_start); fill_start = lxi; } else if (lxi < fill_start) { ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8), fill_start - lxi); } /* Update fill_end */ if (rxi < fill_end) { ADD_SATURATE_8 (ap + rxi, fill_size * N_X_FRAC (8), fill_end - rxi); fill_end = rxi; } else if (fill_end < rxi) { ADD_SATURATE_8 (ap + fill_end, N_X_FRAC (8), rxi - fill_end); } fill_size++; } } } else { ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8), rxi - lxi); } WRITE (image, ap + rxi, clip255 (READ (image, ap + rxi) + rxs)); } } if (y == b) { /* We're done, make sure we clean up any remaining fill. */ if (fill_start != fill_end) { if (fill_size == N_Y_FRAC (8)) { MEMSET_WRAPPED (image, ap + fill_start, 0xff, fill_end - fill_start); } else { ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8), fill_end - fill_start); } } break; } if (pixman_fixed_frac (y) != Y_FRAC_LAST (8)) { RENDER_EDGE_STEP_SMALL (l); RENDER_EDGE_STEP_SMALL (r); y += STEP_Y_SMALL (8); } else { RENDER_EDGE_STEP_BIG (l); RENDER_EDGE_STEP_BIG (r); y += STEP_Y_BIG (8); if (fill_start != fill_end) { if (fill_size == N_Y_FRAC (8)) { MEMSET_WRAPPED (image, ap + fill_start, 0xff, fill_end - fill_start); } else { ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8), fill_end - fill_start); } fill_start = fill_end = -1; fill_size = 0; } line += stride; } } }
static uint32_t * bits_image_fetch_bilinear_no_repeat_8888 (pixman_iter_t *iter, const uint32_t *mask) { pixman_image_t * ima = iter->image; int offset = iter->x; int line = iter->y++; int width = iter->width; uint32_t * buffer = iter->buffer; bits_image_t *bits = &ima->bits; pixman_fixed_t x_top, x_bottom, x; pixman_fixed_t ux_top, ux_bottom, ux; pixman_vector_t v; uint32_t top_mask, bottom_mask; uint32_t *top_row; uint32_t *bottom_row; uint32_t *end; uint32_t zero[2] = { 0, 0 }; uint32_t one = 1; int y, y1, y2; int disty; int mask_inc; int w; /* reference point is the center of the pixel */ v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; v.vector[2] = pixman_fixed_1; if (!pixman_transform_point_3d (bits->common.transform, &v)) return iter->buffer; ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0]; x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2; y = v.vector[1] - pixman_fixed_1/2; disty = pixman_fixed_to_bilinear_weight (y); /* Load the pointers to the first and second lines from the source * image that bilinear code must read. * * The main trick in this code is about the check if any line are * outside of the image; * * When I realize that a line (any one) is outside, I change * the pointer to a dummy area with zeros. Once I change this, I * must be sure the pointer will not change, so I set the * variables to each pointer increments inside the loop. */ y1 = pixman_fixed_to_int (y); y2 = y1 + 1; if (y1 < 0 || y1 >= bits->height) { top_row = zero; x_top = 0; ux_top = 0; } else { top_row = bits->bits + y1 * bits->rowstride; x_top = x; ux_top = ux; } if (y2 < 0 || y2 >= bits->height) { bottom_row = zero; x_bottom = 0; ux_bottom = 0; } else { bottom_row = bits->bits + y2 * bits->rowstride; x_bottom = x; ux_bottom = ux; } /* Instead of checking whether the operation uses the mast in * each loop iteration, verify this only once and prepare the * variables to make the code smaller inside the loop. */ if (!mask) { mask_inc = 0; mask = &one; } else { /* If have a mask, prepare the variables to check it */ mask_inc = 1; } /* If both are zero, then the whole thing is zero */ if (top_row == zero && bottom_row == zero) { memset (buffer, 0, width * sizeof (uint32_t)); return iter->buffer; } else if (bits->format == PIXMAN_x8r8g8b8) { if (top_row == zero) { top_mask = 0; bottom_mask = 0xff000000; } else if (bottom_row == zero) { top_mask = 0xff000000; bottom_mask = 0; } else { top_mask = 0xff000000; bottom_mask = 0xff000000; } } else { top_mask = 0; bottom_mask = 0; } end = buffer + width; /* Zero fill to the left of the image */ while (buffer < end && x < pixman_fixed_minus_1) { *buffer++ = 0; x += ux; x_top += ux_top; x_bottom += ux_bottom; mask += mask_inc; } /* Left edge */ while (buffer < end && x < 0) { uint32_t tr, br; int32_t distx; tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask; br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; distx = pixman_fixed_to_bilinear_weight (x); *buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty); x += ux; x_top += ux_top; x_bottom += ux_bottom; mask += mask_inc; } /* Main part */ w = pixman_int_to_fixed (bits->width - 1); while (buffer < end && x < w) { if (*mask) { uint32_t tl, tr, bl, br; int32_t distx; tl = top_row [pixman_fixed_to_int (x_top)] | top_mask; tr = top_row [pixman_fixed_to_int (x_top) + 1] | top_mask; bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; distx = pixman_fixed_to_bilinear_weight (x); *buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty); } buffer++; x += ux; x_top += ux_top; x_bottom += ux_bottom; mask += mask_inc; } /* Right Edge */ w = pixman_int_to_fixed (bits->width); while (buffer < end && x < w) { if (*mask) { uint32_t tl, bl; int32_t distx; tl = top_row [pixman_fixed_to_int (x_top)] | top_mask; bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; distx = pixman_fixed_to_bilinear_weight (x); *buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty); } buffer++; x += ux; x_top += ux_top; x_bottom += ux_bottom; mask += mask_inc; } /* Zero fill to the left of the image */ while (buffer < end) *buffer++ = 0; return iter->buffer; }
static force_inline uint32_t bits_image_fetch_pixel_convolution (bits_image_t *image, pixman_fixed_t x, pixman_fixed_t y, get_pixel_t get_pixel) { pixman_fixed_t *params = image->common.filter_params; int x_off = (params[0] - pixman_fixed_1) >> 1; int y_off = (params[1] - pixman_fixed_1) >> 1; int32_t cwidth = pixman_fixed_to_int (params[0]); int32_t cheight = pixman_fixed_to_int (params[1]); int32_t i, j, x1, x2, y1, y2; pixman_repeat_t repeat_mode = image->common.repeat; int width = image->width; int height = image->height; int srtot, sgtot, sbtot, satot; params += 2; x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off); y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off); x2 = x1 + cwidth; y2 = y1 + cheight; srtot = sgtot = sbtot = satot = 0; for (i = y1; i < y2; ++i) { for (j = x1; j < x2; ++j) { int rx = j; int ry = i; pixman_fixed_t f = *params; if (f) { uint32_t pixel; if (repeat_mode != PIXMAN_REPEAT_NONE) { repeat (repeat_mode, &rx, width); repeat (repeat_mode, &ry, height); pixel = get_pixel (image, rx, ry, FALSE); } else { pixel = get_pixel (image, rx, ry, TRUE); } srtot += (int)RED_8 (pixel) * f; sgtot += (int)GREEN_8 (pixel) * f; sbtot += (int)BLUE_8 (pixel) * f; satot += (int)ALPHA_8 (pixel) * f; } params++; } } satot >>= 16; srtot >>= 16; sgtot >>= 16; sbtot >>= 16; satot = CLIP (satot, 0, 0xff); srtot = CLIP (srtot, 0, 0xff); sgtot = CLIP (sgtot, 0, 0xff); sbtot = CLIP (sbtot, 0, 0xff); return ((satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot)); }