static void ConvertARGBToUV_SSE41(const uint32_t* argb, uint8_t* u, uint8_t* v, int src_width, int do_store) { const int max_width = src_width & ~31; int i; for (i = 0; i < max_width; i += 32, u += 16, v += 16) { __m128i rgb[6], U0, V0, U1, V1; RGB32PackedToPlanar_SSE41(&argb[i], rgb); HorizontalAddPack_SSE41(&rgb[0], &rgb[1], &rgb[0]); HorizontalAddPack_SSE41(&rgb[2], &rgb[3], &rgb[2]); HorizontalAddPack_SSE41(&rgb[4], &rgb[5], &rgb[4]); ConvertRGBToUV_SSE41(&rgb[0], &rgb[2], &rgb[4], &U0, &V0); RGB32PackedToPlanar_SSE41(&argb[i + 16], rgb); HorizontalAddPack_SSE41(&rgb[0], &rgb[1], &rgb[0]); HorizontalAddPack_SSE41(&rgb[2], &rgb[3], &rgb[2]); HorizontalAddPack_SSE41(&rgb[4], &rgb[5], &rgb[4]); ConvertRGBToUV_SSE41(&rgb[0], &rgb[2], &rgb[4], &U1, &V1); U0 = _mm_packus_epi16(U0, U1); V0 = _mm_packus_epi16(V0, V1); if (!do_store) { const __m128i prev_u = LOAD_16(u); const __m128i prev_v = LOAD_16(v); U0 = _mm_avg_epu8(U0, prev_u); V0 = _mm_avg_epu8(V0, prev_v); } STORE_16(U0, u); STORE_16(V0, v); } if (i < src_width) { // left-over WebPConvertARGBToUV_C(argb + i, u, v, src_width - i, do_store); } }
void GBAHardwareGPIOWrite(struct GBACartridgeHardware* hw, uint32_t address, uint16_t value) { switch (address) { case GPIO_REG_DATA: hw->pinState &= ~hw->direction; hw->pinState |= value; _readPins(hw); break; case GPIO_REG_DIRECTION: hw->direction = value; break; case GPIO_REG_CONTROL: hw->readWrite = value; break; default: mLOG(GBA_HW, WARN, "Invalid GPIO address"); } if (hw->readWrite) { uint16_t old; LOAD_16(old, 0, hw->gpioBase); old &= ~hw->direction; old |= hw->pinState; STORE_16(old, 0, hw->gpioBase); } else { hw->gpioBase[0] = 0; } }
static void ConvertBGR24ToY_SSE41(const uint8_t* bgr, uint8_t* y, int width) { const int max_width = width & ~31; int i; for (i = 0; i < max_width; bgr += 3 * 16 * 2) { __m128i bgr_plane[6]; int j; RGB24PackedToPlanar_SSE41(bgr, bgr_plane); for (j = 0; j < 2; ++j, i += 16) { const __m128i zero = _mm_setzero_si128(); __m128i r, g, b, Y0, Y1; // Convert to 16-bit Y. b = _mm_unpacklo_epi8(bgr_plane[0 + j], zero); g = _mm_unpacklo_epi8(bgr_plane[2 + j], zero); r = _mm_unpacklo_epi8(bgr_plane[4 + j], zero); ConvertRGBToY_SSE41(&r, &g, &b, &Y0); // Convert to 16-bit Y. b = _mm_unpackhi_epi8(bgr_plane[0 + j], zero); g = _mm_unpackhi_epi8(bgr_plane[2 + j], zero); r = _mm_unpackhi_epi8(bgr_plane[4 + j], zero); ConvertRGBToY_SSE41(&r, &g, &b, &Y1); // Cast to 8-bit and store. STORE_16(_mm_packus_epi16(Y0, Y1), y + i); } } for (; i < width; ++i, bgr += 3) { // left-over y[i] = VP8RGBToY(bgr[2], bgr[1], bgr[0], YUV_HALF); } }
void _outputPins(struct GBACartridgeHardware* hw, unsigned pins) { if (hw->readWrite) { uint16_t old; LOAD_16(old, 0, hw->gpioBase); old &= hw->direction; hw->pinState = old | (pins & ~hw->direction & 0xF); STORE_16(hw->pinState, 0, hw->gpioBase); } }
static void ConvertRGBA32ToUV_SSE41(const uint16_t* rgb, uint8_t* u, uint8_t* v, int width) { const int max_width = width & ~15; const uint16_t* const last_rgb = rgb + 4 * max_width; while (rgb < last_rgb) { __m128i r, g, b, U0, V0, U1, V1; RGBA32PackedToPlanar_16b_SSE41(rgb + 0, &r, &g, &b); ConvertRGBToUV_SSE41(&r, &g, &b, &U0, &V0); RGBA32PackedToPlanar_16b_SSE41(rgb + 32, &r, &g, &b); ConvertRGBToUV_SSE41(&r, &g, &b, &U1, &V1); STORE_16(_mm_packus_epi16(U0, U1), u); STORE_16(_mm_packus_epi16(V0, V1), v); u += 16; v += 16; rgb += 2 * 32; } if (max_width < width) { // left-over WebPConvertRGBA32ToUV_C(rgb, u, v, width - max_width); } }
static void ConvertARGBToY_SSE41(const uint32_t* argb, uint8_t* y, int width) { const int max_width = width & ~15; int i; for (i = 0; i < max_width; i += 16) { __m128i Y0, Y1, rgb[6]; RGB32PackedToPlanar_SSE41(&argb[i], rgb); ConvertRGBToY_SSE41(&rgb[0], &rgb[2], &rgb[4], &Y0); ConvertRGBToY_SSE41(&rgb[1], &rgb[3], &rgb[5], &Y1); STORE_16(_mm_packus_epi16(Y0, Y1), y + i); } for (; i < width; ++i) { // left-over const uint32_t p = argb[i]; y[i] = VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >> 0) & 0xff, YUV_HALF); } }
void glitz_set_rectangles (glitz_surface_t *dst, const glitz_color_t *color, const glitz_rectangle_t *rects, int n_rects) { GLITZ_GL_SURFACE (dst); if (n_rects < 1) return; if (SURFACE_SOLID (dst)) { glitz_color_t old = dst->solid; glitz_box_t *clip = dst->clip; int n_clip = dst->n_clip; for (; n_clip; clip++, n_clip--) { if (clip->x1 > 0 || clip->y1 > 0 || clip->x2 < 1 || clip->y2 < 1) continue; for (; n_rects; rects++, n_rects--) { if (rects->x > 0 || rects->y > 0 || (rects->x + rects->width) < 1 || (rects->y + rects->height) < 1) continue; STORE_16 (dst->solid.red, dst->format->color.red_size, color->red); STORE_16 (dst->solid.green, dst->format->color.green_size, color->green); STORE_16 (dst->solid.blue, dst->format->color.blue_size, color->blue); STORE_16 (dst->solid.alpha, dst->format->color.alpha_size, color->alpha); if (dst->flags & GLITZ_SURFACE_FLAG_SOLID_DAMAGE_MASK) { dst->flags &= ~GLITZ_SURFACE_FLAG_SOLID_DAMAGE_MASK; glitz_surface_damage (dst, &dst->box, GLITZ_DAMAGE_TEXTURE_MASK | GLITZ_DAMAGE_DRAWABLE_MASK); } else { if (dst->solid.red != old.red || dst->solid.green != old.green || dst->solid.blue != old.blue || dst->solid.alpha != old.alpha) glitz_surface_damage (dst, &dst->box, GLITZ_DAMAGE_TEXTURE_MASK | GLITZ_DAMAGE_DRAWABLE_MASK); } break; } break; } } else { static glitz_pixel_format_t pf = { GLITZ_FOURCC_RGB, { 32, 0xff000000, 0x00ff0000, 0x0000ff00, 0x000000ff }, 0, 0, 0, GLITZ_PIXEL_SCANLINE_ORDER_BOTTOM_UP }; glitz_buffer_t *buffer = NULL; glitz_box_t box; glitz_bool_t drawable = 0; if (n_rects == 1 && rects->width <= 1 && rects->height <= 1) { glitz_surface_push_current (dst, GLITZ_ANY_CONTEXT_CURRENT); } else { drawable = glitz_surface_push_current (dst, GLITZ_DRAWABLE_CURRENT); } if (drawable) { glitz_box_t *clip; int n_clip; gl->clear_color (color->red / (glitz_gl_clampf_t) 0xffff, color->green / (glitz_gl_clampf_t) 0xffff, color->blue / (glitz_gl_clampf_t) 0xffff, color->alpha / (glitz_gl_clampf_t) 0xffff); while (n_rects--) { clip = dst->clip; n_clip = dst->n_clip; while (n_clip--) { box.x1 = clip->x1 + dst->x_clip; box.y1 = clip->y1 + dst->y_clip; box.x2 = clip->x2 + dst->x_clip; box.y2 = clip->y2 + dst->y_clip; if (dst->box.x1 > box.x1) box.x1 = dst->box.x1; if (dst->box.y1 > box.y1) box.y1 = dst->box.y1; if (dst->box.x2 < box.x2) box.x2 = dst->box.x2; if (dst->box.y2 < box.y2) box.y2 = dst->box.y2; if (rects->x > box.x1) box.x1 = rects->x; if (rects->y > box.y1) box.y1 = rects->y; if (rects->x + rects->width < box.x2) box.x2 = rects->x + rects->width; if (rects->y + rects->height < box.y2) box.y2 = rects->y + rects->height; if (box.x1 < box.x2 && box.y1 < box.y2) { gl->scissor (box.x1, dst->attached->height - dst->y - box.y2, box.x2 - box.x1, box.y2 - box.y1); gl->clear (GLITZ_GL_COLOR_BUFFER_BIT); glitz_surface_damage (dst, &box, GLITZ_DAMAGE_TEXTURE_MASK | GLITZ_DAMAGE_SOLID_MASK); } clip++; } rects++; } } else { unsigned int pixel = ((((unsigned int) color->alpha * 0xff) / 0xffff) << 24) | ((((unsigned int) color->red * 0xff) / 0xffff) << 16) | ((((unsigned int) color->green * 0xff) / 0xffff) << 8) | ((((unsigned int) color->blue * 0xff) / 0xffff)); int x1, y1, x2, y2; buffer = _glitz_minimum_buffer (dst, rects, n_rects, &pixel); if (!buffer) { glitz_surface_status_add (dst, GLITZ_STATUS_NO_MEMORY_MASK); return; } while (n_rects--) { x1 = rects->x; y1 = rects->y; x2 = x1 + rects->width; y2 = y1 + rects->height; if (x1 < 0) x1 = 0; if (y1 < 0) y1 = 0; if (x2 > dst->box.x2) x2 = dst->box.x2; if (y2 > dst->box.y2) y2 = dst->box.y2; if (x1 < x2 && y1 < y2) glitz_set_pixels (dst, x1, y1, x2 - x1, y2 - y1, &pf, buffer); rects++; } if (buffer) glitz_buffer_destroy (buffer); } glitz_surface_pop_current (dst); } }