static void _putpixels_scale2 (int x, int y, int w, BYTE *p) { BYTE *p0 = g_screen.screen_pixels, *p1; /* word aligned */ /* Using ASPECT_RATIO to allow runtime window resize */ y = ASPECT_RATIO(GFX_HEIGHT) - y - 1; x <<= 1; y <<= 1; p0 += x + y * xsize; p1 = p0 + xsize; EnterCriticalSection(&g_screen.cs); if (!opt.hires || y == 0 || y >= ((GFX_HEIGHT - 1)<<2)) { while (w--) { *p0++ = *p; *p0++ = *p; *p1++ = *p; *p1++ = *p; p++; } } else { scale2x_8_def(p1, p0, p - GFX_WIDTH, p, p + GFX_WIDTH, w); } LeaveCriticalSection (&g_screen.cs); }
/** * Apply the Scale2x effect on a group of rows. Used internally. */ static inline void stage_scale2x(void* dst0, void* dst1, const void* src0, const void* src1, const void* src2, unsigned pixel, unsigned pixel_per_row) { switch (pixel) { #if defined(__GNUC__) && defined(__i386__) case 1 : scale2x_8_mmx(dst0, dst1, src0, src1, src2, pixel_per_row); break; case 2 : scale2x_16_mmx(dst0, dst1, src0, src1, src2, pixel_per_row); break; case 4 : scale2x_32_mmx(dst0, dst1, src0, src1, src2, pixel_per_row); break; #else case 1 : scale2x_8_def(dst0, dst1, src0, src1, src2, pixel_per_row); break; case 2 : scale2x_16_def(dst0, dst1, src0, src1, src2, pixel_per_row); break; case 4 : scale2x_32_def(dst0, dst1, src0, src1, src2, pixel_per_row); break; #endif } }
/** * Scale by a factor of 2 a row of pixels of 8 bits. * This is a very fast MMX implementation. * The implementation uses a combination of cmp/and/not operations to * completly remove the need of conditional jumps. This trick give the * major speed improvement. * Also, using the 8 bytes MMX registers more than one pixel are computed * at the same time. * Before calling this function you must ensure that the currenct CPU supports * the MMX instruction set. After calling it you must be sure to call the EMMS * instruction before any floating-point operation. * The pixels over the left and right borders are assumed of the same color of * the pixels on the border. * Note that the implementation is optimized to write data sequentially to * maximize the bandwidth on video memory. * \param src0 Pointer at the first pixel of the previous row. * \param src1 Pointer at the first pixel of the current row. * \param src2 Pointer at the first pixel of the next row. * \param count Length in pixels of the src0, src1 and src2 rows. It must * be at least 16 and a multiple of 8. * \param dst0 First destination row, double length in pixels. * \param dst1 Second destination row, double length in pixels. */ void scale2x_8_mmx(scale2x_uint8* dst0, scale2x_uint8* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count) { if (count % 8 != 0 || count < 16) { scale2x_8_def(dst0, dst1, src0, src1, src2, count); } else { scale2x_8_mmx_border(dst0, src0, src1, src2, count); scale2x_8_mmx_border(dst1, src2, src1, src0, count); } }
static void _putpixels_fixratio_scale2 (int x, int y, int w, BYTE *p) { BYTE *p0 = g_screen.screen_pixels, *p1, *p2, *_p; /* Word aligned! */ int extra = 0; if (0 == w) return; y = GFX_HEIGHT - y - 1; x <<= 1; y <<= 1; if (y < ((GFX_WIDTH - 1) << 2) && ASPECT_RATIO (y) + 2 != ASPECT_RATIO (y + 2)) { extra = w; } y = ASPECT_RATIO(y); p0 += x + y * xsize; p1 = p0 + xsize; p2 = p1 + xsize; EnterCriticalSection(&g_screen.cs); if (!opt.hires || y == 0 || y >= (ASPECT_RATIO(GFX_HEIGHT - 1) << 1)) { for (_p = p; w--; p++) { *p0++ = *p; *p0++ = *p; *p1++ = *p; *p1++ = *p; } for (p = _p; extra--; p++) { *p2++ = *p; *p2++ = *p; } } else { _p = p; scale2x_8_def(p2, p1, _p - GFX_WIDTH, _p, _p + GFX_WIDTH, extra); scale2x_8_def(p1, p0, _p - GFX_WIDTH, _p, _p + GFX_WIDTH, w); } LeaveCriticalSection (&g_screen.cs); }
/** * Apply the Scale2x effect on a group of rows. Used internally. */ static inline void stage_scale2x(void* dst0, void* dst1, const void* src0, const void* src1, const void* src2, unsigned pixel, unsigned pixel_per_row) { switch (pixel) { #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) case 1 : scale2x_8_mmx(SSDST(8,0), SSDST(8,1), SSSRC(8,0), SSSRC(8,1), SSSRC(8,2), pixel_per_row); break; case 2 : scale2x_16_mmx(SSDST(16,0), SSDST(16,1), SSSRC(16,0), SSSRC(16,1), SSSRC(16,2), pixel_per_row); break; case 4 : scale2x_32_mmx(SSDST(32,0), SSDST(32,1), SSSRC(32,0), SSSRC(32,1), SSSRC(32,2), pixel_per_row); break; #else case 1 : scale2x_8_def(SSDST(8,0), SSDST(8,1), SSSRC(8,0), SSSRC(8,1), SSSRC(8,2), pixel_per_row); break; case 2 : scale2x_16_def(SSDST(16,0), SSDST(16,1), SSSRC(16,0), SSSRC(16,1), SSSRC(16,2), pixel_per_row); break; case 4 : scale2x_32_def(SSDST(32,0), SSDST(32,1), SSSRC(32,0), SSSRC(32,1), SSSRC(32,2), pixel_per_row); break; #endif } }