void NV21_to_RGB
(
	unsigned char * in,
	int * out,
	int   sx,
	int   sy,
	int   rotate
)
{
	int x, y;
	short Y1, Y2, u, v, vp, up, va, ua;
	unsigned int R, G, B;

	for (y=0; y<sy; ++y)
	{
		up=(short)get_Usz(in, 0, y);
		vp=(short)get_Vsz(in, 0, y);

		for (x=0; x<sx; x+=2)
		{
			Y1 = (short)get_Ysz(in, x, y);
			Y2 = (short)get_Ysz(in, x+1, y);
			v  = (short)get_Vsz(in, x, y);
			if (x<sx-2)
				u  = (short)get_Usz(in, x+2, y);
			else
				u = up;

			ua = (u+up)/2;
			va = (v+vp)/2;

			R = CSC_R(Y1, va);
			G = CSC_G(Y1, up, va);
			B = CSC_B(Y1, up);

			if (rotate)
				out[x*sy+sy-1-y] = (R<<16) + (G<<8) + B + (255<<24);
			else
				out[y*sx+x] = (R<<16) + (G<<8) + B + (255<<24);

			R = CSC_R(Y2, v);
			G = CSC_G(Y2, ua, v);
			B = CSC_B(Y2, ua);

			if (rotate)
				out[(x+1)*sy+sy-1-y] = (R<<16) + (G<<8) + B + (255<<24);
			else
				out[y*sx+x+1] = (R<<16) + (G<<8) + B + (255<<24);

			vp = v;
			up = u;
		}
	}
}
inline void NV21_to_RGB_rotated(unsigned char *pY, int width, int height, int *crop, int outWidth, int outHeight, int stride, unsigned char *buffer)
{
	unsigned char *pUV = pY + width * height;

	int i, j, is, js;
	int nR, nG, nB;
	int nY, nU, nV;
	unsigned char *out = buffer;
	int offset;
	const int tripleHeight = (outHeight - 1) * stride;
	const float scaleWidth = (float)crop[2] / outWidth;
	const float scaleHeight = (float)crop[3] / outHeight;
	int yoffset = tripleHeight;

	pY += crop[0] + crop[1] * width;
	pUV += crop[0]-(crop[0]&1) + (crop[1]/2) * width;

	for (i = 0; i < outHeight; i++)
	{
		offset = yoffset;

		is = (int)(i * scaleHeight);

		for (j = 0; j < outWidth; j++)
		{
			js = (int)(j * scaleWidth);

			nY = *(pY + is * width + js);
			nU = *(pUV + (is / 2) * width + 2 * (js / 2) + 1);
			nV = *(pUV + (is / 2) * width + 2 * (js / 2));

			if (stride == 4)
			{
				out[offset++] = CSC_R(nY, nV);
				out[offset++] = CSC_G(nY, nU, nV);
				out[offset++] = CSC_B(nY, nU);
				out[offset++] = 0xFF;
			}
			else
			{
				out[offset++] = CSC_R(nY, nV);
				out[offset++] = CSC_G(nY, nU, nV);
				out[offset++] = CSC_B(nY, nU);
			}

			offset += tripleHeight;
		}

		yoffset -= stride;
	}
}
void NV21_to_RGB_scaled
(
	unsigned char *pY,
	int width,
	int height,
	int x0,
	int y0,
	int wCrop,
	int hCrop,
	int outWidth,
	int outHeight,
	int stride,
	unsigned char *buffer
)
{
    unsigned char *pUV = pY + width * height + (x0&~1) + (y0/2)*width;
    pY += x0+y0*width;

    int i, j, is, js;
    unsigned char *out = buffer;
    int offset = 0;
    const float scaleWidth = wCrop / outWidth;
    const float scaleHeight = hCrop / outHeight;

    int Y, U, V;

    for (i = 0; i < outHeight; i++)
    {
    	offset = i * outWidth * 4;

        is = i * hCrop / outHeight;

        for (j = 0; j < outWidth; j++)
        {
            js = j * wCrop / outWidth;

            Y = *(pY + is * width + js);
            V = *(pUV + (is / 2) * width + 2*(js / 2));
            U = *(pUV + (is / 2) * width + 2*(js / 2) + 1);

			out[offset++] = CSC_B(Y,U);
			out[offset++] = CSC_G(Y, U, V);
			out[offset++] = CSC_R(Y,V);
			if (stride == 4) out[offset++] = 255;
        }
    }
}
void Preview_YUV2ARGB(Uint8 *pview_yuv, Uint32 *pview, int sx, int sy)
{
	int x, y;

	for (y=0; y<sy; ++y)
		for (x=0; x<sx; ++x)
		{
			int Y = pview_yuv[(y*sx+x)*3];
			int U = pview_yuv[(y*sx+x)*3+1];
			int V = pview_yuv[(y*sx+x)*3+2];
			Uint32 R = CSC_R(Y,V);
			Uint32 G = CSC_G(Y,U,V);
			Uint32 B = CSC_B(Y,U);

			//pview[y*sx+x] =
			pview[x*sy+sy-1-y] =						// rotate 90 degree for portrait layout
					(R<<16) + (G<<8) + B + (255<<24);
		}
}
void NV21_to_RGB_scaled_rotated
(
	unsigned char *pY,
	int width,
	int height,
	int x0,
	int y0,
	int wCrop,
	int hCrop,
	int outWidth,
	int outHeight,
	int stride,
	unsigned char *buffer
)
{
	unsigned char *pUV = pY + width * height;

	int i, j, is, js;
	int nY, nU, nV;
	unsigned char *out = buffer;
	int offset;
	int bgr = 1;

	if (stride >= 5)	// a special case for GL - RGBA
	{
		bgr = 0;
		stride -= 2;
	}
	else if (stride == 4)
	{
		bgr = 0;
	}

	const int tripleHeight = (outHeight - 1) * stride;
	int yoffset = tripleHeight;

	pY += x0 + y0 * width;
	pUV += x0-(x0&1) + (y0/2) * width;

	for (i = 0; i < outHeight; i++)
	{
		offset = yoffset;

		is = i * hCrop / outHeight;

		for (j = 0; j < outWidth; j++)
		{
			js = j * wCrop / outWidth;

			nY = *(pY + is * width + js);
			nV = *(pUV + (is / 2) * width + 2 * (js / 2));
			nU = *(pUV + (is / 2) * width + 2 * (js / 2) + 1);

			if (bgr)	// usual bitmap has BGRA format
			{
				out[offset++] = CSC_B(nY, nU);
				out[offset++] = CSC_G(nY, nU, nV);
				out[offset++] = CSC_R(nY, nV);
			}
			else		// a special case for GL - RGBA
			{
				out[offset++] = CSC_R(nY, nV);
				out[offset++] = CSC_G(nY, nU, nV);
				out[offset++] = CSC_B(nY, nU);
			}
			if (stride == 4) out[offset++] = 255;

			offset += tripleHeight;
		}

		yoffset -= stride;
	}
}