static void
_cairo_os2_surface_blit_pixels (cairo_os2_surface_t *surface,
                                HPS                  hps_begin_paint,
                                PRECTL               prcl_begin_paint_rect)
{
    POINTL aptlPoints[4];
    LONG lOldYInversion, rc = GPI_OK;

    /* Enable Y Inversion for the HPS, so the
     * GpiDrawBits will work with upside-top image, not with upside-down image!
     */
    lOldYInversion = GpiQueryYInversion (hps_begin_paint);
    GpiEnableYInversion (hps_begin_paint, surface->bitmap_info.cy-1);

    /* Target coordinates (Noninclusive) */
    aptlPoints[0].x = prcl_begin_paint_rect->xLeft;
    aptlPoints[0].y = prcl_begin_paint_rect->yBottom;

    aptlPoints[1].x = prcl_begin_paint_rect->xRight-1;
    aptlPoints[1].y = prcl_begin_paint_rect->yTop-1;

    /* Source coordinates (Inclusive) */
    aptlPoints[2].x = prcl_begin_paint_rect->xLeft;
    aptlPoints[2].y = prcl_begin_paint_rect->yBottom;

    aptlPoints[3].x = prcl_begin_paint_rect->xRight;
    aptlPoints[3].y = (prcl_begin_paint_rect->yTop);

    /* Some extra checking for limits
     * (Dunno if really needed, but had some crashes sometimes without it,
     *  while developing the code...)
     */
    {
        int i;
        for (i = 0; i < 4; i++) {
            if (aptlPoints[i].x < 0)
                aptlPoints[i].x = 0;
            if (aptlPoints[i].y < 0)
                aptlPoints[i].y = 0;
            if (aptlPoints[i].x > (LONG) surface->bitmap_info.cx)
                aptlPoints[i].x = (LONG) surface->bitmap_info.cx;
            if (aptlPoints[i].y > (LONG) surface->bitmap_info.cy)
                aptlPoints[i].y = (LONG) surface->bitmap_info.cy;
        }
    }

    /* Debug code to draw rectangle limits */
#if 0
    {
        int x, y;
        unsigned char *pixels;

        pixels = surface->pixels;
        for (x = 0; x < surface->bitmap_info.cx; x++) {
            for (y = 0; y < surface->bitmap_info.cy; y++) {
                if ((x == 0) ||
                    (y == 0) ||
                    (x == y) ||
                    (x >= surface->bitmap_info.cx-1) ||
                    (y >= surface->bitmap_info.cy-1))
                {
                    pixels[y*surface->bitmap_info.cx*4+x*4] = 255;
                }
            }
        }
    }
#endif
    rc = GpiDrawBits (hps_begin_paint,
                      surface->pixels,
                      &(surface->bitmap_info),
                      4,
                      aptlPoints,
                      ROP_SRCCOPY,
                      BBO_IGNORE);

    if (rc != GPI_OK) {
        /* if GpiDrawBits () failed then this is most likely because the
         * display driver could not handle a 32bit bitmap. So we need to
         * - create a buffer that only contains 3 bytes per pixel
         * - change the bitmap info header to contain 24bit
         * - pass the new buffer to GpiDrawBits () again
         * - clean up the new buffer
         */
        BITMAPINFOHEADER2 bmpheader;
        unsigned char *pchPixBuf, *pchPixSource;
        void *pBufStart;
        ULONG ulPixels;

        /* allocate temporary pixel buffer */
        pchPixBuf = (unsigned char *) _buffer_alloc (surface->bitmap_info.cy,
                                                     surface->bitmap_info.cx,
                                                     3);
        pchPixSource = surface->pixels; /* start at beginning of pixel buffer */
        pBufStart = pchPixBuf; /* remember beginning of the new pixel buffer */

        /* copy the first three bytes for each pixel but skip over the fourth */
        for (ulPixels = 0; ulPixels < surface->bitmap_info.cx * surface->bitmap_info.cy; ulPixels++)
        {
            /* copy BGR from source buffer */
            *pchPixBuf++ = *pchPixSource++;
            *pchPixBuf++ = *pchPixSource++;
            *pchPixBuf++ = *pchPixSource++;
            pchPixSource++; /* jump over alpha channel in source buffer */
        }

        /* jump back to start of the buffer for display and cleanup */
        pchPixBuf = pBufStart;

        /* set up the bitmap header, but this time with 24bit depth only */
        memset (&bmpheader, 0, sizeof (bmpheader));
        bmpheader.cbFix = sizeof (BITMAPINFOHEADER2);
        bmpheader.cx = surface->bitmap_info.cx;
        bmpheader.cy = surface->bitmap_info.cy;
        bmpheader.cPlanes = surface->bitmap_info.cPlanes;
        bmpheader.cBitCount = 24;
        rc = GpiDrawBits (hps_begin_paint,
                          pchPixBuf,
                          (PBITMAPINFO2)&bmpheader,
                          4,
                          aptlPoints,
                          ROP_SRCCOPY,
                          BBO_IGNORE);

        _buffer_free (pchPixBuf);
    }

    /* Restore Y inversion */
    GpiEnableYInversion (hps_begin_paint, lOldYInversion);
}
static void
_cairo_os2_surface_blit_pixels (cairo_os2_surface_t *surface,
                                HPS                  hps_begin_paint,
                                PRECTL               prcl_begin_paint_rect)
{
    POINTL aptlPoints[4];
    LONG   lOldYInversion;
    LONG   rc = GPI_OK;

    /* Check the limits (may not be necessary) */
    if (prcl_begin_paint_rect->xLeft < 0)
        prcl_begin_paint_rect->xLeft = 0;
    if (prcl_begin_paint_rect->yBottom < 0)
        prcl_begin_paint_rect->yBottom = 0;
    if (prcl_begin_paint_rect->xRight > (LONG) surface->bitmap_info.cx)
        prcl_begin_paint_rect->xRight = (LONG) surface->bitmap_info.cx;
    if (prcl_begin_paint_rect->yTop > (LONG) surface->bitmap_info.cy)
        prcl_begin_paint_rect->yTop = (LONG) surface->bitmap_info.cy;

    /* Exit if the rectangle is empty */
    if (prcl_begin_paint_rect->xLeft   >= prcl_begin_paint_rect->xRight ||
        prcl_begin_paint_rect->yBottom >= prcl_begin_paint_rect->yTop)
        return;

    /* Set the Target & Source coordinates */
    *((PRECTL)&aptlPoints[0]) = *prcl_begin_paint_rect;
    *((PRECTL)&aptlPoints[2]) = *prcl_begin_paint_rect;

    /* Make the Target coordinates non-inclusive */
    aptlPoints[1].x -= 1;
    aptlPoints[1].y -= 1;

    /* Enable Y Inversion for the HPS, so  GpiDrawBits will
     * work with upside-top image, not with upside-down image!
     */
    lOldYInversion = GpiQueryYInversion (hps_begin_paint);
    GpiEnableYInversion (hps_begin_paint, surface->bitmap_info.cy-1);

    /* Debug code to draw rectangle limits */
#if 0
    {
        int x, y;
        unsigned char *pixels;

        pixels = surface->pixels;
        for (x = 0; x < surface->bitmap_info.cx; x++) {
            for (y = 0; y < surface->bitmap_info.cy; y++) {
                if ((x == 0) ||
                    (y == 0) ||
                    (x == y) ||
                    (x >= surface->bitmap_info.cx-1) ||
                    (y >= surface->bitmap_info.cy-1))
                {
                    pixels[y*surface->bitmap_info.cx*4+x*4] = 255;
                }
            }
        }
    }
#endif
    if (!surface->use_24bpp) {
        rc = GpiDrawBits (hps_begin_paint,
                          surface->pixels,
                          &(surface->bitmap_info),
                          4,
                          aptlPoints,
                          ROP_SRCCOPY,
                          BBO_IGNORE);
        if (rc != GPI_OK)
            surface->use_24bpp = TRUE;
    }

    if (surface->use_24bpp) {
        /* If GpiDrawBits () failed then this is most likely because the
         * display driver could not handle a 32bit bitmap. So we need to
         * - create a buffer that only contains 3 bytes per pixel
         * - change the bitmap info header to contain 24bit
         * - pass the new buffer to GpiDrawBits () again
         * - clean up the new buffer
         */
        BITMAPINFO2       bmpinfo;
        unsigned char    *pchPixBuf;
        unsigned char    *pchTarget;
        ULONG            *pulSource;
        ULONG             ulX;
        ULONG             ulY;
        ULONG             ulPad;

        /* Set up the bitmap header, but this time for 24bit depth. */
        bmpinfo = surface->bitmap_info;
        bmpinfo.cBitCount = 24;

        /* The start of each row has to be DWORD aligned.  Calculate the
         * of number aligned bytes per row, the total size of the bitmap,
         * and the number of padding bytes at the end of each row.
         */
        ulX = (((bmpinfo.cx * bmpinfo.cBitCount) + 31) / 32) * 4;
        bmpinfo.cbImage = ulX * bmpinfo.cy;
        ulPad = ulX - bmpinfo.cx * 3;

        /* Allocate temporary pixel buffer.  If the rows don't need
         * padding, it has to be 1 byte larger than the size of the
         * bitmap  or else the high-order byte from the last source
         * row will end up in unallocated memory.
         */
        pchPixBuf = (unsigned char *)_buffer_alloc (1, 1,
                                        bmpinfo.cbImage + (ulPad ? 0 : 1));

        if (pchPixBuf) {
            /* Copy 4 bytes from the source but advance the target ptr only
             * 3 bytes, so the high-order alpha byte will be overwritten by
             * the next copy. At the end of each row, skip over the padding.
             */
            pchTarget = pchPixBuf;
            pulSource = (ULONG*)surface->pixels;
            for (ulY = bmpinfo.cy; ulY; ulY--) {
                for (ulX = bmpinfo.cx; ulX; ulX--) {
                    *((ULONG*)pchTarget) = *pulSource++;
                    pchTarget += 3;
                }
                pchTarget += ulPad;
            }

            rc = GpiDrawBits (hps_begin_paint,
                              pchPixBuf,
                              &bmpinfo,
                              4,
                              aptlPoints,
                              ROP_SRCCOPY,
                              BBO_IGNORE);
            if (rc != GPI_OK)
                surface->use_24bpp = FALSE;

            _buffer_free (pchPixBuf);
        }
    }

    /* Restore Y inversion */
    GpiEnableYInversion (hps_begin_paint, lOldYInversion);
}