mlib_status mlib_AffineEdges(mlib_affine_param *param,
                             const mlib_image  *dst,
                             const mlib_image  *src,
                             void              *buff_lcl,
                             mlib_s32          buff_size,
                             mlib_s32          kw,
                             mlib_s32          kh,
                             mlib_s32          kw1,
                             mlib_s32          kh1,
                             mlib_edge         edge,
                             const mlib_d64    *mtx,
                             mlib_s32          shiftx,
                             mlib_s32          shifty)
{
  mlib_u8 *buff = buff_lcl;
  mlib_u8 **lineAddr = param->lineAddr;
  mlib_s32 srcWidth, dstWidth, srcHeight, dstHeight, srcYStride, dstYStride;
  mlib_s32 *leftEdges, *rightEdges, *xStarts, *yStarts, bsize0, bsize1 = 0;
  mlib_u8 *srcData, *dstData;
  mlib_u8 *paddings;
  void *warp_tbl = NULL;
  mlib_s32 yStart = 0, yFinish = -1, dX, dY;

  mlib_d64 xClip, yClip, wClip, hClip;
  mlib_d64 delta = 0.;
  mlib_d64 minX, minY, maxX, maxY;

  mlib_d64 coords[4][2];
  mlib_d64 a = mtx[0], b = mtx[1], tx = mtx[2], c = mtx[3], d = mtx[4], ty = mtx[5];
  mlib_d64 a2, b2, tx2, c2, d2, ty2;
  mlib_d64 dx, dy, div;
  mlib_s32 sdx, sdy;
  mlib_d64 dTop;
  mlib_d64 val0;
  mlib_s32 top, bot;
  mlib_s32 topIdx, max_xsize = 0;
  mlib_s32 i, j, t;

  srcData = mlib_ImageGetData(src);
  dstData = mlib_ImageGetData(dst);
  srcWidth = mlib_ImageGetWidth(src);
  srcHeight = mlib_ImageGetHeight(src);
  dstWidth = mlib_ImageGetWidth(dst);
  dstHeight = mlib_ImageGetHeight(dst);
  srcYStride = mlib_ImageGetStride(src);
  dstYStride = mlib_ImageGetStride(dst);
  paddings = mlib_ImageGetPaddings(src);

  if (srcWidth >= (1 << 15) || srcHeight >= (1 << 15)) {
    return MLIB_FAILURE;
  }

  div = a * d - b * c;

  if (div == 0.0) {
    return MLIB_FAILURE;
  }

  bsize0 = (dstHeight * sizeof(mlib_s32) + 7) & ~7;

  if (lineAddr == NULL) {
    bsize1 = ((srcHeight + 4 * kh) * sizeof(mlib_u8 *) + 7) & ~7;
  }

  param->buff_malloc = NULL;

  if ((4 * bsize0 + bsize1) > buff_size) {
    buff = param->buff_malloc = mlib_malloc(4 * bsize0 + bsize1);

    if (buff == NULL)
      return MLIB_FAILURE;
  }

  leftEdges = (mlib_s32 *) (buff);
  rightEdges = (mlib_s32 *) (buff += bsize0);
  xStarts = (mlib_s32 *) (buff += bsize0);
  yStarts = (mlib_s32 *) (buff += bsize0);

  if (lineAddr == NULL) {
    mlib_u8 *srcLinePtr = srcData;
    lineAddr = (mlib_u8 **) (buff += bsize0);
    for (i = 0; i < 2 * kh; i++)
      lineAddr[i] = srcLinePtr;
    lineAddr += 2 * kh;
    for (i = 0; i < srcHeight - 1; i++) {
      lineAddr[i] = srcLinePtr;
      srcLinePtr += srcYStride;
    }

    for (i = srcHeight - 1; i < srcHeight + 2 * kh; i++)
      lineAddr[i] = srcLinePtr;
  }

  if ((mlib_s32) edge < 0) {                               /* process edges */
    minX = 0;
    minY = 0;
    maxX = srcWidth;
    maxY = srcHeight;
  }
  else {

    if (kw > 1)
      delta = -0.5;                                        /* for MLIB_NEAREST filter delta = 0. */

    minX = (kw1 - delta);
    minY = (kh1 - delta);
    maxX = srcWidth - ((kw - 1) - (kw1 - delta));
    maxY = srcHeight - ((kh - 1) - (kh1 - delta));

    if (edge == MLIB_EDGE_SRC_PADDED) {
      if (minX < paddings[0])
        minX = paddings[0];

      if (minY < paddings[1])
        minY = paddings[1];

      if (maxX > (srcWidth - paddings[2]))
        maxX = srcWidth - paddings[2];

      if (maxY > (srcHeight - paddings[3]))
        maxY = srcHeight - paddings[3];
    }
  }

  xClip = minX;
  yClip = minY;
  wClip = maxX;
  hClip = maxY;

/*
 *   STORE_PARAM(param, src);
 *   STORE_PARAM(param, dst);
 */
  param->src = (void *)src;
  param->dst = (void *)dst;
  STORE_PARAM(param, lineAddr);
  STORE_PARAM(param, dstData);
  STORE_PARAM(param, srcYStride);
  STORE_PARAM(param, dstYStride);
  STORE_PARAM(param, leftEdges);
  STORE_PARAM(param, rightEdges);
  STORE_PARAM(param, xStarts);
  STORE_PARAM(param, yStarts);
  STORE_PARAM(param, max_xsize);
  STORE_PARAM(param, yStart);
  STORE_PARAM(param, yFinish);
  STORE_PARAM(param, warp_tbl);

  if ((xClip >= wClip) || (yClip >= hClip)) {
    return MLIB_SUCCESS;
  }

  a2 = d;
  b2 = -b;
  tx2 = (-d * tx + b * ty);
  c2 = -c;
  d2 = a;
  ty2 = (c * tx - a * ty);

  dx = a2;
  dy = c2;

  tx -= 0.5;
  ty -= 0.5;

  coords[0][0] = xClip * a + yClip * b + tx;
  coords[0][1] = xClip * c + yClip * d + ty;

  coords[2][0] = wClip * a + hClip * b + tx;
  coords[2][1] = wClip * c + hClip * d + ty;

  if (div > 0) {
    coords[1][0] = wClip * a + yClip * b + tx;
    coords[1][1] = wClip * c + yClip * d + ty;

    coords[3][0] = xClip * a + hClip * b + tx;
    coords[3][1] = xClip * c + hClip * d + ty;
  }
  else {
    coords[3][0] = wClip * a + yClip * b + tx;
    coords[3][1] = wClip * c + yClip * d + ty;

    coords[1][0] = xClip * a + hClip * b + tx;
    coords[1][1] = xClip * c + hClip * d + ty;
  }

  topIdx = 0;
  for (i = 1; i < 4; i++) {

    if (coords[i][1] < coords[topIdx][1])
      topIdx = i;
  }

  dTop = coords[topIdx][1];
  val0 = dTop;
  SAT32(top);
  bot = -1;

  if (top >= dstHeight) {
    return MLIB_SUCCESS;
  }

  if (dTop >= 0.0) {
    mlib_d64 xLeft, xRight, x;
    mlib_s32 nextIdx;

    if (dTop == top) {
      xLeft = coords[topIdx][0];
      xRight = coords[topIdx][0];
      nextIdx = (topIdx + 1) & 0x3;

      if (dTop == coords[nextIdx][1]) {
        x = coords[nextIdx][0];
        xLeft = (xLeft <= x) ? xLeft : x;
        xRight = (xRight >= x) ? xRight : x;
      }

      nextIdx = (topIdx - 1) & 0x3;

      if (dTop == coords[nextIdx][1]) {
        x = coords[nextIdx][0];
        xLeft = (xLeft <= x) ? xLeft : x;
        xRight = (xRight >= x) ? xRight : x;
      }

      val0 = xLeft;
      SAT32(t);
      leftEdges[top] = (t >= xLeft) ? t : ++t;

      if (xLeft >= MLIB_S32_MAX)
        leftEdges[top] = MLIB_S32_MAX;

      val0 = xRight;
      SAT32(rightEdges[top]);
    }
    else
      top++;
  }
  else
    top = 0;

  for (i = 0; i < 2; i++) {
    mlib_d64 dY1 = coords[(topIdx - i) & 0x3][1];
    mlib_d64 dX1 = coords[(topIdx - i) & 0x3][0];
    mlib_d64 dY2 = coords[(topIdx - i - 1) & 0x3][1];
    mlib_d64 dX2 = coords[(topIdx - i - 1) & 0x3][0];
    mlib_d64 x = dX1, slope = (dX2 - dX1) / (dY2 - dY1);
    mlib_s32 y1;
    mlib_s32 y2;

    if (dY1 == dY2)
      continue;

    if (dY1 < 0.0)
      y1 = 0;
    else {
      val0 = dY1 + 1;
      SAT32(y1);
    }

    val0 = dY2;
    SAT32(y2);

    if (y2 >= dstHeight)
      y2 = (mlib_s32) (dstHeight - 1);

    x += slope * (y1 - dY1);
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
    for (j = y1; j <= y2; j++) {
      val0 = x;
      SAT32(t);
      leftEdges[j] = (t >= x) ? t : ++t;

      if (x >= MLIB_S32_MAX)
        leftEdges[j] = MLIB_S32_MAX;
      x += slope;
    }
  }

  for (i = 0; i < 2; i++) {
    mlib_d64 dY1 = coords[(topIdx + i) & 0x3][1];
    mlib_d64 dX1 = coords[(topIdx + i) & 0x3][0];
    mlib_d64 dY2 = coords[(topIdx + i + 1) & 0x3][1];
    mlib_d64 dX2 = coords[(topIdx + i + 1) & 0x3][0];
    mlib_d64 x = dX1, slope = (dX2 - dX1) / (dY2 - dY1);
    mlib_s32 y1;
    mlib_s32 y2;

    if (dY1 == dY2)
      continue;

    if (dY1 < 0.0)
      y1 = 0;
    else {
      val0 = dY1 + 1;
      SAT32(y1);
    }

    val0 = dY2;
    SAT32(y2);

    if (y2 >= dstHeight)
      y2 = (mlib_s32) (dstHeight - 1);

    x += slope * (y1 - dY1);
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
    for (j = y1; j <= y2; j++) {
      val0 = x;
      SAT32(rightEdges[j]);
      x += slope;
    }

    bot = y2;
  }

  {
    mlib_d64 dxCl = xClip * div;
    mlib_d64 dyCl = yClip * div;
    mlib_d64 dwCl = wClip * div;
    mlib_d64 dhCl = hClip * div;

    mlib_s32 xCl = (mlib_s32) (xClip + delta);
    mlib_s32 yCl = (mlib_s32) (yClip + delta);
    mlib_s32 wCl = (mlib_s32) (wClip + delta);
    mlib_s32 hCl = (mlib_s32) (hClip + delta);

    /*
     * mlib_s32 xCl = (mlib_s32)(xClip + delta);
     * mlib_s32 yCl = (mlib_s32)(yClip + delta);
     * mlib_s32 wCl = (mlib_s32)(wClip);
     * mlib_s32 hCl = (mlib_s32)(hClip);
     */

    if (edge == MLIB_EDGE_SRC_PADDED) {
      xCl = kw1;
      yCl = kh1;
      wCl = (mlib_s32) (srcWidth - ((kw - 1) - kw1));
      hCl = (mlib_s32) (srcHeight - ((kh - 1) - kh1));
    }

    div = 1.0 / div;

    sdx = (mlib_s32) (a2 * div * (1 << shiftx));
    sdy = (mlib_s32) (c2 * div * (1 << shifty));

    if (div > 0) {

#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
      for (i = top; i <= bot; i++) {
        mlib_s32 xLeft = leftEdges[i];
        mlib_s32 xRight = rightEdges[i];
        mlib_s32 xs, ys, x_e, y_e, x_s, y_s;
        mlib_d64 dxs, dys, dxe, dye;
        mlib_d64 xl, ii, xr;

        xLeft = (xLeft < 0) ? 0 : xLeft;
        xRight = (xRight >= dstWidth) ? (mlib_s32) (dstWidth - 1) : xRight;

        xl = xLeft + 0.5;
        ii = i + 0.5;
        xr = xRight + 0.5;
        dxs = xl * a2 + ii * b2 + tx2;
        dys = xl * c2 + ii * d2 + ty2;

        if ((dxs < dxCl) || (dxs >= dwCl) || (dys < dyCl) || (dys >= dhCl)) {
          dxs += dx;
          dys += dy;
          xLeft++;

          if ((dxs < dxCl) || (dxs >= dwCl) || (dys < dyCl) || (dys >= dhCl))
            xRight = -1;
        }

        dxe = xr * a2 + ii * b2 + tx2;
        dye = xr * c2 + ii * d2 + ty2;

        if ((dxe < dxCl) || (dxe >= dwCl) || (dye < dyCl) || (dye >= dhCl)) {
          dxe -= dx;
          dye -= dy;
          xRight--;

          if ((dxe < dxCl) || (dxe >= dwCl) || (dye < dyCl) || (dye >= dhCl))
            xRight = -1;
        }

        xs = (mlib_s32) ((dxs * div + delta) * (1 << shiftx));
        x_s = xs >> shiftx;

        ys = (mlib_s32) ((dys * div + delta) * (1 << shifty));
        y_s = ys >> shifty;

        if (x_s < xCl)
          xs = (xCl << shiftx);
        else if (x_s >= wCl)
          xs = ((wCl << shiftx) - 1);

        if (y_s < yCl)
          ys = (yCl << shifty);
        else if (y_s >= hCl)
          ys = ((hCl << shifty) - 1);

        if (xRight >= xLeft) {
          x_e = ((xRight - xLeft) * sdx + xs) >> shiftx;
          y_e = ((xRight - xLeft) * sdy + ys) >> shifty;

          if ((x_e < xCl) || (x_e >= wCl)) {
            if (sdx > 0)
              sdx -= 1;
            else
              sdx += 1;
          }

          if ((y_e < yCl) || (y_e >= hCl)) {
            if (sdy > 0)
              sdy -= 1;
            else
              sdy += 1;
          }
        }

        leftEdges[i] = xLeft;
        rightEdges[i] = xRight;
        xStarts[i] = xs;
        yStarts[i] = ys;

        if ((xRight - xLeft + 1) > max_xsize)
          max_xsize = (xRight - xLeft + 1);
      }
    }
    else {

#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
      for (i = top; i <= bot; i++) {
mlib_status
mlib_ImageGridWarp_alltypes(
    mlib_image *dst,
    const mlib_image *src,
    const mlib_f32 *xWarpPos,
    const mlib_f32 *yWarpPos,
    mlib_d64 postShiftX,
    mlib_d64 postShiftY,
    mlib_s32 xStart,
    mlib_s32 xStep,
    mlib_s32 xNumCells,
    mlib_s32 yStart,
    mlib_s32 yStep,
    mlib_s32 yNumCells,
    mlib_filter filter,
    mlib_edge edge)
{
	mlib_affine_param param[1];
	mlib_status res = MLIB_SUCCESS;
	mlib_type type;
	mlib_s32 nchan;
	mlib_s32 srcWidth, dstWidth, srcHeight, dstHeight;
	mlib_s32 srcYStride, dstYStride;
	mlib_s32 *warp_tbl, sArr[5 + 2 * BUFF_SIZE], t_ind = 0;
	mlib_s32 *leftEdges, *rightEdges, *xStarts, *yStarts;
	mlib_s32 leArr[BUFF_SIZE], reArr[BUFF_SIZE], xsArr[BUFF_SIZE],
	    ysArr[BUFF_SIZE];
	mlib_u8 *laArr[BUFF_SIZE + 4], **lineAddr, *memBuffer = NULL;
	mlib_u8 *srcLinePtr;
	mlib_u8 *srcData, *dstData_beg, *dstData;
	mlib_u8 *paddings;
	mlib_s32 align;
	mlib_s32 xFirst, xLast, xSkip, xRest;
	mlib_s32 yFirst, yLast, ySkip, yRest;
	mlib_d64 minX, minY, maxX, maxY;
	mlib_s32 xBeg, xEnd;
	mlib_s32 yBeg, yEnd;
	mlib_s32 yskip, xskip, yrest, xrest;
	mlib_s32 x, y, cx, cy;
	mlib_d64 px0, py0, px1, py1, px3, py3;
	mlib_d64 dx0, dx1, dy0, dy1, cx0, cx1, cy0, cy1, dx, dy, cx2, cy2, cx3,
	    cy3;
	mlib_d64 delta_x, delta_y;
	mlib_d64 xs, ys;
	mlib_d64 xNum, yNum;
	mlib_d64 xStep1 = 1.0 / xStep, yStep1 = 1.0 / yStep;
	mlib_s32 is_clip, is_clip1, is_clip2, is_clip3, is_clip4;
	mlib_d64 tE, tL, tmp_x0, tmp_y0, tmp_dx, tmp_dy, num, denom, t;
	mlib_d64 d_rdx, d_rdy, x0, y0, x1, y1, offset;
	mlib_s32 shift_left, shift_right, max_xsize;
	mlib_s32 i;

/* check for obvious errors */
	MLIB_IMAGE_TYPE_EQUAL(src, dst);
	MLIB_IMAGE_CHAN_EQUAL(src, dst);

	if (xNumCells == 0 || yNumCells == 0)
		return (MLIB_SUCCESS);

	if (xWarpPos == NULL || yWarpPos == NULL)
		return (MLIB_FAILURE);

	if (edge != MLIB_EDGE_DST_NO_WRITE && edge != MLIB_EDGE_SRC_PADDED)
		return (MLIB_FAILURE);

	if (xNumCells < 0 || yNumCells <= 0 || xStep <= 0 || yStep <= 0)
		return (MLIB_FAILURE);

	srcData = mlib_ImageGetData(src);
	dstData_beg = mlib_ImageGetData(dst);
	type = mlib_ImageGetType(dst);
	nchan = mlib_ImageGetChannels(dst);
	srcWidth = mlib_ImageGetWidth(src);
	srcHeight = mlib_ImageGetHeight(src);
	dstWidth = mlib_ImageGetWidth(dst);
	dstHeight = mlib_ImageGetHeight(dst);
	srcYStride = mlib_ImageGetStride(src);
	dstYStride = mlib_ImageGetStride(dst);
	paddings = mlib_ImageGetPaddings(src);

	if (srcWidth >= (1 << 15) || srcHeight >= (1 << 15)) {
		return (MLIB_FAILURE);
	}

	if (xStart >= dstWidth ||
	    ((xStart + xStep * xNumCells) <= 0) ||
	    yStart >= dstHeight || ((yStart + yStep * yNumCells) <= 0))
		return (MLIB_SUCCESS);

	if (srcHeight < BUFF_SIZE && dstHeight < BUFF_SIZE) {
		lineAddr = laArr;
		leftEdges = leArr;
		rightEdges = reArr;
		xStarts = xsArr;
		yStarts = ysArr;
		warp_tbl = sArr;
	} else {
		memBuffer =
		    __mlib_malloc((6 * dstHeight + 8) * sizeof (mlib_s32) +
		    (srcHeight + 4) * sizeof (mlib_u8 *));

		if (memBuffer == NULL)
			return (MLIB_FAILURE);
		leftEdges = (mlib_s32 *)(memBuffer);
		rightEdges =
		    (mlib_s32 *)(memBuffer + dstHeight * sizeof (mlib_s32));
		xStarts =
		    (mlib_s32 *)(memBuffer + 2 * dstHeight * sizeof (mlib_s32));
		yStarts =
		    (mlib_s32 *)(memBuffer + 3 * dstHeight * sizeof (mlib_s32));
		warp_tbl =
		    (mlib_s32 *)(memBuffer + 4 * dstHeight * sizeof (mlib_s32));
		lineAddr =
		    (mlib_u8 **)(memBuffer + (6 * dstHeight +
		    8) * sizeof (mlib_s32));
	}

	for (xFirst = 0; ; xFirst++) {
		if ((xStart + xFirst * xStep + xStep - 1) >= 0)
			break;
	}

	for (yFirst = 0; ; yFirst++) {
		if ((yStart + yFirst * yStep + yStep - 1) >= 0)
			break;
	}

	for (xLast = xNumCells - 1; xLast >= xFirst; xLast--) {
		if ((xStart + xLast * xStep) <= (dstWidth - 1))
			break;
	}

	for (yLast = yNumCells - 1; yLast >= yFirst; yLast--) {
		if ((yStart + yLast * yStep) <= (dstHeight - 1))
			break;
	}

	if ((xStart + xFirst * xStep) < 0)
		xSkip = -(xStart + xFirst * xStep);
	else
		xSkip = 0;

	if ((yStart + yFirst * yStep) < 0)
		ySkip = -(yStart + yFirst * yStep);
	else
		ySkip = 0;

	if ((xStart + xLast * xStep + xStep) > (dstWidth))
		xRest = (xStart + xLast * xStep + xStep) - (dstWidth);
	else
		xRest = 0;

	if ((yStart + yLast * yStep + yStep) > (dstHeight))
		yRest = (yStart + yLast * yStep + yStep) - (dstHeight);
	else
		yRest = 0;

	srcLinePtr = (mlib_u8 *)srcData;
	lineAddr += 2;
	for (i = -2; i < srcHeight + 2; i++) {
		lineAddr[i] = srcLinePtr + i * srcYStride;
	}

	if (type == MLIB_BYTE)
		t_ind = 0;
	else if (type == MLIB_SHORT)
		t_ind = 1;
	else if (type == MLIB_INT)
		t_ind = 2;
	else if (type == MLIB_USHORT)
		t_ind = 3;
	else if (type == MLIB_FLOAT)
		t_ind = 4;
	else if (type == MLIB_DOUBLE)
		t_ind = 5;

	if (filter == MLIB_NEAREST) {
		if (t_ind >= 3)
/* correct types USHORT, FLOAT, DOUBLE; new values: 1, 2, 3 */
			t_ind -= 2;

/* two channels as one channel of next type */
		align =
		    (mlib_s32)dstData_beg | (mlib_s32)srcData | dstYStride |
		    srcYStride;
#ifndef i386	/* do not perform the copying by mlib_d64 data type for x86 */
		while (((nchan | (align >> t_ind)) & 1) == 0 && t_ind < 3)
#else /* i386 ( do not perform the copying by mlib_d64 data type for x86 ) */
		while (((nchan | (align >> t_ind)) & 1) == 0 && t_ind < 2)
#endif /* i386 ( do not perform the copying by mlib_d64 data type for x86 ) */
		{
			nchan >>= 1;
			t_ind++;
		}
	}

	switch (filter) {
	case MLIB_NEAREST:
		minX = 0;
		minY = 0;
		maxX = srcWidth;
		maxY = srcHeight;
		offset = 0;
		break;

	case MLIB_BILINEAR:
		minX = 0.5;
		minY = 0.5;
		maxX = srcWidth - 0.5;
		maxY = srcHeight - 0.5;
		offset = 0.5;
		break;

	case MLIB_BICUBIC:
	case MLIB_BICUBIC2:
		minX = 1.5;
		minY = 1.5;
		maxX = srcWidth - 1.5;
		maxY = srcHeight - 1.5;
		offset = 0.5;
		break;

	default:

		if (memBuffer != NULL) {
			__mlib_free(memBuffer);
		}

		return (MLIB_FAILURE);
	}

	if (edge == MLIB_EDGE_SRC_PADDED) {
		if (minX < paddings[0])
			minX = paddings[0];

		if (minY < paddings[1])
			minY = paddings[1];

		if (maxX > (srcWidth - paddings[2]))
			maxX = srcWidth - paddings[2];

		if (maxY > (srcHeight - paddings[3]))
			maxY = srcHeight - paddings[3];
	}

	if ((minX >= maxX) || (minY >= maxY)) {
		if (memBuffer != NULL) {
			__mlib_free(memBuffer);
		}

		return (MLIB_SUCCESS);
	}

/*
 *   STORE_PARAM(param, src);
 */
	param->src = (void *)src;

	STORE_PARAM(param, dst);
	STORE_PARAM(param, lineAddr);
	STORE_PARAM(param, leftEdges);
	STORE_PARAM(param, rightEdges);
	STORE_PARAM(param, xStarts);
	STORE_PARAM(param, yStarts);
	STORE_PARAM(param, srcYStride);
	STORE_PARAM(param, dstYStride);
	STORE_PARAM(param, warp_tbl);
	STORE_PARAM(param, filter);

	for (y = yFirst, cy = yStart + yFirst * yStep; y <= yLast;
	    y++, cy += yStep) {
		yskip = (y == yFirst) ? ySkip : 0;
		yrest = (y == yLast) ? yRest : 0;
		yBeg = cy + yskip;
		yEnd = cy + (yStep - 1) - yrest;
		yNum = yEnd - yBeg;
		ys = yskip + 0.5;
		INIT_GRID_ROW(xFirst, y);
		for (x = xFirst, cx = xStart + xFirst * xStep; x <= xLast;
		    x++, cx += xStep) {
			xskip = (x == xFirst) ? xSkip : 0;
			xrest = (x == xLast) ? xRest : 0;
			xBeg = cx + xskip;
			xEnd = cx + (xStep - 1) - xrest;
			xNum = xEnd - xBeg;
			xs = xskip + 0.5;
			INCR_GRID_ROW(x, y, xs, ys);

			max_xsize = 0;

			if (IS_CELL_CLIP()) {

				for (i = yBeg; i <= yEnd; i++) {
					tE = 0;
					tL = xNum;
					tmp_x0 = cx0 + (i - yBeg) * dx0;
					tmp_y0 = cy0 + (i - yBeg) * dy0;
					tmp_dx = dx + (i - yBeg) * delta_x;
					tmp_dy = dy + (i - yBeg) * delta_y;

					if (tmp_dx) {
						d_rdx = 1.0 / tmp_dx;
						MLIB_CLIP(d_rdx,
						    -tmp_x0 + minX);
						MLIB_CLIP(-d_rdx,
						    tmp_x0 - maxX);
					} else if ((tmp_x0 < minX) ||
					    (tmp_x0 >= maxX)) {
						leftEdges[i] = 1;
						rightEdges[i] = 0;
						continue;
					}

					if (tmp_dy) {
						d_rdy = 1.0 / tmp_dy;
						MLIB_CLIP(d_rdy,
						    -tmp_y0 + minY);
						MLIB_CLIP(-d_rdy,
						    tmp_y0 - maxY);
					} else if ((tmp_y0 < minY) ||
					    (tmp_y0 >= maxY)) {
						leftEdges[i] = 1;
						rightEdges[i] = 0;
						continue;
					}

					if (tE > tL) {
						leftEdges[i] = 1;
						rightEdges[i] = 0;
						continue;
					}

					shift_left = (mlib_s32)tE;

					if ((mlib_d64)shift_left != tE)
						shift_left++;
					shift_right = (mlib_s32)tL;

					if ((mlib_d64)shift_right != tL)
						shift_right++;

					x0 = tmp_x0 + shift_left * tmp_dx;
					x1 = tmp_x0 + shift_right * tmp_dx;

					if (tmp_dx >= 0) {
						if (x0 < minX)
							shift_left++;

						if (x1 >= maxX)
							shift_right--;
					} else {

						if (x0 >= maxX)
							shift_left++;

						if (x1 < minX)
							shift_right--;
					}

					y0 = tmp_y0 + shift_left * tmp_dy;
					y1 = tmp_y0 + shift_right * tmp_dy;

					if (tmp_dy >= 0) {
						if (y0 < minY)
							shift_left++;

						if (y1 >= maxY)
							shift_right--;
					} else {

						if (y0 >= maxY)
							shift_left++;

						if (y1 < minY)
							shift_right--;
					}

					x0 = tmp_x0 + shift_left * tmp_dx;
					y0 = tmp_y0 + shift_left * tmp_dy;

					leftEdges[i] = xBeg + shift_left;
					rightEdges[i] = xBeg + shift_right;

					if ((shift_right - shift_left + 1) >
					    max_xsize)
						max_xsize =
						    (shift_right - shift_left +
						    1);

					xStarts[i] =
					    (mlib_s32)((x0 -
					    offset) * MLIB_PREC);
					yStarts[i] =
					    (mlib_s32)((y0 -
					    offset) * MLIB_PREC);
					warp_tbl[2 * i] =
					    (mlib_s32)(tmp_dx * MLIB_PREC);
					warp_tbl[2 * i + 1] =
					    (mlib_s32)(tmp_dy * MLIB_PREC);
				}
			} else {

				if ((xEnd - xBeg + 1) > max_xsize)
					max_xsize = (xEnd - xBeg + 1);

#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
				for (i = yBeg; i <= yEnd; i++) {
					tmp_x0 = cx0 + (i - yBeg) * dx0;
					tmp_y0 = cy0 + (i - yBeg) * dy0;
					tmp_dx = dx + (i - yBeg) * delta_x;
					tmp_dy = dy + (i - yBeg) * delta_y;

					leftEdges[i] = xBeg;
					rightEdges[i] = xEnd;
					xStarts[i] =
					    (mlib_s32)((tmp_x0 -
					    offset) * MLIB_PREC);
					yStarts[i] =
					    (mlib_s32)((tmp_y0 -
					    offset) * MLIB_PREC);
					warp_tbl[2 * i] =
					    (mlib_s32)(tmp_dx * MLIB_PREC);
					warp_tbl[2 * i + 1] =
					    (mlib_s32)(tmp_dy * MLIB_PREC);
				}
			}

			if (max_xsize > 0) {
				mlib_s32 yStart = yBeg;
				mlib_s32 yFinish = yEnd;

				dstData = dstData_beg + (yBeg - 1) * dstYStride;

				STORE_PARAM(param, dstData);
				STORE_PARAM(param, yStart);
				STORE_PARAM(param, yFinish);
				STORE_PARAM(param, max_xsize);

				switch (filter) {
				case MLIB_NEAREST:
					res =
					    mlib_AffineFunArr_nn[4 * t_ind +
					    (nchan - 1)] (param);
					break;
				case MLIB_BILINEAR:
					res =
					    mlib_AffineFunArr_bl[4 * t_ind +
					    (nchan - 1)] (param);
					break;
				case MLIB_BICUBIC:
				case MLIB_BICUBIC2:
					res =
					    mlib_AffineFunArr_bc[4 * t_ind +
					    (nchan - 1)] (param);
					break;
				}

				if (res != MLIB_SUCCESS) {
					if (memBuffer != NULL)
						__mlib_free(memBuffer);
					return (res);
				}
			}
		}
	}

	if (memBuffer != NULL) {
		__mlib_free(memBuffer);
	}

	return (MLIB_SUCCESS);
}
mlib_status mlib_ImageAffine_alltypes(mlib_image       *dst,
                                      const mlib_image *src,
                                      const mlib_d64   *mtx,
                                      mlib_filter      filter,
                                      mlib_edge        edge,
                                      const void       *colormap)
{
  mlib_affine_param param[1];
  mlib_status res;
  mlib_type type;
  mlib_s32 nchan, t_ind, kw, kw1;
  mlib_addr align;
  mlib_d64 buff_lcl[BUFF_SIZE / 8];
  mlib_u8 **lineAddr = NULL;

  /* check for obvious errors */
  MLIB_IMAGE_TYPE_EQUAL(src, dst);
  MLIB_IMAGE_CHAN_EQUAL(src, dst);

  type = mlib_ImageGetType(dst);
  nchan = mlib_ImageGetChannels(dst);

  switch (filter) {
    case MLIB_NEAREST:
      kw = 1;
      kw1 = 0;
      break;

    case MLIB_BILINEAR:
      kw = 2;
      kw1 = 0;
      break;

    case MLIB_BICUBIC:
    case MLIB_BICUBIC2:
      kw = 4;
      kw1 = 1;
      break;

    default:
      return MLIB_FAILURE;
  }

  STORE_PARAM(param, lineAddr);
  STORE_PARAM(param, filter);

  res = mlib_AffineEdges(param, dst, src, buff_lcl, BUFF_SIZE,
                         kw, kw, kw1, kw1, edge, mtx, MLIB_SHIFT, MLIB_SHIFT);

  if (res != MLIB_SUCCESS)
    return res;

  lineAddr = param->lineAddr;

  if (type == MLIB_BYTE)
    t_ind = 0;
  else if (type == MLIB_SHORT)
    t_ind = 1;
  else if (type == MLIB_INT)
    t_ind = 2;
  else if (type == MLIB_USHORT)
    t_ind = 3;
  else if (type == MLIB_FLOAT)
    t_ind = 4;
  else if (type == MLIB_DOUBLE)
    t_ind = 5;

  if (colormap != NULL && filter != MLIB_NEAREST) {
    if (t_ind != 0 && t_ind != 1)
      return MLIB_FAILURE;

    if (mlib_ImageGetLutType(colormap) == MLIB_SHORT)
      t_ind += 2;
    t_ind = 2 * t_ind;

    if (mlib_ImageGetLutChannels(colormap) == 4)
      t_ind++;
  }

  if (type == MLIB_BIT) {
    mlib_s32 s_bitoff = mlib_ImageGetBitOffset(src);
    mlib_s32 d_bitoff = mlib_ImageGetBitOffset(dst);

    if (nchan != 1 || filter != MLIB_NEAREST)
      return MLIB_FAILURE;
    mlib_ImageAffine_bit_1ch_nn(param, s_bitoff, d_bitoff);
  }
  else {
    switch (filter) {
      case MLIB_NEAREST:

        if (t_ind >= 3)
          t_ind -= 2;                                      /* correct types USHORT, FLOAT, DOUBLE; new values: 1, 2, 3 */

        /* two channels as one channel of next type */
        align = (mlib_addr) (param->dstData) | (mlib_addr) lineAddr[0];
        align |= param->dstYStride | param->srcYStride;
        while (((nchan | (align >> t_ind)) & 1) == 0 && t_ind < MAX_T_IND) {
          nchan >>= 1;
          t_ind++;
        }

        res = mlib_AffineFunArr_nn[4 * t_ind + (nchan - 1)] (param);
        break;

      case MLIB_BILINEAR:

        if (colormap != NULL) {
          res = mlib_AffineFunArr_bl_i[t_ind] (param, colormap);
        }
        else {
          res = mlib_AffineFunArr_bl[4 * t_ind + (nchan - 1)] (param);
        }

        break;

      case MLIB_BICUBIC:
      case MLIB_BICUBIC2:

        if (colormap != NULL) {
          res = mlib_AffineFunArr_bc_i[t_ind] (param, colormap);
        }
        else {
          res = mlib_AffineFunArr_bc[4 * t_ind + (nchan - 1)] (param);
        }

        break;
    }

    if (res != MLIB_SUCCESS) {
      if (param->buff_malloc != NULL)
        mlib_free(param->buff_malloc);
      return res;
    }
  }

  if (edge == MLIB_EDGE_SRC_PADDED)
    edge = MLIB_EDGE_DST_NO_WRITE;

  if (filter != MLIB_NEAREST && edge != MLIB_EDGE_DST_NO_WRITE) {
    mlib_affine_param param_e[1];
    mlib_d64 buff_lcl1[BUFF_SIZE / 8];

    STORE_PARAM(param_e, lineAddr);
    STORE_PARAM(param_e, filter);

    res = mlib_AffineEdges(param_e, dst, src, buff_lcl1, BUFF_SIZE,
                           kw, kw, kw1, kw1, -1, mtx, MLIB_SHIFT, MLIB_SHIFT);

    if (res != MLIB_SUCCESS) {
      if (param->buff_malloc != NULL)
        mlib_free(param->buff_malloc);
      return res;
    }

    switch (edge) {
      case MLIB_EDGE_DST_FILL_ZERO:
        mlib_ImageAffineEdgeZero(param, param_e, colormap);
        break;

      case MLIB_EDGE_OP_NEAREST:
        mlib_ImageAffineEdgeNearest(param, param_e);
        break;

      case MLIB_EDGE_SRC_EXTEND:

        if (filter == MLIB_BILINEAR) {
          res = mlib_ImageAffineEdgeExtend_BL(param, param_e, colormap);
        }
        else {
          res = mlib_ImageAffineEdgeExtend_BC(param, param_e, colormap);
        }

        break;
    }

    if (param_e->buff_malloc != NULL)
      mlib_free(param_e->buff_malloc);
  }

  if (param->buff_malloc != NULL)
    mlib_free(param->buff_malloc);

  return res;
}
mlib_status
__mlib_ImageZoomTranslateTableBlend(
    mlib_image *dst,
    const mlib_image *src,
    mlib_d64 zoomx,
    mlib_d64 zoomy,
    mlib_d64 tx,
    mlib_d64 ty,
    const void *table,
    mlib_edge edge,
    mlib_blend blend,
    mlib_s32 cmask)
{
	mlib_affine_param param[1];
	mlib_affine_param *cur_param;
	mlib_zoom_workspace ws[1];
	mlib_d64 buff_lcl[BUFF_SIZE / 8];
	mlib_type type;
	mlib_u8 *srcData, *dstData;
	mlib_s32 srcWidth, dstWidth, srcHeight, dstHeight;
	mlib_s32 srcStride, dstStride, schan, dchan;
	mlib_s32 *leftEdges, *rightEdges, *xStarts, *yStarts;
	mlib_s32 *p_x_ind = NULL, *x_ind, *x_tab = NULL, xpos;
	mlib_u8 **lineAddr = NULL;
	mlib_s32 kw, kh, kw1, kh1;
	mlib_status res = MLIB_SUCCESS;
	fun_type_nw fun_nw = NULL;
	mlib_interp_table *tbl = (mlib_interp_table *) table;
	mlib_d64 mtx[6], dxs, tmp_dxs, div;
	mlib_s32 i, x_shift, y_shift;
	mlib_s32 affine = 0, yStart;
	mlib_s32 xLeft_e, xRight_e, xLeft, xRight, dx;

	mtx[0] = zoomx;
	mtx[1] = 0;
	mtx[2] = tx;
	mtx[3] = 0;
	mtx[4] = zoomy;
	mtx[5] = ty;

	ws->zoomx = zoomx;
	ws->zoomy = zoomy;

/* check for obvious errors */
	MLIB_IMAGE_CHECK(src);
	MLIB_IMAGE_CHECK(dst);
	MLIB_IMAGE_TYPE_EQUAL(src, dst);
	MLIB_IMAGE_HAVE_TYPE(src, MLIB_BYTE);

	if (zoomx <= 0 || zoomy <= 0)
		return (MLIB_OUTOFRANGE);

	if (mlib_ImageGetWidth(src) >= (1 << 15) ||
	    mlib_ImageGetHeight(src) >= (1 << 15)) {
		return (MLIB_FAILURE);
	}

	MLIB_IMAGE_GET_ALL_PARAMS(src, type, schan, srcWidth, srcHeight,
	    srcStride, srcData);
	MLIB_IMAGE_GET_ALL_PARAMS(dst, type, dchan, dstWidth, dstHeight,
	    dstStride, dstData);

	if ((schan == 4 || dchan == 4) && cmask != 1 && cmask != 8)
	    return (MLIB_OUTOFRANGE);

	if (schan < 3 || schan > 4 || dchan < 3 || dchan > 4) {
		return (MLIB_FAILURE);
	}

	if ((blend == MLIB_BLEND_GTK_SRC) && (schan == 3) && (dchan == 3))
		return __mlib_ImageZoomTranslateTable(dst, src, zoomx, zoomy,
		    tx, ty, table, edge);

	kw = tbl->width;
	kh = tbl->height;
	kw1 = tbl->leftPadding;
	kh1 = tbl->topPadding;

	x_shift = INT_BITS - mlib_ilogb(srcWidth + kw);
	y_shift = INT_BITS - mlib_ilogb(srcHeight + kh);

	ws->type = type;
	ws->srcData = srcData;
	ws->dstData = dstData;
	ws->srcWidth = srcWidth;
	ws->srcHeight = srcHeight;
	ws->srcStride = srcStride;
	ws->dstStride = dstStride;
	ws->nchan = schan;
	ws->dchan = dchan;
	ws->blend = blend;
	ws->alpha_shift = 1;
	ws->edge = edge;
	ws->x_shift = x_shift;
	ws->y_shift = y_shift;
	ws->x_move = (kw1 << x_shift);
	ws->y_move = (kh1 << y_shift);

	if (cmask == 1)
		ws->alpha_shift = -3;

/* VIS version of non NULL */
	fun_nw = mlib_ImageZoomTranslate_GetFunc(ws, table);

	if (fun_nw == NULL) {
		fun_nw = mlib_ImageZoomTranslateTableBlend_8nw;
	}

/* NULL */
	STORE_PARAM(param, affine);
	STORE_PARAM(param, lineAddr);
	param->buff_malloc = NULL;

/* process internal pixels */

	res = mlib_AffineEdges(param, dst, src, buff_lcl, BUFF_SIZE,
	    kw, kh, kw1, kh1, edge, mtx, x_shift, y_shift);

	if (res != MLIB_SUCCESS)
		return (res);

	ws->yStart = param->yStart;
	ws->yFinish = param->yFinish;
	ws->max_xsize = param->max_xsize;
	ws->dx = param->dX;
	ws->dy = param->dY;

	LOAD_PARAM(param, lineAddr);
	LOAD_PARAM(param, leftEdges);
	LOAD_PARAM(param, rightEdges);
	LOAD_PARAM(param, xStarts);
	LOAD_PARAM(param, yStarts);

	if (edge == MLIB_EDGE_SRC_EXTEND)
		ws->y_move += (1 << (y_shift - 1));

	if ((ws->max_xsize) > 0) {
/* RTC */
		yStarts[(ws->yFinish) + 1] = 0;

		res = fun_nw(param->dstData, lineAddr,
		    leftEdges, rightEdges, xStarts, yStarts, ws, tbl);

		if (res != MLIB_SUCCESS) {
			if (param->buff_malloc != NULL)
				__mlib_free(param->buff_malloc);
			return (res);
		}
	}

/* process edge pixels */

	if (edge != MLIB_EDGE_DST_NO_WRITE && edge != MLIB_EDGE_SRC_PADDED) {
		mlib_affine_param param_e[1];

		param_e->buff_malloc = NULL;

		if (edge == MLIB_EDGE_DST_FILL_ZERO ||
		    edge == MLIB_EDGE_OP_NEAREST) {
			x_shift = 16;
			y_shift = 16;
		}

		STORE_PARAM(param_e, lineAddr);

		if (edge != MLIB_EDGE_SRC_EXTEND_INDEF) {
			res = mlib_AffineEdges(param_e, dst, src, NULL, 0,
			    kw, kh, kw1, kh1, -1, mtx, x_shift, y_shift);
		}

		if (res == MLIB_SUCCESS)
			switch (edge) {
			case MLIB_EDGE_DST_FILL_ZERO:
				mlib_ImageZoomTranslateTableBlendEdgeZero
				    (param, param_e, dchan, schan,
				    ws->alpha_shift, blend);
				break;

			case MLIB_EDGE_OP_NEAREST:
				mlib_ImageZoomTranslateTableBlendEdgeNearest
				    (param, param_e, dchan, schan,
				    ws->alpha_shift, blend);
				break;

			case MLIB_EDGE_SRC_EXTEND:
			case MLIB_EDGE_SRC_EXTEND_INDEF:

				ws->x_shift = x_shift;
				ws->x_move += (1 << (x_shift - 1));

				if (edge == MLIB_EDGE_SRC_EXTEND) {
					ws->yStart = param_e->yStart;
					ws->yFinish = param_e->yFinish;
					yStart = ws->yStart;
					ws->dx = param_e->dX;
					xLeft_e = param_e->leftEdges[yStart];
					xRight_e = param_e->rightEdges[yStart];
					cur_param = param_e;

				} else {
					cur_param = param;
					ws->yStart = param->yStart;
					yStart = ws->yStart;
					xLeft_e = 0;
					xRight_e = dstWidth - 1;
				}

				xLeft = param->leftEdges[param->yStart];
				xRight = param->rightEdges[param->yStart];

				if ((xLeft > xRight) ||
				    (param->yStart > param->yFinish)) {
					xLeft = xRight_e + 1;
					xRight = xRight_e;
				}

				if (((xRight_e - xLeft_e + 1) > 0) &&
				    (ws->yStart <= ws->yFinish)) {
					CREATE_X_IND();

					LOAD_PARAM(cur_param, lineAddr);
					LOAD_PARAM(cur_param, leftEdges);
					LOAD_PARAM(cur_param, xStarts);
					LOAD_PARAM(cur_param, yStarts);

/* RTC */
					yStarts[(ws->yFinish) + 1] = 0;

					if (edge == MLIB_EDGE_SRC_EXTEND) {
						CREATE_X_EXT()
					}

					ws->max_xsize = xLeft - xLeft_e;

					if ((ws->max_xsize) > 0) {
						if (edge !=
						    MLIB_EDGE_SRC_EXTEND) {
							CREATE_X(xLeft_e,
							    xLeft - 1);
						}

						leftEdges[0] = ws->max_xsize;
						leftEdges[1] = xLeft_e;

						res = FUNCNAME_EXT
						    (cur_param->dstData,
						    lineAddr, x_ind, leftEdges,
						    x_tab, xStarts, yStarts, ws,
						    tbl);
					}

					ws->max_xsize = xRight_e - xRight;

					if ((ws->max_xsize) > 0) {
						mlib_s32 shift = 0;

						if (edge !=
						    MLIB_EDGE_SRC_EXTEND) {
							CREATE_X(xRight + 1,
							    xRight_e);
						} else {
							shift =
							    xRight + 1 -
							    xLeft_e;
						}

						leftEdges[0] = ws->max_xsize;
						leftEdges[1] = xRight + 1;

						res = FUNCNAME_EXT
						    (cur_param->dstData,
						    lineAddr, x_ind, leftEdges,
						    x_tab + shift, xStarts,
						    yStarts, ws, tbl);
					}

					__mlib_free(p_x_ind);

					if (x_tab != NULL)
						__mlib_free(x_tab);
				}
				break;
			default:
				res = MLIB_FAILURE;
				break;
			}