Exemplo n.º 1
0
void WarpGlobal(CImageOf<T> src, CImageOf<T>& dst,
                CTransform3x3 M,
                EWarpInterpolationMode interp, float cubicA)

{
    // Not implemented yet, since haven't decided on semantics of M yet...

    // Check that dst is of a valid shape
    if (dst.Shape().width == 0)
        dst.ReAllocate(src.Shape());
    CShape sh = dst.Shape();

    // Allocate a row buffer for coordinates
    int n = sh.width;
    std::vector<float> rowBuf;
    rowBuf.resize(n*2);

    // Precompute the cubic interpolant
    if (interp == eWarpInterpCubic)
        InitializeCubicLUT(cubicA);

    // Process each row
    for (int y = 0; y < sh.height; y++)
    {
        float *xyP  = &rowBuf[0];
        T *dstP     = &dst.Pixel(0, y, 0);

        // Compute pixel coordinates
        float X0 = (float) (M[0][1]*y + M[0][2]);
        float dX = (float) (M[0][0]);
        float Y0 = (float) (M[1][1]*y + M[1][2]);
        float dY = (float) (M[1][0]);
        float Z0 = (float) (M[2][1]*y + M[2][2]);
        float dZ = (float) (M[2][0]);
        bool affine = (dZ == 0.0);
        float Zi = 1.0f / Z0;           // TODO:  doesn't guard against divide by 0
        if (affine)
        {
            X0 *= Zi, dX *= Zi, Y0 *= Zi, dY *= Zi;
        }
        for (int x = 0; x < n; x++)
        {
            xyP[2*x+0] = X0 * Zi;
            xyP[2*x+1] = Y0 * Zi;
            X0 += dX;
            Y0 += dY;
            if (! affine)
            {
                Z0 += dZ;
                Zi = 1.0f / Z0;
            }
        }

        // Resample the line
        WarpLine(src, dstP, xyP, n, sh.nBands, interp, src.MinVal(), src.MaxVal());
    }
}
Exemplo n.º 2
0
void ConvolveSeparable(CImageOf<T> src, CImageOf<T>& dst,
                       CFloatImage x_kernel, CFloatImage y_kernel,
                       int subsample)
{
    // Allocate the result, if necessary
    CShape dShape = src.Shape();
    if (subsample > 1) {
        dShape.width  = (dShape.width  + subsample - 1) / subsample;
        dShape.height = (dShape.height + subsample - 1) / subsample;
    }
    dst.ReAllocate(dShape, false);

    // Allocate the intermediate (or final) result
    CImageOf<T> tmpImg;
    if (subsample > 1)
        tmpImg.ReAllocate(src.Shape());
    CImageOf<T>& tmp = (subsample > 1) ? tmpImg : dst;

    // Create the IPL images
    IplImage* srcImage = IPLCreateImage(src);
    IplImage* dstImage = IPLCreateImage(tmp);
    srcImage->alphaChannel = 0;     // convolve the A channel also
    dstImage->alphaChannel = 0;     // convolve the A channel also

    // Call the convolution code
    if (typeid(T) == typeid(float)) {
        IplConvKernelFP* xKernel = IPLConvKernelFP(x_kernel, false);
        IplConvKernelFP* yKernel = IPLConvKernelFP(y_kernel, true);
        iplConvolveSep2DFP(srcImage, dstImage, xKernel, yKernel);
        iplDeleteConvKernelFP(xKernel);
        iplDeleteConvKernelFP(yKernel);
    } else {
        IplConvKernel* xKernel = IPLConvKernel(x_kernel, false);
        IplConvKernel* yKernel = IPLConvKernel(y_kernel, true);
        iplConvolveSep2D(srcImage, dstImage, xKernel, yKernel);
        iplDeleteConvKernel(xKernel);
        iplDeleteConvKernel(yKernel);
    }
    iplDeallocate(srcImage, IPL_IMAGE_HEADER);
    iplDeallocate(dstImage, IPL_IMAGE_HEADER);

    // Downsample if necessary
    if (subsample > 1) {
        for (int y = 0; y < dShape.height; y++) {
            T* sPtr = &tmp.Pixel(0, y * subsample, 0);
            T* dPtr = &dst.Pixel(0, y, 0);
            int nB  = dShape.nBands;
            for (int x = 0; x < dShape.width; x++) {
                for (int b = 0; b < nB; b++)
                    dPtr[b] = sPtr[b];
                sPtr += subsample * nB;
                dPtr += nB;
            }
        }
    }
}
Exemplo n.º 3
0
void Convolve(CImageOf<T> src, CImageOf<T>& dst,
              CFloatImage kernel)
{
    // Determine the shape of the kernel and source image
    CShape kShape = kernel.Shape();
    CShape sShape = src.Shape();

    // Allocate the result, if necessary
    dst.ReAllocate(sShape, false);
    if (sShape.width * sShape.height * sShape.nBands == 0)
        return;

    // Do the convolution
    for (int y = 0; y < sShape.height; y++)
		for (int x = 0; x < sShape.width; x++)
			for (int c = 0; c < sShape.nBands; c++)
			{
				double sum = 0;
				for (int kx = 0; kx < kShape.width; kx++)
					for (int ky = 0; ky < kShape.height; ky++)
						if ((x-kernel.origin[0]+kx >= 0) && (x-kernel.origin[0]+kx < sShape.width) && (y-kernel.origin[1]+ky >= 0) && (y-kernel.origin[1]+ky < sShape.height))
							sum += kernel.Pixel(kx,ky,0) * src.Pixel(x-kernel.origin[0]+kx,y-kernel.origin[1]+ky,c);
				dst.Pixel(x,y,c) = (T) __max(dst.MinVal(), __min(dst.MaxVal(), sum));
			}
}
Exemplo n.º 4
0
void ConvolveRow(CImageOf<T> buffer, CFloatImage kernel, T* dst,
                 int n, T minVal, T maxVal)
{
    CShape kShape = kernel.Shape();
    int kX  = kShape.width;
    int kY  = kShape.height;
    CShape bShape = buffer.Shape();
    int nB  = bShape.nBands;

    for (int i = 0; i < n; i++)
    {
        for (int b = 0; b < nB; b++)
        {
            float sum = 0.0f;
            for (int k = 0; k < kY; k++)
            {
                float* kPtr = &kernel.Pixel(0, k, 0);
                T*     bPtr = &buffer.Pixel(i, k, b);
                for (int l = 0; l < kX; l++, bPtr += nB)
                    sum += kPtr[l] * bPtr[0];
            }
            *dst++ = (T) __max(minVal, __min(maxVal, sum));
        }
    }
}
Exemplo n.º 5
0
static void FillRowBuffer(float buf[], CImageOf<T>& src, CFloatImage& kernel,
                          int k, int n)
{
    // Compute the real row address
    CShape sShape = src.Shape();
    int nB = sShape.nBands;
    int k0 = TrimIndex(k + kernel.origin[1], src.borderMode, sShape.height);
    if (k0 < 0)
    {
        memset(buf, 0, n * sizeof(float));
        return;
    }

    // Fill the row
    T* srcP = &src.Pixel(0, k0, 0);
    int m = n / nB;
    for (int l = 0; l < m; l++, buf += nB)
    {
        int l0 = TrimIndex(l + kernel.origin[0], src.borderMode, sShape.width);
        if (l0 < 0)
            memset(buf, 0, nB * sizeof(T));
        else
            for (int b = 0; b < nB; b++)
                buf[b] = srcP[l0*nB + b];
    }
}
Exemplo n.º 6
0
void Convolve(CImageOf<T> src, CImageOf<T>& dst,
              CFloatImage kernel)
{
    // Allocate the result, if necessary
    dst.ReAllocate(src.Shape(), false);

    // Create the IPL images
    IplImage* srcImage = IPLCreateImage(src);
    IplImage* dstImage = IPLCreateImage(dst);

    // Call the convolution code
    if (typeid(T) == typeid(float))
    {
        IplConvKernelFP* iplKernel = IPLConvKernelFP(kernel);
        iplConvolve2DFP(srcImage, dstImage, &iplKernel, 1, IPL_SUM);
        iplDeleteConvKernelFP(iplKernel);
    }
    else
    {
        IplConvKernel* iplKernel = IPLConvKernel(kernel);
        iplConvolve2D(srcImage, dstImage, &iplKernel, 1, IPL_SUM);
        iplDeleteConvKernel(iplKernel);
    }
    iplDeallocate(srcImage, IPL_IMAGE_HEADER);
    iplDeallocate(dstImage, IPL_IMAGE_HEADER);
}
Exemplo n.º 7
0
void ConvolveSeparable(CImageOf<T> src, CImageOf<T>& dst,
                       CFloatImage x_kernel, CFloatImage y_kernel,
                       float scale, float offset,
                       int decimate, int interpolate)
{
    // Allocate the result, if necessary
    CShape dShape = src.Shape();
    if (decimate > 1)
    {
        dShape.width  = (dShape.width  + decimate-1) / decimate;
        dShape.height = (dShape.height + decimate-1) / decimate;
    }
    dst.ReAllocate(dShape, false);

    // Allocate the intermediate images
    CImageOf<T> tmpImg1(src.Shape());
    CImageOf<T> tmpImg2(src.Shape());

    // Create a proper vertical convolution kernel
    CFloatImage v_kernel(1, y_kernel.Shape().width, 1);
    for (int k = 0; k < y_kernel.Shape().width; k++)
        v_kernel.Pixel(0, k, 0) = y_kernel.Pixel(k, 0, 0);
    v_kernel.origin[1] = y_kernel.origin[0];

    // Perform the two convolutions
    Convolve(src, tmpImg1, x_kernel, 1.0f, 0.0f);
    Convolve(tmpImg1, tmpImg2, v_kernel, scale, offset);

    // Downsample or copy
    for (int y = 0; y < dShape.height; y++)
    {
        T* sPtr = &tmpImg2.Pixel(0, y * decimate, 0);
        T* dPtr = &dst.Pixel(0, y, 0);
        int nB  = dShape.nBands;
        for (int x = 0; x < dShape.width; x++)
        {
            for (int b = 0; b < nB; b++)
                dPtr[b] = sPtr[b];
            sPtr += decimate * nB;
            dPtr += nB;
        }
    }

    interpolate++; // to get rid of "unused parameter" warning
}
Exemplo n.º 8
0
void WarpLine(CImageOf<T> src, T* dstP, float *xyP, int n, int nBands,
              EWarpInterpolationMode interp, T minVal, T maxVal)
{
    // Determine the interpolator's "footprint"
    const int o0 = int(interp)/2;       // negative extent
    const int o1 = int(interp) - o0;    // positive extent
    const int oH = nBands;              // horizonal offset between pixels
    const int oV = &src.Pixel(0, 1, 0) -
                   &src.Pixel(0, 0, 0); // vertical  offset between pixels
    CShape sh = src.Shape();

    // Resample a single output scanline
    for (int i = 0; i < n; i++, dstP += nBands, xyP += 2)
    {
        if (nBands == 4 && dstP[3] == 0)
            continue; // don't fill in if alpha = 0

        // Round down pixel coordinates
        int x = int(floor(xyP[0]));
        int y = int(floor(xyP[1]));

        // Check if all participating pixels are in bounds
        if (! (sh.InBounds(x-o0, y-o0) && sh.InBounds(x+o1, y+o1)))
        {
            for (int j = 0; j < nBands; j++)
                dstP[j] = 0;
            continue;
        }
        T* srcP = &src.Pixel(x, y, 0);

        // Nearest-neighbor: just copy pixels
        if (interp == eWarpInterpNearest)
        {
            for (int j = 0; j < nBands; j++)
                dstP[j] = srcP[j];
            continue;
        }

        float xf = xyP[0] - x;
        float yf = xyP[1] - y;

        // Bilinear and bi-cubic
        if (interp == eWarpInterpLinear)
        {
            for (int j = 0; j < nBands; j++)
                dstP[j] = __max(minVal, __min(maxVal,
                    ResampleBiLinear(&srcP[j], oH, oV, xf, yf)));
        }
        if (interp == eWarpInterpCubic)
        {
            for (int j = 0; j < nBands; j++)
                dstP[j] = __max(minVal, __min(maxVal,
                    ResampleBiCubic(&srcP[j], oH, oV, xf, yf)));
        }
    }
}
Exemplo n.º 9
0
void ConvolveSeparable(CImageOf<T> src, CImageOf<T>& dst,
                       CFloatImage x_kernel, CFloatImage y_kernel,
                       int subsample)
{
    // Allocate the result, if necessary
    CShape dShape = src.Shape();
    if (subsample > 1)
    {
        dShape.width  = (dShape.width  + subsample-1) / subsample;
        dShape.height = (dShape.height + subsample-1) / subsample;
    }
    dst.ReAllocate(dShape, false);

    // Allocate the intermediate images
    CImageOf<T> tmpImg1(src.Shape());
    CImageOf<T> tmpImg2(src.Shape());

    // Create a proper vertical convolution kernel
    CFloatImage v_kernel(1, y_kernel.Shape().width, 1);
    for (int k = 0; k < y_kernel.Shape().width; k++)
        v_kernel.Pixel(0, k, 0) = y_kernel.Pixel(k, 0, 0);
    v_kernel.origin[1] = y_kernel.origin[0];

    // Perform the two convolutions
    Convolve(src, tmpImg1, x_kernel);
    Convolve(tmpImg1, tmpImg2, v_kernel);
				
    // Downsample or copy
    for (int y = 0; y < dShape.height; y++)
    {
        T* sPtr = &tmpImg2.Pixel(0, y * subsample, 0);
        T* dPtr = &dst.Pixel(0, y, 0);
        int nB  = dShape.nBands;
        for (int x = 0; x < dShape.width; x++)
        {
            for (int b = 0; b < nB; b++)
                dPtr[b] = sPtr[b];
            sPtr += subsample * nB;
            dPtr += nB;
        }
    }
}
Exemplo n.º 10
0
void Convolve(CImageOf<T> src, CImageOf<T>& dst,
              CFloatImage kernel,
              float scale, float offset)
{
    // Determine the shape of the kernel and row buffer
    CShape kShape = kernel.Shape();
    CShape sShape = src.Shape();
    CShape bShape(sShape.width + kShape.width, kShape.height, sShape.nBands);
    int bWidth = bShape.width * bShape.nBands;

    // Allocate the result, if necessary, and the row buffer
    dst.ReAllocate(sShape, false);
    CFloatImage buffer(bShape);
    if (sShape.width * sShape.height * sShape.nBands == 0)
        return;
    CFloatImage output(CShape(sShape.width, 1, sShape.nBands));

    // Fill up the row buffer initially
    for (int k = 0; k < kShape.height; k++)
        FillRowBuffer(&buffer.Pixel(0, k, 0), src, kernel, k, bWidth);

    // Determine if clipping is required
    //  (we assume up-conversion to float never requires clipping, i.e.,
    //   floats have the highest dynamic range)
    T minVal = dst.MinVal();
    T maxVal = dst.MaxVal();
    if (minVal <= buffer.MinVal() && maxVal >= buffer.MaxVal())
        minVal = maxVal = 0;

    // Process each row
    for (int y = 0; y < sShape.height; y++)
    {
        // Do the convolution
        ConvolveRow2D(buffer, kernel, &output.Pixel(0, 0, 0),
                      sShape.width);

        // Scale, offset, and type convert
        ScaleAndOffsetLine(&output.Pixel(0, 0, 0), &dst.Pixel(0, y, 0),
                           sShape.width * sShape.nBands,
                           scale, offset, minVal, maxVal);

        // Shift up the row buffer and fill the last line
        if (y < sShape.height-1)
        {
            int k;
            for (k = 0; k < kShape.height-1; k++)
                memcpy(&buffer.Pixel(0, k, 0), &buffer.Pixel(0, k+1, 0),
                       bWidth * sizeof(float));
            FillRowBuffer(&buffer.Pixel(0, k, 0), src, kernel, y+k+1, bWidth);
        }
    }
}
Exemplo n.º 11
0
void WarpLocal(CImageOf<T> src, CImageOf<T>& dst,
               CFloatImage uv, bool relativeCoords,
               EWarpInterpolationMode interp, float cubicA)
{
    // Check that dst is of the right shape
    CShape sh(uv.Shape().width, uv.Shape().height, src.Shape().nBands);
    dst.ReAllocate(sh);

    // Allocate a row buffer for coordinates
    int n = sh.width;
    std::vector<float> rowBuf;
    rowBuf.resize(n*2);

    // Precompute the cubic interpolant
    if (interp == eWarpInterpCubic)
        InitializeCubicLUT(cubicA);

    // Process each row
    for (int y = 0; y < sh.height; y++)
    {
        float *uvP  = &uv .Pixel(0, y, 0);
        float *xyP  = (relativeCoords) ? &rowBuf[0] : uvP;
        T *dstP     = &dst.Pixel(0, y, 0);

        // Convert to absolute coordinates if necessary
        if (relativeCoords)
        {
            for (int x = 0; x < n; x++)
            {
                xyP[2*x+0] = x + uvP[2*x+0];
                xyP[2*x+1] = y + uvP[2*x+1];
            }
        }

        // Resample the line
        WarpLine(src, dstP, xyP, n, sh.nBands, interp, src.MinVal(), src.MaxVal());
    }
}
Exemplo n.º 12
0
void ConvolveSeparable(CImageOf<T> src, CImageOf<T>& dst,
                       CFloatImage x_kernel, CFloatImage y_kernel,
                       float scale, float offset,
                       int decimate, int interpolate)
{
    // Allocate the result, if necessary
    CShape dShape = src.Shape();
    if (decimate > 1)
    {
        dShape.width  = (dShape.width  + decimate-1) / decimate;
        dShape.height = (dShape.height + decimate-1) / decimate;
    }
    dst.ReAllocate(dShape, false);

    // Allocate the intermediate images
    CImageOf<T> tmpImg1(src.Shape());
    //CImageOf<T> tmpImgCUDA(src.Shape());
    CImageOf<T> tmpImg2(src.Shape());

    // Create a proper vertical convolution kernel
    CFloatImage v_kernel(1, y_kernel.Shape().width, 1);
    for (int k = 0; k < y_kernel.Shape().width; k++)
        v_kernel.Pixel(0, k, 0) = y_kernel.Pixel(k, 0, 0);
    v_kernel.origin[1] = y_kernel.origin[0];

#ifdef RUN_ON_GPU
    // Modifications for integrating CUDA kernels
    BinomialFilterType type;

    profilingTimer->startTimer();

    // CUDA Convolve
    switch (x_kernel.Shape().width)
    {
       case 3:
           type = BINOMIAL6126;
           break;
       case 5:
           type = BINOMIAL14641;
           break;
       default:
           // Unsupported kernel case
           throw CError("Convolution kernel Unknown");
           assert(false);
    }

    // Skip copy if decimation is not required
    if (decimate != 1) CudaConvolveXY(src, tmpImg2, type); 
    else CudaConvolveXY(src, dst, type);

    printf("\nGPU convolution time = %f ms\n", profilingTimer->stopAndGetTimerValue());
#else

    profilingTimer->startTimer();
    //VerifyComputedData(&tmpImg2.Pixel(0, 0, 0), &tmpImgCUDA.Pixel(0, 0, 0), 7003904);

    // Perform the two convolutions
    Convolve(src, tmpImg1, x_kernel, 1.0f, 0.0f);
    Convolve(tmpImg1, tmpImg2, v_kernel, scale, offset);

    printf("\nCPU Convolution time = %f ms\n", profilingTimer->stopAndGetTimerValue());
#endif

    profilingTimer->startTimer();
    // Downsample or copy
    // Skip decimate and recopy if not required
#ifdef RUN_ON_GPU
    if (decimate != 1)
    {
#endif
       for (int y = 0; y < dShape.height; y++)
       {
           T* sPtr = &tmpImg2.Pixel(0, y * decimate, 0);
           T* dPtr = &dst.Pixel(0, y, 0);
           int nB  = dShape.nBands;
           for (int x = 0; x < dShape.width; x++)
           {
               for (int b = 0; b < nB; b++)
                   dPtr[b] = sPtr[b];
               sPtr += decimate * nB;
               dPtr += nB;
           }
       }
#ifdef RUN_ON_GPU
    }
#endif
    printf("\nDecimate/Recopy took = %f ms\n", profilingTimer->stopAndGetTimerValue());
}
Exemplo n.º 13
0
extern void ForwardWarp(CImageOf<T>& src, CImageOf<T>& dst, CFloatImage& disp,
                        float d_scale, bool line_interpolate, float disp_gap)
{
    // Warps src into dst using disparities disp.
    //  Each disparity is scaled by d_scale
    // Note that "empty" pixels are left at their original value

    CShape sh = src.Shape();
    int w = sh.width, h = sh.height, n_bands = sh.nBands;
    float round_offset = (typeid(T) == typeid(float)) ? 0.0f : 0.5f;

    if (! sh.SameIgnoringNBands(disp.Shape()))
        throw CError("ForwardWarp: disparity image has wrong size");

    if (sh != dst.Shape())
        dst.ReAllocate(sh);
    
    // Optional clipping (if necessary)
    CFloatImage flt;
    T minVal = dst.MinVal();
    T maxVal = dst.MaxVal();
    if (minVal <= flt.MinVal() && maxVal >= flt.MaxVal())
        minVal = maxVal = 0;

    for (int y = 0; y < h; y++)
    {
        // determine correct warping direction
        int xstart = (d_scale>0 ? 0   : w-1);
        int xend   = (d_scale>0 ? w   : -1  );
        int xincr  = (d_scale>0 ? 1   : -1  );
        
        float *dp = &disp.Pixel(0, y, 0);
        T *ps = &src .Pixel(0, y, 0);
        T *pd = &dst .Pixel(0, y, 0);

        for (int x = xstart; x != xend; x += xincr)
        {
            // determine if a line should be drawn
            int x2 = x + xincr;
            float d_diff = fabs(dp[x] - dp[x2]);
            bool draw_line = line_interpolate && (x2 != xend) &&
                 (d_diff < disp_gap);

            // scaled disparity:
            float d = d_scale * dp[x];

            // line drawing
            if (draw_line)
            {
                float d2 = d_scale * dp[x2];

                if (xincr > 0)
                    draw_intensity_line(&ps[x * n_bands], &ps[x2 * n_bands], pd,
                                        x - d, x2 - d2, w, n_bands, round_offset,
                                        minVal, maxVal);
                else
                    draw_intensity_line(&ps[x2 * n_bands], &ps[x * n_bands], pd,
                                        x2 - d, x - d2, w, n_bands, round_offset,
                                        minVal, maxVal);
                continue;
            }
            
            // splatting
            int xx = x - ROUND(d);
            if (xx >= 0 && xx < w)
                memcpy(&pd[xx * n_bands], &ps[x * n_bands],
                       n_bands*sizeof(T));
        }
    }
}
Exemplo n.º 14
0
extern void InverseWarp(CImageOf<T>& src, CImageOf<T>& dst, CFloatImage& disp,
                        float d_scale, float disp_gap, int order)
{
    // Warps src into dst using disparities disp.
    //  Each disparity is scaled by d_scale
    // Note that "empty" pixels are left at their original value

    CShape sh = src.Shape();
    int w = sh.width, h = sh.height, n_bands = sh.nBands;
    int n = w * n_bands;

    if (! sh.SameIgnoringNBands(disp.Shape()))
        throw CError("InverseWarp: disparity image has wrong size");

    if (sh != dst.Shape())
        dst.ReAllocate(sh);

    // Optional forward warped depth map if checking for visibility
    CFloatImage fwd, fwd_tmp;
    if (disp_gap > 0.0f)
    {
        ScaleAndOffset(disp, fwd_tmp, d_scale, 0.0f);
        fwd.ReAllocate(disp.Shape());
        fwd.FillPixels(-9999.0f);
        ForwardWarp(fwd_tmp, fwd, disp, d_scale, true, disp_gap);
    }

    // Allocate line buffers
    std::vector<float> src_buf, dst_buf, dsp_buf;
    src_buf.resize(n);
    dst_buf.resize(n);
    dsp_buf.resize(n);
    CFloatImage fimg;   // dummy, used for MinVal(), MaxVal()

    for (int y = 0; y < h; y++)
    {
        // Set up (copy) the line buffers
        ScaleAndOffsetLine(&src .Pixel(0, y, 0), &src_buf[0], n,
                           1.0f,       0.0f, fimg.MinVal(), fimg.MaxVal());
        ScaleAndOffsetLine(&disp.Pixel(0, y, 0), &dsp_buf[0], w,
                           d_scale, 0.0f, 0.0f, 0.0f);
        ScaleAndOffsetLine(&dst .Pixel(0, y, 0), &dst_buf[0], n,
                           1.0f,       0.0f, fimg.MinVal(), fimg.MaxVal());

        // Forward warp the depth map
        float *fwd_buf = (disp_gap > 0.0f) ? &fwd.Pixel(0, y, 0) : 0;

        // Process (warp) the line
        InverseWarpLine(&src_buf[0], &dst_buf[0], &dsp_buf[0],
                        w, n_bands, order, fwd_buf, disp_gap);

        // Convert back to native type
        T minVal = dst.MinVal();
        T maxVal = dst.MaxVal();
        float offset = (typeid(T) == typeid(float)) ? 0.0f : 0.5;   // rounding
        if (minVal <= fimg.MinVal() && maxVal >= fimg.MaxVal())
            minVal = maxVal = 0;
        ScaleAndOffsetLine(&dst_buf[0], &dst.Pixel(0, y, 0), n,
                           1.0f, offset, minVal, maxVal);
    }
}