Beispiel #1
0
int NI_ZoomShift(PyArrayObject *input, PyArrayObject* zoom_ar,
                                 PyArrayObject* shift_ar, PyArrayObject *output,
                                 int order, int mode, double cval)
{
    char *po, *pi;
    npy_intp **zeros = NULL, **offsets = NULL, ***edge_offsets = NULL;
    npy_intp ftmp[NPY_MAXDIMS], *fcoordinates = NULL, *foffsets = NULL;
    npy_intp jj, hh, kk, filter_size, odimensions[NPY_MAXDIMS];
    npy_intp idimensions[NPY_MAXDIMS], istrides[NPY_MAXDIMS];
    npy_intp size;
    double ***splvals = NULL;
    NI_Iterator io;
    npy_double *zooms = zoom_ar ? (npy_double*)PyArray_DATA(zoom_ar) : NULL;
    npy_double *shifts = shift_ar ? (npy_double*)PyArray_DATA(shift_ar) : NULL;
    int rank = 0;
    NPY_BEGIN_THREADS_DEF;

    NPY_BEGIN_THREADS;

    for (kk = 0; kk < PyArray_NDIM(input); kk++) {
        idimensions[kk] = PyArray_DIM(input, kk);
        istrides[kk] = PyArray_STRIDE(input, kk);
        odimensions[kk] = PyArray_DIM(output, kk);
    }
    rank = PyArray_NDIM(input);

    /* if the mode is 'constant' we need some temps later: */
    if (mode == NI_EXTEND_CONSTANT) {
        zeros = malloc(rank * sizeof(npy_intp*));
        if (NPY_UNLIKELY(!zeros)) {
            NPY_END_THREADS;
            PyErr_NoMemory();
            goto exit;
        }
        for(jj = 0; jj < rank; jj++)
            zeros[jj] = NULL;
        for(jj = 0; jj < rank; jj++) {
            zeros[jj] = malloc(odimensions[jj] * sizeof(npy_intp));
            if (NPY_UNLIKELY(!zeros[jj])) {
                NPY_END_THREADS;
                PyErr_NoMemory();
                goto exit;
            }
        }
    }

    /* store offsets, along each axis: */
    offsets = malloc(rank * sizeof(npy_intp*));
    /* store spline coefficients, along each axis: */
    splvals = malloc(rank * sizeof(double**));
    /* store offsets at all edges: */
    edge_offsets = malloc(rank * sizeof(npy_intp**));
    if (NPY_UNLIKELY(!offsets || !splvals || !edge_offsets)) {
        NPY_END_THREADS;
        PyErr_NoMemory();
        goto exit;
    }
    for(jj = 0; jj < rank; jj++) {
        offsets[jj] = NULL;
        splvals[jj] = NULL;
        edge_offsets[jj] = NULL;
    }
    for(jj = 0; jj < rank; jj++) {
        offsets[jj] = malloc(odimensions[jj] * sizeof(npy_intp));
        splvals[jj] = malloc(odimensions[jj] * sizeof(double*));
        edge_offsets[jj] = malloc(odimensions[jj] * sizeof(npy_intp*));
        if (NPY_UNLIKELY(!offsets[jj] || !splvals[jj] || !edge_offsets[jj])) {
            NPY_END_THREADS;
            PyErr_NoMemory();
            goto exit;
        }
        for(hh = 0; hh < odimensions[jj]; hh++) {
            splvals[jj][hh] = NULL;
            edge_offsets[jj][hh] = NULL;
        }
    }

    /* precalculate offsets, and offsets at the edge: */
    for(jj = 0; jj < rank; jj++) {
        double shift = 0.0, zoom = 0.0;
        if (shifts)
            shift = shifts[jj];
        if (zooms)
            zoom = zooms[jj];
        for(kk = 0; kk < odimensions[jj]; kk++) {
            double cc = (double)kk;
            if (shifts)
                cc += shift;
            if (zooms)
                cc *= zoom;
            cc = map_coordinate(cc, idimensions[jj], mode);
            if (cc > -1.0) {
                npy_intp start;
                if (zeros && zeros[jj])
                    zeros[jj][kk] = 0;
                if (order & 1) {
                    start = (npy_intp)floor(cc) - order / 2;
                } else {
                    start = (npy_intp)floor(cc + 0.5) - order / 2;
                }
                offsets[jj][kk] = istrides[jj] * start;
                if (start < 0 || start + order >= idimensions[jj]) {
                    edge_offsets[jj][kk] = malloc((order + 1) * sizeof(npy_intp));
                    if (NPY_UNLIKELY(!edge_offsets[jj][kk])) {
                        NPY_END_THREADS;
                        PyErr_NoMemory();
                        goto exit;
                    }
                    for(hh = 0; hh <= order; hh++) {
                        npy_intp idx = start + hh;
                        npy_intp len = idimensions[jj];
                        if (len <= 1) {
                            idx = 0;
                        } else {
                            npy_intp s2 = 2 * len - 2;
                            if (idx < 0) {
                                idx = s2 * (npy_intp)(-idx / s2) + idx;
                                idx = idx <= 1 - len ? idx + s2 : -idx;
                            } else if (idx >= len) {
                                idx -= s2 * (npy_intp)(idx / s2);
                                if (idx >= len)
                                    idx = s2 - idx;
                            }
                        }
                        edge_offsets[jj][kk][hh] = istrides[jj] * (idx - start);
                    }
                }
                if (order > 0) {
                    splvals[jj][kk] = malloc((order + 1) * sizeof(double));
                    if (NPY_UNLIKELY(!splvals[jj][kk])) {
                        NPY_END_THREADS;
                        PyErr_NoMemory();
                        goto exit;
                    }
                    spline_coefficients(cc, order, splvals[jj][kk]);
                }
            } else {
                zeros[jj][kk] = 1;
            }
        }
    }

    filter_size = 1;
    for(jj = 0; jj < rank; jj++)
        filter_size *= order + 1;

    if (!NI_InitPointIterator(output, &io))
        goto exit;

    pi = (void *)PyArray_DATA(input);
    po = (void *)PyArray_DATA(output);

    /* store all coordinates and offsets with filter: */
    fcoordinates = malloc(rank * filter_size * sizeof(npy_intp));
    foffsets = malloc(filter_size * sizeof(npy_intp));
    if (NPY_UNLIKELY(!fcoordinates || !foffsets)) {
        NPY_END_THREADS;
        PyErr_NoMemory();
        goto exit;
    }

    for(jj = 0; jj < rank; jj++)
        ftmp[jj] = 0;
    kk = 0;
    for(hh = 0; hh < filter_size; hh++) {
        for(jj = 0; jj < rank; jj++)
            fcoordinates[jj + hh * rank] = ftmp[jj];
        foffsets[hh] = kk;
        for(jj = rank - 1; jj >= 0; jj--) {
            if (ftmp[jj] < order) {
                ftmp[jj]++;
                kk += istrides[jj];
                break;
            } else {
                ftmp[jj] = 0;
                kk -= istrides[jj] * order;
            }
        }
    }
    size = PyArray_SIZE(output);
    for(kk = 0; kk < size; kk++) {
        double t = 0.0;
        npy_intp edge = 0, oo = 0, zero = 0;

        for(hh = 0; hh < rank; hh++) {
            if (zeros && zeros[hh][io.coordinates[hh]]) {
                /* we use constant border condition */
                zero = 1;
                break;
            }
            oo += offsets[hh][io.coordinates[hh]];
            if (edge_offsets[hh][io.coordinates[hh]])
                edge = 1;
        }

        if (!zero) {
            npy_intp *ff = fcoordinates;
            const int type_num = PyArray_TYPE(input);
            t = 0.0;
            for(hh = 0; hh < filter_size; hh++) {
                npy_intp idx = 0;
                double coeff = 0.0;

                if (NPY_UNLIKELY(edge)) {
                    /* use precalculated edge offsets: */
                    for(jj = 0; jj < rank; jj++) {
                        if (edge_offsets[jj][io.coordinates[jj]])
                            idx += edge_offsets[jj][io.coordinates[jj]][ff[jj]];
                        else
                            idx += ff[jj] * istrides[jj];
                    }
                    idx += oo;
                } else {
                    /* use normal offsets: */
                    idx += oo + foffsets[hh];
                }
                switch (type_num) {
                    CASE_INTERP_COEFF(NPY_BOOL, npy_bool,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_UBYTE, npy_ubyte,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_USHORT, npy_ushort,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_UINT, npy_uint,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_ULONG, npy_ulong,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_ULONGLONG, npy_ulonglong,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_BYTE, npy_byte,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_SHORT, npy_short,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_INT, npy_int,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_LONG, npy_long,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_LONGLONG, npy_longlong,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_FLOAT, npy_float,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_DOUBLE, npy_double,
                                      coeff, pi, idx);
                default:
                    NPY_END_THREADS;
                    PyErr_SetString(PyExc_RuntimeError,
                                    "data type not supported");
                    goto exit;
                }
                /* calculate interpolated value: */
                for(jj = 0; jj < rank; jj++)
                    if (order > 0)
                        coeff *= splvals[jj][io.coordinates[jj]][ff[jj]];
                t += coeff;
                ff += rank;
            }
        } else {
            t = cval;
        }
        /* store output: */
        switch (PyArray_TYPE(output)) {
            CASE_INTERP_OUT(NPY_BOOL, npy_bool, po, t);
            CASE_INTERP_OUT_UINT(UBYTE, npy_ubyte, po, t);
            CASE_INTERP_OUT_UINT(USHORT, npy_ushort, po, t);
            CASE_INTERP_OUT_UINT(UINT, npy_uint, po, t);
            CASE_INTERP_OUT_UINT(ULONG, npy_ulong, po, t);
            CASE_INTERP_OUT_UINT(ULONGLONG, npy_ulonglong, po, t);
            CASE_INTERP_OUT_INT(BYTE, npy_byte, po, t);
            CASE_INTERP_OUT_INT(SHORT, npy_short, po, t);
            CASE_INTERP_OUT_INT(INT, npy_int, po, t);
            CASE_INTERP_OUT_INT(LONG, npy_long, po, t);
            CASE_INTERP_OUT_INT(LONGLONG, npy_longlong, po, t);
            CASE_INTERP_OUT(NPY_FLOAT, npy_float, po, t);
            CASE_INTERP_OUT(NPY_DOUBLE, npy_double, po, t);
        default:
            NPY_END_THREADS;
            PyErr_SetString(PyExc_RuntimeError, "data type not supported");
            goto exit;
        }
        NI_ITERATOR_NEXT(io, po);
    }

 exit:
    NPY_END_THREADS;
    if (zeros) {
        for(jj = 0; jj < rank; jj++)
            free(zeros[jj]);
        free(zeros);
    }
    if (offsets) {
        for(jj = 0; jj < rank; jj++)
            free(offsets[jj]);
        free(offsets);
    }
    if (splvals) {
        for(jj = 0; jj < rank; jj++) {
            if (splvals[jj]) {
                for(hh = 0; hh < odimensions[jj]; hh++)
                    free(splvals[jj][hh]);
                free(splvals[jj]);
            }
        }
        free(splvals);
    }
    if (edge_offsets) {
        for(jj = 0; jj < rank; jj++) {
            if (edge_offsets[jj]) {
                for(hh = 0; hh < odimensions[jj]; hh++)
                    free(edge_offsets[jj][hh]);
                free(edge_offsets[jj]);
            }
        }
        free(edge_offsets);
    }
    free(foffsets);
    free(fcoordinates);
    return PyErr_Occurred() ? 0 : 1;
}
int NI_ZoomShift(PyArrayObject *input, PyArrayObject* zoom_ar,
                                 PyArrayObject* shift_ar, PyArrayObject *output,
                                 int order, int mode, double cval)
{
    char *po, *pi;
    npy_intp **zeros = NULL, **offsets = NULL, ***edge_offsets = NULL;
    npy_intp ftmp[MAXDIM], *fcoordinates = NULL, *foffsets = NULL;
    npy_intp jj, hh, kk, filter_size, odimensions[MAXDIM];
    npy_intp idimensions[MAXDIM], istrides[MAXDIM], *idxs = NULL;
    npy_intp size;
    double ***splvals = NULL;
    NI_Iterator io;
    Float64 *zooms = zoom_ar ? (Float64*)PyArray_DATA(zoom_ar) : NULL;
    Float64 *shifts = shift_ar ? (Float64*)PyArray_DATA(shift_ar) : NULL;
    int rank = 0, qq;

    for(kk = 0; kk < input->nd; kk++) {
        idimensions[kk] = input->dimensions[kk];
        istrides[kk] = input->strides[kk];
        odimensions[kk] = output->dimensions[kk];
    }
    rank = input->nd;

    /* if the mode is 'constant' we need some temps later: */
    if (mode == NI_EXTEND_CONSTANT) {
        zeros = (npy_intp**)malloc(rank * sizeof(npy_intp*));
        if (!zeros) {
            PyErr_NoMemory();
            goto exit;
        }
        for(jj = 0; jj < rank; jj++)
            zeros[jj] = NULL;
        for(jj = 0; jj < rank; jj++) {
            zeros[jj] = (npy_intp*)malloc(odimensions[jj] * sizeof(npy_intp));
            if(!zeros[jj]) {
                PyErr_NoMemory();
                goto exit;
            }
        }
    }

    /* store offsets, along each axis: */
    offsets = (npy_intp**)malloc(rank * sizeof(npy_intp*));
    /* store spline coefficients, along each axis: */
    splvals = (double***)malloc(rank * sizeof(double**));
    /* store offsets at all edges: */
    edge_offsets = (npy_intp***)malloc(rank * sizeof(npy_intp**));
    if (!offsets || !splvals || !edge_offsets) {
        PyErr_NoMemory();
        goto exit;
    }
    for(jj = 0; jj < rank; jj++) {
        offsets[jj] = NULL;
        splvals[jj] = NULL;
        edge_offsets[jj] = NULL;
    }
    for(jj = 0; jj < rank; jj++) {
        offsets[jj] = (npy_intp*)malloc(odimensions[jj] * sizeof(npy_intp));
        splvals[jj] = (double**)malloc(odimensions[jj] * sizeof(double*));
        edge_offsets[jj] = (npy_intp**)malloc(odimensions[jj] * sizeof(npy_intp*));
        if (!offsets[jj] || !splvals[jj] || !edge_offsets[jj]) {
            PyErr_NoMemory();
            goto exit;
        }
        for(hh = 0; hh < odimensions[jj]; hh++) {
            splvals[jj][hh] = NULL;
            edge_offsets[jj][hh] = NULL;
        }
    }

    /* precalculate offsets, and offsets at the edge: */
    for(jj = 0; jj < rank; jj++) {
        double shift = 0.0, zoom = 0.0;
        if (shifts)
            shift = shifts[jj];
        if (zooms)
            zoom = zooms[jj];
        for(kk = 0; kk < odimensions[jj]; kk++) {
            double cc = (double)kk;
            if (shifts)
                cc += shift;
            if (zooms)
                cc *= zoom;
            cc = map_coordinate(cc, idimensions[jj], mode);
            if (cc > -1.0) {
                int start;
                if (zeros && zeros[jj])
                    zeros[jj][kk] = 0;
                if (order & 1) {
                    start = (int)floor(cc) - order / 2;
                } else {
                    start = (int)floor(cc + 0.5) - order / 2;
                }
                offsets[jj][kk] = istrides[jj] * start;
                if (start < 0 || start + order >= idimensions[jj]) {
                    edge_offsets[jj][kk] = (npy_intp*)malloc((order + 1) * sizeof(npy_intp));
                    if (!edge_offsets[jj][kk]) {
                        PyErr_NoMemory();
                        goto exit;
                    }
                    for(hh = 0; hh <= order; hh++) {
                        int idx = start + hh;
                         int len = idimensions[jj];
                        if (len <= 1) {
                            idx = 0;
                        } else {
                            int s2 = 2 * len - 2;
                            if (idx < 0) {
                                idx = s2 * (int)(-idx / s2) + idx;
                                idx = idx <= 1 - len ? idx + s2 : -idx;
                            } else if (idx >= len) {
                                idx -= s2 * (int)(idx / s2);
                                if (idx >= len)
                                    idx = s2 - idx;
                            }
                        }
                        edge_offsets[jj][kk][hh] = istrides[jj] * (idx - start);
                    }
                }
                if (order > 0) {
                    splvals[jj][kk] = (double*)malloc((order + 1) * sizeof(double));
                    if (!splvals[jj][kk]) {
                        PyErr_NoMemory();
                        goto exit;
                    }
                    spline_coefficients(cc, order, splvals[jj][kk]);
                }
            } else {
                zeros[jj][kk] = 1;
            }
        }
    }

    filter_size = 1;
    for(jj = 0; jj < rank; jj++)
        filter_size *= order + 1;
    idxs = (npy_intp*)malloc(filter_size * sizeof(idxs));
    if (!idxs) {
        PyErr_NoMemory();
        goto exit;
    }

    if (!NI_InitPointIterator(output, &io))
        goto exit;

    pi = (void *)PyArray_DATA(input);
    po = (void *)PyArray_DATA(output);

    /* store all coordinates and offsets with filter: */
    fcoordinates = (npy_intp*)malloc(rank * filter_size * sizeof(npy_intp));
    foffsets = (npy_intp*)malloc(filter_size * sizeof(npy_intp));
    if (!fcoordinates || !foffsets) {
        PyErr_NoMemory();
        goto exit;
    }

    for(jj = 0; jj < rank; jj++)
        ftmp[jj] = 0;
    kk = 0;
    for(hh = 0; hh < filter_size; hh++) {
        for(jj = 0; jj < rank; jj++)
            fcoordinates[jj + hh * rank] = ftmp[jj];
        foffsets[hh] = kk;
        for(jj = rank - 1; jj >= 0; jj--) {
            if (ftmp[jj] < order) {
                ftmp[jj]++;
                kk += istrides[jj];
                break;
            } else {
                ftmp[jj] = 0;
                kk -= istrides[jj] * order;
            }
        }
    }
    size = 1;
    for(qq = 0; qq < output->nd; qq++)
        size *= output->dimensions[qq];
    for(kk = 0; kk < size; kk++) {
        double t = 0.0;
        int edge = 0, oo = 0, zero = 0;

        for(hh = 0; hh < rank; hh++) {
            if (zeros && zeros[hh][io.coordinates[hh]]) {
                /* we use constant border condition */
                zero = 1;
                break;
            }
            oo += offsets[hh][io.coordinates[hh]];
            if (edge_offsets[hh][io.coordinates[hh]])
                edge = 1;
        }

        if (!zero) {
            npy_intp *ff = fcoordinates;
            for(hh = 0; hh < filter_size; hh++) {
                int idx = 0;
                if (edge) {
                        /* use precalculated edge offsets: */
                    for(jj = 0; jj < rank; jj++) {
                        if (edge_offsets[jj][io.coordinates[jj]])
                            idx += edge_offsets[jj][io.coordinates[jj]][ff[jj]];
                        else
                            idx += ff[jj] * istrides[jj];
                    }
                    idx += oo;
                } else {
                    /* use normal offsets: */
                    idx += oo + foffsets[hh];
                }
                idxs[hh] = idx;
                ff += rank;
            }
        }
        if (!zero) {
            npy_intp *ff = fcoordinates;
            t = 0.0;
            for(hh = 0; hh < filter_size; hh++) {
                double coeff = 0.0;
                switch(input->descr->type_num) {
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], Bool);
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], UInt8);
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], UInt16);
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], UInt32);
#if HAS_UINT64
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], UInt64);
#endif
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], Int8);
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], Int16);
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], Int32);
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], Int64);
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], Float32);
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], Float64);
                default:
                    PyErr_SetString(PyExc_RuntimeError,
                                                    "data type not supported");
                    goto exit;
                }
                /* calculate interpolated value: */
                for(jj = 0; jj < rank; jj++)
                    if (order > 0)
                        coeff *= splvals[jj][io.coordinates[jj]][ff[jj]];
                t += coeff;
                ff += rank;
            }
        } else {
            t = cval;
        }
        /* store output: */
        switch (output->descr->type_num) {
            CASE_INTERP_OUT(po, t, Bool);
            CASE_INTERP_OUT_UINT(po, t, UInt8, 0, MAX_UINT8);
            CASE_INTERP_OUT_UINT(po, t, UInt16, 0, MAX_UINT16);
            CASE_INTERP_OUT_UINT(po, t, UInt32, 0, MAX_UINT32);
#if HAS_UINT64
            /* FIXME */
            CASE_INTERP_OUT_UINT(po, t, UInt64, 0, MAX_UINT32);
#endif
            CASE_INTERP_OUT_INT(po, t, Int8, MIN_INT8, MAX_INT8);
            CASE_INTERP_OUT_INT(po, t, Int16, MIN_INT16, MAX_INT16);
            CASE_INTERP_OUT_INT(po, t, Int32, MIN_INT32, MAX_INT32);
            CASE_INTERP_OUT_INT(po, t, Int64, MIN_INT64, MAX_INT64);
            CASE_INTERP_OUT(po, t, Float32);
            CASE_INTERP_OUT(po, t, Float64);
        default:
            PyErr_SetString(PyExc_RuntimeError, "data type not supported");
            goto exit;
        }
        NI_ITERATOR_NEXT(io, po);
    }

 exit:
    if (zeros) {
        for(jj = 0; jj < rank; jj++)
            if (zeros[jj])
                free(zeros[jj]);
        free(zeros);
    }
    if (offsets) {
        for(jj = 0; jj < rank; jj++)
            if (offsets[jj])
                free(offsets[jj]);
        free(offsets);
    }
    if (splvals) {
        for(jj = 0; jj < rank; jj++) {
            if (splvals[jj]) {
                for(hh = 0; hh < odimensions[jj]; hh++)
                    if (splvals[jj][hh])
                        free(splvals[jj][hh]);
                free(splvals[jj]);
            }
        }
        free(splvals);
    }
    if (edge_offsets) {
        for(jj = 0; jj < rank; jj++) {
            if (edge_offsets[jj]) {
                for(hh = 0; hh < odimensions[jj]; hh++)
                    if (edge_offsets[jj][hh])
                        free(edge_offsets[jj][hh]);
                free(edge_offsets[jj]);
            }
        }
        free(edge_offsets);
    }
    if (foffsets)
        free(foffsets);
    if (fcoordinates)
        free(fcoordinates);
    if (idxs)
        free(idxs);
    return PyErr_Occurred() ? 0 : 1;
}
Beispiel #3
0
int
NI_GeometricTransform(PyArrayObject *input, int (*map)(npy_intp*, double*,
                int, int, void*), void* map_data, PyArrayObject* matrix_ar,
                PyArrayObject* shift_ar, PyArrayObject *coordinates,
                PyArrayObject *output, int order, int mode, double cval)
{
    char *po, *pi, *pc = NULL;
    npy_intp **edge_offsets = NULL, **data_offsets = NULL, filter_size;
    npy_intp ftmp[NPY_MAXDIMS], *fcoordinates = NULL, *foffsets = NULL;
    npy_intp cstride = 0, kk, hh, ll, jj;
    npy_intp size;
    double **splvals = NULL, icoor[NPY_MAXDIMS];
    npy_intp idimensions[NPY_MAXDIMS], istrides[NPY_MAXDIMS];
    NI_Iterator io, ic;
    npy_double *matrix = matrix_ar ? (npy_double*)PyArray_DATA(matrix_ar) : NULL;
    npy_double *shift = shift_ar ? (npy_double*)PyArray_DATA(shift_ar) : NULL;
    int irank = 0, orank;
    NPY_BEGIN_THREADS_DEF;

    NPY_BEGIN_THREADS;

    for(kk = 0; kk < PyArray_NDIM(input); kk++) {
        idimensions[kk] = PyArray_DIM(input, kk);
        istrides[kk] = PyArray_STRIDE(input, kk);
    }
    irank = PyArray_NDIM(input);
    orank = PyArray_NDIM(output);

    /* if the mapping is from array coordinates: */
    if (coordinates) {
        /* initialize a line iterator along the first axis: */
        if (!NI_InitPointIterator(coordinates, &ic))
            goto exit;
        cstride = ic.strides[0];
        if (!NI_LineIterator(&ic, 0))
            goto exit;
        pc = (void *)(PyArray_DATA(coordinates));
    }

    /* offsets used at the borders: */
    edge_offsets = malloc(irank * sizeof(npy_intp*));
    data_offsets = malloc(irank * sizeof(npy_intp*));
    if (NPY_UNLIKELY(!edge_offsets || !data_offsets)) {
        NPY_END_THREADS;
        PyErr_NoMemory();
        goto exit;
    }
    for(jj = 0; jj < irank; jj++)
        data_offsets[jj] = NULL;
    for(jj = 0; jj < irank; jj++) {
        data_offsets[jj] = malloc((order + 1) * sizeof(npy_intp));
        if (NPY_UNLIKELY(!data_offsets[jj])) {
            NPY_END_THREADS;
            PyErr_NoMemory();
            goto exit;
        }
    }
    /* will hold the spline coefficients: */
    splvals = malloc(irank * sizeof(double*));
    if (NPY_UNLIKELY(!splvals)) {
        NPY_END_THREADS;
        PyErr_NoMemory();
        goto exit;
    }
    for(jj = 0; jj < irank; jj++)
        splvals[jj] = NULL;
    for(jj = 0; jj < irank; jj++) {
        splvals[jj] = malloc((order + 1) * sizeof(double));
        if (NPY_UNLIKELY(!splvals[jj])) {
            NPY_END_THREADS;
            PyErr_NoMemory();
            goto exit;
        }
    }

    filter_size = 1;
    for(jj = 0; jj < irank; jj++)
        filter_size *= order + 1;

    /* initialize output iterator: */
    if (!NI_InitPointIterator(output, &io))
        goto exit;

    /* get data pointers: */
    pi = (void *)PyArray_DATA(input);
    po = (void *)PyArray_DATA(output);

    /* make a table of all possible coordinates within the spline filter: */
    fcoordinates = malloc(irank * filter_size * sizeof(npy_intp));
    /* make a table of all offsets within the spline filter: */
    foffsets = malloc(filter_size * sizeof(npy_intp));
    if (NPY_UNLIKELY(!fcoordinates || !foffsets)) {
        NPY_END_THREADS;
        PyErr_NoMemory();
        goto exit;
    }
    for(jj = 0; jj < irank; jj++)
        ftmp[jj] = 0;
    kk = 0;
    for(hh = 0; hh < filter_size; hh++) {
        for(jj = 0; jj < irank; jj++)
            fcoordinates[jj + hh * irank] = ftmp[jj];
        foffsets[hh] = kk;
        for(jj = irank - 1; jj >= 0; jj--) {
            if (ftmp[jj] < order) {
                ftmp[jj]++;
                kk += istrides[jj];
                break;
            } else {
                ftmp[jj] = 0;
                kk -= istrides[jj] * order;
            }
        }
    }

    size = PyArray_SIZE(output);
    for(kk = 0; kk < size; kk++) {
        double t = 0.0;
        int constant = 0, edge = 0;
        npy_intp offset = 0;
        if (map) {
            NPY_END_THREADS;
            /* call mappint functions: */
            if (!map(io.coordinates, icoor, orank, irank, map_data)) {
                if (!PyErr_Occurred())
                    PyErr_SetString(PyExc_RuntimeError,
                                                    "unknown error in mapping function");
                goto exit;
            }
            NPY_BEGIN_THREADS;
        } else if (matrix) {
            /* do an affine transformation: */
            npy_double *p = matrix;
            for(hh = 0; hh < irank; hh++) {
                icoor[hh] = 0.0;
                for(ll = 0; ll < orank; ll++)
                    icoor[hh] += io.coordinates[ll] * *p++;
                icoor[hh] += shift[hh];
            }
        } else if (coordinates) {
            /* mapping is from an coordinates array: */
            char *p = pc;
            switch (PyArray_TYPE(coordinates)) {
                CASE_MAP_COORDINATES(NPY_BOOL, npy_bool,
                                     p, icoor, irank, cstride);
                CASE_MAP_COORDINATES(NPY_UBYTE, npy_ubyte,
                                     p, icoor, irank, cstride);
                CASE_MAP_COORDINATES(NPY_USHORT, npy_ushort,
                                     p, icoor, irank, cstride);
                CASE_MAP_COORDINATES(NPY_UINT, npy_uint,
                                     p, icoor, irank, cstride);
                CASE_MAP_COORDINATES(NPY_ULONG, npy_ulong,
                                     p, icoor, irank, cstride);
                CASE_MAP_COORDINATES(NPY_ULONGLONG, npy_ulonglong,
                                     p, icoor, irank, cstride);
                CASE_MAP_COORDINATES(NPY_BYTE, npy_byte,
                                     p, icoor, irank, cstride);
                CASE_MAP_COORDINATES(NPY_SHORT, npy_short,
                                     p, icoor, irank, cstride);
                CASE_MAP_COORDINATES(NPY_INT, npy_int,
                                     p, icoor, irank, cstride);
                CASE_MAP_COORDINATES(NPY_LONG, npy_long,
                                     p, icoor, irank, cstride);
                CASE_MAP_COORDINATES(NPY_LONGLONG, npy_longlong,
                                     p, icoor, irank, cstride);
                CASE_MAP_COORDINATES(NPY_FLOAT, npy_float,
                                     p, icoor, irank, cstride);
                CASE_MAP_COORDINATES(NPY_DOUBLE, npy_double,
                                     p, icoor, irank, cstride);
            default:
                NPY_END_THREADS;
                PyErr_SetString(PyExc_RuntimeError,
                                "coordinate array data type not supported");
                goto exit;
            }
        }
        /* iterate over axes: */
        for(hh = 0; hh < irank; hh++) {
            /* if the input coordinate is outside the borders, map it: */
            double cc = map_coordinate(icoor[hh], idimensions[hh], mode);
            if (cc > -1.0) {
                /* find the filter location along this axis: */
                npy_intp start;
                if (order & 1) {
                    start = (npy_intp)floor(cc) - order / 2;
                } else {
                    start = (npy_intp)floor(cc + 0.5) - order / 2;
                }
                /* get the offset to the start of the filter: */
                offset += istrides[hh] * start;
                if (start < 0 || start + order >= idimensions[hh]) {
                    /* implement border mapping, if outside border: */
                    edge = 1;
                    edge_offsets[hh] = data_offsets[hh];
                    for(ll = 0; ll <= order; ll++) {
                        npy_intp idx = start + ll;
                        npy_intp len = idimensions[hh];
                        if (len <= 1) {
                            idx = 0;
                        } else {
                            npy_intp s2 = 2 * len - 2;
                            if (idx < 0) {
                                idx = s2 * (int)(-idx / s2) + idx;
                                idx = idx <= 1 - len ? idx + s2 : -idx;
                            } else if (idx >= len) {
                                idx -= s2 * (int)(idx / s2);
                                if (idx >= len)
                                    idx = s2 - idx;
                            }
                        }
                        /* calculate and store the offests at this edge: */
                        edge_offsets[hh][ll] = istrides[hh] * (idx - start);
                    }
                } else {
                    /* we are not at the border, use precalculated offsets: */
                    edge_offsets[hh] = NULL;
                }
                spline_coefficients(cc, order, splvals[hh]);
            } else {
                /* we use the constant border condition: */
                constant = 1;
                break;
            }
        }

        if (!constant) {
            npy_intp *ff = fcoordinates;
            const int type_num = PyArray_TYPE(input);
            t = 0.0;
            for(hh = 0; hh < filter_size; hh++) {
                double coeff = 0.0;
                npy_intp idx = 0;

                if (NPY_UNLIKELY(edge)) {
                    for(ll = 0; ll < irank; ll++) {
                        if (edge_offsets[ll])
                            idx += edge_offsets[ll][ff[ll]];
                        else
                            idx += ff[ll] * istrides[ll];
                    }
                } else {
                    idx = foffsets[hh];
                }
                idx += offset;
                switch (type_num) {
                    CASE_INTERP_COEFF(NPY_BOOL, npy_bool,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_UBYTE, npy_ubyte,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_USHORT, npy_ushort,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_UINT, npy_uint,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_ULONG, npy_ulong,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_ULONGLONG, npy_ulonglong,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_BYTE, npy_byte,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_SHORT, npy_short,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_INT, npy_int,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_LONG, npy_long,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_LONGLONG, npy_longlong,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_FLOAT, npy_float,
                                      coeff, pi, idx);
                    CASE_INTERP_COEFF(NPY_DOUBLE, npy_double,
                                      coeff, pi, idx);
                default:
                    NPY_END_THREADS;
                    PyErr_SetString(PyExc_RuntimeError,
                                    "data type not supported");
                    goto exit;
                }
                /* calculate the interpolated value: */
                for(ll = 0; ll < irank; ll++)
                    if (order > 0)
                        coeff *= splvals[ll][ff[ll]];
                t += coeff;
                ff += irank;
            }
        } else {
            t = cval;
        }
        /* store output value: */
        switch (PyArray_TYPE(output)) {
            CASE_INTERP_OUT(NPY_BOOL, npy_bool, po, t);
            CASE_INTERP_OUT_UINT(UBYTE, npy_ubyte, po, t);
            CASE_INTERP_OUT_UINT(USHORT, npy_ushort, po, t);
            CASE_INTERP_OUT_UINT(UINT, npy_uint, po, t);
            CASE_INTERP_OUT_UINT(ULONG, npy_ulong, po, t);
            CASE_INTERP_OUT_UINT(ULONGLONG, npy_ulonglong, po, t);
            CASE_INTERP_OUT_INT(BYTE, npy_byte, po, t);
            CASE_INTERP_OUT_INT(SHORT, npy_short, po, t);
            CASE_INTERP_OUT_INT(INT, npy_int, po, t);
            CASE_INTERP_OUT_INT(LONG, npy_long, po, t);
            CASE_INTERP_OUT_INT(LONGLONG, npy_longlong, po, t);
            CASE_INTERP_OUT(NPY_FLOAT, npy_float, po, t);
            CASE_INTERP_OUT(NPY_DOUBLE, npy_double, po, t);
        default:
            NPY_END_THREADS;
            PyErr_SetString(PyExc_RuntimeError, "data type not supported");
            goto exit;
        }
        if (coordinates) {
            NI_ITERATOR_NEXT2(io, ic, po, pc);
        } else {
            NI_ITERATOR_NEXT(io, po);
        }
    }

 exit:
    NPY_END_THREADS;
    free(edge_offsets);
    if (data_offsets) {
        for(jj = 0; jj < irank; jj++)
            free(data_offsets[jj]);
        free(data_offsets);
    }
    if (splvals) {
        for(jj = 0; jj < irank; jj++)
            free(splvals[jj]);
        free(splvals);
    }
    free(foffsets);
    free(fcoordinates);
    return PyErr_Occurred() ? 0 : 1;
}
int
NI_GeometricTransform(PyArrayObject *input, int (*map)(npy_intp*, double*,
                int, int, void*), void* map_data, PyArrayObject* matrix_ar,
                PyArrayObject* shift_ar, PyArrayObject *coordinates,
                PyArrayObject *output, int order, int mode, double cval)
{
    char *po, *pi, *pc = NULL;
    npy_intp **edge_offsets = NULL, **data_offsets = NULL, filter_size;
    npy_intp ftmp[MAXDIM], *fcoordinates = NULL, *foffsets = NULL;
    npy_intp cstride = 0, kk, hh, ll, jj, *idxs = NULL;
    npy_intp size;
    double **splvals = NULL, icoor[MAXDIM];
    npy_intp idimensions[MAXDIM], istrides[MAXDIM];
    NI_Iterator io, ic;
    Float64 *matrix = matrix_ar ? (Float64*)PyArray_DATA(matrix_ar) : NULL;
    Float64 *shift = shift_ar ? (Float64*)PyArray_DATA(shift_ar) : NULL;
    int irank = 0, orank, qq;

    for(kk = 0; kk < input->nd; kk++) {
        idimensions[kk] = input->dimensions[kk];
        istrides[kk] = input->strides[kk];
    }
    irank = input->nd;
    orank = output->nd;

    /* if the mapping is from array coordinates: */
    if (coordinates) {
        /* initialze a line iterator along the first axis: */
        if (!NI_InitPointIterator(coordinates, &ic))
            goto exit;
        cstride = ic.strides[0];
        if (!NI_LineIterator(&ic, 0))
            goto exit;
        pc = (void *)(PyArray_DATA(coordinates));
    }

    /* offsets used at the borders: */
    edge_offsets = (npy_intp**)malloc(irank * sizeof(npy_intp*));
    data_offsets = (npy_intp**)malloc(irank * sizeof(npy_intp*));
    if (!edge_offsets || !data_offsets) {
        PyErr_NoMemory();
        goto exit;
    }
    for(jj = 0; jj < irank; jj++)
        data_offsets[jj] = NULL;
    for(jj = 0; jj < irank; jj++) {
        data_offsets[jj] = (npy_intp*)malloc((order + 1) * sizeof(npy_intp));
        if (!data_offsets[jj]) {
            PyErr_NoMemory();
            goto exit;
        }
    }
    /* will hold the spline coefficients: */
    splvals = (double**)malloc(irank * sizeof(double*));
    if (!splvals) {
        PyErr_NoMemory();
        goto exit;
    }
    for(jj = 0; jj < irank; jj++)
        splvals[jj] = NULL;
    for(jj = 0; jj < irank; jj++) {
        splvals[jj] = (double*)malloc((order + 1) * sizeof(double));
        if (!splvals[jj]) {
            PyErr_NoMemory();
            goto exit;
        }
    }

    filter_size = 1;
    for(jj = 0; jj < irank; jj++)
        filter_size *= order + 1;
    idxs = (npy_intp*)malloc(filter_size * sizeof(idxs));
    if (!idxs) {
        PyErr_NoMemory();
        goto exit;
    }

    /* initialize output iterator: */
    if (!NI_InitPointIterator(output, &io))
        goto exit;

    /* get data pointers: */
    pi = (void *)PyArray_DATA(input);
    po = (void *)PyArray_DATA(output);

    /* make a table of all possible coordinates within the spline filter: */
    fcoordinates = (npy_intp*)malloc(irank * filter_size * sizeof(npy_intp));
    /* make a table of all offsets within the spline filter: */
    foffsets = (npy_intp*)malloc(filter_size * sizeof(npy_intp));
    if (!fcoordinates || !foffsets) {
        PyErr_NoMemory();
        goto exit;
    }
    for(jj = 0; jj < irank; jj++)
        ftmp[jj] = 0;
    kk = 0;
    for(hh = 0; hh < filter_size; hh++) {
        for(jj = 0; jj < irank; jj++)
            fcoordinates[jj + hh * irank] = ftmp[jj];
        foffsets[hh] = kk;
        for(jj = irank - 1; jj >= 0; jj--) {
            if (ftmp[jj] < order) {
                ftmp[jj]++;
                kk += istrides[jj];
                break;
            } else {
                ftmp[jj] = 0;
                kk -= istrides[jj] * order;
            }
        }
    }

    size = 1;
    for(qq = 0; qq < output->nd; qq++)
        size *= output->dimensions[qq];
    for(kk = 0; kk < size; kk++) {
        double t = 0.0;
        int constant = 0, edge = 0, offset = 0;
        if (map) {
            /* call mappint functions: */
            if (!map(io.coordinates, icoor, orank, irank, map_data)) {
                if (!PyErr_Occurred())
                    PyErr_SetString(PyExc_RuntimeError,
                                                    "unknown error in mapping function");
                goto exit;
            }
        } else if (matrix) {
            /* do an affine transformation: */
            Float64 *p = matrix;
            for(hh = 0; hh < irank; hh++) {
                icoor[hh] = 0.0;
                for(ll = 0; ll < orank; ll++)
                    icoor[hh] += io.coordinates[ll] * *p++;
                icoor[hh] += shift[hh];
            }
        } else if (coordinates) {
            /* mapping is from an coordinates array: */
            char *p = pc;
            switch(coordinates->descr->type_num) {
                CASE_MAP_COORDINATES(p, icoor, irank, cstride, Bool);
                CASE_MAP_COORDINATES(p, icoor, irank, cstride, UInt8);
                CASE_MAP_COORDINATES(p, icoor, irank, cstride, UInt16);
                CASE_MAP_COORDINATES(p, icoor, irank, cstride, UInt32);
#if HAS_UINT64
                CASE_MAP_COORDINATES(p, icoor, irank, cstride, UInt64);
#endif
                CASE_MAP_COORDINATES(p, icoor, irank, cstride, Int8);
                CASE_MAP_COORDINATES(p, icoor, irank, cstride, Int16);
                CASE_MAP_COORDINATES(p, icoor, irank, cstride, Int32);
                CASE_MAP_COORDINATES(p, icoor, irank, cstride, Int64);
                CASE_MAP_COORDINATES(p, icoor, irank, cstride, Float32);
                CASE_MAP_COORDINATES(p, icoor, irank, cstride, Float64);
            default:
                PyErr_SetString(PyExc_RuntimeError,
                                                "coordinate array data type not supported");
                goto exit;
            }
        }
        /* iterate over axes: */
        for(hh = 0; hh < irank; hh++) {
            /* if the input coordinate is outside the borders, map it: */
            double cc = map_coordinate(icoor[hh], idimensions[hh], mode);
            if (cc > -1.0) {
                /* find the filter location along this axis: */
                int start;
                if (order & 1) {
                    start = (int)floor(cc) - order / 2;
                } else {
                    start = (int)floor(cc + 0.5) - order / 2;
                }
                /* get the offset to the start of the filter: */
                offset += istrides[hh] * start;
                if (start < 0 || start + order >= idimensions[hh]) {
                    /* implement border mapping, if outside border: */
                    edge = 1;
                    edge_offsets[hh] = data_offsets[hh];
                    for(ll = 0; ll <= order; ll++) {
                        int idx = start + ll;
                        int len = idimensions[hh];
                        if (len <= 1) {
                            idx = 0;
                        } else {
                            int s2 = 2 * len - 2;
                            if (idx < 0) {
                                idx = s2 * (int)(-idx / s2) + idx;
                                idx = idx <= 1 - len ? idx + s2 : -idx;
                            } else if (idx >= len) {
                                idx -= s2 * (int)(idx / s2);
                                if (idx >= len)
                                    idx = s2 - idx;
                            }
                        }
                        /* calculate and store the offests at this edge: */
                        edge_offsets[hh][ll] = istrides[hh] * (idx - start);
                    }
                } else {
                    /* we are not at the border, use precalculated offsets: */
                    edge_offsets[hh] = NULL;
                }
                spline_coefficients(cc, order, splvals[hh]);
            } else {
                /* we use the constant border condition: */
                constant = 1;
                break;
            }
        }

        if (!constant) {
            npy_intp *ff = fcoordinates;
            for(hh = 0; hh < filter_size; hh++) {
                int idx = 0;
                if (edge) {
                    for(ll = 0; ll < irank; ll++) {
                        if (edge_offsets[ll])
                            idx += edge_offsets[ll][ff[ll]];
                        else
                            idx += ff[ll] * istrides[ll];
                    }
                } else {
                    idx = foffsets[hh];
                }
                idx += offset;
                idxs[hh] = idx;
                ff += irank;
            }
        }
        if (!constant) {
            npy_intp *ff = fcoordinates;
            t = 0.0;
            for(hh = 0; hh < filter_size; hh++) {
                double coeff = 0.0;
                switch(input->descr->type_num) {
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], Bool);
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], UInt8);
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], UInt16);
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], UInt32);
#if HAS_UINT64
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], UInt64);
#endif
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], Int8);
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], Int16);
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], Int32);
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], Int64);
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], Float32);
                    CASE_INTERP_COEFF(coeff, pi, idxs[hh], Float64);
                default:
                    PyErr_SetString(PyExc_RuntimeError,
                                                    "data type not supported");
                    goto exit;
                }
                /* calculate the interpolated value: */
                for(ll = 0; ll < irank; ll++)
                    if (order > 0)
                        coeff *= splvals[ll][ff[ll]];
                t += coeff;
                ff += irank;
            }
        } else {
            t = cval;
        }
        /* store output value: */
        switch (output->descr->type_num) {
            CASE_INTERP_OUT(po, t, Bool);
            CASE_INTERP_OUT_UINT(po, t, UInt8, 0, MAX_UINT8);
            CASE_INTERP_OUT_UINT(po, t, UInt16, 0, MAX_UINT16);
            CASE_INTERP_OUT_UINT(po, t, UInt32, 0, MAX_UINT32);
#if HAS_UINT64
            /* FIXME */
            CASE_INTERP_OUT_UINT(po, t, UInt64, 0, MAX_UINT32);
#endif
            CASE_INTERP_OUT_INT(po, t, Int8, MIN_INT8, MAX_INT8);
            CASE_INTERP_OUT_INT(po, t, Int16, MIN_INT16, MAX_INT16);
            CASE_INTERP_OUT_INT(po, t, Int32, MIN_INT32, MAX_INT32);
            CASE_INTERP_OUT_INT(po, t, Int64, MIN_INT64, MAX_INT64);
            CASE_INTERP_OUT(po, t, Float32);
            CASE_INTERP_OUT(po, t, Float64);
        default:
            PyErr_SetString(PyExc_RuntimeError, "data type not supported");
            goto exit;
        }
        if (coordinates) {
            NI_ITERATOR_NEXT2(io, ic, po, pc);
        } else {
            NI_ITERATOR_NEXT(io, po);
        }
    }

 exit:
    if (edge_offsets)
        free(edge_offsets);
    if (data_offsets) {
        for(jj = 0; jj < irank; jj++)
            free(data_offsets[jj]);
        free(data_offsets);
    }
    if (splvals) {
        for(jj = 0; jj < irank; jj++)
            free(splvals[jj]);
        free(splvals);
    }
    if (foffsets)
        free(foffsets);
    if (fcoordinates)
        free(fcoordinates);
    if (idxs)
        free(idxs);
    return PyErr_Occurred() ? 0 : 1;
}