Esempio n. 1
0
        static __device__ __forceinline__ int syncthreadsOr(int pred)
        {
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
                // just campilation stab
                return 0;
#else
                return __syncthreads_or(pred);
#endif
        }
Esempio n. 2
0
__global__
static void update_equiv(cuda::Param<T> equiv_map, const cudaTextureObject_t tex)
{
    // Basic coordinates
    const int base_x = (blockIdx.x * blockDim.x * n_per_thread) + threadIdx.x;
    const int base_y = (blockIdx.y * blockDim.y * n_per_thread) + threadIdx.y;

    const int width  = equiv_map.dims[0];
    const int height = equiv_map.dims[1];

    bool tid_changed = false;

    // Per element write flags and label, initially 0
    char write[n_per_thread * n_per_thread];
    T    best_label[n_per_thread * n_per_thread];

    #pragma unroll
    for (int i = 0; i < n_per_thread * n_per_thread; ++i) {
        write[i]      = (char)0;
        best_label[i] = (T)0;
    }

    // Cached tile of the equivalency map
    __shared__ T s_tile[n_per_thread*block_dim][(n_per_thread*block_dim)];

    #pragma unroll
    for (int xb = 0; xb < n_per_thread; ++xb) {
        #pragma unroll
        for (int yb = 0; yb < n_per_thread; ++yb) {

            // Indexing variables
            const int x = base_x + (xb * blockDim.x);
            const int y = base_y + (yb * blockDim.y);
            const int tx = threadIdx.x + (xb * blockDim.x);
            const int ty = threadIdx.y + (yb * blockDim.y);
            const int tid_i = xb * n_per_thread + yb;
            const int n = y * width + x;

            // Get the label for this pixel if we're  in bounds
            const T orig_label = (x < width && y < height) ?
                fetch<T>(n, equiv_map, tex) : (T)0;
            s_tile[ty][tx] = orig_label;

            // Find the lowest label of the nearest valid pixel
            // So far, all we know is that this pixel is valid.
            best_label[tid_i] = orig_label;

            if (orig_label != (T)0) {
                const int south_y = min(y, height-2) + 1;
                const int north_y = max(y, 1) - 1;
                const int east_x = min(x, width-2) + 1;
                const int west_x = max(x, 1) - 1;

                // Check bottom
                best_label[tid_i] = relabel(best_label[tid_i],
                        fetch((south_y) * width + x, equiv_map, tex));

                // Check right neighbor
                best_label[tid_i] = relabel(best_label[tid_i],
                        fetch(y * width + east_x, equiv_map, tex));

                // Check left neighbor
                best_label[tid_i] = relabel(best_label[tid_i],
                        fetch(y * width + west_x, equiv_map, tex));

                // Check top neighbor
                best_label[tid_i] = relabel(best_label[tid_i],
                        fetch((north_y) * width + x, equiv_map, tex));

                if (full_conn) {
                    // Check NW corner
                    best_label[tid_i] = relabel(best_label[tid_i],
                            fetch((north_y) * width + west_x, equiv_map, tex));

                    // Check NE corner
                    best_label[tid_i] = relabel(best_label[tid_i],
                            fetch((north_y) * width + east_x, equiv_map, tex));

                    // Check SW corner
                    best_label[tid_i] = relabel(best_label[tid_i],
                        fetch((south_y) * width + west_x, equiv_map, tex));

                    // Check SE corner
                    best_label[tid_i] = relabel(best_label[tid_i],
                            fetch((south_y) * width + east_x, equiv_map, tex));
                } // if connectivity == 8
            } // if orig_label != 0

            // Process the equivalency list.
            T last_label = orig_label;
            T new_label  = best_label[tid_i];

            while (best_label[tid_i] != (T)0 && new_label < last_label) {
                last_label = new_label;
                new_label = fetch(new_label - (T)1, equiv_map, tex);
            }

            if (orig_label != new_label) {
                tid_changed = true;
                s_tile[ty][tx] = new_label;
                write[tid_i] = (char)1;
            }
            best_label[tid_i] = new_label;
        }
    }

    bool continue_iter = __syncthreads_or((int)tid_changed);

    // Iterate until no pixel in the tile changes
    while (continue_iter) {

        // Reset whether or not this thread's pixels have changed.
        tid_changed = false;

        #pragma unroll
        for (int xb = 0; xb < n_per_thread; ++xb) {
            #pragma unroll
            for (int yb = 0; yb < n_per_thread; ++yb) {

                // Indexing
                const int tx = threadIdx.x + (xb * blockDim.x);
                const int ty = threadIdx.y + (yb * blockDim.y);
                const int tid_i = xb * n_per_thread + yb;

                T last_label = best_label[tid_i];

                if (best_label[tid_i] != 0) {

                    const int north_y   = max(ty, 1) -1;
                    const int south_y   = min(ty, n_per_thread*block_dim - 2) +1;
                    const int east_x    = min(tx, n_per_thread*block_dim - 2) +1;
                    const int west_x    = max(tx, 1) -1;

                    // Check bottom
                    best_label[tid_i] = relabel(best_label[tid_i],
                                                s_tile[south_y][tx]);

                    // Check right neighbor
                    best_label[tid_i] = relabel(best_label[tid_i],
                                                s_tile[ty][east_x]);

                    // Check left neighbor
                    best_label[tid_i] = relabel(best_label[tid_i],
                                                s_tile[ty][west_x]);

                    // Check top neighbor
                    best_label[tid_i] = relabel(best_label[tid_i],
                                                s_tile[north_y][tx]);

                    if (full_conn) {
                        // Check NW corner
                        best_label[tid_i] = relabel(best_label[tid_i],
                                                    s_tile[north_y][west_x]);

                        // Check NE corner
                        best_label[tid_i] = relabel(best_label[tid_i],
                                                    s_tile[north_y][east_x]);

                        // Check SW corner
                        best_label[tid_i] = relabel(best_label[tid_i],
                                                    s_tile[south_y][west_x]);

                        // Check SE corner
                        best_label[tid_i] = relabel(best_label[tid_i],
                                                    s_tile[south_y][east_x]);
                    } // if connectivity == 8

                    // This thread's value changed during this iteration if the
                    // best label is not the same as the last label.
                    const bool changed = best_label[tid_i] != last_label;
                    write[tid_i] = write[tid_i] || changed;
                    tid_changed  =  tid_changed || changed;
                }
            }
        }
        // Done looking at neighbors for this iteration
        continue_iter = __syncthreads_or((int)tid_changed);

        // If we have to continue iterating, update the tile of the
        // equiv map in shared memory
        if (continue_iter) {
            #pragma unroll
            for (int xb = 0; xb < n_per_thread; ++xb) {
                #pragma unroll
                for (int yb = 0; yb < n_per_thread; ++yb) {
                    const int tx = threadIdx.x + (xb * blockDim.x);
                    const int ty = threadIdx.y + (yb * blockDim.y);
                    const int tid_i = xb * n_per_thread + yb;
                    // Update tile in shared memory
                    s_tile[ty][tx] = best_label[tid_i];
                }
            }
            __syncthreads();
        }
    } // while (continue_iter)

    // Write out equiv_map
    #pragma unroll
    for (int xb = 0; xb < n_per_thread; ++xb) {
        #pragma unroll
        for (int yb = 0; yb < n_per_thread; ++yb) {
            const int x = base_x + (xb * blockDim.x);
            const int y = base_y + (yb * blockDim.y);
            const int n = y * width + x;
            const int tid_i = xb * n_per_thread + yb;
            if (x < width && y < height && write[tid_i]) {
                equiv_map.ptr[n]  = best_label[tid_i];
                continue_flag = 1;
            }
        }
    }
}