Esempio n. 1
0
SEXP R_auto_cudaMemcpyFromSymbolAsync(SEXP r_dst, SEXP r_symbol, SEXP r_count, SEXP r_offset, SEXP r_kind, SEXP r_stream)
{
    SEXP r_ans = R_NilValue;
    void * dst = GET_REF(r_dst, void );
    const void * symbol = GET_REF(r_symbol, const void );
    size_t count = REAL(r_count)[0];
    size_t offset = REAL(r_offset)[0];
    enum cudaMemcpyKind kind = (enum cudaMemcpyKind) INTEGER(r_kind)[0];
    cudaStream_t stream = (cudaStream_t) getRReference(r_stream);
    
    cudaError_t ans;
    ans = cudaMemcpyFromSymbolAsync(dst, symbol, count, offset, kind, stream);
    
    r_ans = Renum_convert_cudaError_t(ans) ;
    
    return(r_ans);
}
Esempio n. 2
0
void regions(cuda::Param<T> out, cuda::CParam<char> in, cudaTextureObject_t tex)
{
    const dim3 threads(THREADS_X, THREADS_Y);

    const int blk_x = divup(in.dims[0], threads.x*2);
    const int blk_y = divup(in.dims[1], threads.y*2);

    const dim3 blocks(blk_x, blk_y);

    CUDA_LAUNCH((initial_label<T,n_per_thread>), blocks, threads, out, in);

    POST_LAUNCH_CHECK();

    int h_continue = 1;

    while (h_continue) {
        h_continue = 0;
        CUDA_CHECK(cudaMemcpyToSymbolAsync(continue_flag, &h_continue, sizeof(int),
                    0, cudaMemcpyHostToDevice,
                    cuda::getActiveStream()));

        CUDA_LAUNCH((update_equiv<T, 16, n_per_thread, full_conn>), blocks, threads, out, tex);

        POST_LAUNCH_CHECK();

        CUDA_CHECK(cudaMemcpyFromSymbolAsync(&h_continue, continue_flag, sizeof(int),
                    0, cudaMemcpyDeviceToHost,
                    cuda::getActiveStream()));
        CUDA_CHECK(cudaStreamSynchronize(cuda::getActiveStream()));
    }

    // Now, perform the final relabeling.  This converts the equivalency
    // map from having unique labels based on the lowest pixel in the
    // component to being sequentially numbered components starting at
    // 1.
    int size = in.dims[0] * in.dims[1];
    auto tmp = cuda::memAlloc<T>(size);
    CUDA_CHECK(cudaMemcpyAsync(tmp.get(), out.ptr, size * sizeof(T),
                          cudaMemcpyDeviceToDevice,
                          cuda::getActiveStream()));

    // Wrap raw device ptr
    thrust::device_ptr<T> wrapped_tmp = thrust::device_pointer_cast(tmp.get());

    // Sort the copy
    THRUST_SELECT(thrust::sort, wrapped_tmp, wrapped_tmp + size);

    // Take the max element which is the number
    // of label assignments to compute.
    const int num_bins = wrapped_tmp[size - 1] + 1;

    // If the number of label assignments is two,
    // then either the entire input image is one big
    // component(1's) or it has only one component other than
    // background(0's). Either way, no further
    // post-processing of labels is required.
    if (num_bins<=2)
        return;

    cuda::ThrustVector<T> labels(num_bins);

    // Find the end of each section of values
    thrust::counting_iterator<T> search_begin(0);
    THRUST_SELECT(thrust::upper_bound, wrapped_tmp,  wrapped_tmp  + size,
                        search_begin, search_begin + num_bins,
                        labels.begin());

    THRUST_SELECT(thrust::adjacent_difference, labels.begin(), labels.end(), labels.begin());

    // Operators for the scan
    clamp_to_one<T> clamp;
    thrust::plus<T> add;

    // Perform scan -- this computes the correct labels for each component
    THRUST_SELECT(thrust::transform_exclusive_scan,
                                     labels.begin(),
                                     labels.end(),
                                     labels.begin(),
                                     clamp,
                                     0,
                                     add);
    // Apply the correct labels to the equivalency map
    CUDA_LAUNCH((final_relabel<T,n_per_thread>), blocks,threads,
            out, in, thrust::raw_pointer_cast(&labels[0]));

    POST_LAUNCH_CHECK();
}
Esempio n. 3
0
cudaError_t WINAPI wine_cudaMemcpyFromSymbolAsync( void *dst, const char *symbol, size_t count, size_t offset, enum cudaMemcpyKind kind, cudaStream_t stream ) {
    WINE_TRACE("\n");
    return cudaMemcpyFromSymbolAsync( dst, symbol, count, offset, kind, stream);
}