static void memset_cuda(void *descr[], void *arg)
{
	STARPU_SKIP_IF_VALGRIND;

	int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);

	cudaMemsetAsync(ptr, 42, n * sizeof(*ptr), starpu_cuda_get_local_stream());
}
Exemple #2
0
SEXP R_auto_cudaMemsetAsync(SEXP r_devPtr, SEXP r_value, SEXP r_count, SEXP r_stream)
{
    SEXP r_ans = R_NilValue;
    void * devPtr = GET_REF(r_devPtr, void );
    int value = INTEGER(r_value)[0];
    size_t count = REAL(r_count)[0];
    cudaStream_t stream = (cudaStream_t) getRReference(r_stream);
    
    cudaError_t ans;
    ans = cudaMemsetAsync(devPtr, value, count, stream);
    
    r_ans = Renum_convert_cudaError_t(ans) ;
    
    return(r_ans);
}
void attention_layer<dType>::clear_gradients() {
	cudaSetDevice(device_number);

	cudaMemsetAsync(d_W_a_grad,0,LSTM_size*LSTM_size*sizeof(dType),layer_info.s0);
	cudaMemsetAsync(d_W_p_grad,0,LSTM_size*LSTM_size*sizeof(dType),layer_info.s0);
	cudaMemsetAsync(d_v_p_grad,0,LSTM_size*1*sizeof(dType),layer_info.s0);
	cudaMemsetAsync(d_output_bias_grad,0,LSTM_size*1*sizeof(dType),layer_info.s0);
	cudaMemsetAsync(d_W_c_p1_grad,0,LSTM_size*LSTM_size*sizeof(dType),layer_info.s0);
	cudaMemsetAsync(d_W_c_p2_grad,0,LSTM_size*LSTM_size*sizeof(dType),layer_info.s0);

}
Exemple #4
0
void nonMaximal(float* x_out, float* y_out, float* resp_out,
                 unsigned* count, const unsigned idim0, const unsigned idim1,
                 const T * resp_in, const unsigned edge, const unsigned max_corners)
{
    dim3 threads(BLOCK_X, BLOCK_Y);
    dim3 blocks(divup(idim0-edge*2, BLOCK_X), divup(idim1-edge*2, BLOCK_Y));

    unsigned* d_corners_found = memAlloc<unsigned>(1);
    CUDA_CHECK(cudaMemsetAsync(d_corners_found, 0, sizeof(unsigned),
                cuda::getStream(cuda::getActiveDeviceId())));

    CUDA_LAUNCH((nonMaxKernel<T>), blocks, threads,
            x_out, y_out, resp_out, d_corners_found, idim0, idim1, resp_in, edge, max_corners);

    POST_LAUNCH_CHECK();

    CUDA_CHECK(cudaMemcpy(count, d_corners_found, sizeof(unsigned), cudaMemcpyDeviceToHost));
    memFree(d_corners_found);
}
void init_cuda_func(void *descr[], void *cl_arg)
{
	DOT_TYPE *dot = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]);
	cudaMemsetAsync(dot, 0, sizeof(DOT_TYPE), starpu_cuda_get_local_stream());
}