static void memset_cuda(void *descr[], void *arg) { STARPU_SKIP_IF_VALGRIND; int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); cudaMemsetAsync(ptr, 42, n * sizeof(*ptr), starpu_cuda_get_local_stream()); }
SEXP R_auto_cudaMemsetAsync(SEXP r_devPtr, SEXP r_value, SEXP r_count, SEXP r_stream) { SEXP r_ans = R_NilValue; void * devPtr = GET_REF(r_devPtr, void ); int value = INTEGER(r_value)[0]; size_t count = REAL(r_count)[0]; cudaStream_t stream = (cudaStream_t) getRReference(r_stream); cudaError_t ans; ans = cudaMemsetAsync(devPtr, value, count, stream); r_ans = Renum_convert_cudaError_t(ans) ; return(r_ans); }
void attention_layer<dType>::clear_gradients() { cudaSetDevice(device_number); cudaMemsetAsync(d_W_a_grad,0,LSTM_size*LSTM_size*sizeof(dType),layer_info.s0); cudaMemsetAsync(d_W_p_grad,0,LSTM_size*LSTM_size*sizeof(dType),layer_info.s0); cudaMemsetAsync(d_v_p_grad,0,LSTM_size*1*sizeof(dType),layer_info.s0); cudaMemsetAsync(d_output_bias_grad,0,LSTM_size*1*sizeof(dType),layer_info.s0); cudaMemsetAsync(d_W_c_p1_grad,0,LSTM_size*LSTM_size*sizeof(dType),layer_info.s0); cudaMemsetAsync(d_W_c_p2_grad,0,LSTM_size*LSTM_size*sizeof(dType),layer_info.s0); }
void nonMaximal(float* x_out, float* y_out, float* resp_out, unsigned* count, const unsigned idim0, const unsigned idim1, const T * resp_in, const unsigned edge, const unsigned max_corners) { dim3 threads(BLOCK_X, BLOCK_Y); dim3 blocks(divup(idim0-edge*2, BLOCK_X), divup(idim1-edge*2, BLOCK_Y)); unsigned* d_corners_found = memAlloc<unsigned>(1); CUDA_CHECK(cudaMemsetAsync(d_corners_found, 0, sizeof(unsigned), cuda::getStream(cuda::getActiveDeviceId()))); CUDA_LAUNCH((nonMaxKernel<T>), blocks, threads, x_out, y_out, resp_out, d_corners_found, idim0, idim1, resp_in, edge, max_corners); POST_LAUNCH_CHECK(); CUDA_CHECK(cudaMemcpy(count, d_corners_found, sizeof(unsigned), cudaMemcpyDeviceToHost)); memFree(d_corners_found); }
void init_cuda_func(void *descr[], void *cl_arg) { DOT_TYPE *dot = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); cudaMemsetAsync(dot, 0, sizeof(DOT_TYPE), starpu_cuda_get_local_stream()); }