int main(){ float *A, *B, *C; hipDeviceptr_t Ad, Bd, Cd; A = new float[LEN]; B = new float[LEN]; C = new float[LEN]; for(uint32_t i=0;i<LEN;i++){ A[i] = i*1.0f; B[i] = 1.0f; C[i] = 0.0f; } hipInit(0); hipDevice_t device; hipCtx_t context; hipDeviceGet(&device, 0); hipCtxCreate(&context, 0, device); hipMalloc((void**)&Ad, SIZE); hipMalloc((void**)&Bd, SIZE); hipMalloc((void**)&Cd, SIZE); hipMemcpyHtoD(Ad, A, SIZE); hipMemcpyHtoD(Bd, B, SIZE); hipMemcpyHtoD(Cd, C, SIZE); hipModule_t Module; hipFunction_t Function; hipModuleLoad(&Module, fileName); hipModuleGetFunction(&Function, Module, kernel_name); int n = LEN; void * args[4] = {&Ad, &Bd, &Cd, &n}; hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, args, nullptr); hipMemcpyDtoH(C, Cd, SIZE); int mismatchCount = 0; for(uint32_t i=0;i<LEN;i++){ if (A[i] + B[i] != C[i]) { mismatchCount++; std::cout<<"error: mismatch " << A[i]<<" + "<<B[i]<<" != "<<C[i]<<std::endl; } } if (mismatchCount == 0) { std::cout << "PASSED!\n"; } else { std::cout << "FAILED!\n"; }; hipCtxDestroy(context); return 0; }
void hipLaunchKernelGGLImpl( std::uintptr_t function_address, const dim3& numBlocks, const dim3& dimBlocks, std::uint32_t sharedMemBytes, hipStream_t stream, void** kernarg) { const auto& kd = hip_impl::get_program_state().kernel_descriptor(function_address, target_agent(stream)); hipModuleLaunchKernel(kd, numBlocks.x, numBlocks.y, numBlocks.z, dimBlocks.x, dimBlocks.y, dimBlocks.z, sharedMemBytes, stream, nullptr, kernarg); }