void cuWaitEventImpl(struct cuda_stream_command_t *command) { CUevent event = command->e_args.event; cuda_debug("CUDA stream command 'cuWaitEvent' running now"); cuda_debug("\tin: event = [%p]", event); while (event->recorded) ; cuda_debug("CUDA stream command 'cuWaitEvent' completed now"); }
void cuEventRecordImpl(struct cuda_stream_command_t *command) { CUevent event = command->e_args.event; cuda_debug("CUDA stream command 'cuEventRecord' running now"); cuda_debug("\tin: event = [%p]", event); cuda_event_record(event); event->recorded = 1; cuda_debug("CUDA stream command 'cuEventRecord' completed now"); }
void cuStreamCallbackImpl(struct cuda_stream_command_t *command) { CUstreamCallback func = command->cb.func; CUstream stream = command->cb.stream; CUresult status = command->cb.status; void *userData = command->cb.userData; cuda_debug("CUDA stream command 'cuStreamCallback' running now"); cuda_debug("\tin: func = [%p]", func); cuda_debug("\tin: stream = [%p]", stream); cuda_debug("\tin: status = %d", status); cuda_debug("\tin: userData = [%p]", userData); func(stream, status, userData); cuda_debug("CUDA stream command 'cuStreamCallback' completed now"); }
int cuda_abi_call(X86Context *ctx) { struct x86_regs_t *regs = ctx->regs; int code; int ret; /* Function code */ code = regs->ebx; if (code <= cuda_call_invalid || code >= cuda_call_count) fatal("%s: invalid CUDA function (code %d).\n%s", __FUNCTION__, code, cuda_err_code); /* Debug */ cuda_debug("CUDA call '%s' (code %d)\n", cuda_call_name[code], code); /* Call */ assert(cuda_func_table[code]); ret = cuda_func_table[code](ctx); return ret; }
void cuMemcpyAsyncImpl(struct cuda_stream_command_t *command) { int i; CUdeviceptr mem_ptr, mem_tail_ptr; CUdeviceptr dst = command->m_args.dst_ptr; CUdeviceptr src = command->m_args.src_ptr; unsigned size = command->m_args.size; int dst_is_device = 0; int src_is_device = 0; cuda_debug("CUDA stream command 'cuMemcpyAsync' running now"); cuda_debug("\tin: '%s' stream id = %d", __func__, command->id); cuda_debug("\tin: '%s' src_ptr = 0x%08x, stream id = %d", __func__, src, command->id); cuda_debug("\tin: '%s' dst_ptr = 0x%08x, stream id = %d", __func__, dst, command->id); cuda_debug("\tin: '%s' size = %d, stream id = %d", __func__, size, command->id); /* Determine if dst/src is host or device pointer */ for (i = 0; i < list_count(device_memory_object_list); ++i) { mem_ptr = (CUdeviceptr)list_get(device_memory_object_list, i); mem_tail_ptr = (CUdeviceptr)list_get(device_memory_object_tail_list, i); cuda_debug("\tin: '%s' mem_ptr = 0x%08x, stream id = %d", __func__, mem_ptr, command->id); cuda_debug("\tin: '%s' mem_tail_ptr = 0x%08x, stream id = %d", __func__, mem_tail_ptr, command->id); // if ((! dst_is_device) && (mem_ptr == dst)) if ((!dst_is_device) && (mem_ptr <= dst && dst < mem_tail_ptr)) dst_is_device = 1; else if ((!src_is_device) && (mem_ptr <= src && src < mem_tail_ptr)) src_is_device = 1; if (dst_is_device && src_is_device) break; } unsigned args[3] = {(unsigned)dst, (unsigned)src, (unsigned)size}; int ret; if ((!src_is_device) && dst_is_device) ret = ioctl(active_device->fd, cuda_call_MemWrite, args); else if (src_is_device && (!dst_is_device)) ret = ioctl(active_device->fd, cuda_call_MemRead, args); else { warning( "%s: host to host and device to device async memory copy \ not implemented.\n", __func__); fatal("%s: not implemented\n", __FUNCTION__); } extern char *cuda_err_native; /* Check that we are running on Multi2Sim. If a program linked with this * library is running natively, system call CUDA_SYS_CODE is not * supported. */ if (ret) fatal("native execution not supported.\n%s", cuda_err_native); /* Syscall */ /* if ((!src_is_device) && dst_is_device) ret = syscall(CUDA_SYS_CODE, cuda_call_cuKplMemcpyHtoD, dst, src, size); else if (src_is_device && (! dst_is_device)) ret = syscall(CUDA_SYS_CODE, cuda_call_cuKplMemcpyDtoH, dst, src, size); else warning("%s: host to host and device to device async memory copy \ not implemented.\n", __func__); else fatal("device not supported.\n");*/ // fatal("%s: not implemented", __FUNCTION__); /* Debug */ cuda_debug("CUDA stream command 'cuMemcpyAsync' completed now"); }
void cuLaunchKernelImpl(struct cuda_stream_command_t *command) { CUfunction f = command->k_args.kernel; unsigned gridDimX = command->k_args.grid_dim_x; unsigned gridDimY = command->k_args.grid_dim_y; unsigned gridDimZ = command->k_args.grid_dim_z; unsigned blockDimX = command->k_args.block_dim_x; unsigned blockDimY = command->k_args.block_dim_y; unsigned blockDimZ = command->k_args.block_dim_z; unsigned sharedMemBytes = command->k_args.shared_mem_size; CUstream hStream = command->k_args.stream; int **kernelParams = (int **)command->k_args.kernel_params; void **extra = command->k_args.extra; // unsigned sys_args[11]; int ret; extern char *cuda_err_native; cuda_debug("CUDA stream command 'cuLaunchKernel' running now"); cuda_debug("\tin: function = [%p]", f); cuda_debug("\tin: gridDimX = %u", gridDimX); cuda_debug("\tin: gridDimY = %u", gridDimY); cuda_debug("\tin: gridDimZ = %u", gridDimZ); cuda_debug("\tin: blockDimX = %u", blockDimX); cuda_debug("\tin: blockDimY = %u", blockDimY); cuda_debug("\tin: blockDimZ = %u", blockDimZ); cuda_debug("\tin: sharedMemBytes = %u", sharedMemBytes); cuda_debug("\tin: hStream = [%p]", hStream); cuda_debug("\tin: kernelParams = [%p]", kernelParams); cuda_debug("\tin: extra = [%p]", extra); /* Check input */ assert(gridDimX != 0 && gridDimY != 0 && gridDimZ != 0); assert(blockDimX != 0 && blockDimY != 0 && blockDimZ != 0); unsigned args[11]; args[0] = f->id; args[1] = (unsigned)f->name; args[2] = gridDimX; args[3] = gridDimY; args[4] = gridDimZ; args[5] = blockDimX; args[6] = blockDimY; args[7] = blockDimZ; args[8] = sharedMemBytes; args[9] = (hStream ? hStream->id : 0); args[10] = (unsigned)kernelParams; args[11] = (unsigned)extra; ret = ioctl(active_device->fd, cuda_call_LaunchKernel, args); /* Syscall arguments */ /*sys_args[0] = f->id; sys_args[1] = gridDimX; sys_args[2] = gridDimY; sys_args[3] = gridDimZ; sys_args[4] = blockDimX; sys_args[5] = blockDimY; sys_args[6] = blockDimZ; sys_args[7] = sharedMemBytes; sys_args[8] = (hStream ? hStream->id : 0); sys_args[9] = (unsigned)kernelParams; sys_args[10] = (unsigned)extra;*/ /* Syscall */ // ret = syscall(CUDA_SYS_CODE, cuda_call_cuKplLaunchKernel, sys_args); // ret = 0; // fatal("%s: not implemented", __FUNCTION__); /* Check that we are running on Multi2Sim. If a program linked with this * library is running natively, system call CUDA_SYS_CODE is not * supported. */ if (ret) fatal("native execution not supported.\n%s", cuda_err_native); cuda_debug("CUDA stream command 'cuLaunchKernel' completed now"); }