Пример #1
0
void cuWaitEventImpl(struct cuda_stream_command_t *command) {
  CUevent event = command->e_args.event;

  cuda_debug("CUDA stream command 'cuWaitEvent' running now");
  cuda_debug("\tin: event = [%p]", event);

  while (event->recorded)
    ;

  cuda_debug("CUDA stream command 'cuWaitEvent' completed now");
}
Пример #2
0
void cuEventRecordImpl(struct cuda_stream_command_t *command) {
  CUevent event = command->e_args.event;

  cuda_debug("CUDA stream command 'cuEventRecord' running now");
  cuda_debug("\tin: event = [%p]", event);

  cuda_event_record(event);
  event->recorded = 1;

  cuda_debug("CUDA stream command 'cuEventRecord' completed now");
}
Пример #3
0
void cuStreamCallbackImpl(struct cuda_stream_command_t *command) {
  CUstreamCallback func = command->cb.func;
  CUstream stream = command->cb.stream;
  CUresult status = command->cb.status;
  void *userData = command->cb.userData;

  cuda_debug("CUDA stream command 'cuStreamCallback' running now");
  cuda_debug("\tin: func = [%p]", func);
  cuda_debug("\tin: stream = [%p]", stream);
  cuda_debug("\tin: status = %d", status);
  cuda_debug("\tin: userData = [%p]", userData);

  func(stream, status, userData);

  cuda_debug("CUDA stream command 'cuStreamCallback' completed now");
}
Пример #4
0
int cuda_abi_call(X86Context *ctx)
{
	struct x86_regs_t *regs = ctx->regs;

	int code;
	int ret;

	/* Function code */
	code = regs->ebx;
	if (code <= cuda_call_invalid || code >= cuda_call_count)
		fatal("%s: invalid CUDA function (code %d).\n%s",
			__FUNCTION__, code, cuda_err_code);

	/* Debug */
	cuda_debug("CUDA call '%s' (code %d)\n", cuda_call_name[code], code);

	/* Call */
	assert(cuda_func_table[code]);
	ret = cuda_func_table[code](ctx);

	return ret;
}
Пример #5
0
void cuMemcpyAsyncImpl(struct cuda_stream_command_t *command) {
  int i;
  CUdeviceptr mem_ptr, mem_tail_ptr;
  CUdeviceptr dst = command->m_args.dst_ptr;
  CUdeviceptr src = command->m_args.src_ptr;
  unsigned size = command->m_args.size;
  int dst_is_device = 0;
  int src_is_device = 0;

  cuda_debug("CUDA stream command 'cuMemcpyAsync' running now");
  cuda_debug("\tin: '%s' stream id = %d", __func__, command->id);
  cuda_debug("\tin: '%s' src_ptr = 0x%08x, stream id = %d", __func__, src,
             command->id);
  cuda_debug("\tin: '%s' dst_ptr = 0x%08x, stream id = %d", __func__, dst,
             command->id);
  cuda_debug("\tin: '%s' size = %d, stream id = %d", __func__, size,
             command->id);

  /* Determine if dst/src is host or device pointer */
  for (i = 0; i < list_count(device_memory_object_list); ++i) {
    mem_ptr = (CUdeviceptr)list_get(device_memory_object_list, i);
    mem_tail_ptr = (CUdeviceptr)list_get(device_memory_object_tail_list, i);
    cuda_debug("\tin: '%s' mem_ptr = 0x%08x, stream id = %d", __func__, mem_ptr,
               command->id);
    cuda_debug("\tin: '%s' mem_tail_ptr = 0x%08x, stream id = %d", __func__,
               mem_tail_ptr, command->id);
    // if ((! dst_is_device) && (mem_ptr == dst))
    if ((!dst_is_device) && (mem_ptr <= dst && dst < mem_tail_ptr))
      dst_is_device = 1;
    else if ((!src_is_device) && (mem_ptr <= src && src < mem_tail_ptr))
      src_is_device = 1;
    if (dst_is_device && src_is_device) break;
  }

  unsigned args[3] = {(unsigned)dst, (unsigned)src, (unsigned)size};
  int ret;
  if ((!src_is_device) && dst_is_device)
    ret = ioctl(active_device->fd, cuda_call_MemWrite, args);
  else if (src_is_device && (!dst_is_device))
    ret = ioctl(active_device->fd, cuda_call_MemRead, args);
  else {
    warning(
        "%s: host to host and device to device async memory copy \
				not implemented.\n",
        __func__);

    fatal("%s: not implemented\n", __FUNCTION__);
  }

  extern char *cuda_err_native;
  /* Check that we are running on Multi2Sim. If a program linked with this
   * library is running natively, system call CUDA_SYS_CODE is not
   * supported. */
  if (ret) fatal("native execution not supported.\n%s", cuda_err_native);

  /* Syscall */
  /*
  if ((!src_is_device) && dst_is_device)
          ret = syscall(CUDA_SYS_CODE, cuda_call_cuKplMemcpyHtoD, dst, src,
                          size);
  else if (src_is_device && (! dst_is_device))
          ret = syscall(CUDA_SYS_CODE, cuda_call_cuKplMemcpyDtoH, dst, src,
                          size);
  else
          warning("%s: host to host and device to device async memory copy \
                          not implemented.\n", __func__);
  else
          fatal("device not supported.\n");*/
  // fatal("%s: not implemented", __FUNCTION__);

  /* Debug */
  cuda_debug("CUDA stream command 'cuMemcpyAsync' completed now");
}
Пример #6
0
void cuLaunchKernelImpl(struct cuda_stream_command_t *command) {
  CUfunction f = command->k_args.kernel;
  unsigned gridDimX = command->k_args.grid_dim_x;
  unsigned gridDimY = command->k_args.grid_dim_y;
  unsigned gridDimZ = command->k_args.grid_dim_z;
  unsigned blockDimX = command->k_args.block_dim_x;
  unsigned blockDimY = command->k_args.block_dim_y;
  unsigned blockDimZ = command->k_args.block_dim_z;
  unsigned sharedMemBytes = command->k_args.shared_mem_size;
  CUstream hStream = command->k_args.stream;
  int **kernelParams = (int **)command->k_args.kernel_params;
  void **extra = command->k_args.extra;
  // unsigned sys_args[11];
  int ret;
  extern char *cuda_err_native;

  cuda_debug("CUDA stream command 'cuLaunchKernel' running now");
  cuda_debug("\tin: function = [%p]", f);
  cuda_debug("\tin: gridDimX = %u", gridDimX);
  cuda_debug("\tin: gridDimY = %u", gridDimY);
  cuda_debug("\tin: gridDimZ = %u", gridDimZ);
  cuda_debug("\tin: blockDimX = %u", blockDimX);
  cuda_debug("\tin: blockDimY = %u", blockDimY);
  cuda_debug("\tin: blockDimZ = %u", blockDimZ);
  cuda_debug("\tin: sharedMemBytes = %u", sharedMemBytes);
  cuda_debug("\tin: hStream = [%p]", hStream);
  cuda_debug("\tin: kernelParams = [%p]", kernelParams);
  cuda_debug("\tin: extra = [%p]", extra);

  /* Check input */
  assert(gridDimX != 0 && gridDimY != 0 && gridDimZ != 0);
  assert(blockDimX != 0 && blockDimY != 0 && blockDimZ != 0);

  unsigned args[11];
  args[0] = f->id;
  args[1] = (unsigned)f->name;
  args[2] = gridDimX;
  args[3] = gridDimY;
  args[4] = gridDimZ;
  args[5] = blockDimX;
  args[6] = blockDimY;
  args[7] = blockDimZ;
  args[8] = sharedMemBytes;
  args[9] = (hStream ? hStream->id : 0);
  args[10] = (unsigned)kernelParams;
  args[11] = (unsigned)extra;
  ret = ioctl(active_device->fd, cuda_call_LaunchKernel, args);

  /* Syscall arguments */
  /*sys_args[0] = f->id;
  sys_args[1] = gridDimX;
  sys_args[2] = gridDimY;
  sys_args[3] = gridDimZ;
  sys_args[4] = blockDimX;
  sys_args[5] = blockDimY;
  sys_args[6] = blockDimZ;
  sys_args[7] = sharedMemBytes;
  sys_args[8] = (hStream ? hStream->id : 0);
  sys_args[9] = (unsigned)kernelParams;
  sys_args[10] = (unsigned)extra;*/

  /* Syscall */
  // ret = syscall(CUDA_SYS_CODE, cuda_call_cuKplLaunchKernel, sys_args);
  // ret = 0;
  // fatal("%s: not implemented", __FUNCTION__);

  /* Check that we are running on Multi2Sim. If a program linked with this
   * library is running natively, system call CUDA_SYS_CODE is not
   * supported. */
  if (ret) fatal("native execution not supported.\n%s", cuda_err_native);

  cuda_debug("CUDA stream command 'cuLaunchKernel' completed now");
}