示例#1
0
文件: pocl-hsa.c 项目: larsmans/pocl
unsigned int
pocl_hsa_probe(struct pocl_device_ops *ops)
{
  int env_count = pocl_device_get_env_count(ops->device_name);

  POCL_MSG_PRINT_INFO("pocl-hsa: found %d env devices with %s.\n",
                      env_count, ops->device_name);

  /* No hsa env specified, the user did not request for HSA agents. */
  if (env_count <= 0)
    return 0;

  if (hsa_init() != HSA_STATUS_SUCCESS)
    {
      POCL_ABORT("pocl-hsa: hsa_init() failed.");
    }

  if (hsa_iterate_agents(pocl_hsa_get_agents, NULL) !=
      HSA_STATUS_SUCCESS)
    {
      assert (0 && "pocl-hsa: could not get agents.");
    }
  POCL_MSG_PRINT_INFO("pocl-hsa: found %d agents.\n", found_hsa_agents);
  return found_hsa_agents;
}
示例#2
0
文件: basic.c 项目: clementleger/pocl
cl_int
pocl_basic_alloc_mem_obj (cl_device_id device, cl_mem mem_obj, void* host_ptr)
{
  void *b = NULL;
  cl_mem_flags flags = mem_obj->flags;
  unsigned i;
  POCL_MSG_PRINT_INFO("BASIC: alloc_mem_obj, mem %p, dev %d\n", 
                      mem_obj, device->dev_id);
  /* check if some driver has already allocated memory for this mem_obj 
     in our global address space, and use that*/
  for (i = 0; i < mem_obj->context->num_devices; ++i)
    {
      if (!mem_obj->device_ptrs[i].available)
        continue;

      if (mem_obj->device_ptrs[i].global_mem_id == device->global_mem_id
          && mem_obj->device_ptrs[i].mem_ptr != NULL)
        {
          mem_obj->device_ptrs[device->dev_id].mem_ptr =
            mem_obj->device_ptrs[i].mem_ptr;
          POCL_MSG_PRINT_INFO("BASIC: alloc_mem_obj %p dev %d, using already allocated mem\n", mem_obj, device->dev_id);
          return CL_SUCCESS;
        }
    }

  /* memory for this global memory is not yet allocated -> do it */
  if (flags & CL_MEM_USE_HOST_PTR)
    {
      // mem_host_ptr must be non-NULL
      assert(host_ptr != NULL);
      b = host_ptr;
    }
  else
    {
      b = pocl_memalign_alloc_global_mem (device, MAX_EXTENDED_ALIGNMENT,
                                          mem_obj->size);
      if (b==NULL)
        return CL_MEM_OBJECT_ALLOCATION_FAILURE;

      mem_obj->shared_mem_allocation_owner = device;
    }

  /* use this dev mem allocation as host ptr */
  if (flags & CL_MEM_ALLOC_HOST_PTR && (mem_obj->mem_host_ptr == NULL))
    mem_obj->mem_host_ptr = b;

  if (flags & CL_MEM_COPY_HOST_PTR)
    {
      // mem_host_ptr must be non-NULL
      assert(host_ptr != NULL);
      memcpy (b, host_ptr, mem_obj->size);
    }

  mem_obj->device_ptrs[device->dev_id].mem_ptr = b;

  return CL_SUCCESS;
}
示例#3
0
文件: pocl_binary.c 项目: kwm81/pocl
/* serializes an entire pocl kernel cachedir. */
static unsigned char*
serialize_kernel_cachedir(cl_kernel kernel, unsigned device_i, unsigned char* buffer)
{
  cl_program program = kernel->program;
  char path[POCL_FILENAME_LENGTH];
  char basedir[POCL_FILENAME_LENGTH];

  pocl_cache_program_path(basedir, program, device_i);
  size_t basedir_len = strlen(basedir);

  pocl_cache_kernel_cachedir(path, program, device_i, kernel);
  POCL_MSG_PRINT_INFO("Kernel %s: recur serializing cachedir %s\n", kernel->name, path);
  buffer = recursively_serialize_path(path, basedir_len, buffer);

  return buffer;
}
示例#4
0
文件: pocl_build.c 项目: pocl/pocl
cl_int
compile_and_link_program(int compile_program,
                         int link_program,
                         cl_program program,
                         cl_uint num_devices,
                         const cl_device_id *device_list,
                         const char *options,
                         cl_uint num_input_headers,
                         const cl_program *input_headers,
                         const char **header_include_names,
                         cl_uint num_input_programs,
                         const cl_program *input_programs,
                         void (CL_CALLBACK *pfn_notify) (cl_program program,
                                                         void *user_data),
                         void *user_data)
{
  char program_bc_path[POCL_FILENAME_LENGTH];
  char link_options[512];
  int errcode, error;
  int create_library = 0;
  int requires_cr_sqrt_div = 0;
  int spir_build = 0;
  unsigned flush_denorms = 0;
  uint64_t fsize;
  cl_device_id *unique_devlist = NULL;
  char *binary = NULL;
  unsigned device_i = 0, actually_built = 0;
  size_t i, j;
  char *temp_options = NULL;
  const char *extra_build_options =
    pocl_get_string_option ("POCL_EXTRA_BUILD_FLAGS", NULL);
  int build_error_code
      = (link_program ? CL_BUILD_PROGRAM_FAILURE : CL_COMPILE_PROGRAM_FAILURE);

  POCL_GOTO_LABEL_COND (PFN_NOTIFY, (program == NULL), CL_INVALID_PROGRAM);

  POCL_GOTO_LABEL_COND (PFN_NOTIFY, (num_devices > 0 && device_list == NULL),
                        CL_INVALID_VALUE);
  POCL_GOTO_LABEL_COND (PFN_NOTIFY, (num_devices == 0 && device_list != NULL),
                        CL_INVALID_VALUE);

  POCL_GOTO_LABEL_COND (PFN_NOTIFY, (pfn_notify == NULL && user_data != NULL),
                        CL_INVALID_VALUE);

  POCL_GOTO_LABEL_ON (PFN_NOTIFY, program->kernels, CL_INVALID_OPERATION,
                      "Program already has kernels\n");

  POCL_GOTO_LABEL_ON (PFN_NOTIFY,
                      (program->source == NULL && program->binaries == NULL),
                      CL_INVALID_PROGRAM,
                      "Program doesn't have sources or binaries! You need "
                      "to call clCreateProgramWith{Binary|Source} first\n");

  POCL_GOTO_LABEL_ON (PFN_NOTIFY,
                      ((program->source == NULL) && (link_program == 0)),
                      CL_INVALID_OPERATION,
                      "Cannot clCompileProgram when program has no source\n");

  POCL_LOCK_OBJ (program);

  program->main_build_log[0] = 0;

  /* TODO this should be somehow utilized at linking */
  POCL_MEM_FREE (program->compiler_options);

  if (extra_build_options)
    {
      size_t len = (options != NULL) ? strlen (options) : 0;
      len += strlen (extra_build_options) + 2;
      temp_options = (char *)malloc (len);

      temp_options[0] = 0;
      if (options != NULL)
        {
          strcpy (temp_options, options);
          strcat (temp_options, " ");
        }
      strcat (temp_options, extra_build_options);
    }
  else
    temp_options = (char*) options;

  if (temp_options)
    {
      i = strlen (temp_options);
      size_t size = i + 512; /* add some space for pocl-added options */
      program->compiler_options = (char *)malloc (size);
      errcode = process_options (temp_options, program->compiler_options,
                                 link_options, program, compile_program,
                                 link_program, &create_library, &flush_denorms,
                                 &requires_cr_sqrt_div, &spir_build, size);
      if (errcode != CL_SUCCESS)
        goto ERROR_CLEAN_OPTIONS;
    }

  POCL_MSG_PRINT_LLVM ("building program with options %s\n",
                       program->compiler_options);


  program->flush_denorms = flush_denorms;
#if !(defined(__x86_64__) && defined(__GNUC__))
  if (flush_denorms)
    {
      POCL_MSG_WARN ("flush to zero is currently only implemented for "
                     "x86-64 & gcc/clang, ignoring flag\n");
    }
#endif

  /* DEVICE LIST */
  if (num_devices == 0)
    {
      num_devices = program->num_devices;
      device_list = program->devices;
    }
  else
    {
      // convert subdevices to devices and remove duplicates
      cl_uint real_num_devices = 0;
      unique_devlist = pocl_unique_device_list (device_list, num_devices,
                                                &real_num_devices);
      num_devices = real_num_devices;
      device_list = unique_devlist;
    }

  clean_program_on_rebuild (program);

  /* Build the fully linked non-parallel bitcode for all
         devices. */
  for (device_i = 0; device_i < program->num_devices; ++device_i)
    {
      cl_device_id device = program->devices[device_i];

      /* find the device in the supplied devices-to-build-for list */
      int found = 0;
      for (i = 0; i < num_devices; ++i)
          if (device_list[i] == device) found = 1;
      if (!found) continue;

      if (requires_cr_sqrt_div
          && !(device->single_fp_config & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT))
        {
          APPEND_TO_MAIN_BUILD_LOG (REQUIRES_CR_SQRT_DIV_ERR);
          POCL_GOTO_ERROR_ON (1, build_error_code,
                              REQUIRES_CR_SQRT_DIV_ERR " %s\n",
                              device->short_name);
        }
      actually_built++;

      /* clCreateProgramWithSource */
      if (program->source)
        {
#ifdef OCS_AVAILABLE
          if (device->compiler_available == CL_TRUE)
            {
              POCL_MSG_PRINT_INFO ("building from sources for device %d\n",
                                   device_i);
              error = pocl_llvm_build_program (
                  program, device_i, program->compiler_options,
                  program_bc_path, num_input_headers, input_headers,
                  header_include_names, (create_library ? 0 : link_program));
              POCL_GOTO_ERROR_ON ((error != 0), build_error_code,
                                  "pocl_llvm_build_program() failed\n");
            }
          else
#endif
            {
              APPEND_TO_MAIN_BUILD_LOG (
                  "Cannot build a program from sources with pocl "
                  "that does not have online compiler support\n");
              POCL_GOTO_ERROR_ON (1, CL_COMPILER_NOT_AVAILABLE, "%s",
                                  program->main_build_log);
            }
        }
      /* clCreateProgramWithBinaries */
      else if (program->binaries[device_i]
               && (program->pocl_binaries[device_i] == NULL))
        {
#ifdef OCS_AVAILABLE
          /* bitcode is now either plain LLVM IR or SPIR IR */
          int spir_binary = bitcode_is_spir ((char*)program->binaries[device_i],
                                             program->binary_sizes[device_i]);
          if (spir_binary)
            POCL_MSG_PRINT_LLVM ("LLVM-SPIR binary detected\n");
          else
            POCL_MSG_PRINT_LLVM ("building from a BC binary for device %d\n",
                                 device_i);

          if (spir_binary)
            {
#ifdef ENABLE_SPIR
              if (!strstr (device->extensions, "cl_khr_spir"))
                {
                  APPEND_TO_MAIN_BUILD_LOG (REQUIRES_SPIR_SUPPORT);
                  POCL_GOTO_ERROR_ON (1, build_error_code,
                                      REQUIRES_SPIR_SUPPORT " %s\n",
                                      device->short_name);
                }
              if (!spir_build)
                POCL_MSG_WARN (
                    "SPIR binary provided, but no spir in build options\n");
              /* SPIR binaries need to be explicitly linked to the kernel
               * library. for non-SPIR binaries this happens as part of build
               * process when program.bc is generated. */
              error
                  = pocl_llvm_link_program (program, device_i, program_bc_path,
                                            0, NULL, NULL, NULL, 0, 1);

              POCL_GOTO_ERROR_ON (error, CL_LINK_PROGRAM_FAILURE,
                                  "Failed to link SPIR program.bc\n");
#else
              APPEND_TO_MAIN_BUILD_LOG (REQUIRES_SPIR_SUPPORT);
              POCL_GOTO_ERROR_ON (1, build_error_code,
                                  REQUIRES_SPIR_SUPPORT " %s\n",
                                  device->short_name);
#endif
            }

#else
          APPEND_TO_MAIN_BUILD_LOG (
              "Cannot build program from LLVM IR binaries with "
              "pocl that does not have online compiler support\n");
          POCL_GOTO_ERROR_ON (1, CL_COMPILER_NOT_AVAILABLE, "%s",
                              program->main_build_log);
#endif
        }
      else if (program->pocl_binaries[device_i])
        {
          POCL_MSG_PRINT_INFO("having a poclbinary for device %d\n", device_i);
#ifdef OCS_AVAILABLE
          if (program->binaries[device_i] == NULL)
            {
              POCL_MSG_WARN (
                  "pocl-binary for this device doesn't contain "
                  "program.bc - you won't be able to rebuild/link it\n");
              /* do not try to read program.bc or LLVM IRs
               * TODO maybe read LLVM IRs ?*/
              continue;
            }
#else
          continue;
#endif
        }
      else if (link_program && (num_input_programs > 0))
        {
#ifdef OCS_AVAILABLE
          /* just link binaries. */
          unsigned char *cur_device_binaries[num_input_programs];
          size_t cur_device_binary_sizes[num_input_programs];
          void *cur_llvm_irs[num_input_programs];
          for (j = 0; j < num_input_programs; j++)
            {
              assert (device == input_programs[j]->devices[device_i]);
              cur_device_binaries[j] = input_programs[j]->binaries[device_i];

              assert (cur_device_binaries[j]);
              cur_device_binary_sizes[j]
                  = input_programs[j]->binary_sizes[device_i];

              if (input_programs[j]->llvm_irs[device_i] == NULL)
                pocl_update_program_llvm_irs (input_programs[j], device_i);

              cur_llvm_irs[j] = input_programs[j]->llvm_irs[device_i];
              assert (cur_llvm_irs[j]);
            }
          error = pocl_llvm_link_program (
              program, device_i, program_bc_path, num_input_programs,
              cur_device_binaries, cur_device_binary_sizes, cur_llvm_irs,
              create_library, 0);
          POCL_GOTO_ERROR_ON ((error != CL_SUCCESS), CL_LINK_PROGRAM_FAILURE,
                              "pocl_llvm_link_program() failed\n");
#else
          POCL_GOTO_ERROR_ON ((1), CL_LINK_PROGRAM_FAILURE,
                              "clCompileProgram/clLinkProgram/clBuildProgram"
                              " require a pocl built with LLVM\n");

#endif
        }
      else
        {
          POCL_GOTO_ERROR_ON (1, CL_INVALID_BINARY,
                              "No sources nor binaries for device %s - can't "
                              "build the program\n", device->short_name);
        }

#ifdef OCS_AVAILABLE
      /* Read binaries from program.bc to memory */
      if (program->binaries[device_i] == NULL)
        {
          errcode = pocl_read_file(program_bc_path, &binary, &fsize);
          POCL_GOTO_ERROR_ON(errcode, CL_BUILD_ERROR,
                             "Failed to read binaries from program.bc to "
                             "memory: %s\n", program_bc_path);

          program->binary_sizes[device_i] = (size_t)fsize;
          program->binaries[device_i] = (unsigned char *)binary;
        }

      if (program->llvm_irs[device_i] == NULL)
        {
          pocl_update_program_llvm_irs(program, device_i);
        }
      /* Maintain a 'last_accessed' file in every program's
       * cache directory. Will be useful for cache pruning script
       * that flushes old directories based on LRU */
      pocl_cache_update_program_last_access(program, device_i);
#endif

    }

  POCL_GOTO_ERROR_ON ((actually_built < num_devices), build_error_code,
                      "Some of the devices on the argument-supplied list are"
                      "not available for the program, or do not exist\n");

  program->build_status = CL_BUILD_SUCCESS;
  program->binary_type = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
  /* if program will be compiled using clCompileProgram its binary_type
   * will be set to CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT.
   *
   * if program was created by clLinkProgram which is called
   * with the –createlibrary link option its binary_type will be set to
   * CL_PROGRAM_BINARY_TYPE_LIBRARY.
   */
  if (create_library)
    program->binary_type = CL_PROGRAM_BINARY_TYPE_LIBRARY;
  if (compile_program && !link_program)
    program->binary_type = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT;

  assert(program->num_kernels == 0);

  /* get non-device-specific kernel metadata. We can stop after finding
   * the first method that works.*/
  for (device_i = 0; device_i < program->num_devices; device_i++)
    {
#ifdef OCS_AVAILABLE
      if (program->binaries[device_i])
        {
          program->num_kernels
              = pocl_llvm_get_kernel_count (program, device_i);
          if (program->num_kernels)
            {
              program->kernel_meta = calloc (program->num_kernels,
                                             sizeof (pocl_kernel_metadata_t));
              pocl_llvm_get_kernels_metadata (program, device_i);
            }
          break;
        }
#endif
      if (program->pocl_binaries[device_i])
        {
          program->num_kernels
              = pocl_binary_get_kernel_count (program, device_i);
          if (program->num_kernels)
            {
              program->kernel_meta = calloc (program->num_kernels,
                                             sizeof (pocl_kernel_metadata_t));
              pocl_binary_get_kernels_metadata (program, device_i);
            }
          break;
        }
    }

  POCL_GOTO_ERROR_ON ((device_i >= program->num_devices), CL_INVALID_BINARY,
                      "Could find kernel metadata in the built program\n");

  /* calculate device-specific kernel hashes. */
  for (j = 0; j < program->num_kernels; ++j)
    {
      program->kernel_meta[j].build_hash
          = calloc (program->num_devices, sizeof (pocl_kernel_hash_t));

      for (device_i = 0; device_i < program->num_devices; device_i++)
        {
          pocl_calculate_kernel_hash (program, j, device_i);
        }
    }

  errcode = CL_SUCCESS;
  goto FINISH;

ERROR:
  free_meta (program);

  program->kernels = NULL;

  for (device_i = 0; device_i < program->num_devices; device_i++)
    {
      if (program->source)
        {
          POCL_MEM_FREE (program->binaries[device_i]);
          program->binary_sizes[device_i] = 0;
        }
    }

ERROR_CLEAN_OPTIONS:
  if (temp_options != options)
    free (temp_options);

  program->build_status = CL_BUILD_ERROR;

FINISH:
  POCL_UNLOCK_OBJ (program);
  POCL_MEM_FREE (unique_devlist);

PFN_NOTIFY:
  if (pfn_notify)
    pfn_notify (program, user_data);

  return errcode;
}
示例#5
0
文件: pocl-cuda.c 项目: jrprice/pocl
void
pocl_cuda_init (cl_device_id device, const char *parameters)
{
  CUresult result;

  result = cuInit (0);
  CUDA_CHECK (result, "cuInit");

  if (device->data)
    return;

  pocl_cuda_device_data_t *data = malloc (sizeof (pocl_cuda_device_data_t));
  result = cuDeviceGet (&data->device, 0);
  CUDA_CHECK (result, "cuDeviceGet");

  // Get specific device name
  device->long_name = device->short_name = malloc (256 * sizeof (char));
  cuDeviceGetName (device->long_name, 256, data->device);

  // Get other device properties
  cuDeviceGetAttribute ((int *)&device->max_work_group_size,
                        CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
                        data->device);
  cuDeviceGetAttribute ((int *)(device->max_work_item_sizes + 0),
                        CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, data->device);
  cuDeviceGetAttribute ((int *)(device->max_work_item_sizes + 1),
                        CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, data->device);
  cuDeviceGetAttribute ((int *)(device->max_work_item_sizes + 2),
                        CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, data->device);
  cuDeviceGetAttribute (
      (int *)&device->local_mem_size,
      CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR, data->device);
  cuDeviceGetAttribute ((int *)&device->max_compute_units,
                        CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
                        data->device);
  cuDeviceGetAttribute ((int *)&device->max_clock_frequency,
                        CU_DEVICE_ATTRIBUTE_CLOCK_RATE, data->device);
  cuDeviceGetAttribute ((int *)&device->error_correction_support,
                        CU_DEVICE_ATTRIBUTE_ECC_ENABLED, data->device);
  cuDeviceGetAttribute ((int *)&device->host_unified_memory,
                        CU_DEVICE_ATTRIBUTE_INTEGRATED, data->device);
  cuDeviceGetAttribute ((int *)&device->max_constant_buffer_size,
                        CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY,
                        data->device);

  device->preferred_vector_width_char = 1;
  device->preferred_vector_width_short = 1;
  device->preferred_vector_width_int = 1;
  device->preferred_vector_width_long = 1;
  device->preferred_vector_width_float = 1;
  device->preferred_vector_width_double = 1;
  device->preferred_vector_width_half = 0;
  device->native_vector_width_char = 1;
  device->native_vector_width_short = 1;
  device->native_vector_width_int = 1;
  device->native_vector_width_long = 1;
  device->native_vector_width_float = 1;
  device->native_vector_width_double = 1;
  device->native_vector_width_half = 0;

  device->single_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO
                             | CL_FP_ROUND_TO_INF | CL_FP_FMA | CL_FP_INF_NAN
                             | CL_FP_DENORM;
  device->double_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO
                             | CL_FP_ROUND_TO_INF | CL_FP_FMA | CL_FP_INF_NAN
                             | CL_FP_DENORM;

  device->local_mem_type = CL_LOCAL;
  device->host_unified_memory = 0;

  // Get GPU architecture name
  int sm_maj, sm_min;
  cuDeviceGetAttribute (&sm_maj, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
                        data->device);
  cuDeviceGetAttribute (&sm_min, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
                        data->device);
  char *gpu_arch = malloc (16 * sizeof (char));
  snprintf (gpu_arch, 16, "sm_%d%d", sm_maj, sm_min);
  device->llvm_cpu = pocl_get_string_option ("POCL_CUDA_GPU_ARCH", gpu_arch);
  POCL_MSG_PRINT_INFO ("[CUDA] GPU architecture = %s\n", device->llvm_cpu);

  // Create context
  result = cuCtxCreate (&data->context, CU_CTX_MAP_HOST, data->device);
  CUDA_CHECK (result, "cuCtxCreate");

  // Get global memory size
  size_t memfree, memtotal;
  result = cuMemGetInfo (&memfree, &memtotal);
  device->max_mem_alloc_size = max (memtotal / 4, 128 * 1024 * 1024);
  device->global_mem_size = memtotal;

  device->data = data;
}