コード例 #1
0
ファイル: pocl-cuda.c プロジェクト: jrprice/pocl
void
pocl_cuda_uninit (cl_device_id device)
{
  pocl_cuda_device_data_t *data = device->data;

  cuCtxDestroy (data->context);

  POCL_MEM_FREE (data);
  device->data = NULL;

  POCL_MEM_FREE (device->long_name);
}
コード例 #2
0
ファイル: pocl-hsa.c プロジェクト: larsmans/pocl
void
pocl_hsa_uninit (cl_device_id device)
{
  struct data *d = (struct data*)device->data;
  POCL_MEM_FREE(d);
  device->data = NULL;
}
コード例 #3
0
ファイル: pocl_util.c プロジェクト: Finomnis/pocl_hpx
void
pocl_remove_file (const char *file_path)
{
  int str_size = 10 + strlen(file_path) + 1;
  char *cmd = (char*)malloc(str_size);
  snprintf(cmd, str_size, "rm -f '%s'", file_path);
  system(cmd);
  POCL_MEM_FREE(cmd);
}
コード例 #4
0
ファイル: pocl_util.c プロジェクト: Finomnis/pocl_hpx
void 
pocl_remove_directory (const char *path_name)
{
  int str_size = 10 + strlen(path_name) + 1;
  char *cmd = (char*)malloc(str_size);
  snprintf(cmd, str_size, "rm -fr '%s'", path_name);
  system(cmd);
  POCL_MEM_FREE(cmd);
}
コード例 #5
0
ファイル: pocl_util.c プロジェクト: Finomnis/pocl_hpx
void
pocl_make_directory (const char *path_name)
{
  int str_size = 12 + strlen(path_name) + 1;
  char *cmd = (char*)malloc(str_size);
  snprintf(cmd, str_size, "mkdir -p '%s'", path_name);
  system(cmd);
  POCL_MEM_FREE(cmd);
}
コード例 #6
0
ファイル: pocl_build.c プロジェクト: pocl/pocl
static void
clean_program_on_rebuild (cl_program program)
{
  /* if we're rebuilding the program, release the kernels and reset log/status
   */
  size_t i;
  if ((program->build_status != CL_BUILD_NONE) || program->num_kernels > 0)
    {
      /* Spec says:
         CL_INVALID_OPERATION if there are kernel objects attached to program.
         ...and we check for that earlier.
       */
      assert (program->kernels == NULL);

      free_meta (program);

      program->num_kernels = 0;
      program->build_status = CL_BUILD_NONE;

      for (i = 0; i < program->num_devices; ++i)
        {
          POCL_MEM_FREE (program->build_log[i]);
          memset (program->build_hash[i], 0, sizeof (SHA1_digest_t));
          if (program->source)
            {
              POCL_MEM_FREE (program->binaries[i]);
              program->binary_sizes[i] = 0;
#ifdef OCS_AVAILABLE
              if (program->llvm_irs[i])
                pocl_free_llvm_irs (program, i);
#endif
              POCL_MEM_FREE (program->pocl_binaries[i]);
              program->pocl_binary_sizes[i] = 0;
            }
        }
      program->main_build_log[0] = 0;
    }
}
コード例 #7
0
ファイル: pocl_cache.c プロジェクト: glupescu/pocl
int
pocl_cache_create_program_cachedir(cl_program program,
                                   unsigned device_i,
                                   const char* preprocessed_source,
                                   size_t source_len,
                                   char* program_bc_path,
                                   void** cache_lock)
{
    const char *hash_source = NULL;
    uint8_t old_build_hash[SHA1_DIGEST_SIZE] = {0};
    size_t hs_len = 0;

    assert(cache_topdir_initialized);

    if (program->source && preprocessed_source==NULL) {
        hash_source = program->source;
        hs_len = strlen(program->source);
    } else {
        hash_source = preprocessed_source;
        hs_len = source_len;
    }

    if (program->build_hash[device_i])
        memcpy(old_build_hash, program->build_hash[device_i], SHA1_DIGEST_SIZE);

    build_program_compute_hash(program, device_i, hash_source, hs_len);

    /* if the old hash is nonzero and different, we must free the built binaries
       before returning, so that they get loaded from the new location */
    if (old_build_hash[0] && memcmp(old_build_hash, program->build_hash[device_i],
            SHA1_DIGEST_SIZE))
    {
        if (program->binaries[device_i]) {
            POCL_MEM_FREE(program->binaries[device_i]);
            program->binary_sizes[device_i] = 0;
        }
        pocl_free_llvm_irs(program, device_i);
    }

    program_device_dir(program_bc_path, program, device_i, "");

    if (pocl_mkdir_p(program_bc_path))
        return 1;

    pocl_cache_program_bc_path(program_bc_path, program, device_i);

    *cache_lock = pocl_cache_acquire_writer_lock_i(program, device_i);

    return 0;
}
コード例 #8
0
ファイル: devices.c プロジェクト: ochafik/pocl
/**
 * Get the number of specified devices from environnement
 */
int pocl_device_get_env_count(const char *dev_type)
{
  const char *dev_env = getenv(POCL_DEVICES_ENV);
  char *ptr, *saveptr, *tofree, *token;
  unsigned int dev_count = 0;
  if (dev_env == NULL) 
    {
      return -1;
    }
  ptr = tofree = strdup(dev_env);
  while ((token = strtok_r (ptr, " ", &saveptr)) != NULL)
    {
      if(strcmp(token, dev_type) == 0)
        dev_count++;
      ptr = NULL;
    }
  POCL_MEM_FREE(tofree);

  return dev_count;
}
コード例 #9
0
ファイル: pocl_build.c プロジェクト: pocl/pocl
static void
free_meta (cl_program program)
{
  size_t i;
  unsigned j;

  if (program->num_kernels)
    {
      for (i = 0; i < program->num_kernels; i++)
        {
          pocl_kernel_metadata_t *meta = &program->kernel_meta[i];
          POCL_MEM_FREE (meta->attributes);
          POCL_MEM_FREE (meta->name);
          POCL_MEM_FREE (meta->arg_info);
          for (j = 0; j < program->num_devices; ++j)
            if (meta->data[j] != NULL)
              meta->data[j] = NULL; // TODO free data in driver callback
          POCL_MEM_FREE (meta->data);
          POCL_MEM_FREE (meta->local_sizes);
          POCL_MEM_FREE (meta->build_hash);
        }
      POCL_MEM_FREE (program->kernel_meta);
    }
}
コード例 #10
0
ファイル: pocl_build.c プロジェクト: pocl/pocl
cl_int
compile_and_link_program(int compile_program,
                         int link_program,
                         cl_program program,
                         cl_uint num_devices,
                         const cl_device_id *device_list,
                         const char *options,
                         cl_uint num_input_headers,
                         const cl_program *input_headers,
                         const char **header_include_names,
                         cl_uint num_input_programs,
                         const cl_program *input_programs,
                         void (CL_CALLBACK *pfn_notify) (cl_program program,
                                                         void *user_data),
                         void *user_data)
{
  char program_bc_path[POCL_FILENAME_LENGTH];
  char link_options[512];
  int errcode, error;
  int create_library = 0;
  int requires_cr_sqrt_div = 0;
  int spir_build = 0;
  unsigned flush_denorms = 0;
  uint64_t fsize;
  cl_device_id *unique_devlist = NULL;
  char *binary = NULL;
  unsigned device_i = 0, actually_built = 0;
  size_t i, j;
  char *temp_options = NULL;
  const char *extra_build_options =
    pocl_get_string_option ("POCL_EXTRA_BUILD_FLAGS", NULL);
  int build_error_code
      = (link_program ? CL_BUILD_PROGRAM_FAILURE : CL_COMPILE_PROGRAM_FAILURE);

  POCL_GOTO_LABEL_COND (PFN_NOTIFY, (program == NULL), CL_INVALID_PROGRAM);

  POCL_GOTO_LABEL_COND (PFN_NOTIFY, (num_devices > 0 && device_list == NULL),
                        CL_INVALID_VALUE);
  POCL_GOTO_LABEL_COND (PFN_NOTIFY, (num_devices == 0 && device_list != NULL),
                        CL_INVALID_VALUE);

  POCL_GOTO_LABEL_COND (PFN_NOTIFY, (pfn_notify == NULL && user_data != NULL),
                        CL_INVALID_VALUE);

  POCL_GOTO_LABEL_ON (PFN_NOTIFY, program->kernels, CL_INVALID_OPERATION,
                      "Program already has kernels\n");

  POCL_GOTO_LABEL_ON (PFN_NOTIFY,
                      (program->source == NULL && program->binaries == NULL),
                      CL_INVALID_PROGRAM,
                      "Program doesn't have sources or binaries! You need "
                      "to call clCreateProgramWith{Binary|Source} first\n");

  POCL_GOTO_LABEL_ON (PFN_NOTIFY,
                      ((program->source == NULL) && (link_program == 0)),
                      CL_INVALID_OPERATION,
                      "Cannot clCompileProgram when program has no source\n");

  POCL_LOCK_OBJ (program);

  program->main_build_log[0] = 0;

  /* TODO this should be somehow utilized at linking */
  POCL_MEM_FREE (program->compiler_options);

  if (extra_build_options)
    {
      size_t len = (options != NULL) ? strlen (options) : 0;
      len += strlen (extra_build_options) + 2;
      temp_options = (char *)malloc (len);

      temp_options[0] = 0;
      if (options != NULL)
        {
          strcpy (temp_options, options);
          strcat (temp_options, " ");
        }
      strcat (temp_options, extra_build_options);
    }
  else
    temp_options = (char*) options;

  if (temp_options)
    {
      i = strlen (temp_options);
      size_t size = i + 512; /* add some space for pocl-added options */
      program->compiler_options = (char *)malloc (size);
      errcode = process_options (temp_options, program->compiler_options,
                                 link_options, program, compile_program,
                                 link_program, &create_library, &flush_denorms,
                                 &requires_cr_sqrt_div, &spir_build, size);
      if (errcode != CL_SUCCESS)
        goto ERROR_CLEAN_OPTIONS;
    }

  POCL_MSG_PRINT_LLVM ("building program with options %s\n",
                       program->compiler_options);


  program->flush_denorms = flush_denorms;
#if !(defined(__x86_64__) && defined(__GNUC__))
  if (flush_denorms)
    {
      POCL_MSG_WARN ("flush to zero is currently only implemented for "
                     "x86-64 & gcc/clang, ignoring flag\n");
    }
#endif

  /* DEVICE LIST */
  if (num_devices == 0)
    {
      num_devices = program->num_devices;
      device_list = program->devices;
    }
  else
    {
      // convert subdevices to devices and remove duplicates
      cl_uint real_num_devices = 0;
      unique_devlist = pocl_unique_device_list (device_list, num_devices,
                                                &real_num_devices);
      num_devices = real_num_devices;
      device_list = unique_devlist;
    }

  clean_program_on_rebuild (program);

  /* Build the fully linked non-parallel bitcode for all
         devices. */
  for (device_i = 0; device_i < program->num_devices; ++device_i)
    {
      cl_device_id device = program->devices[device_i];

      /* find the device in the supplied devices-to-build-for list */
      int found = 0;
      for (i = 0; i < num_devices; ++i)
          if (device_list[i] == device) found = 1;
      if (!found) continue;

      if (requires_cr_sqrt_div
          && !(device->single_fp_config & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT))
        {
          APPEND_TO_MAIN_BUILD_LOG (REQUIRES_CR_SQRT_DIV_ERR);
          POCL_GOTO_ERROR_ON (1, build_error_code,
                              REQUIRES_CR_SQRT_DIV_ERR " %s\n",
                              device->short_name);
        }
      actually_built++;

      /* clCreateProgramWithSource */
      if (program->source)
        {
#ifdef OCS_AVAILABLE
          if (device->compiler_available == CL_TRUE)
            {
              POCL_MSG_PRINT_INFO ("building from sources for device %d\n",
                                   device_i);
              error = pocl_llvm_build_program (
                  program, device_i, program->compiler_options,
                  program_bc_path, num_input_headers, input_headers,
                  header_include_names, (create_library ? 0 : link_program));
              POCL_GOTO_ERROR_ON ((error != 0), build_error_code,
                                  "pocl_llvm_build_program() failed\n");
            }
          else
#endif
            {
              APPEND_TO_MAIN_BUILD_LOG (
                  "Cannot build a program from sources with pocl "
                  "that does not have online compiler support\n");
              POCL_GOTO_ERROR_ON (1, CL_COMPILER_NOT_AVAILABLE, "%s",
                                  program->main_build_log);
            }
        }
      /* clCreateProgramWithBinaries */
      else if (program->binaries[device_i]
               && (program->pocl_binaries[device_i] == NULL))
        {
#ifdef OCS_AVAILABLE
          /* bitcode is now either plain LLVM IR or SPIR IR */
          int spir_binary = bitcode_is_spir ((char*)program->binaries[device_i],
                                             program->binary_sizes[device_i]);
          if (spir_binary)
            POCL_MSG_PRINT_LLVM ("LLVM-SPIR binary detected\n");
          else
            POCL_MSG_PRINT_LLVM ("building from a BC binary for device %d\n",
                                 device_i);

          if (spir_binary)
            {
#ifdef ENABLE_SPIR
              if (!strstr (device->extensions, "cl_khr_spir"))
                {
                  APPEND_TO_MAIN_BUILD_LOG (REQUIRES_SPIR_SUPPORT);
                  POCL_GOTO_ERROR_ON (1, build_error_code,
                                      REQUIRES_SPIR_SUPPORT " %s\n",
                                      device->short_name);
                }
              if (!spir_build)
                POCL_MSG_WARN (
                    "SPIR binary provided, but no spir in build options\n");
              /* SPIR binaries need to be explicitly linked to the kernel
               * library. for non-SPIR binaries this happens as part of build
               * process when program.bc is generated. */
              error
                  = pocl_llvm_link_program (program, device_i, program_bc_path,
                                            0, NULL, NULL, NULL, 0, 1);

              POCL_GOTO_ERROR_ON (error, CL_LINK_PROGRAM_FAILURE,
                                  "Failed to link SPIR program.bc\n");
#else
              APPEND_TO_MAIN_BUILD_LOG (REQUIRES_SPIR_SUPPORT);
              POCL_GOTO_ERROR_ON (1, build_error_code,
                                  REQUIRES_SPIR_SUPPORT " %s\n",
                                  device->short_name);
#endif
            }

#else
          APPEND_TO_MAIN_BUILD_LOG (
              "Cannot build program from LLVM IR binaries with "
              "pocl that does not have online compiler support\n");
          POCL_GOTO_ERROR_ON (1, CL_COMPILER_NOT_AVAILABLE, "%s",
                              program->main_build_log);
#endif
        }
      else if (program->pocl_binaries[device_i])
        {
          POCL_MSG_PRINT_INFO("having a poclbinary for device %d\n", device_i);
#ifdef OCS_AVAILABLE
          if (program->binaries[device_i] == NULL)
            {
              POCL_MSG_WARN (
                  "pocl-binary for this device doesn't contain "
                  "program.bc - you won't be able to rebuild/link it\n");
              /* do not try to read program.bc or LLVM IRs
               * TODO maybe read LLVM IRs ?*/
              continue;
            }
#else
          continue;
#endif
        }
      else if (link_program && (num_input_programs > 0))
        {
#ifdef OCS_AVAILABLE
          /* just link binaries. */
          unsigned char *cur_device_binaries[num_input_programs];
          size_t cur_device_binary_sizes[num_input_programs];
          void *cur_llvm_irs[num_input_programs];
          for (j = 0; j < num_input_programs; j++)
            {
              assert (device == input_programs[j]->devices[device_i]);
              cur_device_binaries[j] = input_programs[j]->binaries[device_i];

              assert (cur_device_binaries[j]);
              cur_device_binary_sizes[j]
                  = input_programs[j]->binary_sizes[device_i];

              if (input_programs[j]->llvm_irs[device_i] == NULL)
                pocl_update_program_llvm_irs (input_programs[j], device_i);

              cur_llvm_irs[j] = input_programs[j]->llvm_irs[device_i];
              assert (cur_llvm_irs[j]);
            }
          error = pocl_llvm_link_program (
              program, device_i, program_bc_path, num_input_programs,
              cur_device_binaries, cur_device_binary_sizes, cur_llvm_irs,
              create_library, 0);
          POCL_GOTO_ERROR_ON ((error != CL_SUCCESS), CL_LINK_PROGRAM_FAILURE,
                              "pocl_llvm_link_program() failed\n");
#else
          POCL_GOTO_ERROR_ON ((1), CL_LINK_PROGRAM_FAILURE,
                              "clCompileProgram/clLinkProgram/clBuildProgram"
                              " require a pocl built with LLVM\n");

#endif
        }
      else
        {
          POCL_GOTO_ERROR_ON (1, CL_INVALID_BINARY,
                              "No sources nor binaries for device %s - can't "
                              "build the program\n", device->short_name);
        }

#ifdef OCS_AVAILABLE
      /* Read binaries from program.bc to memory */
      if (program->binaries[device_i] == NULL)
        {
          errcode = pocl_read_file(program_bc_path, &binary, &fsize);
          POCL_GOTO_ERROR_ON(errcode, CL_BUILD_ERROR,
                             "Failed to read binaries from program.bc to "
                             "memory: %s\n", program_bc_path);

          program->binary_sizes[device_i] = (size_t)fsize;
          program->binaries[device_i] = (unsigned char *)binary;
        }

      if (program->llvm_irs[device_i] == NULL)
        {
          pocl_update_program_llvm_irs(program, device_i);
        }
      /* Maintain a 'last_accessed' file in every program's
       * cache directory. Will be useful for cache pruning script
       * that flushes old directories based on LRU */
      pocl_cache_update_program_last_access(program, device_i);
#endif

    }

  POCL_GOTO_ERROR_ON ((actually_built < num_devices), build_error_code,
                      "Some of the devices on the argument-supplied list are"
                      "not available for the program, or do not exist\n");

  program->build_status = CL_BUILD_SUCCESS;
  program->binary_type = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
  /* if program will be compiled using clCompileProgram its binary_type
   * will be set to CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT.
   *
   * if program was created by clLinkProgram which is called
   * with the –createlibrary link option its binary_type will be set to
   * CL_PROGRAM_BINARY_TYPE_LIBRARY.
   */
  if (create_library)
    program->binary_type = CL_PROGRAM_BINARY_TYPE_LIBRARY;
  if (compile_program && !link_program)
    program->binary_type = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT;

  assert(program->num_kernels == 0);

  /* get non-device-specific kernel metadata. We can stop after finding
   * the first method that works.*/
  for (device_i = 0; device_i < program->num_devices; device_i++)
    {
#ifdef OCS_AVAILABLE
      if (program->binaries[device_i])
        {
          program->num_kernels
              = pocl_llvm_get_kernel_count (program, device_i);
          if (program->num_kernels)
            {
              program->kernel_meta = calloc (program->num_kernels,
                                             sizeof (pocl_kernel_metadata_t));
              pocl_llvm_get_kernels_metadata (program, device_i);
            }
          break;
        }
#endif
      if (program->pocl_binaries[device_i])
        {
          program->num_kernels
              = pocl_binary_get_kernel_count (program, device_i);
          if (program->num_kernels)
            {
              program->kernel_meta = calloc (program->num_kernels,
                                             sizeof (pocl_kernel_metadata_t));
              pocl_binary_get_kernels_metadata (program, device_i);
            }
          break;
        }
    }

  POCL_GOTO_ERROR_ON ((device_i >= program->num_devices), CL_INVALID_BINARY,
                      "Could find kernel metadata in the built program\n");

  /* calculate device-specific kernel hashes. */
  for (j = 0; j < program->num_kernels; ++j)
    {
      program->kernel_meta[j].build_hash
          = calloc (program->num_devices, sizeof (pocl_kernel_hash_t));

      for (device_i = 0; device_i < program->num_devices; device_i++)
        {
          pocl_calculate_kernel_hash (program, j, device_i);
        }
    }

  errcode = CL_SUCCESS;
  goto FINISH;

ERROR:
  free_meta (program);

  program->kernels = NULL;

  for (device_i = 0; device_i < program->num_devices; device_i++)
    {
      if (program->source)
        {
          POCL_MEM_FREE (program->binaries[device_i]);
          program->binary_sizes[device_i] = 0;
        }
    }

ERROR_CLEAN_OPTIONS:
  if (temp_options != options)
    free (temp_options);

  program->build_status = CL_BUILD_ERROR;

FINISH:
  POCL_UNLOCK_OBJ (program);
  POCL_MEM_FREE (unique_devlist);

PFN_NOTIFY:
  if (pfn_notify)
    pfn_notify (program, user_data);

  return errcode;
}
コード例 #11
0
ファイル: pocl_topology.c プロジェクト: pocl/pocl
int
pocl_topology_detect_device_info (cl_device_id device)
{
  device->global_mem_cacheline_size = HOST_CPU_CACHELINE_SIZE;
  device->global_mem_cache_type
      = 0x2; // CL_READ_WRITE_CACHE, without including all of CL/cl.h

  /* global mem cache size */

  char *content;
  uint64_t filesize;

  if (pocl_read_file (L3_CACHE_SIZE, &content, &filesize) == 0)
    {
      long val = atol (content);
      device->global_mem_cache_size = val * 1024;
      POCL_MEM_FREE (content);
    }
  else
    {
      if (pocl_read_file (L2_CACHE_SIZE, &content, &filesize) == 0)
        {
          long val = atol (content);
          device->global_mem_cache_size = val * 1024;
          POCL_MEM_FREE (content);
        }
      else
        {
          POCL_MSG_WARN (
              "Could not figure out CPU cache size, using bogus value\n");
          device->global_mem_cache_size = 1 << 20;
        }
    }

  /* global_mem_size */
  if (pocl_read_file (MEMINFO, &content, &filesize) == 0)
    {
      printf ("content 11: %s  FSIZE: %lu \n", content, filesize);
      char *tmp = content;
      unsigned long memsize_kb;
      size_t i;

      while (*tmp && (*tmp != '\n'))
        ++tmp;
      *tmp = 0;
      printf ("content: %s \n", content);
      tmp = content;
      while (*tmp && (*tmp != 0x20))
        ++tmp;
      while (*tmp && (*tmp == 0x20))
        ++tmp;
      printf ("TMP: %s \n", tmp);
      int items = sscanf (tmp, "%lu kB", &memsize_kb);
      printf ("MEMSIZE: %lu   ITEMS: %i\n", memsize_kb, items);

      assert (items == 1);

      device->global_mem_size = memsize_kb * 1024;
      POCL_MEM_FREE (content);
    }
  else
    {
      POCL_MSG_WARN ("Cannot get memory size\n");
      device->global_mem_size = 256 << 20;
    }

  /* max_compute_units */
  if (pocl_read_file (CPUS, &content, &filesize) == 0)
    {
      long start, end;
      int items = sscanf (content, "%lu-%lu", &start, &end);
      assert (items == 2);
      device->max_compute_units = (unsigned)end + 1;
      POCL_MEM_FREE (content);
    }
  else
    {
      POCL_MSG_WARN ("Cannot get logical CPU number\n");
      device->max_compute_units = 1;
    }

  return 0;
}
コード例 #12
0
ファイル: pocl_build.c プロジェクト: pocl/pocl
/* options must be non-NULL.
 * modded_options[size] + link_options are preallocated outputs
 */
static cl_int
process_options (const char *options, char *modded_options, char *link_options,
                 cl_program program, int compiling, int linking,
                 int *create_library, unsigned *flush_denorms,
                 int *requires_correctly_rounded_sqrt_div,
                 int *spir_build, size_t size)
{
  cl_int error;
  char *token = NULL;
  char *saveptr = NULL;

  *create_library = 0;
  *flush_denorms = 0;
  *requires_correctly_rounded_sqrt_div = 0;
  *spir_build = 0;
  int enable_link_options = 0;
  link_options[0] = 0;
  modded_options[0] = 0;
  int ret_error = (linking ? (compiling ? CL_INVALID_BUILD_OPTIONS
                                        : CL_INVALID_LINKER_OPTIONS)
                           : CL_INVALID_COMPILER_OPTIONS);

  assert (options);
  assert (modded_options);
  assert (compiling || linking);

  size_t i = 1; /* terminating char */
  size_t needed = 0;
  char *temp_options = (char*) malloc (strlen (options) + 1);
  strcpy (temp_options, options);

  token = strtok_r (temp_options, " ", &saveptr);
  while (token != NULL)
    {
      /* check if parameter is supported compiler parameter */
      if (strncmp (token, "-cl", 3) == 0 || strncmp (token, "-w", 2) == 0
          || strncmp (token, "-Werror", 7) == 0)
        {
          if (strstr (cl_program_link_options, token))
            {
              /* when linking, only a subset of -cl* options are valid,
               * and only with -enable-link-options */
              if (linking && (!compiling))
                {
                  if (!enable_link_options)
                    {
                      APPEND_TO_MAIN_BUILD_LOG (
                          "Not compiling but link options were not enabled, "
                          "therefore %s is an invalid option\n",
                          token);
                      error = ret_error;
                      goto ERROR;
                    }
                  strcat (link_options, token);
                }
              if (strstr (token, "-cl-denorms-are-zero"))
                {
                  *flush_denorms = 1;
                }
              if (strstr (token, "-cl-fp32-correctly-rounded-divide-sqrt"))
                {
                  *requires_correctly_rounded_sqrt_div = 1;
                }
            }
          if (strstr (cl_parameters, token))
            {
              /* the LLVM API call pushes the parameters directly to the
                 frontend without using -Xclang */
            }
          else if (strstr (cl_parameters_supported_after_clang_3_9, token))
            {
#ifndef LLVM_OLDER_THAN_3_9
/* the LLVM API call pushes the parameters directly to the
 * frontend without using -Xclang*/
#else
              APPEND_TO_MAIN_BUILD_LOG (
                  "This build option is supported after clang3.9: %s\n",
                  token);
              token = strtok_r (NULL, " ", &saveptr);
              continue;
#endif
            }
          else if (strstr (cl_parameters_not_yet_supported_by_clang, token))
            {
              APPEND_TO_MAIN_BUILD_LOG (
                  "This build option is not yet supported by clang: %s\n",
                  token);
              token = strtok_r (NULL, " ", &saveptr);
              continue;
            }
          else
            {
              APPEND_TO_MAIN_BUILD_LOG("Invalid build option: %s\n", token);
              error = ret_error;
              goto ERROR;
            }
        }
      else if (strncmp (token, "-g", 2) == 0)
        {
#ifndef LLVM_OLDER_THAN_3_8
          token = "-dwarf-column-info -debug-info-kind=limited " \
	    "-dwarf-version=4 -debugger-tuning=gdb";
#endif
        }
      else if (strncmp (token, "-D", 2) == 0 || strncmp (token, "-I", 2) == 0)
        {
          APPEND_TOKEN();
          /* if there is a space in between, then next token is part
             of the option */
          if (strlen (token) == 2)
            token = strtok_r (NULL, " ", &saveptr);
          else
            {
              token = strtok_r (NULL, " ", &saveptr);
              continue;
            }
        }
      else if (strncmp (token, "-x", 2) == 0 && strlen (token) == 2)
        {
          /* only "-x spir" is valid for the "-x" option */
          token = strtok_r (NULL, " ", &saveptr);
          if (!token || strncmp (token, "spir", 4) != 0)
            {
              APPEND_TO_MAIN_BUILD_LOG (
                  "Invalid parameter to -x build option\n");
              error = ret_error;
              goto ERROR;
            }
          /* "-x spir" is not valid if we are building from source */
          else if (program->source)
            {
              APPEND_TO_MAIN_BUILD_LOG (
                  "\"-x spir\" is not valid when building from source\n");
              error = ret_error;
              goto ERROR;
            }
          else
            *spir_build = 1;
          token = strtok_r (NULL, " ", &saveptr);
          continue;
        }
      else if (strncmp (token, "-spir-std=1.2", 13) == 0)
        {
          /* "-spir-std=" flags are not valid when building from source */
          if (program->source)
            {
              APPEND_TO_MAIN_BUILD_LOG ("\"-spir-std=\" flag is not valid "
                                        "when building from source\n");
              error = ret_error;
              goto ERROR;
            }
          else
            *spir_build = 1;
          token = strtok_r (NULL, " ", &saveptr);
          continue;
        }
      else if (strncmp (token, "-create-library", 15) == 0)
        {
          if (!linking)
            {
              APPEND_TO_MAIN_BUILD_LOG (
                  "\"-create-library\" flag is only valid when linking\n");
              error = ret_error;
              goto ERROR;
            }
          *create_library = 1;
          token = strtok_r (NULL, " ", &saveptr);
          continue;
        }
      else if (strncmp (token, "-enable-link-options", 20) == 0)
        {
          if (!linking)
            {
              APPEND_TO_MAIN_BUILD_LOG ("\"-enable-link-options\" flag is "
                                        "only valid when linking\n");
              error = ret_error;
              goto ERROR;
            }
          if (!(*create_library))
            {
              APPEND_TO_MAIN_BUILD_LOG ("\"-enable-link-options\" flag is "
                                        "only valid when -create-library "
                                        "option was given\n");
              error = ret_error;
              goto ERROR;
            }
          enable_link_options = 1;
          token = strtok_r (NULL, " ", &saveptr);
          continue;
        }
      else
        {
          APPEND_TO_MAIN_BUILD_LOG ("Invalid build option: %s\n", token);
          error = ret_error;
          goto ERROR;
        }
      APPEND_TOKEN ();
      token = strtok_r (NULL, " ", &saveptr);
    }

  error = CL_SUCCESS;

  /* remove trailing whitespace */
  i = strlen (modded_options);
  if ((i > 0) && (modded_options[i - 1] == ' '))
    modded_options[i - 1] = 0;
ERROR:
  POCL_MEM_FREE (temp_options);
  return error;
}
コード例 #13
0
ファイル: pocl_util.c プロジェクト: larsmans/pocl
cl_int pocl_create_command (_cl_command_node **cmd,
                            cl_command_queue command_queue,
                            cl_command_type command_type, cl_event *event_p,
                            cl_int num_events, const cl_event *wait_list)
{
  int i;
  int err;
  cl_event *event = NULL;
  /* the provided waiting list will be cloned, because the calling program
   * might recycle the array for a different command.
   */
  cl_event *event_wl = NULL;
  /* Additionally, if the command queue is non-empty and in-order, we want to
   * add the previous command to the waiting list: double-bang to ensure that
   * add_prev_command will be 1 in this case, and 0 otherwise
   */
  cl_int add_prev_command = !!(
    !(command_queue->properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
    && command_queue->root != NULL);

  if ((wait_list == NULL && num_events != 0) ||
      (wait_list != NULL && num_events == 0))
    return CL_INVALID_EVENT_WAIT_LIST;

  for (i = 0; i < num_events; ++i)
    {
      if (wait_list[i] == NULL)
        return CL_INVALID_EVENT_WAIT_LIST;
    }

  *cmd = pocl_mem_manager_new_command ();

  if (*cmd == NULL)
    return CL_OUT_OF_HOST_MEMORY;

  if (num_events || add_prev_command)
    {
      event_wl = (cl_event*)malloc((num_events + add_prev_command)*sizeof(cl_event));
      if (event_wl == NULL)
        return CL_OUT_OF_HOST_MEMORY;
    }

  /* if user does not provide event pointer, create event anyway */
  event = &((*cmd)->event);
  err = pocl_create_event(event, command_queue->context, command_queue, command_type);
  if (err != CL_SUCCESS)
    {
      POCL_MEM_FREE(event_wl);
      POCL_MEM_FREE(*cmd);
      return err;
    }
  if (event_p)
    *event_p = *event;
  else
    (*event)->implicit_event = 1;

  /* clone the event list */
  for (i = 0; i < num_events; ++i)
    {
      event_wl[i] = wait_list[i];
    }

  if (add_prev_command)
    {
      // find the previous command
      _cl_command_node *prev_command;
      for (prev_command = command_queue->root; prev_command->next != NULL;
           prev_command = prev_command->next){}
      //printf("create_command: prev_com=%d prev_com->event = %d \n",prev_command, prev_command->event);
      event_wl[i] = prev_command->event;
    }
#if 0
  for (i = 0; i < num_events + add_prev_command; ++i)
    {
      printf("create-command: event_wl[%i]=%p\n", i, event_wl[i]);
    }
#endif
  (*cmd)->event_wait_list = event_wl;
  (*cmd)->num_events_in_wait_list = num_events + add_prev_command;
  (*cmd)->type = command_type;
  (*cmd)->next = NULL;
  (*cmd)->device = command_queue->device;

  //printf("create_command (end): event=%d new_event=%d cmd->event=%d cmd=%d\n", event, new_event, (*cmd)->event, *cmd);


  return CL_SUCCESS;
}
コード例 #14
0
ファイル: clFinish.c プロジェクト: Hyunsu-Lee/gpulib
static void exec_commands (_cl_command_node *node_list)
{
  int i;
  cl_event *event = NULL;
  _cl_command_node *node;
  cl_command_queue command_queue = NULL;
  event_callback_item* cb_ptr;
  
  LL_FOREACH (node_list, node)
    {
      event = &(node->event);
      /* Command queue is needed for POCL_UPDATE_EVENT macros */
      if (node->event)
        command_queue = node->event->queue;

      if (node->device->ops->compile_submitted_kernels)
        node->device->ops->compile_submitted_kernels (node);

      switch (node->type)
        {
        case CL_COMMAND_READ_BUFFER:
          POCL_UPDATE_EVENT_RUNNING(event, command_queue);
          node->device->ops->read
            (node->device->data, 
             node->command.read.host_ptr, 
             node->command.read.device_ptr,
             node->command.read.offset,
             node->command.read.cb); 
          POCL_UPDATE_EVENT_COMPLETE(event, command_queue);
          POname(clReleaseMemObject) (node->command.read.buffer);
          break;
        case CL_COMMAND_WRITE_BUFFER:
          POCL_UPDATE_EVENT_RUNNING(event, command_queue);
          node->device->ops->write
            (node->device->data, 
             node->command.write.host_ptr, 
             node->command.write.device_ptr,
             node->command.write.offset,
             node->command.write.cb);
          POCL_UPDATE_EVENT_COMPLETE(event, command_queue);
          POname(clReleaseMemObject) (node->command.write.buffer);
          break;
        case CL_COMMAND_COPY_BUFFER:
          POCL_UPDATE_EVENT_RUNNING(event, command_queue);
          node->device->ops->copy
            (node->command.copy.data, 
             node->command.copy.src_ptr,
             node->command.copy.src_offset,
             node->command.copy.dst_ptr,
             node->command.copy.dst_offset,
             node->command.copy.cb);
          POCL_UPDATE_EVENT_COMPLETE(event, command_queue);
          POname(clReleaseMemObject) (node->command.copy.src_buffer);
          POname(clReleaseMemObject) (node->command.copy.dst_buffer);
          break;
        case CL_COMMAND_MAP_IMAGE:
        case CL_COMMAND_MAP_BUFFER: 
          POCL_UPDATE_EVENT_RUNNING(event, command_queue);            
          pocl_map_mem_cmd (node->device, node->command.map.buffer, 
                            node->command.map.mapping);
          POCL_UPDATE_EVENT_COMPLETE(event, command_queue);
          break;
        case CL_COMMAND_WRITE_IMAGE:
          POCL_UPDATE_EVENT_RUNNING(event, command_queue); 
          node->device->ops->write_rect 
            (node->device->data, node->command.rw_image.host_ptr,
             node->command.rw_image.device_ptr, node->command.rw_image.origin,
             node->command.rw_image.origin, node->command.rw_image.region, 
             node->command.rw_image.rowpitch, 
             node->command.rw_image.slicepitch,
             node->command.rw_image.rowpitch,
             node->command.rw_image.slicepitch);
          POCL_UPDATE_EVENT_COMPLETE(event, command_queue);
          break;
        case CL_COMMAND_READ_IMAGE:
          POCL_UPDATE_EVENT_RUNNING(event, command_queue); 
          node->device->ops->read_rect 
            (node->device->data, node->command.rw_image.host_ptr,
             node->command.rw_image.device_ptr, node->command.rw_image.origin,
             node->command.rw_image.origin, node->command.rw_image.region, 
             node->command.rw_image.rowpitch, 
             node->command.rw_image.slicepitch,
             node->command.rw_image.rowpitch,
             node->command.rw_image.slicepitch);
          POCL_UPDATE_EVENT_COMPLETE(event, command_queue);
          break;
        case CL_COMMAND_UNMAP_MEM_OBJECT:
          POCL_UPDATE_EVENT_RUNNING(event, command_queue);
          if ((node->command.unmap.memobj)->flags & 
              (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR))
            {
              /* TODO: should we ensure the device global region is updated from
                 the host memory? How does the specs define it,
                 can the host_ptr be assumed to point to the host and the
                 device accessible memory or just point there until the
                 kernel(s) get executed or similar? */
              /* Assume the region is automatically up to date. */
            } else 
            {
              /* TODO: fixme. The offset computation must be done at the device 
                 driver. */
              if (node->device->ops->unmap_mem != NULL)        
                node->device->ops->unmap_mem
                  (node->device->data, 
                   (node->command.unmap.mapping)->host_ptr, 
                   (node->command.unmap.memobj)->device_ptrs[node->device->dev_id].mem_ptr, 
                   (node->command.unmap.mapping)->size);
            }
          DL_DELETE((node->command.unmap.memobj)->mappings, 
                    node->command.unmap.mapping);
          (node->command.unmap.memobj)->map_count--;
          POCL_UPDATE_EVENT_COMPLETE(event, command_queue);
          break;
        case CL_COMMAND_NDRANGE_KERNEL:
          assert (*event == node->event);
          POCL_UPDATE_EVENT_RUNNING(event, command_queue);
          node->device->ops->run(node->command.run.data, node);
          POCL_UPDATE_EVENT_COMPLETE(event, command_queue);
          for (i = 0; i < node->command.run.arg_buffer_count; ++i)
            {
              cl_mem buf = node->command.run.arg_buffers[i];
              if (buf == NULL) continue;
              /*printf ("### releasing arg %d - the buffer %x of kernel %s\n", i, 
                buf,  node->command.run.kernel->function_name); */
              POname(clReleaseMemObject) (buf);
            }
          POCL_MEM_FREE(node->command.run.arg_buffers);
          POCL_MEM_FREE(node->command.run.tmp_dir);
          for (i = 0; i < node->command.run.kernel->num_args + 
                 node->command.run.kernel->num_locals; ++i)
            {
              pocl_aligned_free (node->command.run.arguments[i].value);
              node->command.run.arguments[i].value = NULL;
            }
          POCL_MEM_FREE(node->command.run.arguments);
      
          POname(clReleaseKernel)(node->command.run.kernel);
          break;
        case CL_COMMAND_NATIVE_KERNEL:
          POCL_UPDATE_EVENT_RUNNING(event, command_queue);
          node->device->ops->run_native(node->command.native.data, node);
          POCL_UPDATE_EVENT_COMPLETE(event, command_queue);
          for (i = 0; i < node->command.native.num_mem_objects; ++i)
            {
              cl_mem buf = node->command.native.mem_list[i];
              if (buf == NULL) continue;
              POname(clReleaseMemObject) (buf);
            }
          POCL_MEM_FREE(node->command.native.mem_list);
          POCL_MEM_FREE(node->command.native.args);
	      break;
        case CL_COMMAND_FILL_IMAGE:
          POCL_UPDATE_EVENT_RUNNING(event, command_queue);
          node->device->ops->fill_rect 
            (node->command.fill_image.data, 
             node->command.fill_image.device_ptr,
             node->command.fill_image.buffer_origin,
             node->command.fill_image.region,
             node->command.fill_image.rowpitch, 
             node->command.fill_image.slicepitch,
             node->command.fill_image.fill_pixel,
             node->command.fill_image.pixel_size);
          POCL_MEM_FREE(node->command.fill_image.fill_pixel);
          POCL_UPDATE_EVENT_COMPLETE(event, command_queue);
          break;
        case CL_COMMAND_MARKER:
          POCL_UPDATE_EVENT_RUNNING(event, command_queue);
          POCL_UPDATE_EVENT_COMPLETE(event, command_queue);
          break;
        default:
          POCL_ABORT_UNIMPLEMENTED("clFinish: Unknown command");
          break;
        }   

        if (event)
          {
            /* event callback handling 
               just call functions in the same order they were added */
            for (cb_ptr = (*event)->callback_list; cb_ptr; cb_ptr = cb_ptr->next)
              {
                cb_ptr->callback_function ((*event), cb_ptr->trigger_status, 
                                           cb_ptr->user_data);
              }
            if ((*event)->implicit_event)
              POname(clReleaseEvent) (*event);
          }
    }
コード例 #15
0
ファイル: pocl_topology.c プロジェクト: franz/pocl
int
pocl_topology_detect_device_info (cl_device_id device)
{
  device->global_mem_cacheline_size = HOST_CPU_CACHELINE_SIZE;
  device->global_mem_cache_type
      = 0x2; // CL_READ_WRITE_CACHE, without including all of CL/cl.h

  /* global mem cache size */

  char *content;
  uint64_t filesize;

  if (pocl_read_file (L3_CACHE_SIZE, &content, &filesize) == 0)
    {
      long val = atol (content);
      device->global_mem_cache_size = val * 1024;
      POCL_MEM_FREE (content);
    }
  else
    {
      if (pocl_read_file (L2_CACHE_SIZE, &content, &filesize) == 0)
        {
          long val = atol (content);
          device->global_mem_cache_size = val * 1024;
          POCL_MEM_FREE (content);
        }
      else
        {
          POCL_MSG_WARN (
              "Could not figure out CPU cache size, using bogus value\n");
          device->global_mem_cache_size = 1 << 20;
        }
    }

  /* global_mem_size */
  if (pocl_read_file (MEMINFO, &content, &filesize) == 0)
    {
      char *tmp = content;
      unsigned long memsize_kb;
      size_t i;

      while (*tmp && (*tmp != '\n'))
        ++tmp;
      *tmp = 0;
      tmp = content;
      while (*tmp && (*tmp != 0x20))
        ++tmp;
      while (*tmp && (*tmp == 0x20))
        ++tmp;
      int items = sscanf (tmp, "%lu kB", &memsize_kb);

      assert (items == 1);

      device->global_mem_size = memsize_kb * 1024;
      POCL_MEM_FREE (content);
    }
  else
    {
      POCL_MSG_WARN ("Cannot get memory size\n");
      device->global_mem_size = 256 << 20;
    }

  /* max_compute_units */
  pocl_read_file (CPUS, &content, &filesize);
  /* files in /sys report file size larger than can be actually read,
     so pocl_read_file() returns an error (even if it correctly
     reads the file). */
  if (content && content[0] != 0)
    {
      unsigned long start, end;
      int items = sscanf (content, "%lu-%lu", &start, &end);
      assert (items == 2);
      device->max_compute_units = (unsigned)end + 1;
      POCL_MEM_FREE (content);
    }
  else
    {
      POCL_MSG_WARN ("Cannot get logical CPU number\n");
      device->max_compute_units = 1;
    }

  return 0;
}
コード例 #16
0
/* creates either a program with binaries, or an empty program. The latter
 * is useful for clLinkProgram() which needs an empty program to put the
 * compiled results in.
 */
cl_program
create_program_skeleton (cl_context context, cl_uint num_devices,
                         const cl_device_id *device_list,
                         const size_t *lengths, const unsigned char **binaries,
                         cl_int *binary_status, cl_int *errcode_ret,
                         int allow_empty_binaries)
{
  cl_program program;
  unsigned i,j;
  int errcode, is_spirv_opencl;
  cl_device_id *unique_devlist = NULL;

  POCL_GOTO_ERROR_COND((context == NULL), CL_INVALID_CONTEXT);

  POCL_GOTO_ERROR_COND((device_list == NULL), CL_INVALID_VALUE);

  POCL_GOTO_ERROR_COND((num_devices == 0), CL_INVALID_VALUE);

  if (!allow_empty_binaries)
    {
      POCL_GOTO_ERROR_COND ((lengths == NULL), CL_INVALID_VALUE);

      for (i = 0; i < num_devices; ++i)
        {
          POCL_GOTO_ERROR_ON ((lengths[i] == 0 || binaries[i] == NULL),
                              CL_INVALID_VALUE,
                              "%i-th binary is NULL or its length==0\n", i);
        }
    }

  // check for duplicates in device_list[].
  for (i = 0; i < context->num_devices; i++)
    {
      int count = 0;
      for (j = 0; j < num_devices; j++)
        {
          count += context->devices[i] == device_list[j];
        }
      // duplicate devices
      POCL_GOTO_ERROR_ON((count > 1), CL_INVALID_DEVICE,
        "device %s specified multiple times\n", context->devices[i]->long_name);
    }

  // convert subdevices to devices and remove duplicates
  cl_uint real_num_devices = 0;
  unique_devlist = pocl_unique_device_list(device_list, num_devices, &real_num_devices);
  num_devices = real_num_devices;
  device_list = unique_devlist;

  // check for invalid devices in device_list[].
  for (i = 0; i < num_devices; i++)
    {
      int found = 0;
      for (j = 0; j < context->num_devices; j++)
        {
          found |= context->devices[j] == device_list[i];
        }
      POCL_GOTO_ERROR_ON((!found), CL_INVALID_DEVICE,
        "device not found in the device list of the context\n");
    }

  if ((program = (cl_program) calloc (1, sizeof (struct _cl_program))) == NULL)
    {
      errcode = CL_OUT_OF_HOST_MEMORY;
      goto ERROR;
    }
  
  POCL_INIT_OBJECT(program);

  if ((program->binary_sizes =
       (size_t*) calloc (num_devices, sizeof(size_t))) == NULL ||
      (program->binaries = (unsigned char**)
       calloc (num_devices, sizeof(unsigned char*))) == NULL ||
      (program->pocl_binaries = (unsigned char**)
       calloc (num_devices, sizeof(unsigned char*))) == NULL ||
      (program->pocl_binary_sizes =
             (size_t*) calloc (num_devices, sizeof(size_t))) == NULL ||
      (program->build_log = (char**)
       calloc (num_devices, sizeof(char*))) == NULL ||
      ((program->llvm_irs =
        (void**) calloc (num_devices, sizeof(void*))) == NULL) ||
      ((program->build_hash = (SHA1_digest_t*)
        calloc (num_devices, sizeof(SHA1_digest_t))) == NULL))
    {
      errcode = CL_OUT_OF_HOST_MEMORY;
      goto ERROR_CLEAN_PROGRAM_AND_BINARIES;
    }

  program->context = context;
  program->num_devices = num_devices;
  program->devices = unique_devlist;
  program->build_status = CL_BUILD_NONE;
  program->binary_type = CL_PROGRAM_BINARY_TYPE_NONE;
  char program_bc_path[POCL_FILENAME_LENGTH];

  if (allow_empty_binaries && (lengths == NULL) && (binaries == NULL))
    goto SUCCESS;

  for (i = 0; i < num_devices; ++i)
    {
      /* LLVM IR */
      if (!strncmp((const char *)binaries[i], "BC", 2))
        {
          program->binary_sizes[i] = lengths[i];
          program->binaries[i] = (unsigned char*) malloc(lengths[i]);
          memcpy (program->binaries[i], binaries[i], lengths[i]);
          if (binary_status != NULL)
            binary_status[i] = CL_SUCCESS;
        }
      /* SPIR-V binary needs to be converted, and requires
       * linking of the converted BC */
#ifdef OCS_AVAILABLE
      else if (bitcode_is_spirv ((const char *)binaries[i], lengths[i], &is_spirv_opencl))
        {
          if (is_spirv_opencl == 0) {
            // SPIR-V but not OpenCL-type.
            POCL_GOTO_ERROR_ON (
                1, CL_BUILD_PROGRAM_FAILURE,
                "SPIR-V binary provided, but is not using Kernel mode."
                "Pocl can't process this binary.\n");
          }

          int no_spir
              = strstr (device_list[i]->extensions, "cl_khr_spir") == NULL;
          POCL_GOTO_ERROR_ON (
              no_spir, CL_BUILD_PROGRAM_FAILURE,
              "SPIR binary provided, but device has no SPIR support");
#ifdef ENABLE_SPIRV
          POCL_MSG_PRINT_LLVM (
              "SPIR-V binary detected, converting to LLVM SPIR\n");
          char program_bc_spirv[POCL_FILENAME_LENGTH];
          char program_bc_temp[POCL_FILENAME_LENGTH];
          pocl_cache_write_spirv (program_bc_spirv, (const char *)binaries[i],
                                  (uint64_t)lengths[i]);
          pocl_cache_tempname (program_bc_temp, ".bc", NULL);

          char *args[] = { LLVM_SPIRV,       "-r", "-o", program_bc_temp,
                           program_bc_spirv, NULL };

          errcode = pocl_run_command (args);
          assert (errcode == 0);
          /* load LLVM SPIR binary. */
          uint64_t fsize;
          char *content;
          pocl_read_file (program_bc_temp, &content, &fsize);
          program->binary_sizes[i] = fsize;
          program->binaries[i] = (unsigned char *)content;
          pocl_remove (program_bc_temp);
#else
          POCL_GOTO_ERROR_ON (
              1, CL_BUILD_PROGRAM_FAILURE,
              "SPIR binary provided, but this pocl has no SPIR-V support."
              "SPIR-V support requires llvm-spirv converter binary.\n");
#endif
        }
#endif
      /* Poclcc binary */
      else if (pocl_binary_check_binary(device_list[i], binaries[i]))
        {
          program->pocl_binary_sizes[i] = lengths[i];
          program->pocl_binaries[i] = (unsigned char*) malloc (lengths[i]);
          memcpy (program->pocl_binaries[i], binaries[i], lengths[i]);

          pocl_binary_set_program_buildhash (program, i, binaries[i]);
          int error = pocl_cache_create_program_cachedir
            (program, i, NULL, 0, program_bc_path);
          POCL_GOTO_ERROR_ON((error != 0), CL_BUILD_PROGRAM_FAILURE,
                             "Could not create program cachedir");
          POCL_GOTO_ERROR_ON(pocl_binary_deserialize (program, i),
                             CL_INVALID_BINARY,
                             "Could not unpack a pocl binary\n");
          /* read program.bc, can be useful later */
          if (pocl_exists (program_bc_path))
            {
              pocl_read_file (program_bc_path,
                              (char **)(&program->binaries[i]),
                              (uint64_t *)(&program->binary_sizes[i]));
            }

          if (binary_status != NULL)
            binary_status[i] = CL_SUCCESS;
        }
      /* Unknown binary */
      else
        {
          POCL_MSG_WARN ("Could not recognize binary\n");
          if (binary_status != NULL)
            binary_status[i] = CL_INVALID_BINARY;
          errcode = CL_INVALID_BINARY;
          goto ERROR_CLEAN_PROGRAM_AND_BINARIES;
        }
    }

SUCCESS:
  POCL_RETAIN_OBJECT(context);

  if (errcode_ret != NULL)
    *errcode_ret = CL_SUCCESS;
  return program;

ERROR_CLEAN_PROGRAM_AND_BINARIES:
  if (program->binaries)
    for (i = 0; i < num_devices; ++i)
      POCL_MEM_FREE(program->binaries[i]);
  POCL_MEM_FREE(program->binaries);
  POCL_MEM_FREE(program->binary_sizes);
  if (program->pocl_binaries)
    for (i = 0; i < num_devices; ++i)
      POCL_MEM_FREE(program->pocl_binaries[i]);
  POCL_MEM_FREE(program->pocl_binaries);
  POCL_MEM_FREE(program->pocl_binary_sizes);
/*ERROR_CLEAN_PROGRAM:*/
  POCL_MEM_FREE(program);
ERROR:
  POCL_MEM_FREE(unique_devlist);
    if(errcode_ret != NULL)
      {
        *errcode_ret = errcode;
      }
    return NULL;
}
コード例 #17
0
ファイル: pocl-hsa.c プロジェクト: larsmans/pocl
void
pocl_hsa_run
(void *data,
 _cl_command_node* cmd)
{
  struct data *d;
  struct pocl_argument *al;
  unsigned i;
  cl_kernel kernel = cmd->command.run.kernel;
  struct pocl_context *pc = &cmd->command.run.pc;
  hsa_signal_value_t initial_value = 1;
#if 0
  /* Not yet supported by the reference library. */
  hsa_kernel_dispatch_packet_t kernel_packet;
#else
  hsa_dispatch_packet_t kernel_packet;
#endif
  hsa_signal_t kernel_completion_signal = 0;
  hsa_region_t region;
  int error;
  amdgpu_args_t *args;
  /* 32b word offset in the kernel arguments buffer we can push the next
     argument to. */
  int args_offset = 0;

  assert (data != NULL);
  d = (struct data *) data;

  d->current_kernel = kernel;

  memset (&kernel_packet, 0, sizeof (hsa_dispatch_packet_t));

#if 0
  /* TODO: not yet supported by the open source runtime implementation.
     Assume the HSA Full profile so we can simply use host malloc().
   */
  hsa_agent_iterate_regions(kernel_agent, pocl_hsa_get_kernarg, &region);

  if (hsa_memory_allocate(region, sizeof(amdgpu_args_t),
                          (void**)&args) !=
      HSA_STATUS_SUCCESS)
    {
      assert (0 && "hsa_memory_allocate() failed.");
    }
#else
  args = (amdgpu_args_t*)malloc(sizeof(amdgpu_args_t));
#endif

  kernel_packet.kernarg_address = (uint64_t)args;

  /* Process the kernel arguments. Convert the opaque buffer
     pointers to real device pointers, allocate dynamic local
     memory buffers, etc. */
  for (i = 0; i < kernel->num_args; ++i)
    {
      al = &(cmd->command.run.arguments[i]);
      if (kernel->arg_info[i].is_local)
        {
          POCL_ABORT_UNIMPLEMENTED("pocl-hsa: local buffers not implemented.");
#if 0
          arguments[i] = malloc (sizeof (void *));
          *(void **)(arguments[i]) = pocl_hsa_malloc(data, 0, al->size, NULL);
#endif
        }
      else if (kernel->arg_info[i].type == POCL_ARG_TYPE_POINTER)
        {
          if (args_offset + 1 >= MAX_KERNEL_ARG_WORDS)
            POCL_ABORT("pocl-hsa: too many kernel arguments!");
          /* Assuming the pointers are 64b (or actually the same as in
             host) due to HSA. TODO: the 32b profile. */
          if (al->value == NULL)
            {
              args->kernel_args[args_offset] = 0;
              args->kernel_args[args_offset + 1] = 0;
            }
          else
            {
              *(uint64_t*)&args->kernel_args[args_offset] =
                (uint64_t)(*(cl_mem *) (al->value))->
                device_ptrs[cmd->device->dev_id].mem_ptr;
            }
          args_offset += 2;

#if 0
          /* It's legal to pass a NULL pointer to clSetKernelArguments. In
             that case we must pass the same NULL forward to the kernel.
             Otherwise, the user must have created a buffer with per device
             pointers stored in the cl_mem. */
          if (al->value == NULL)
            {
              arguments[i] = malloc (sizeof (void *));
              *(void **)arguments[i] = NULL;
            }
          else
            arguments[i] =
              &((*(cl_mem *) (al->value))->device_ptrs[cmd->device->dev_id].mem_ptr);
#endif
        }
      else if (kernel->arg_info[i].type == POCL_ARG_TYPE_IMAGE)
        {
          POCL_ABORT_UNIMPLEMENTED("hsa: image arguments not implemented.");
#if 0
          dev_image_t di;
          fill_dev_image_t (&di, al, cmd->device);

          void* devptr = pocl_hsa_malloc (data, 0, sizeof(dev_image_t), NULL);
          arguments[i] = malloc (sizeof (void *));
          *(void **)(arguments[i]) = devptr;
          pocl_hsa_write (data, &di, devptr, 0, sizeof(dev_image_t));
#endif
        }
      else if (kernel->arg_info[i].type == POCL_ARG_TYPE_SAMPLER)
        {
          POCL_ABORT_UNIMPLEMENTED("hsa: sampler arguments not implemented.");
#if 0
          dev_sampler_t ds;
          arguments[i] = malloc (sizeof (void *));
          *(void **)(arguments[i]) = pocl_hsa_malloc
            (data, 0, sizeof(dev_sampler_t), NULL);
          pocl_hsa_write (data, &ds, *(void**)arguments[i], 0,
                            sizeof(dev_sampler_t));
#endif
        }
      else
        {
          if (args_offset >= MAX_KERNEL_ARG_WORDS)
            POCL_ABORT("pocl-hsa: too many kernel arguments!");

          /* Assuming the scalar fits to a 32b slot. TODO! */
          assert (al->size <= 4);
          args->kernel_args[args_offset] = *(uint32_t*)al->value;
          ++args_offset;
        }
    }

  for (i = kernel->num_args;
       i < kernel->num_args + kernel->num_locals;
       ++i)
    {
      POCL_ABORT_UNIMPLEMENTED("hsa: automatic local buffers not implemented.");
#if 0
      al = &(cmd->command.run.arguments[i]);
      arguments[i] = malloc (sizeof (void *));
      *(void **)(arguments[i]) = pocl_hsa_malloc (data, 0, al->size, NULL);
#endif
    }


  args->workgroup_size_x = kernel_packet.workgroup_size_x = cmd->command.run.local_x;
  args->workgroup_size_y = kernel_packet.workgroup_size_y = cmd->command.run.local_y;
  args->workgroup_size_z = kernel_packet.workgroup_size_z = cmd->command.run.local_z;

  kernel_packet.grid_size_x = pc->num_groups[0] * cmd->command.run.local_x;
  kernel_packet.grid_size_y = pc->num_groups[1] * cmd->command.run.local_y;
  kernel_packet.grid_size_z = pc->num_groups[2] * cmd->command.run.local_z;

  /* AMDGPU specific OpenCL argument data. */

  args->wgs_x = pc->num_groups[0];
  args->wgs_y = pc->num_groups[1];
  args->wgs_z = pc->num_groups[2];

  kernel_packet.dimensions = 1;
  if (cmd->command.run.local_y > 1) kernel_packet.dimensions = 2;
  if (cmd->command.run.local_z > 1) kernel_packet.dimensions = 3;

  kernel_packet.header.type = HSA_PACKET_TYPE_DISPATCH;
  kernel_packet.header.acquire_fence_scope = HSA_FENCE_SCOPE_SYSTEM;
  kernel_packet.header.release_fence_scope = HSA_FENCE_SCOPE_SYSTEM;
  kernel_packet.header.barrier = 1;

  kernel_packet.kernel_object_address =
    *(hsa_amd_code_t*)cmd->command.run.device_data[1];

  error =  hsa_signal_create(initial_value, 0, NULL, &kernel_completion_signal);
  assert (error == HSA_STATUS_SUCCESS);

  kernel_packet.completion_signal = kernel_completion_signal;

  {
    /* Launch the kernel by allocating a slot in the queue, writing the
       command to it, signaling the update with a door bell and finally,
       block waiting until finish signalled with the completion_signal. */
    const uint32_t queue_mask = d->queue->size - 1;
    uint64_t queue_index = hsa_queue_load_write_index_relaxed(d->queue);
    hsa_signal_value_t sigval;
    ((hsa_dispatch_packet_t*)(d->queue->base_address))[queue_index & queue_mask] =
      kernel_packet;
    hsa_queue_store_write_index_relaxed(d->queue, queue_index + 1);
    hsa_signal_store_relaxed(d->queue->doorbell_signal, queue_index);

    sigval = hsa_signal_wait_acquire(kernel_completion_signal, HSA_EQ, 0,
                                     (uint64_t)(-1), HSA_WAIT_EXPECTANCY_UNKNOWN);
  }

  for (i = 0; i < kernel->num_args; ++i)
    {
      if (kernel->arg_info[i].is_local)
        {
#if 0
          pocl_hsa_free (data, 0, *(void **)(arguments[i]));
          POCL_MEM_FREE(arguments[i]);
#endif
        }
      else if (kernel->arg_info[i].type == POCL_ARG_TYPE_IMAGE)
        {
#if 0
          pocl_hsa_free (data, 0, *(void **)(arguments[i]));
          POCL_MEM_FREE(arguments[i]);
#endif
        }
#if 0
      else if (kernel->arg_info[i].type == POCL_ARG_TYPE_SAMPLER ||
               (kernel->arg_info[i].type == POCL_ARG_TYPE_POINTER &&
                *(void**)args->kernel_args[i] == NULL))
        {
          POCL_MEM_FREE(arguments[i]);
        }
#endif
    }
  for (i = kernel->num_args;
       i < kernel->num_args + kernel->num_locals;
       ++i)
    {
#if 0
      pocl_hsa_free(data, 0, *(void **)(arguments[i]));
      POCL_MEM_FREE(arguments[i]);
#endif
    }
  free(args);
}
コード例 #18
0
ファイル: basic.c プロジェクト: zwang4/dividend
void pocl_basic_free_ptr (cl_device_id device, void* mem_ptr)
{
  /* TODO we should somehow figure out the size argument
   * and call pocl_free_global_mem */
  POCL_MEM_FREE(mem_ptr);
}
コード例 #19
0
ファイル: basic.c プロジェクト: zwang4/dividend
void
pocl_basic_run 
(void *data, 
 _cl_command_node* cmd)
{
  struct data *d;
  struct pocl_argument *al;
  size_t x, y, z;
  unsigned i;
  cl_kernel kernel = cmd->command.run.kernel;
  struct pocl_context *pc = &cmd->command.run.pc;

  assert (data != NULL);
  d = (struct data *) data;

  d->current_kernel = kernel;

  void **arguments = (void**)malloc(
      sizeof(void*) * (kernel->num_args + kernel->num_locals)
    );

  /* Process the kernel arguments. Convert the opaque buffer
     pointers to real device pointers, allocate dynamic local 
     memory buffers, etc. */
  for (i = 0; i < kernel->num_args; ++i)
    {
      al = &(cmd->command.run.arguments[i]);
      if (kernel->arg_info[i].is_local)
        {
          arguments[i] = malloc (sizeof (void *));
          *(void **)(arguments[i]) = pocl_memalign_alloc(MAX_EXTENDED_ALIGNMENT, al->size);
        }
      else if (kernel->arg_info[i].type == POCL_ARG_TYPE_POINTER)
        {
          /* It's legal to pass a NULL pointer to clSetKernelArguments. In 
             that case we must pass the same NULL forward to the kernel.
             Otherwise, the user must have created a buffer with per device
             pointers stored in the cl_mem. */
          if (al->value == NULL)
            {
              arguments[i] = malloc (sizeof (void *));
              *(void **)arguments[i] = NULL;
            }
          else
            arguments[i] = &((*(cl_mem *) (al->value))->device_ptrs[cmd->device->dev_id].mem_ptr);
        }
      else if (kernel->arg_info[i].type == POCL_ARG_TYPE_IMAGE)
        {
          dev_image_t di;
          fill_dev_image_t (&di, al, cmd->device);

          void* devptr = pocl_memalign_alloc(MAX_EXTENDED_ALIGNMENT,  sizeof(dev_image_t));
          arguments[i] = malloc (sizeof (void *));
          *(void **)(arguments[i]) = devptr; 
          pocl_basic_write (data, &di, devptr, 0, sizeof(dev_image_t));
        }
      else if (kernel->arg_info[i].type == POCL_ARG_TYPE_SAMPLER)
        {
          dev_sampler_t ds;
          fill_dev_sampler_t(&ds, al);
          
          void* devptr = pocl_memalign_alloc(MAX_EXTENDED_ALIGNMENT, sizeof(dev_sampler_t));
          arguments[i] = malloc (sizeof (void *));
          *(void **)(arguments[i]) = devptr;
          pocl_basic_write (data, &ds, devptr, 0, sizeof(dev_sampler_t));
        }
      else
        {
          arguments[i] = al->value;
        }
    }
  for (i = kernel->num_args;
       i < kernel->num_args + kernel->num_locals;
       ++i)
    {
      al = &(cmd->command.run.arguments[i]);
      arguments[i] = malloc (sizeof (void *));
      *(void **)(arguments[i]) = pocl_memalign_alloc(MAX_EXTENDED_ALIGNMENT, al->size);
    }

  for (z = 0; z < pc->num_groups[2]; ++z)
    {
      for (y = 0; y < pc->num_groups[1]; ++y)
        {
          for (x = 0; x < pc->num_groups[0]; ++x)
            {
              pc->group_id[0] = x;
              pc->group_id[1] = y;
              pc->group_id[2] = z;

              cmd->command.run.wg (arguments, pc);

            }
        }
    }
  for (i = 0; i < kernel->num_args; ++i)
    {
      if (kernel->arg_info[i].is_local)
        {
          POCL_MEM_FREE(*(void **)(arguments[i]));
          POCL_MEM_FREE(arguments[i]);
        }
      else if (kernel->arg_info[i].type == POCL_ARG_TYPE_IMAGE ||
                kernel->arg_info[i].type == POCL_ARG_TYPE_SAMPLER)
        {
          POCL_MEM_FREE(*(void **)(arguments[i]));
          POCL_MEM_FREE(arguments[i]);
        }
      else if (kernel->arg_info[i].type == POCL_ARG_TYPE_POINTER && *(void**)arguments[i] == NULL)
        {
          POCL_MEM_FREE(arguments[i]);
        }
    }
  for (i = kernel->num_args;
       i < kernel->num_args + kernel->num_locals;
       ++i)
    {
      POCL_MEM_FREE(*(void **)(arguments[i]));
      POCL_MEM_FREE(arguments[i]);
    }
  free(arguments);
}
コード例 #20
0
ファイル: pocl_util.c プロジェクト: Finomnis/pocl_hpx
cl_int pocl_create_command (_cl_command_node **cmd,
                            cl_command_queue command_queue, 
                            cl_command_type command_type, cl_event *event_p, 
                            cl_int num_events, const cl_event *wait_list)
{
  int i;
  int err;
  cl_event *event = NULL;

  if ((wait_list == NULL && num_events != 0) ||
      (wait_list != NULL && num_events == 0))
    return CL_INVALID_EVENT_WAIT_LIST;
  
  for (i = 0; i < num_events; ++i)
    {
      if (wait_list[i] == NULL)
        return CL_INVALID_EVENT_WAIT_LIST;
    }
  
  *cmd = pocl_mem_manager_new_command ();

  if (*cmd == NULL)
    return CL_OUT_OF_HOST_MEMORY;
  
  /* if user does not provide event pointer, create event anyway */
  event = &((*cmd)->event);
  err = pocl_create_event(event, command_queue, command_type);
  if (err != CL_SUCCESS)
    {
      POCL_MEM_FREE(*cmd);
      return err;
    }
  if (event_p)
    *event_p = *event;
  else
    (*event)->implicit_event = 1;
  
  /* if in-order command queue and queue is not empty, add event from 
     previous command to new commands event_waitlist */
  if (!(command_queue->properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) 
      && command_queue->root != NULL)
    {
      _cl_command_node *prev_command;
      for (prev_command = command_queue->root; prev_command->next != NULL;
           prev_command = prev_command->next){}
      //printf("create_command: prev_com=%d prev_com->event = %d \n",prev_command, prev_command->event);
      cl_event *new_wl = (cl_event*)malloc ((num_events +1)*sizeof (cl_event));
      for (i = 0; i < num_events; ++i)
        {
          new_wl[i] = wait_list[i];
        }
      new_wl[i] = prev_command->event;
      (*cmd)->event_wait_list = new_wl;
      (*cmd)->num_events_in_wait_list = num_events + 1;
      for (i = 0; i < num_events + 1; ++i)
        {
          //printf("create-command: new_wl[%i]=%d\n", i, new_wl[i]);
        }
    }
  else
    {
      (*cmd)->event_wait_list = wait_list;  
      (*cmd)->num_events_in_wait_list = num_events;
    }
  (*cmd)->type = command_type;
  (*cmd)->next = NULL;
  (*cmd)->device = command_queue->device;

  //printf("create_command (end): event=%d new_event=%d cmd->event=%d cmd=%d\n", event, new_event, (*cmd)->event, *cmd);
  

  return CL_SUCCESS;
}