Exemple #1
0
void print_platforms_devices()
{
  // get number of platforms
  cl_uint plat_count;
  CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count));

  // allocate memory, get list of platforms
  cl_platform_id *platforms =
    (cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id));
  CHECK_SYS_ERROR(!platforms, "allocating platform array");

  CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL));

  // iterate over platforms
  for (cl_uint i = 0; i < plat_count; ++i)
  {
    // get platform vendor name
    char buf[MAX_NAME_LEN];
    CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR,
          sizeof(buf), buf, NULL));
    printf("platform %d: vendor '%s'\n", i, buf);

    // get number of devices in platform
    cl_uint dev_count;
    CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL,
          0, NULL, &dev_count));

    cl_device_id *devices =
      (cl_device_id *) malloc(dev_count*sizeof(cl_device_id));
    CHECK_SYS_ERROR(!devices, "allocating device array");

    // get list of devices in platform
    CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL,
          dev_count, devices, NULL));

    // iterate over devices
    for (cl_uint j = 0; j < dev_count; ++j)
    {
      char buf[MAX_NAME_LEN];
      CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME,
            sizeof(buf), buf, NULL));
      printf("  device %d: '%s'\n", j, buf);
    }

    free(devices);
  }

  free(platforms);
}
	bool BinaryFile::Truncate(const string& filename)
	{
		DWORD error = GetLastError();

		HANDLE hFile = CreateFile(filename.Data(), GENERIC_WRITE, 0, 0, TRUNCATE_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);		
		error = GetLastError();
		if (error == ERROR_FILE_NOT_FOUND || error == ERROR_SUCCESS)
			return true;				
		SetLastError(error);
		CHECK_SYS_ERROR(L"Error can't truncate binary file " + filename);

		CloseHandle(hFile);
		CHECK_SYS_ERROR(L"Failed to truncate a file " + filename);
		return true;
	}
	bool BinaryFile::Save(const string& filename, const Buffer& buffer)
	{
		DWORD error = GetLastError();

		HANDLE hFile = CreateFile(filename.Data(), GENERIC_WRITE, 0, 0,CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
		CHECK_SYS_ERROR(L"Error in binary file, open file for saving " + filename);

		DWORD read;
		WriteFile(hFile, (LPCVOID)buffer.StartPointer(), (DWORD)buffer.GetPosition(), &read, 0);
		CHECK_SYS_ERROR(L"Error in binary file, can't write data to file " + filename);

		if (read != buffer.GetPosition())
			throw OSException(L"Error in binary file, written data is less than should be " + filename);

		CloseHandle(hFile);
		CHECK_SYS_ERROR(L"Saving binary file failed " + filename);
		return true;
	}
Exemple #4
0
//TODO move this to the routine opencl.printAllPlatform()
void print_platforms_devices()
{
  cl_uint plat_count;

  CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count));

  cl_platform_id *platforms = 
    (cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id));
  CHECK_SYS_ERROR(!platforms, "allocating platform array");

  CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL));
  cl_uint i;
  for (i = 0; i < plat_count; ++i)
  {
    char buf[100];
    CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, 
          sizeof(buf), buf, NULL));
    printf("plat %d: vendor '%s'\n", i, buf);

    cl_uint dev_count;
    CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL,
          0, NULL, &dev_count));

    cl_device_id *devices = 
      (cl_device_id *) malloc(dev_count*sizeof(cl_device_id));
    CHECK_SYS_ERROR(!devices, "allocating device array");

    CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL,
          dev_count, devices, NULL));

    cl_uint j;
    for (j = 0; j < dev_count; ++j)
    {
      char buf[100];
      CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME,
            sizeof(buf), buf, NULL));
      printf("  dev %d '%s'\n", j, buf);
    }

    free(devices);
  }

  free(platforms);
}
	bool BinaryFile::Load(const string& filename, Buffer& buffer)
	{
		DWORD error = GetLastError();

		HANDLE hFile = CreateFile(filename.Data(), GENERIC_READ, FILE_SHARE_READ|FILE_SHARE_WRITE, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
		CHECK_SYS_ERROR(L"Error in binary file, can't load it " + filename);

		int size = GetFileSize(hFile, 0);
		buffer.SetSize(size);

		DWORD read;
		ReadFile(hFile, buffer.StartPointer(), size, &read, 0);
		CHECK_SYS_ERROR(L"Error in binary file, can't read data " + filename);

		if (read != size)
			throw OSException(L"Error in binary file, read data less than file contains, possible bad staff happenes " + filename);

		CloseHandle(hFile);
		CHECK_SYS_ERROR(L"Binary file load failed " + filename);
		return true;
	}
	bool BinaryFile::Append(const string& filename, const Buffer& buffer)
	{
		DWORD error = GetLastError();

		HANDLE hFile = CreateFile(filename.Data(), GENERIC_WRITE, 0, 0, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
		CHECK_SYS_ERROR(L"Error in binary file, can't open file for appending it " + filename);

		DWORD offset = GetFileSize(hFile, 0);
		SetFilePointer(hFile, offset, 0, FILE_BEGIN);

		DWORD read;

		WriteFile(hFile, (LPCVOID)buffer.StartPointer(), (DWORD)buffer.GetPosition(), &read, 0);
		CHECK_SYS_ERROR(L"Error in binary file, can't write data to file " + filename);

		if (read != buffer.GetPosition())
			throw OSException(L"Error in binary file, written data is less than should be in " + filename);

		CloseHandle(hFile);
		CHECK_SYS_ERROR(L"Failed to append a file " + filename);
		return true;
	}
Exemple #7
0
cl_kernel kernel_from_string(cl_context ctx,
    char const *knl, char const *knl_name, char const *options)
{
  // create an OpenCL program (may have multiple kernels)
  size_t sizes[] = { strlen(knl) };

  cl_int status;
  cl_program program = clCreateProgramWithSource(ctx, 1, &knl, sizes, &status);
  CHECK_CL_ERROR(status, "clCreateProgramWithSource");

  // build it
  status = clBuildProgram(program, 0, NULL, options, NULL, NULL);

  if (status != CL_SUCCESS)
  {
    // build failed, get build log and print it

    cl_device_id dev;
    CALL_CL_GUARDED(clGetProgramInfo, (program, CL_PROGRAM_DEVICES,
          sizeof(dev), &dev, NULL));

    size_t log_size;
    CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG,
          0, NULL, &log_size));

    char *log = (char *) malloc(log_size);
    CHECK_SYS_ERROR(!log, "kernel_from_string: allocate log");

    char devname[MAX_NAME_LEN];
    CALL_CL_GUARDED(clGetDeviceInfo, (dev, CL_DEVICE_NAME,
          sizeof(devname), devname, NULL));

    CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG,
          log_size, log, NULL));
    fprintf(stderr, "*** build of '%s' on '%s' failed:\n%s\n*** (end of error)\n",
        knl_name, devname, log);
    abort();
  }
  else
    CHECK_CL_ERROR(status, "clBuildProgram");

  // fish the kernel out of the program
  cl_kernel kernel = clCreateKernel(program, knl_name, &status);
  CHECK_CL_ERROR(status, "clCreateKernel");

  CALL_CL_GUARDED(clReleaseProgram, (program));

  return kernel;
}
Exemple #8
0
//TODO remove this at some point (deprecated)
cl_kernel kernel_from_string(cl_context ctx, 
    char const *knl, char const *knl_name, char const *options)
{
  size_t sizes[] = { strlen(knl) };

  cl_int status;
  cl_program program = clCreateProgramWithSource(ctx, 1, &knl, sizes, &status);
  CHECK_CL_ERROR(status, "clCreateProgramWithSource");

  status = clBuildProgram(program, 0, NULL, options, NULL, NULL);

  if (status != CL_SUCCESS)
  {
    // build failed, get build log.

    cl_device_id dev;
    CALL_CL_GUARDED(clGetProgramInfo, (program, CL_PROGRAM_DEVICES,
          sizeof(dev), &dev, NULL));

    size_t log_size;
    CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG,
          0, NULL, &log_size));

    char *log = malloc(log_size);
    CHECK_SYS_ERROR(!log, "kernel_from_string: allocate log");

    char devname[100];
    CALL_CL_GUARDED(clGetDeviceInfo, (dev, CL_DEVICE_NAME,
          sizeof(devname), devname, NULL));

    CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG,
          log_size, log, NULL));
    THError("*** build of '%s' on '%s' failed:\n%s\n*** (end of error)\n",
            knl_name, devname, log);
  }
  else
    CHECK_CL_ERROR(status, "clBuildProgram");

  cl_kernel kernel = clCreateKernel(program, knl_name, &status);
  CHECK_CL_ERROR(status, "clCreateKernel");

  CALL_CL_GUARDED(clReleaseProgram, (program));

  return kernel;
}
Exemple #9
0
char *read_file(const char *filename)
{
  FILE *f = fopen(filename, "r");
  CHECK_SYS_ERROR(!f, "read_file: opening file");

  CHECK_SYS_ERROR(fseek(f, 0, SEEK_END) < 0, "read_file: seeking to end");

  long size = ftell(f);

  CHECK_SYS_ERROR(fseek(f, 0, SEEK_SET) != 0, 
      "read_file: seeking to start");

  char *result = (char *) malloc(size+1);
  CHECK_SYS_ERROR(!result, "read_file: allocating file contents");
  CHECK_SYS_ERROR(fread(result, 1, size, f) < size,
      "read_file: reading file contents");

  CHECK_SYS_ERROR(fclose(f), "read_file: closing file");
  result[size] = '\0';

  return result;
}
Exemple #10
0
char *read_file(const char *filename)
{
  FILE *f = fopen(filename, "r");
  CHECK_SYS_ERROR(!f, "read_file: opening file");

  // figure out file size
  CHECK_SYS_ERROR(fseek(f, 0, SEEK_END) < 0, "read_file: seeking to end");
  size_t size = ftell(f);

  CHECK_SYS_ERROR(fseek(f, 0, SEEK_SET) != 0,
      "read_file: seeking to start");

  // allocate memory, slurp in entire file
  char *result = (char *) malloc(size+1);
  CHECK_SYS_ERROR(!result, "read_file: allocating file contents");
  CHECK_SYS_ERROR(fread(result, 1, size, f) < size,
      "read_file: reading file contents");

  // close, return
  CHECK_SYS_ERROR(fclose(f), "read_file: closing file");
  result[size] = '\0';

  return result;
}
Exemple #11
0
void create_context_on(const char *plat_name, const char*dev_name, cl_uint idx,
    cl_context *ctx, cl_command_queue *queue, int enable_profiling)
{
  cl_uint plat_count;

  CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count));

  cl_platform_id *platforms = 
    (cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id));
  CHECK_SYS_ERROR(!platforms, "allocating platform array");

  CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL));
  for (cl_uint i = 0; i < plat_count; ++i)
  {
    char buf[100];
    CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, 
          sizeof(buf), buf, NULL));

    if (!plat_name || strstr(buf, plat_name))
    {
      cl_uint dev_count;
      CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL,
            0, NULL, &dev_count));

      cl_device_id *devices = 
        (cl_device_id *) malloc(dev_count*sizeof(cl_device_id));
      CHECK_SYS_ERROR(!devices, "allocating device array");

      CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL,
            dev_count, devices, NULL));

      for (cl_uint j = 0; j < dev_count; ++j)
      {
        char buf[100];
        CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME,
              sizeof(buf), buf, NULL));
        if (!dev_name || strstr(buf, dev_name))
        {
          if (idx == 0)
          {
            cl_platform_id plat = platforms[i];
            cl_device_id dev = devices[j];

            free(devices);
            free(platforms);

            cl_context_properties cps[3] = { 
              CL_CONTEXT_PLATFORM, (cl_context_properties) plat, 0 };

            cl_int status;
            *ctx = clCreateContext(
                cps, 1, &dev, NULL, NULL, &status);
            CHECK_CL_ERROR(status, "clCreateContext");


            cl_command_queue_properties qprops = 0;
            if (enable_profiling)
              qprops |= CL_QUEUE_PROFILING_ENABLE;

            *queue = clCreateCommandQueue(*ctx, dev, qprops, &status);
            CHECK_CL_ERROR(status, "clCreateCommandQueue");

            return;
          }
          else
            --idx;
        }
      }

      free(devices);
    }
  }

  free(platforms);

  fputs("create_context_on: specified device not found.\n", stderr);
  abort();
}
Exemple #12
0
int cat_decode_word(search_t *search, const features_t *features, lattice_t *lattice,
               symbol_t *ref, reference_t ref_type, const char *lattice_tmpl)
{
  TRACE(1, "Decoding CAT word...\n");
  int prefix_len = 0;
  const int ref_len = symlen(ref);

  float beam = search->decoder->beam_pruning;

  search_create_emission_cache(search);

  symbol_t *prefix = (symbol_t *)malloc((ref_len+1)*sizeof(symbol_t));
  if (search->decoder->grammar->start != VOCAB_NONE) {
    prefix[prefix_len++] = search->decoder->grammar->start;
  }
  prefix[prefix_len] = VOCAB_NONE;

  if (ref_type == REF_SOURCE || ref_type == REF_TARGET) {
    for (; prefix_len < ref_len; prefix_len++) {
      {
        char *prefix_str = NULL;
        vocab_symbols_to_string(prefix, (ref_type == REF_SOURCE)?search->decoder->vocab->in:search->decoder->vocab->out, &prefix_str);
        TRACE(1, "next prefix: %s\n", prefix_str);
        free(prefix_str);
      }

      search_t *prefix_search = NULL;
      if (ref_type == REF_SOURCE) {
        prefix_search = search_create_from_prefix(search, prefix, NULL);
      }
      else if (ref_type == REF_TARGET) {
        prefix_search = search_create_from_prefix(search, NULL, prefix);
      }
      else {
        REQUIRE(ref_type > REF_NONE && ref_type < REF_MAX, "Invalid reference type\n");
      }
      search->decoder->beam_pruning = beam;

      CHECK(prefix_search->decoder->grammar->list_initial->num_elements > 0, "Empty prefix grammar. Possible lack of coverture\n");
      fprintf(stderr, "n initials = %d, n_states = %d\n", prefix_search->decoder->grammar->list_initial->num_elements, prefix_search->decoder->grammar->num_states);
      grammar_write_dot(prefix_search->decoder->grammar, stderr);
      lattice_t *prefix_lattice = lattice_create(lattice->nbest, lattice->nnode, prefix_search->decoder);
      //lattice_t *prefix_lattice = lattice_create(lattice->nbest, lattice->nnode, search->decoder);

      clock_t tim = clock();
      decode(prefix_search, features, prefix_lattice);
      //decode(search, features, prefix_lattice);
      clock_t tim2 = clock();
      TRACE(1, "iter %d tim %f\n", prefix_len, ((float) ((tim2 - tim) / CLOCKS_PER_SEC) / prefix_search->n_frames) / 0.01);

      //Calculate best hypothesis
      {
        symbol_t *best_ext_hyp = NULL;
        lattice_best_hyp(prefix_lattice, &best_ext_hyp);

        if (best_ext_hyp != NULL) {
          // write lattice
          {
            char path[MAX_LINE];      
            sprintf(path, lattice_tmpl, prefix_len);
            FILE *lattice_file = smart_fopen(path, "w");
            CHECK_SYS_ERROR(lattice_file != NULL, "Couldn't create word graph file '%s'\n", path);
            lattice_write(prefix_lattice, lattice_file, path);
            smart_fclose(lattice_file);
          }

          // add one word to the prefix
          prefix[prefix_len] = ref[prefix_len];
          prefix[prefix_len + 1] = VOCAB_NONE;

          {
            char *sentence_str = NULL;
            extended_vocab_symbols_to_string(best_ext_hyp, prefix_lattice->decoder->vocab, &sentence_str);
            TRACE(1, "%s\n", sentence_str);
            free(sentence_str);
          }
          free(best_ext_hyp);
        } else {
          TRACE(1, "Sentence not recognized. Increasing beam search\n");
          // add one word to the prefix
          //prefix[prefix_len] = ref[prefix_len];
          //prefix[prefix_len + 1] = VOCAB_NONE;

          //abort();
          prefix_len--;
          beam *= 2;
        }

      }


      lattice_delete(prefix_lattice);
      search_delete(prefix_search);

      fflush(stdout);

    }
  }

  free(prefix);

  return prefix_len;
}
Exemple #13
0
void create_context_on(const char *plat_name, const char*dev_name, cl_uint idx,
    cl_context *ctx, cl_command_queue *queue, int enable_profiling)
{
  char dev_sel_buf[MAX_NAME_LEN];
  char platform_sel_buf[MAX_NAME_LEN];

  // get number of platforms
  cl_uint plat_count;
  CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count));

  // allocate memory, get list of platform handles
  cl_platform_id *platforms =
    (cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id));
  CHECK_SYS_ERROR(!platforms, "allocating platform array");
  CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL));

  // print menu, if requested
#ifndef CL_HELPER_FORCE_INTERACTIVE
  if (plat_name == CHOOSE_INTERACTIVELY) // yes, we want exactly that pointer
#endif
  {
    puts("Choose platform:");
    for (cl_uint i = 0; i < plat_count; ++i)
    {
      char buf[MAX_NAME_LEN];
      CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR,
            sizeof(buf), buf, NULL));
      printf("[%d] %s\n", i, buf);
    }

    printf("Enter choice: ");
    fflush(stdout);

    char *sel = read_a_line();
    if (!sel)
    {
      fprintf(stderr, "error reading line from stdin");
      abort();
    }

    int sel_int = MIN(MAX(0, atoi(sel)), (int) plat_count-1);
    free(sel);

    CALL_CL_GUARDED(clGetPlatformInfo, (platforms[sel_int], CL_PLATFORM_VENDOR,
          sizeof(platform_sel_buf), platform_sel_buf, NULL));
    plat_name = platform_sel_buf;
  }

  // iterate over platforms
  for (cl_uint i = 0; i < plat_count; ++i)
  {
    // get platform name
    char buf[MAX_NAME_LEN];
    CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR,
          sizeof(buf), buf, NULL));

    // does it match?
    if (!plat_name || strstr(buf, plat_name))
    {
      // get number of devices in platform
      cl_uint dev_count;
      CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL,
            0, NULL, &dev_count));

      // allocate memory, get list of device handles in platform
      cl_device_id *devices =
        (cl_device_id *) malloc(dev_count*sizeof(cl_device_id));
      CHECK_SYS_ERROR(!devices, "allocating device array");

      CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL,
            dev_count, devices, NULL));

      // {{{ print device menu, if requested
#ifndef CL_HELPER_FORCE_INTERACTIVE
      if (dev_name == CHOOSE_INTERACTIVELY) // yes, we want exactly that pointer
#endif
      {
        puts("Choose device:");
        for (cl_uint j = 0; j < dev_count; ++j)
        {
          char buf[MAX_NAME_LEN];
          CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME,
                sizeof(buf), buf, NULL));
          printf("[%d] %s\n", j, buf);
        }

        printf("Enter choice: ");
        fflush(stdout);

        char *sel = read_a_line();
        if (!sel)
        {
          fprintf(stderr, "error reading line from stdin");
          abort();
        }

        int int_sel = MIN(MAX(0, atoi(sel)), (int) dev_count-1);
        free(sel);

        CALL_CL_GUARDED(clGetDeviceInfo, (devices[int_sel], CL_DEVICE_NAME,
              sizeof(dev_sel_buf), dev_sel_buf, NULL));
        dev_name = dev_sel_buf;
      }

      // }}}

      // iterate over devices
      for (cl_uint j = 0; j < dev_count; ++j)
      {
        // get device name
        char buf[MAX_NAME_LEN];
        CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME,
              sizeof(buf), buf, NULL));

        // does it match?
        if (!dev_name || strstr(buf, dev_name))
        {
          if (idx == 0)
          {
            cl_platform_id plat = platforms[i];
            cl_device_id dev = devices[j];

            free(devices);
            free(platforms);

            cl_int status;
            
            // create a context
#if OPENCL_SHARE_WITH_OPENGL
  #if __APPLE__
//              CGLContextObj kCGLContext = CGLGetCurrentContext();
//              CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext);
//              cl_context_properties cps[] = {
//                CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)kCGLShareGroup,
//                CL_CONTEXT_PLATFORM, (cl_context_properties) plat, 0 };
//            
            
            CGLContextObj gl_context = CGLGetCurrentContext();
            CGLShareGroupObj share_group = CGLGetShareGroup(gl_context);
            
            cl_context_properties properties[] = {
              CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE,
              (cl_context_properties)share_group, 0
            };
            *ctx = clCreateContext(properties, 0, 0, 0, 0, 0);
            clGetGLContextInfoAPPLE(*ctx, gl_context,
                                    CL_CGL_DEVICE_FOR_CURRENT_VIRTUAL_SCREEN_APPLE, sizeof(dev),
                                    &dev, NULL);
            
            
            
  #elif WIN32
              cl_context_properties cps[] = {
                CL_GL_CONTEXT_KHR, (cl_context_properties) wglGetCurrentContext(), CL_WGL_HDC_KHR, (cl_context_properties) wglGetCurrentDC(), CL_CONTEXT_PLATFORM, (cl_context_properties) plat, 0};
            
            //Probably won't work because &dev should correspond to glContext
            *ctx = clCreateContext(cps, 1, &dev, NULL, NULL, &status);
            CHECK_CL_ERROR(status, "clCreateContext");
  #else
              // Linux
              cl_context_properties cps[] = {
                CL_GL_CONTEXT_KHR, ( cl_context_properties) glXGetCurrentContext(), CL_GLX_DISPLAY_KHR, (cl_context_properties) glXGetCurrentDisplay(), CL_CONTEXT_PLATFORM, (cl_context_properties) plat, 0 };
            //Probably won't work because &dev should correspond to glContext
            *ctx = clCreateContext(cps, 1, &dev, NULL, NULL, &status);
            CHECK_CL_ERROR(status, "clCreateContext");
#endif

#else
            // create a context
            cl_context_properties cps[3] = {
              CL_CONTEXT_PLATFORM, (cl_context_properties) plat, 0 };
            // create a command queue
            cl_command_queue_properties qprops = 0;
            if (enable_profiling)
              qprops |= CL_QUEUE_PROFILING_ENABLE;
            
            *queue = clCreateCommandQueue(*ctx, dev, qprops, &status);
            CHECK_CL_ERROR(status, "clCreateCommandQueue");
#endif
//            *ctx = clCreateContext(
//                                   cps, 1, &dev, NULL, NULL, &status);
//            CHECK_CL_ERROR(status, "clCreateContext");

//            // create a command queue
            cl_command_queue_properties qprops = 0;
            if (enable_profiling)
              qprops |= CL_QUEUE_PROFILING_ENABLE;

            *queue = clCreateCommandQueue(*ctx, dev, qprops, &status);
            CHECK_CL_ERROR(status, "clCreateCommandQueue");

            return;
          }
          else
            --idx;
        }
      }

      free(devices);
    }
  }

  free(platforms);

  fputs("create_context_on: specified device not found.\n", stderr);
  abort();
}
Exemple #14
0
int main()
{
  int enable_profiling = 0;
  #ifdef DO_TIMING
      enable_profiling = 1;
  #endif

  //print_platforms_devices();
  cl_context ctx;
  cl_command_queue queue;
  create_context_on("NVIDIA", NULL, 0, &ctx, &queue, enable_profiling);

  // --------------------------------------------------------------------------
  // load kernels
  // --------------------------------------------------------------------------
  // read the cl file

  char buf[100];
  sprintf(buf, "mg-kernel-ver%d.cl", VERSION);
  char *knl_text = read_file(buf);
  //get work group dimensions and gflop info.
  int wg_dims , wg_x, wg_y, wg_z, z_div, fetch_per_pt, flops_per_pt;
  if (sscanf(knl_text, "// workgroup: (%d,%d,%d) z_div:%d fetch_per_pt:%d flops_per_pt:%d", 
        &wg_x, &wg_y, &wg_z, &z_div, &fetch_per_pt, &flops_per_pt) == 6)
  {
    wg_dims = 3;
  }
  else if (sscanf(knl_text, "// workgroup: (%d,%d) fetch_per_pt:%d flops_per_pt:%d",
        &wg_x, &wg_y, &fetch_per_pt, &flops_per_pt) == 4)
  {
    wg_dims = 2;
    wg_z = -1;
    z_div = -1;
  }
  else
  {
    perror("reading workgroup spec");
    abort();
  }

  #ifdef USE_DOUBLE
  char *compile_opt = "-DFTYPE=double";
  #else
  char *compile_opt = "-DFTYPE=float";
  #endif

  // creation of the kernel
  cl_kernel poisson_knl = kernel_from_string(ctx, knl_text, "fd_update", compile_opt);
  free(knl_text); // my compiler complains about this one.  OJO!!
  // --------------------------------------------------------------------------
  // set up grid
  // --------------------------------------------------------------------------
  const unsigned points = POINTS;
  const ftype minus_bdry = -1, plus_bdry = 1;

  // We're dividing into (points-1) intervals.
  ftype dx = (plus_bdry-minus_bdry)/(points-1);

  // --------------------------------------------------------------------------
  // allocate and initialize CPU memory
  // --------------------------------------------------------------------------
  int use_alignment;
  unsigned dim_other = points; //if order 2 then 1 point extra on each side
  #ifdef USE_ALIGNMENT
  // adjusts dimension so that the next row starts in a number divisible by 16
  unsigned dim_x = ((dim_other + 15) / 16) * 16; 
  unsigned field_start = 0;
  use_alignment = 1; 
  #else
  unsigned dim_x = dim_other;
  unsigned field_start = 0;// this one puts me right at the beginning
  use_alignment = 0;
  #endif
  // --------Allocate forcing uexact, r and u vectors -------------------------
  const size_t field_size = 0+dim_x*dim_x*dim_x;  // extra large to fit the 2^n constrain in GPU
  ftype *f = malloc(field_size*sizeof(ftype));
  CHECK_SYS_ERROR(!f, "allocating f");
  ftype *u = malloc (field_size*sizeof(ftype));
  CHECK_SYS_ERROR(!u, "allocating u");  
  ftype *uexact = malloc (field_size*sizeof(ftype));
  CHECK_SYS_ERROR(!uexact, "allocating uexact");
  ftype *r = malloc(field_size * sizeof(ftype));
  CHECK_SYS_ERROR(!r, "allocating residual r");

  // --------------------------------------------------------------------------
  // initialize
  // --------------------------------------------------------------------------
    // zero out (necessary to initialize everything bec. I measure norms)
    for (size_t i = 0; i < field_size; ++i){
      f[i] = 0;
      u[i] = 0;
      uexact[i] = 0;
      r[i] = 0;
    }
    // set up the forcing field
    init_f (points, f, dx, field_start, dim_x, dim_other, minus_bdry);
    // Initialize u with initial boundary conditions
    init_u ( points, u , minus_bdry, plus_bdry, dx, field_start, dim_x, dim_other);
    // Initialize the exact solution
    init_uexact(points, u, uexact, dx, field_size, field_start, dim_x, dim_other);

    // --------------------------------------------------------------------------
    // Setup the v-cycles
    // --------------------------------------------------------------------------
  
    unsigned n1, n2, n3, ncycles;
    n1 = 50;
    n2 = 60;
    n3 = 1;
    ncycles = 2;
    ftype *sweeps = malloc (ncycles*sizeof(ftype));
    ftype *rnorm = malloc (ncycles*sizeof(ftype));
    ftype *enorm = malloc (ncycles*sizeof(ftype));
    ftype rtol = 1.0e-05;

    // Find the norm of the residual (choose your method)
    sweeps[0] =0;
    resid (r, f, u, dx, field_size, field_start, dim_x, dim_other);
    rnorm[0] = norm( r , field_size) * dx;
    U_error(u, uexact, r, field_size);
    enorm[0] = norm( r, field_size ) * dx;

    for(unsigned icycle = 1; icycle <= ncycles; icycle++){
       mgv(f, u, dx, n1, n2, n3, field_size, points, use_alignment, dim_x, ctx, queue, poisson_knl, wg_dims , wg_x, wg_y, wg_z, z_div, fetch_per_pt, flops_per_pt);  //update u through a v-cycle 
       sweeps[icycle] = sweeps[icycle -1] + (4 * (n1 + n2)/3);
       resid (r, f, u, dx, field_size, field_start, dim_x, dim_other);
       rnorm[icycle] = norm( r, field_size ) * dx;
       U_error(u, uexact, r, field_size);
       enorm[icycle] = norm( r, field_size ) * dx;
       //cfacts = (rnorm(icycle)/rnorm(icycle - 1))^(1 / (n1 + n2)) not necessary
       //disp something here if I want to.
       //printf("norm of the cycle %f", enorm[icycle]);
       if(rnorm[icycle] <= rtol * rnorm[0])
	  break;
    }
    #ifdef DO_TIMING
  printf(" ftype:%d ver:%d align:%d pts:%d\tgflops:%.1f\tmcells:%.1f\tgbytes:%.1f [/sec]\tout_gflops:%.6f\n", (int) sizeof(ftype), VERSION, use_alignment, points, gflops_performed/seconds_taken, mcells_updated/seconds_taken, gbytes_accessed/seconds_taken, gflops_performed/tot_secs);
#endif
  // --------------------------------------------------------------------------
  // clean up
  // --------------------------------------------------------------------------
  CALL_CL_GUARDED(clReleaseKernel, (poisson_knl));
  CALL_CL_GUARDED(clReleaseCommandQueue, (queue));
  CALL_CL_GUARDED(clReleaseContext, (ctx));
}