예제 #1
0
파일: opencl.c 프로젝트: YongHaoWu/wine-hub
cl_int WINAPI wine_clGetProgramInfo(cl_program program, cl_program_info param_name,
                                    size_t param_value_size, void * param_value, size_t * param_value_size_ret)
{
    cl_int ret;
    TRACE("\n");
    ret = clGetProgramInfo(program, param_name, param_value_size, param_value, param_value_size_ret);
    return ret;
}
예제 #2
0
WebCLGetInfo WebCLProgram::getInfo(int param_name, ExceptionState& es) {
    cl_int err = 0;
    cl_uint uint_units = 0;
    char program_string[4096];
    RefPtr<WebCLContext> contextObj  = nullptr;
    RefPtr<WebCLDeviceList> deviceList =  nullptr;
    
    if (m_cl_program == NULL) {
        es.throwWebCLException(
                WebCLException::INVALID_PROGRAM,
                WebCLException::invalidProgramMessage);
        printf("Error: Invalid program object\n");
        return WebCLGetInfo();
    }

    switch(param_name) {   
        case WebCL::PROGRAM_NUM_DEVICES:
            err = clGetProgramInfo(m_cl_program, CL_PROGRAM_NUM_DEVICES, 
                                   sizeof(cl_uint), &uint_units, NULL);
            if (err == CL_SUCCESS)
                return WebCLGetInfo(static_cast<unsigned int>(uint_units));
            break;
        case WebCL::PROGRAM_SOURCE:
            err = clGetProgramInfo(m_cl_program, CL_PROGRAM_SOURCE, 
                                   sizeof(program_string), &program_string, 
                                   NULL);
            if (err == CL_SUCCESS)
                return WebCLGetInfo(String(program_string));
            break;
        case WebCL::PROGRAM_CONTEXT:
            return WebCLGetInfo(PassRefPtr<WebCLContext>(m_cl_context.get()));
            break;
        case WebCL::PROGRAM_DEVICES:
            return WebCLGetInfo(m_cl_context->getDevices());
            break;
        default:
            printf("Error: UNSUPPORTED program Info type = %d ",param_name);
            es.throwWebCLException(
                    WebCLException::INVALID_PROGRAM,
                    WebCLException::invalidProgramMessage);
            return WebCLGetInfo();
    }
    WebCLException::throwException(err, es);
    return WebCLGetInfo();
}
예제 #3
0
	void cl_printBinaries(cl_program program) {

		cl_uint program_num_devices;

		clGetProgramInfo( program,
			CL_PROGRAM_NUM_DEVICES,
			sizeof(cl_uint),
			&program_num_devices,
			NULL
			);

		printf("Number of devices: %d\n", program_num_devices);

		//size_t binaries_sizes[program_num_devices];
		size_t * binaries_sizes = (size_t *)malloc(sizeof(size_t)*program_num_devices);

		clGetProgramInfo( program,
			CL_PROGRAM_BINARY_SIZES,
			program_num_devices*sizeof(size_t),
			binaries_sizes,
			NULL
			);

		char** binaries = (char**)malloc(sizeof(char*)*program_num_devices);

		for (unsigned int i = 0; i < program_num_devices; i++)
			binaries[i] = (char*)malloc(sizeof(char)*(binaries_sizes[i]+1));

		clGetProgramInfo(program, CL_PROGRAM_BINARIES, program_num_devices*sizeof(size_t), binaries, NULL);

		for (unsigned int i = 0; i < program_num_devices; i++)
		{
			binaries[i][binaries_sizes[i]] = '\0';

			printf("Program %d\n", i);
			printf("%s\n", binaries[i]);

		}


		for (unsigned int i = 0; i < program_num_devices; i++)
			free(binaries[i]);

		free(binaries);
	}
예제 #4
0
void oclLogBinary(cl_program clProg, cl_device_id clDev)
{
  // Grab the number of devices associated with the program
  cl_uint num_devices;
  clGetProgramInfo(clProg, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices, NULL);

  // Grab the device ids
  cl_device_id* devices = (cl_device_id*) malloc(num_devices * sizeof(cl_device_id));
  clGetProgramInfo(clProg, CL_PROGRAM_DEVICES, num_devices * sizeof(cl_device_id), devices, 0);

  // Grab the sizes of the binaries
  size_t* binary_sizes = (size_t*)malloc(num_devices * sizeof(size_t));
  clGetProgramInfo(clProg, CL_PROGRAM_BINARY_SIZES, num_devices * sizeof(size_t), binary_sizes, NULL);

  // Now get the binaries
  char** ptx_code = (char**)malloc(num_devices * sizeof(char*));
  for( unsigned int i=0; i<num_devices; ++i)
  {
    ptx_code[i] = (char*)malloc(binary_sizes[i]);
  }
  clGetProgramInfo(clProg, CL_PROGRAM_BINARIES, 0, ptx_code, NULL);

  // Find the index of the device of interest
  unsigned int idx = 0;
  while((idx < num_devices) && (devices[idx] != clDev))
  {
    ++idx;
  }

  // If the index is associated, log the result
  if( idx < num_devices )
  {
    MITK_INFO<< "\n ---------------- \n Program Binary: \n -----------------------\n";
    MITK_INFO<< ptx_code[idx];
  }

  free( devices );
  free( binary_sizes );
  for(unsigned int i=0; i<num_devices; ++i)
  {
    free(ptx_code[i]);
  }
  free( ptx_code );
}
예제 #5
0
void cb(cl_program p,void* data)
{
    clRetainProgram(p);
    cl_device_id devid[1];
    clGetProgramInfo(p,CL_PROGRAM_DEVICES,sizeof(cl_device_id),(void*)devid,NULL);
    char bug[65536];
    clGetProgramBuildInfo(p,devid[0],CL_PROGRAM_BUILD_LOG,65536*sizeof(char),bug,NULL);
    clReleaseProgram(p);
    LOGE("Build log \n %s\n",bug);
}
예제 #6
0
파일: Program.cpp 프로젝트: k0zmo/clw
 bool programInfo(cl_program id, cl_program_info info, Value* buf, size_t length)
 {
     cl_int error = 0;
     if((error = clGetProgramInfo(id, info, 
             sizeof(Value) * length, buf, nullptr)) != CL_SUCCESS)
     {
         reportError("programInfo(): ", error);
         return false;
     }
     return true;
 }
예제 #7
0
bool CL_Program::GetCompiledBinaries(cl_uchar** ppBinaryArrayOutput) const
{
	CL_CPP_CONDITIONAL_RETURN_FALSE(m_uNumDevices == 0);
	CL_CPP_CONDITIONAL_RETURN_FALSE(!ppBinaryArrayOutput);

	//	Dynamically retreive the compiled source buffers.
	cl_int iErrorCode = clGetProgramInfo(m_Program, CL_PROGRAM_BINARIES, 0, ppBinaryArrayOutput, NULL);

	CL_CPP_CATCH_ERROR(iErrorCode);
	CL_CPP_FORCE_RETURN_BOOL_BY_ERROR(iErrorCode);
}
예제 #8
0
	bool save_binary(const string& clbin)
	{
		size_t size = 0;
		clGetProgramInfo(cpProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);

		if(!size)
			return false;

		vector<uint8_t> binary(size);
		uint8_t *bytes = &binary[0];

		clGetProgramInfo(cpProgram, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL);

		if(!path_write_binary(clbin, binary)) {
			opencl_error(string_printf("OpenCL failed to write cached binary %s.", clbin.c_str()));
			return false;
		}

		return true;
	}
예제 #9
0
파일: pclu.c 프로젝트: NatTuck/cakemark
void 
pclu_dump_binary(pclu_program* pgm, const char* path)
{
    int errcode;
    size_t bin_size;

    errcode = clGetProgramInfo(pgm->program, CL_PROGRAM_BINARY_SIZES,
            sizeof(size_t), &bin_size, 0);
    pclu_check_call("clGetProgramInfo(BIN_SIZE)", errcode);

    cl_uchar* binary = (cl_uchar*) malloc(bin_size);
    errcode = clGetProgramInfo(pgm->program, CL_PROGRAM_BINARIES, bin_size, &binary, 0);
    pclu_check_call("clGetProgramInfo(BINARIES)", errcode);

    FILE* bf = fopen(path, "w");
    fwrite((void*)binary, bin_size, 1, bf);
    fclose(bf);

    free(binary);
}
예제 #10
0
    void Program::getInfo(cl_program_info paramName, size_t paramValueSize, void *paramValue) const
    {
        cl_int err = 0;
        size_t written = 0;

        err = clGetProgramInfo(programHandle(), paramName, paramValueSize, paramValue, &written);

        if(err != CL_SUCCESS) {
            throw OpenCLException(err);
        }
    }
예제 #11
0
// ****************************************************************************
// Method:  oclGetProgBinary
//
// Purpose:
//   Get the binary (PTX) of the program associated with the device
//
// Arguments:
//       cpProgram    OpenCL program
//       cdDevice     device of interest
//       binary       returned code
//       length       length of returned code
//
// Copyright 1993-2013 NVIDIA Corporation
//
// ****************************************************************************
inline void
oclGetProgBinary (cl_program cpProgram, cl_device_id cdDevice, char** binary, size_t* length)
{
    // Grab the number of devices associated witht the program
    cl_uint num_devices;
    clGetProgramInfo(cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices, NULL);

    // Grab the device ids
    cl_device_id* devices = (cl_device_id*) malloc(num_devices * sizeof(cl_device_id));
    clGetProgramInfo(cpProgram, CL_PROGRAM_DEVICES, num_devices * sizeof(cl_device_id), devices, 0);

    // Grab the sizes of the binaries
    size_t* binary_sizes = (size_t*)malloc(num_devices * sizeof(size_t));
    clGetProgramInfo(cpProgram, CL_PROGRAM_BINARY_SIZES, num_devices * sizeof(size_t), binary_sizes, NULL);

    // Now get the binaries
    char** ptx_code = (char**) malloc(num_devices * sizeof(char*));
    for( unsigned int i=0; i<num_devices; ++i) {
        ptx_code[i]= (char*)malloc(binary_sizes[i]);
    }
    clGetProgramInfo(cpProgram, CL_PROGRAM_BINARIES, 0, ptx_code, NULL);

    // Find the index of the device of interest
    unsigned int idx = 0;
    while( idx<num_devices && devices[idx] != cdDevice ) ++idx;

    // If it is associated prepare the result
    if( idx < num_devices )
    {
        *binary = ptx_code[idx];
        *length = binary_sizes[idx];
    }

    // Cleanup
    free( devices );
    free( binary_sizes );
    for( unsigned int i=0; i<num_devices; ++i) {
        if( i != idx ) free(ptx_code[i]);
    }
    free( ptx_code );
}
예제 #12
0
파일: Program.cpp 프로젝트: k0zmo/clw
 Value programInfo(cl_program id, cl_program_info info)
 {
     Value value;
     cl_int error = 0;
     if((error = clGetProgramInfo(id, info, 
             sizeof(Value), &value, nullptr)) != CL_SUCCESS)
     {
         reportError("programInfo(): ", error);
         return Value(0);
     }
     return value;
 }
예제 #13
0
struct _cl_version clGetProgramVersion(cl_program program)
{
    struct _cl_version version;
    version.major = 0;
    version.minor = 0;
    cl_context context = NULL;
    cl_int flag =  clGetProgramInfo(program, CL_PROGRAM_CONTEXT,
                                    sizeof(cl_context), &context, NULL);
    if(flag != CL_SUCCESS)
        return version;
    return clGetContextVersion(context);
}
예제 #14
0
SEXP getProgramInfo(SEXP sProgram, SEXP sProgramInfo){
    Rcpp::XPtr<cl_program> program(sProgram);
    std::string programInfo = Rcpp::as<std::string>(sProgramInfo);
    char cBuffer[1024];
    
    if(programInfo == "CL_PROGRAM_SOURCE"){
        std::cout << "get Program source\n";
        clGetProgramInfo(*program, CL_PROGRAM_SOURCE,	sizeof(cBuffer), cBuffer, NULL);
    }
    std::string retVal = cBuffer;
    return Rcpp::wrap(retVal);
}
예제 #15
0
cl_uint CL_Program::GetReferenceCount() const
{
	CL_CPP_CONDITIONAL_RETURN_VALUE(!m_Program, 0);

	//	Dynamically retrieve the current reference count for this kernel.
	cl_uint uRefCount = 0;
	cl_int iErrorCode = clGetProgramInfo(m_Program, CL_PROGRAM_REFERENCE_COUNT, sizeof(cl_uint), &uRefCount, NULL);

	CL_CPP_CATCH_ERROR(iErrorCode);

	return uRefCount;
}
예제 #16
0
bool CL_Program::CreateFromBinaries(cl_uint uNumBinaries, const cl_uchar** ppCompiledBinaries, const size_t* pBinaryLengths, cl_int** ppBinaryStatusOutputs)
{
	CL_CPP_CONDITIONAL_RETURN_FALSE(m_Program);
	CL_CPP_CONDITIONAL_RETURN_FALSE(!m_pContextRef);
	CL_CPP_CONDITIONAL_RETURN_FALSE(!m_pContextRef->IsValid());

	const cl_device_id* pDeviceList = m_pContextRef->GetDeviceRef()->GetDeviceList();
	cl_context Context = m_pContextRef->GetContext();
	cl_int* pBinaryStatus = NULL;
	cl_int iErrorCode =	CL_SUCCESS;

	if(ppBinaryStatusOutputs && uNumBinaries > 0)
		pBinaryStatus = (cl_int*) CL_Alloc(sizeof(cl_int) * uNumBinaries);

	//	Create a program from the given compiled source buffers.
	m_Program = clCreateProgramWithBinary(Context, uNumBinaries, pDeviceList, pBinaryLengths, ppCompiledBinaries, pBinaryStatus, &iErrorCode);

	if(iErrorCode != CL_SUCCESS)
		CL_Free(pBinaryStatus);
	else
		*ppBinaryStatusOutputs = pBinaryStatus;


	CL_CPP_CATCH_ERROR(iErrorCode);
	CL_CPP_ON_ERROR_RETURN_FALSE(iErrorCode);

	//	Cache the properties for this program.
	
	clGetProgramInfo(m_Program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &m_uNumDevices, NULL);

	if(m_uNumDevices > 0)
	{
		m_pBinarySizes = (size_t*) CL_Alloc(sizeof(size_t) * m_uNumDevices);

		clGetProgramInfo(m_Program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * m_uNumDevices, m_pBinarySizes, NULL);
	}

	return true;
}
예제 #17
0
    size_t Program::getInfoSize(cl_program_info paramName) const
    {
        size_t ret;
        cl_int err = 0;

        err = clGetProgramInfo(programHandle(), paramName, 0, nullptr, &ret);

        if(err != CL_SUCCESS) {
            throw OpenCLException(err);
        }

        return ret;
    }
예제 #18
0
static cl_kernel
create_kernel (UfoResourcesPrivate *priv,
               cl_program program,
               const gchar *kernel_name,
               GError **error)
{
    cl_kernel kernel;
    gchar *name;
    cl_int errcode = CL_SUCCESS;

    if (kernel_name == NULL) {
        gchar *source;
        gsize size;

        UFO_RESOURCES_CHECK_CLERR (clGetProgramInfo (program, CL_PROGRAM_SOURCE, 0, NULL, &size));
        source = g_malloc0 (size);
        UFO_RESOURCES_CHECK_CLERR (clGetProgramInfo (program, CL_PROGRAM_SOURCE, size, source, NULL));
        name = get_first_kernel_name (source);
        g_free (source);
    }
    else {
        name = g_strdup (kernel_name);
    }

    kernel = clCreateKernel (program, name, &errcode);
    g_free (name);

    if (kernel == NULL || errcode != CL_SUCCESS) {
        g_set_error (error,
                     UFO_RESOURCES_ERROR,
                     UFO_RESOURCES_ERROR_CREATE_KERNEL,
                     "Failed to create kernel `%s`: %s", kernel_name, ufo_resources_clerr (errcode));
        return NULL;
    }

    priv->kernels = g_list_append (priv->kernels, kernel);
    return kernel;
}
예제 #19
0
cl_kernel
bfam_cl_kernel_from_string(cl_context ctx, char const *knl,
                           char const *knl_name, char const *options)
{
  // create an OpenCL program (may have multiple kernels)
  size_t sizes[] = { strlen(knl) };

  cl_int status;
  cl_program program = clCreateProgramWithSource(ctx, 1, &knl, sizes, &status);
  BFAM_CL_CHECK(status, "clCreateProgramWithSource");

  // build it
  status = clBuildProgram(program, 0, NULL, options, NULL, NULL);

  if (status != CL_SUCCESS)
  {
    // build failed, get build log and print it

    cl_device_id dev;
    BFAM_CL_SAFE_CALL(clGetProgramInfo(program, CL_PROGRAM_DEVICES,
          sizeof(dev), &dev, NULL));

    size_t log_size;
    BFAM_CL_SAFE_CALL(clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG,
          0, NULL, &log_size));

    char *log = (char *) bfam_malloc(log_size);

    char devname[MAX_NAME_LEN];
    BFAM_CL_SAFE_CALL(clGetDeviceInfo(dev, CL_DEVICE_NAME,
          sizeof(devname), devname, NULL));

    BFAM_CL_SAFE_CALL(clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG,
          log_size, log, NULL));
    BFAM_LERROR("*** build of '%s' on '%s' failed:\n%s\n*** (end of error)\n",
        knl_name, devname, log);
    BFAM_ABORT("Building kernel from a string");
  }
  else
    BFAM_CL_CHECK(status, "clBuildProgram");

  // fish the kernel out of the program
  cl_kernel kernel = clCreateKernel(program, knl_name, &status);
  BFAM_CL_CHECK(status, "clCreateKernel");

  BFAM_CL_SAFE_CALL(clReleaseProgram(program));

  return kernel;
}
예제 #20
0
static cl_int dump_binaries(cl_program prog, const char *prefix) {
  cl_int errcode;
  cl_uint num_binaries, i;

  size_t *binary_sizes;
  unsigned char **binaries;

  if ((errcode = clGetProgramInfo(prog, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_binaries, NULL)) != CL_SUCCESS)
    return errcode;

  binary_sizes = malloc(sizeof(size_t) * num_binaries);
  binaries = malloc(sizeof(unsigned char *) * num_binaries);

  if ((errcode = clGetProgramInfo(prog, CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * num_binaries, binary_sizes, NULL)) != CL_SUCCESS)
    return errcode;

  for (i = 0; i != num_binaries; ++i)
    binaries[i] = malloc(binary_sizes[i]);

  if ((errcode = clGetProgramInfo(prog, CL_PROGRAM_BINARIES, sizeof(unsigned char *) * num_binaries, binaries, NULL)) != CL_SUCCESS)
    return errcode;

  for (i = 0; i != num_binaries; ++i) {
    char name[64];
    sprintf(name, "%s.bin%lu", prefix, (unsigned long) i);

    FILE *f = fopen(name, "w");
    fwrite(binaries[i], binary_sizes[i], 1, f);
    fclose(f);
    free(binaries[i]);
  }

  free(binary_sizes);
  free(binaries);
  return CL_SUCCESS;
}
예제 #21
0
//Dumps a compiled kernel to a binary file
void dumpBinary(cl_program program,const char *kernelName)
{
	
	//Get number of devices
	cl_uint deviceCount = 0;
	status = clGetProgramInfo(program,CL_PROGRAM_NUM_DEVICES,sizeof(cl_uint),&deviceCount,NULL);
	if(status != CL_SUCCESS) exitOnError("Getting number of devices for the program(clGetProgramInfo)");
	
	//Get sizes of compiled binaries for said devices
	size_t *binSize = (size_t*)malloc(sizeof(size_t)*deviceCount);
	status = clGetProgramInfo(program,CL_PROGRAM_BINARY_SIZES,(sizeof(size_t)*deviceCount),binSize,NULL);
	if(status != CL_SUCCESS) exitOnError("Getting binary sizes for the program(clGetProgramInfo)");

	char **bin = ( char**)malloc(sizeof(char*)*deviceCount);
	for(cl_uint i = 0;i<deviceCount;i++) bin[i] = (char*)malloc(binSize[i]);

	//Retrieve compiled binaries
	status = clGetProgramInfo(program,CL_PROGRAM_BINARIES,(sizeof(size_t)*deviceCount),bin,NULL);
	if(status != CL_SUCCESS) exitOnError("Getting program binaries(clGetProgramInfo)");

	//Export binaries to files, appending CL_DEVICE_NAME to each filename
	char binFileName[MAX_NAME_LENGTH];
	for(cl_uint i = 0;i<deviceCount;i++)
	{
	  char deviceName[MAX_NAME_LENGTH];
	  status = clGetDeviceInfo(devices[i],CL_DEVICE_NAME,MAX_NAME_LENGTH,deviceName,NULL);
	  if (status != CL_SUCCESS) exitOnError("Cannot get device name for given device number");
	  printf("Binary image of kernel %s created for device %s.\n",kernelName,deviceName);
	  sprintf(binFileName,"%s_%s.elf",kernelName,deviceName);
	  std::fstream outBinFile(binFileName, (std::fstream::out | std::fstream::binary));
	  if(outBinFile.fail()) exitOnError("Cannot open binary file");
	  outBinFile.write(bin[i],binSize[i]);
	  outBinFile.close();
	}
      for(cl_uint i = 0;i<deviceCount;i++) free(bin[i]);
}
예제 #22
0
	void OpenCLProgram::getBinary()
	{
		cl_uint program_num_devices;
		cl_int err;
		err = clGetProgramInfo(clProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &program_num_devices, NULL);
		assert(err == CL_SUCCESS);
		
		if (program_num_devices == 0) {
			std::cerr << "no valid binary was found" << std::endl;
			return;
		}
		
		size_t binaries_sizes[program_num_devices];
		
		err = clGetProgramInfo(clProgram, CL_PROGRAM_BINARY_SIZES, program_num_devices*sizeof(size_t), binaries_sizes, NULL);
		assert(err = CL_SUCCESS);
		
		char **binaries = new char*[program_num_devices];
		
		for (size_t i = 0; i < program_num_devices; i++)
			binaries[i] = new char[binaries_sizes[i]+1];
		
		err = clGetProgramInfo(clProgram, CL_PROGRAM_BINARIES, program_num_devices*sizeof(size_t), binaries, NULL);
		assert(err = CL_SUCCESS);
		
		for (size_t i = 0; i < program_num_devices; i++) {
			binaries[i][binaries_sizes[i]] = '\0';
			std::cout << "Program " << i << ":" << std::endl;
			std::cout << binaries[i];
		}
		
		for (size_t i = 0; i < program_num_devices; i++)
			delete [] binaries[i];
		
		delete [] binaries;
	}
예제 #23
0
파일: Program.cpp 프로젝트: k0zmo/clw
 string Program::sourceCode() const
 {
     if(!_ctx || isNull())
         return string();
     size_t size;
     cl_int error;		
     if((error = clGetProgramInfo(_id, CL_PROGRAM_SOURCE,
             0, nullptr, &size)) != CL_SUCCESS)
     {
         detail::reportError("Program::sourceCode(): ", error);
         return string();
     }
     if(size == 0)
         return string();
     string buf;
     buf.resize(size);
     if(detail::programInfo(_id, CL_PROGRAM_SOURCE, 
             const_cast<char*>(buf.data()), size))
         return string();
     return buf;
 }
예제 #24
0
void call_kernel_2d(float *x1,float *x2,int n1,int n2,int n3,float *x3,char * cl_name) {

    FILE* programHandle;
    size_t programSize, KernelSourceSize;
    char *programBuffer, *KernelSource;

	int err, szA, szB, szC;
	szA = n1*n2;
	szB = n2*n3;
	szC = n1*n3;

	int DIM = 1;
	size_t global[DIM];
	size_t local[DIM];
	cl_device_id device_id;
	cl_context context;
	cl_command_queue commands;
	cl_program program;
	cl_kernel kernel;
	cl_uint nd;
	cl_mem input1, input2, output;

	err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
	context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
	commands = clCreateCommandQueue(context, device_id, 0, &err);
	input1 = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * szA, NULL, NULL);
	input2 = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * szB, NULL, NULL);
	output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * szC, NULL, NULL);
	err = clEnqueueWriteBuffer(commands, input1, CL_TRUE, 0, sizeof(float) * szA, x1, 0, NULL, NULL);
	err = clEnqueueWriteBuffer(commands, input2, CL_TRUE, 0, sizeof(float) * szB, x2, 0, NULL, NULL);
	//----------------------------------------------------------------------------
    // get size of kernel source
    programHandle = fopen(cl_name, "r");
    fseek(programHandle, 0, SEEK_END);
    programSize = ftell(programHandle);
    rewind(programHandle);

    programBuffer = (char*) malloc(programSize + 1);
    programBuffer[programSize] = '\0';
    fread(programBuffer, sizeof(char), programSize, programHandle);
    fclose(programHandle);

    // create program from buffer
    program = clCreateProgramWithSource(context,1,(const char**) &programBuffer,&programSize, NULL);
    free(programBuffer);

    // read kernel source back in from program to check
    clGetProgramInfo(program, CL_PROGRAM_SOURCE, 0, NULL, &KernelSourceSize);
    KernelSource = (char*) malloc(KernelSourceSize);
    clGetProgramInfo(program, CL_PROGRAM_SOURCE, KernelSourceSize, KernelSource, NULL);

    program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err);
    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    kernel = clCreateKernel(program, "mmul", &err);

	err  = clSetKernelArg(kernel, 0, sizeof(int), &n1);
	err |= clSetKernelArg(kernel, 1, sizeof(int), &n2);
	err |= clSetKernelArg(kernel, 2, sizeof(int), &n3);
	err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &input1);
	err |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &input2);
	err |= clSetKernelArg(kernel, 5, sizeof(cl_mem), &output);
	//err |= clSetKernelArg(kernel, 6, sizeof(float)*1024, NULL);

	global[0] = (size_t) n1; //global[1] = (size_t) n3;
	err = clEnqueueNDRangeKernel(commands, kernel, DIM, NULL, &global[0], NULL, 0, NULL, NULL); clFinish(commands);
	err = clEnqueueReadBuffer(commands, output, CL_TRUE, 0, sizeof(float) * szC, x3, 0, NULL, NULL );

    clReleaseMemObject(input1);
    clReleaseMemObject(input2);
    clReleaseMemObject(output);
    clReleaseProgram(program);
    clReleaseKernel(kernel);
    clReleaseCommandQueue(commands);
    clReleaseContext(context);

    //printf("nKernel source:\n\n %s \n", KernelSource);
    free(KernelSource);

}
예제 #25
0
int
main(void){
  cl_int err;
  cl_platform_id platforms[MAX_PLATFORMS];
  cl_uint nplatforms;
  cl_device_id devices[MAX_DEVICES + 1]; // + 1 for duplicate test
  cl_device_id device_id0;
  cl_uint num_devices;
  size_t i;
  size_t num_binaries;
  const unsigned char **binaries = NULL;
  size_t *binary_sizes = NULL;
  size_t num_bytes_copied;
  cl_int binary_statuses[MAX_BINARIES];
  cl_int binary_statuses2[MAX_BINARIES];
  cl_program program = NULL;
  cl_program program_with_binary = NULL;

  err = clGetPlatformIDs(MAX_PLATFORMS, platforms, &nplatforms);
  CHECK_OPENCL_ERROR_IN("clGetPlatformIDs");
  if (!nplatforms)
    return EXIT_FAILURE;
  
  err = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, MAX_DEVICES,
                      devices, &num_devices);
  CHECK_OPENCL_ERROR_IN("clGetDeviceIDs");

  cl_context context = clCreateContext(NULL, num_devices, devices, NULL, NULL, &err);
  CHECK_OPENCL_ERROR_IN("clCreateContext");

  size_t kernel_size = strlen(kernel);
  char* kernel_buffer = kernel;

  program = clCreateProgramWithSource(context, 1, (const char**)&kernel_buffer, 
				      &kernel_size, &err);
  CHECK_OPENCL_ERROR_IN("clCreateProgramWithSource");

  err = clBuildProgram(program, num_devices, devices, NULL, NULL, NULL);
  CHECK_OPENCL_ERROR_IN("clBuildProgram");
  
  err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, 0, 0, &num_binaries);
  CHECK_OPENCL_ERROR_IN("clGetProgramInfo");

  num_binaries = num_binaries/sizeof(size_t);
  binary_sizes = (size_t*)malloc(num_binaries * sizeof(size_t));
  binaries = (const unsigned char**)calloc(num_binaries, sizeof(unsigned char*));

  err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, 
			 num_binaries*sizeof(size_t), binary_sizes , 
			 &num_bytes_copied);
  CHECK_OPENCL_ERROR_IN("clGetProgramInfo");
  
  for (i = 0; i < num_binaries; ++i) 
    binaries[i] = (const unsigned char*) malloc(binary_sizes[i] *
						sizeof(const unsigned char));

  err = clGetProgramInfo(program, CL_PROGRAM_BINARIES, 
			 num_binaries*sizeof(char*), binaries, &num_bytes_copied);
  CHECK_OPENCL_ERROR_IN("clGetProgramInfo");
  
  cl_uint num = num_binaries < num_devices ? num_binaries : num_devices;
  if (num == 0)
    {
      err = !CL_SUCCESS;
      goto FREE_AND_EXIT;
    }
  
  program_with_binary = clCreateProgramWithBinary(context, num, devices, binary_sizes, 
						  binaries, binary_statuses, &err);
  CHECK_OPENCL_ERROR_IN("clCreateProgramWithBinary");

  for (i = 0; i < num; ++i) {
      cl_program_binary_type bin_type = 0;
      err = clGetProgramBuildInfo(program_with_binary, devices[i],
                                  CL_PROGRAM_BINARY_TYPE,
                                  sizeof(bin_type), (void *)&bin_type,
                                  NULL);
      CHECK_OPENCL_ERROR_IN("get program binary type");

      /* cl_program_binary_type */
      switch(bin_type) {
        case CL_PROGRAM_BINARY_TYPE_NONE: /*0x0*/
          fprintf(stderr, "program binary type: CL_PROGRAM_BINARY_TYPE_NONE\n");
        break;
        case CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT: /*0x1*/
          fprintf(stderr, "program binary type: CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT\n");
        break;
        case CL_PROGRAM_BINARY_TYPE_LIBRARY: /*0x2*/
          fprintf(stderr, "program binary type: CL_PROGRAM_BINARY_TYPE_LIBRARY\n");
        break;
        case CL_PROGRAM_BINARY_TYPE_EXECUTABLE: /*0x4*/
          fprintf(stderr, "program binary type: CL_PROGRAM_BINARY_TYPE_EXECUTABLE\n");
         break;
      }
  }
  err = clReleaseProgram(program_with_binary);
  CHECK_OPENCL_ERROR_IN("clReleaseProgram");

  for (i = 0; i < num; i++)
    {
      if (binary_statuses[i] != CL_SUCCESS)
        {
          err = !CL_SUCCESS;
          goto FREE_AND_EXIT;
        }
    }
    
  // negative test1: invalid device
  device_id0 = devices[0];
  devices[0] = NULL; // invalid device
  program_with_binary = clCreateProgramWithBinary(context, num, devices, binary_sizes, 
						  binaries, binary_statuses, &err);

  if (err != CL_INVALID_DEVICE || program_with_binary != NULL)
    {
      err = !CL_SUCCESS;
      goto FREE_AND_EXIT;
    }
  err = CL_SUCCESS;

  devices[0] = device_id0;
  for (i = 0; i < num_binaries; ++i) free((void*)binaries[i]);
  free(binary_sizes);
  free(binaries);
  
  // negative test2: duplicate device
  num_binaries = 2;
  devices[1] = devices[0]; // duplicate
  
  binary_sizes = (size_t*)malloc(num_binaries * sizeof(size_t));
  binaries = (const unsigned char**)calloc(num_binaries, sizeof(unsigned char*));
  
  err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, 1*sizeof(size_t), 
			 binary_sizes , &num_bytes_copied);
  CHECK_OPENCL_ERROR_IN("clGetProgramInfo");
  
  binary_sizes[1] = binary_sizes[0];
  
  binaries[0] = (const unsigned char*) malloc(binary_sizes[0] *
					      sizeof(const unsigned char));
  binaries[1] = (const unsigned char*) malloc(binary_sizes[1] *
					      sizeof(const unsigned char));
  
  err = clGetProgramInfo(program, CL_PROGRAM_BINARIES, 1 * sizeof(char*), 
			 binaries, &num_bytes_copied);
  CHECK_OPENCL_ERROR_IN("clGetProgramInfo");
  
  memcpy((void*)binaries[1], (void*)binaries[0], binary_sizes[0]);      
  program_with_binary = clCreateProgramWithBinary(context, 2, devices, binary_sizes, 
						  binaries, binary_statuses2, &err);
  if (err != CL_INVALID_DEVICE || program_with_binary != NULL)
    {
      err = !CL_SUCCESS;
      goto FREE_AND_EXIT;
    }
  err = CL_SUCCESS;

 FREE_AND_EXIT:  
  // Free resources
  for (i = 0; i < num_binaries; ++i) 
    if (binaries) 
      if(binaries[i]) 
	free((void*)binaries[i]);

  if (binary_sizes) 
    free(binary_sizes);
  if (binaries) 
    free(binaries);
  if (program)
    CHECK_CL_ERROR (clReleaseProgram (program));
  if (program_with_binary)
    CHECK_CL_ERROR (clReleaseProgram (program_with_binary));
  if (context)
    CHECK_CL_ERROR (clReleaseContext (context));

  CHECK_CL_ERROR (clUnloadCompiler ());

  return err == CL_SUCCESS ? EXIT_SUCCESS : EXIT_FAILURE;
}
예제 #26
0
int 
NBody::genBinaryImage()
{
    cl_int status = CL_SUCCESS;

    /*
     * Have a look at the available platforms and pick either
     * the AMD one if available or a reasonable default.
     */
    cl_uint numPlatforms;
    cl_platform_id platform = NULL;
    status = clGetPlatformIDs(0, NULL, &numPlatforms);
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clGetPlatformIDs failed."))
    {
        return SDK_FAILURE;
    }
    if (0 < numPlatforms) 
    {
        cl_platform_id* platforms = new cl_platform_id[numPlatforms];
        status = clGetPlatformIDs(numPlatforms, platforms, NULL);
        if(!sampleCommon->checkVal(status,
                                   CL_SUCCESS,
                                   "clGetPlatformIDs failed."))
        {
            return SDK_FAILURE;
        }

        char platformName[100];
        for (unsigned i = 0; i < numPlatforms; ++i) 
        {
            status = clGetPlatformInfo(platforms[i],
                                       CL_PLATFORM_VENDOR,
                                       sizeof(platformName),
                                       platformName,
                                       NULL);

            if(!sampleCommon->checkVal(status,
                                       CL_SUCCESS,
                                       "clGetPlatformInfo failed."))
            {
                return SDK_FAILURE;
            }

            platform = platforms[i];
            if (!strcmp(platformName, "Advanced Micro Devices, Inc.")) 
            {
                break;
            }
        }
        std::cout << "Platform found : " << platformName << "\n";
        delete[] platforms;
    }

    if(NULL == platform)
    {
        sampleCommon->error("NULL platform found so Exiting Application.");
        return SDK_FAILURE;
    }

    /*
     * If we could find our platform, use it. Otherwise use just available platform.
     */
    cl_context_properties cps[5] = 
    {
        CL_CONTEXT_PLATFORM, 
        (cl_context_properties)platform, 
        CL_CONTEXT_OFFLINE_DEVICES_AMD,
        (cl_context_properties)1,
        0
    };

    context = clCreateContextFromType(cps,
                                      CL_DEVICE_TYPE_ALL,
                                      NULL,
                                      NULL,
                                      &status);

    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clCreateContextFromType failed."))
    {
        return SDK_FAILURE;
    }

    /* create a CL program using the kernel source */
    streamsdk::SDKFile kernelFile;
    std::string kernelPath = sampleCommon->getPath();
    kernelPath.append("NBody_Kernels.cl");
    if(!kernelFile.open(kernelPath.c_str()))
    {
        std::cout << "Failed to load kernel file : " << kernelPath << std::endl;
        return SDK_FAILURE;
    }
    const char * source = kernelFile.source().c_str();
    size_t sourceSize[] = {strlen(source)};
    program = clCreateProgramWithSource(context,
                                        1,
                                        &source,
                                        sourceSize,
                                        &status);
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clCreateProgramWithSource failed."))
    {
        return SDK_FAILURE;
    }
    
    std::string flagsStr = std::string("");

    // Get additional options
    if(isComplierFlagsSpecified())
    {
        streamsdk::SDKFile flagsFile;
        std::string flagsPath = sampleCommon->getPath();
        flagsPath.append(flags.c_str());
        if(!flagsFile.open(flagsPath.c_str()))
        {
            std::cout << "Failed to load flags file: " << flagsPath << std::endl;
            return SDK_FAILURE;
        }
        flagsFile.replaceNewlineWithSpaces();
        const char * flags = flagsFile.source().c_str();
        flagsStr.append(flags);
    }

    if(flagsStr.size() != 0)
        std::cout << "Build Options are : " << flagsStr.c_str() << std::endl;


    /* create a cl program executable for all the devices specified */
    status = clBuildProgram(program,
                            0,
                            NULL,
                            flagsStr.c_str(),
                            NULL,
                            NULL);
    sampleCommon->checkVal(status,
                        CL_SUCCESS,
                        "clBuildProgram failed.");

    size_t numDevices;
    status = clGetProgramInfo(program, 
                           CL_PROGRAM_NUM_DEVICES,
                           sizeof(numDevices),
                           &numDevices,
                           NULL );
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clGetProgramInfo(CL_PROGRAM_NUM_DEVICES) failed."))
    {
        return SDK_FAILURE;
    }

    std::cout << "Number of devices found : " << numDevices << "\n\n";
    devices = (cl_device_id *)malloc( sizeof(cl_device_id) * numDevices );
    if(devices == NULL)
    {
        sampleCommon->error("Failed to allocate host memory.(devices)");
        return SDK_FAILURE;
    }
    /* grab the handles to all of the devices in the program. */
    status = clGetProgramInfo(program, 
                              CL_PROGRAM_DEVICES, 
                              sizeof(cl_device_id) * numDevices,
                              devices,
                              NULL );
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clGetProgramInfo(CL_PROGRAM_DEVICES) failed."))
    {
        return SDK_FAILURE;
    }


    /* figure out the sizes of each of the binaries. */
    size_t *binarySizes = (size_t*)malloc( sizeof(size_t) * numDevices );
    if(devices == NULL)
    {
        sampleCommon->error("Failed to allocate host memory.(binarySizes)");
        return SDK_FAILURE;
    }
    
    status = clGetProgramInfo(program, 
                              CL_PROGRAM_BINARY_SIZES,
                              sizeof(size_t) * numDevices, 
                              binarySizes, NULL);
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clGetProgramInfo(CL_PROGRAM_BINARY_SIZES) failed."))
    {
        return SDK_FAILURE;
    }

    size_t i = 0;
    /* copy over all of the generated binaries. */
    char **binaries = (char **)malloc( sizeof(char *) * numDevices );
    if(binaries == NULL)
    {
        sampleCommon->error("Failed to allocate host memory.(binaries)");
        return SDK_FAILURE;
    }

    for(i = 0; i < numDevices; i++)
    {
        if(binarySizes[i] != 0)
        {
            binaries[i] = (char *)malloc( sizeof(char) * binarySizes[i]);
            if(binaries[i] == NULL)
            {
                sampleCommon->error("Failed to allocate host memory.(binaries[i])");
                return SDK_FAILURE;
            }
        }
        else
        {
            binaries[i] = NULL;
        }
    }
    status = clGetProgramInfo(program, 
                              CL_PROGRAM_BINARIES,
                              sizeof(char *) * numDevices, 
                              binaries, 
                              NULL);
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clGetProgramInfo(CL_PROGRAM_BINARIES) failed."))
    {
        return SDK_FAILURE;
    }

    /* dump out each binary into its own separate file. */
    for(i = 0; i < numDevices; i++)
    {
        char fileName[100];
        sprintf(fileName, "%s.%d", dumpBinary.c_str(), (int)i);
        if(binarySizes[i] != 0)
        {
            char deviceName[1024];
            status = clGetDeviceInfo(devices[i], 
                                     CL_DEVICE_NAME, 
                                     sizeof(deviceName),
                                     deviceName, 
                                     NULL);
            if(!sampleCommon->checkVal(status,
                                       CL_SUCCESS,
                                       "clGetDeviceInfo(CL_DEVICE_NAME) failed."))
            {
                return SDK_FAILURE;
            }

            printf( "%s binary kernel: %s\n", deviceName, fileName);
            streamsdk::SDKFile BinaryFile;
            if(!BinaryFile.writeBinaryToFile(fileName, 
                                             binaries[i], 
                                             binarySizes[i]))
            {
                std::cout << "Failed to load kernel file : " << fileName << std::endl;
                return SDK_FAILURE;
            }
        }
        else
        {
            printf("Skipping %s since there is no binary data to write!\n",
                    fileName);
        }
    }

    // Release all resouces and memory
    for(i = 0; i < numDevices; i++)
    {
        if(binaries[i] != NULL)
        {
            free(binaries[i]);
            binaries[i] = NULL;
        }
    }

    if(binaries != NULL)
    {
        free(binaries);
        binaries = NULL;
    }

    if(binarySizes != NULL)
    {
        free(binarySizes);
        binarySizes = NULL;
    }

    if(devices != NULL)
    {
        free(devices);
        devices = NULL;
    }

    status = clReleaseProgram(program);
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clReleaseProgram failed."))
    {
        return SDK_FAILURE;
    }

    status = clReleaseContext(context);
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clReleaseContext failed."))
    {
        return SDK_FAILURE;
    }

    return SDK_SUCCESS;
}
예제 #27
0
// Main function
// *********************************************************************
int main(int argc, char **argv)
{
    
        
    if (!device_comm_queue)
        ocl_initialize();

    const char* program_source;
    
    program_source = load_source_file(argv[1]);

    
    cl_program ocl_program = clCreateProgramWithSource(device_context, 1,
        (const char**)&program_source, NULL, NULL);
    printf("********** program created.\n");


    
    // Build the program (OpenCL JIT compilation)
    char options[100];
    const char* flags = "-g -w -I\"";
    const char* OMHOME = getenv("OPENMODELICAHOME");
    const char* OMINCL = "/include/omc\"";
    const char* OMBIN = "/bin\"";
    
    if ( OMHOME != NULL )
    {
        strcpy(options, flags);
        strcat(options, OMHOME);
        strcat(options, OMINCL);
        strcat(options, " -I\"");
        strcat(options, OMHOME);
        strcat(options, OMBIN);
        printf("Building OpenCL code with flags %s\n",options);
        cl_int err;
        err = clBuildProgram(ocl_program, 0, NULL, options, NULL, NULL);
        ocl_error_check(OCL_BUILD_PROGRAM, err);
    
        size_t size;
        clGetProgramBuildInfo(ocl_program, ocl_device, CL_PROGRAM_BUILD_LOG,        // Get build log size
                                  0, NULL, &size);
        char * log = (char*)malloc(size);
        clGetProgramBuildInfo(ocl_program,ocl_device,CL_PROGRAM_BUILD_LOG,size,log, NULL);
        printf("\t\tCL_PROGRAM_BUILD_LOG:  \t%s\n", log);
        free(log);
        
        if(err){
            printf("Errors detected in compilation of OpenCL code:\n");
            exit(1);
        }
        else
            printf("Program built successfuly.\n");
        
        //if no error create the binary
        clGetProgramInfo(ocl_program, CL_PROGRAM_BINARY_SIZES, 
        sizeof(size_t), &size, NULL);
        unsigned char * binary = (unsigned char*)malloc(size);
        printf("Size of program binary :\t%d\n",size);
        clGetProgramInfo(ocl_program, CL_PROGRAM_BINARIES, sizeof(size_t), &binary, NULL);
        printf("Program binary retrived.\n");        
        
        const char* binary_ext = ".bin";
        char* binary_name = strcat(argv[1],binary_ext);
        printf("binary file name %s\n", binary_name);
        FILE * cache;
        cache = fopen(binary_name, "wb");
        fwrite(binary, sizeof(char), size, cache);
        fclose(cache);
        //free(binary);
        
        
        
        err = 0;
        cl_program newprogram = clCreateProgramWithBinary(device_context, 1, &ocl_device, &size, (const unsigned char **)&binary, NULL, &err);
        if(!err)
            printf("Program created from binary\n");
        else{
            switch (err){
                case CL_INVALID_CONTEXT:
                    printf("Error building program:\n");
                    printf("CL_INVALID_CONTEXT \n");
                    break;
                case CL_INVALID_VALUE:
                    printf("Error building program:\n");
                    printf("CL_INVALID_VALUE \n");
                    break;
                case CL_INVALID_DEVICE:
                    printf("Error building program:\n");
                    printf("CL_INVALID_DEVICE \n");
                    break;
                case CL_INVALID_BINARY:
                    printf("Error building program:\n");
                    printf("CL_INVALID_BINARY \n");
                    break;
                case CL_OUT_OF_HOST_MEMORY:
                    printf("Error building program:\n");
                    printf("CL_OUT_OF_HOST_MEMORY \n");
                    break;
            }
        }
                
        
            
            
        return 0;

   }
   else
   {
       printf("Couldn't find OPENMODELICAHOME!\n");
       exit(1);
   }

    

    
    ocl_clean_up();

    
    return 0;
}
예제 #28
0
파일: libclsmm.c 프로젝트: cdavro/cp2k
// Kernel launch
static int launch_clsmm_dnt_largeDB_16_23_23_12_23_96_2_3_12_10 (void *param_stack, int stack_size, void *stream, int m_max, int n_max, int k_max, void *a_data, void *b_data, void *c_data){
  int shared_size = 0;
//{'name': 'clsmm_dnt_largeDB_16_23_23_12_23_96_2_3_12_10', 'tile_n': 3, 'tile_m': 2, 'm': 23, 'n': 23, 'threads': 96, 'w': 10, 'v': 12, 'minblocks': 12, 'k': 23, 'grouping': 16}
  int careful = (stack_size / 16);
  int nruns = stack_size - careful * 16;

  int i;
  cl_kernel opencl_kernel = NULL;
  cl_program opencl_program = NULL;

  // local queue pointer and device + context value 
  acc_opencl_stream_type *opencl_stream = (acc_opencl_stream_type *) stream;
  acc_opencl_dev_type     opencl_device = (*opencl_stream).device;
  cl_context              opencl_ctx    = opencl_device.ctx;
  cl_device_id            opencl_dev    = opencl_device.device_id;
  cl_command_queue        opencl_queue  = (*opencl_stream).queue;

  // get or create kernel
  if (multiply_kernel) {
    opencl_kernel = multiply_kernel;
  } else {
    // read kernel code
    if (verbose_print) fprintf(stdout,"reading multiplication kernel ...\n");

    size_t  *lengths = (size_t *) malloc(sizeof(size_t) * 2); // 2 - two files, each with different size
    char   **strings = (char **) malloc(sizeof(char *) * 2);  // 2 - two files, each with different lenght
    read_file_at_path(&(strings[0]), &(lengths[0]), "LIBSMM_CL_KERNEL_PATH", "clsmm_common.cl");
    read_file_at_path(&(strings[1]), &(lengths[1]), "LIBSMM_CL_KERNEL_PATH", "clsmm_dnt_largeDB2.cl");

    // get kernel code, build program and kernel
    if (verbose_print) fprintf(stdout,"building multiplication kernel ...\n");
    opencl_program = clCreateProgramWithSource( // cl_program
                       opencl_ctx,              // cl_context   context
                       (cl_uint) 2,             // cl_uint      count
                       (const char **) strings, // const char   **strings
                       lengths,                 // const size_t *lengths
                       &cl_error);              // cl_int       *errcode_ret
    if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clCreateProgramWithSource %d\n", (int) cl_error);

    free(lengths); free(strings[0]); free(strings[1]); free(strings);

    if (cl_error == CL_SUCCESS && verbose_src){
      fprintf(stdout, "\n@@@@@@@@@ SOURCE-DATA: @@@@@@@@@\n");
      size_t src_sz;
      cl_error = clGetProgramInfo(opencl_program, CL_PROGRAM_SOURCE, (size_t) 0, NULL, &src_sz);
      if (cl_error != CL_SUCCESS) fprintf(stdout, "Error 1 (print source) %d\n", (int) cl_error);
      char *src = (char *) malloc(src_sz);
      src[src_sz - 1] = '\0';
      cl_error = clGetProgramInfo(opencl_program, CL_PROGRAM_SOURCE, src_sz, src, NULL);
      if (cl_error != CL_SUCCESS) fprintf(stdout, "Error 2 (print source) %d\n", (int) cl_error);
      fprintf(stdout, "%s", src);
      free(src);
      fprintf(stdout, "@@@@@@@@@ END SOURCE-DATA, SIZE=%zu @@@@@@@@@\n", src_sz);
    }

    cl_error = clBuildProgram(                       // cl_int
                 opencl_program,                     // cl_program                     program
                 (cl_uint) 1,                        // cl_uint                        num_devices
                 (const cl_device_id *) &opencl_dev, // const cl_device_id             *device_list
                 BUILD_OPTIONS,                      // const char                     *options
                 NULL,                               // void (CL_CALLBACK* pfn_notify) (cl_program program, void *user_data)
                 NULL);                              // void                           *user_data
    if (cl_error != CL_SUCCESS){
      fprintf(stdout, "\n@@@@@@@@@ BUILD-DATA, ERROR=%d: @@@@@@@@@\n", (int) cl_error);
      size_t bld_sz;
      cl_error = clGetProgramBuildInfo(opencl_program, opencl_dev, CL_PROGRAM_BUILD_LOG, (size_t) 0, NULL, &bld_sz);
      if (cl_error != CL_SUCCESS) fprintf(stdout, "Error 1 (print source) %d\n", (int) cl_error);
      char *bld = (char *) malloc(bld_sz);
      cl_error = clGetProgramBuildInfo(opencl_program, opencl_dev, CL_PROGRAM_BUILD_LOG, bld_sz, bld, NULL);
      if (cl_error != CL_SUCCESS) fprintf(stdout, "Error 2 (print source) %d\n", (int) cl_error);
      bld[bld_sz - 1] = '\0';
      fprintf(stdout, "%s", bld);
      free(bld);
      fprintf(stdout, "@@@@@@@@@ END BUILD-DATA, SIZE=%zu @@@@@@@@@\n", bld_sz);
    }

    if ((cl_error == CL_SUCCESS) && (verbose_ptx)) {
      fprintf(stdout, "\n@@@@@@@@@ PTX-DATA: @@@@@@@@@\n");
      size_t ptx_sz;
      cl_error = clGetProgramInfo(opencl_program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &ptx_sz, NULL);
      if (cl_error != CL_SUCCESS) fprintf(stdout,"Error 1 (print ptx) %d\n", (int) cl_error);
      unsigned char *ptx = (unsigned char *) malloc(ptx_sz);
      cl_error = clGetProgramInfo(opencl_program, CL_PROGRAM_BINARIES, ptx_sz, &ptx, NULL);
      if (cl_error != CL_SUCCESS) fprintf(stdout,"Error 2 (print ptx) %d\n", (int) cl_error);
      ptx[ptx_sz - 1] = '\0';
      fprintf(stdout, "%s", ptx);
      free(ptx);
      fprintf(stdout, "@@@@@@@@@ END PTX-DATA, SIZE=%zu: @@@@@@@@@\n", ptx_sz);
    }
  
    opencl_kernel = clCreateKernel(                                    // cl_kernel
                      opencl_program,                                  // cl_program program
                      "clsmm_dnt_largeDB2_16_23_23_12_23_96_3_2_12_8", // const char *kernel_name
                      &cl_error);                                      // cl_int     *errcode_ret
    if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clCreateKernel %d\n", (int) cl_error);

    // keep for later usage
    multiply_kernel = opencl_kernel;
  }

  // set kernel parameters
  if (verbose_print) fprintf(stdout,"set multiplication kernel parameters ...\n");
  cl_error = clSetKernelArg(opencl_kernel, (cl_uint) 0, sizeof(cl_mem), param_stack);
  if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clSetKernelArg(0) %d\n", (int) cl_error);
  cl_error = clSetKernelArg(opencl_kernel, (cl_uint) 1, sizeof(int), &careful);
  if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clSetKernelArg(1) %d\n", (int) cl_error);
  cl_error = clSetKernelArg(opencl_kernel, (cl_uint) 2, sizeof(int), &nruns);
  if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clSetKernelArg(2) %d\n", (int) cl_error);
  cl_error = clSetKernelArg(opencl_kernel, (cl_uint) 3, sizeof(cl_mem), a_data);
  if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clSetKernelArg(3) %d\n", (int) cl_error);
  cl_error = clSetKernelArg(opencl_kernel, (cl_uint) 4, sizeof(cl_mem), b_data);
  if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clSetKernelArg(4) %d\n", (int) cl_error);
  cl_error = clSetKernelArg(opencl_kernel, (cl_uint) 5, sizeof(cl_mem), c_data);
  if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clSetKernelArg(5) %d\n", (int) cl_error);

  // set kernel sizes and submit kernel
  if (verbose_print) fprintf(stdout,"set multiplication kernel sizes ...\n");
  size_t num_groups = {((stack_size + 16 - 1) / 16)};
  size_t work_items = {96};
  size_t global_work_size[1] = {num_groups * work_items};
  size_t local_work_size[1] = {work_items};

  if (verbose_print) fprintf(stdout,"calling multiplication kernel ...\n");
  cl_error = clEnqueueNDRangeKernel( // cl_int
               opencl_queue,         // cl_command_queue command_queue
               opencl_kernel,        // cl_kernel        kernel
               (cl_uint) 1,          // cl_uint          work_dim
               NULL,                 // const size_t     *global_work_offset
               global_work_size,     // const size_t     *global_work_size
               local_work_size,      // const size_t     *local_work_size
               (cl_uint) 0,          // cl_uint          num_events_in_wait_list
               NULL,                 // const cl_event   *event_wait_list
               NULL);                // cl_event         *event
  if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clEnqueueNDRangeKernel %d\n", (int) cl_error);

  return 0;
}
예제 #29
0
파일: libclsmm.c 프로젝트: cdavro/cp2k
// Transpose kernel switch and launch
//
// NOTE: All arrays are device buffers - no access from host side.
//
int libclsmm_transpose_d (void *trs_stack, int offset, int nblks, void *buffer, int m, int n, void *stream){
  int idx = 0;
  int missing = 0; //false
  cl_kernel opencl_kernel = NULL;
  cl_program opencl_program = NULL;

  // local queue pointer and device + context value 
  acc_opencl_stream_type *opencl_stream = (acc_opencl_stream_type *) stream;
  acc_opencl_dev_type     opencl_device = (*opencl_stream).device;
  cl_context              opencl_ctx    = opencl_device.ctx;
  cl_device_id            opencl_dev    = opencl_device.device_id;
  cl_command_queue        opencl_queue  = (*opencl_stream).queue;

  switch(m){
    case 23: idx = 0; break;
    default: missing = 1;
  }

  idx *= 1;
  switch(n){
    case 23: idx += 0; break;
    default: missing = 1;
  }

  // If there is no kernel for these blocks, we don't need to transpose them.
  if (missing) return 0;

  if (verbose_print) fprintf(stdout, "Transpose %d blocks.\n", nblks);

  switch(idx){
    case 0:
      // get or create kernel
      if (transpose_kernel) {
        opencl_kernel = transpose_kernel;
      } else {
        // read kernel code
        if (verbose_print) fprintf(stdout,"reading transpose kernel ...\n");

        size_t  *lengths = (size_t *) malloc(sizeof(size_t) * 2); // 2 - two files, each with different size
        char   **strings = (char **) malloc(sizeof(char *) * 2);  // 2 - two files, each with different lenght
        read_file_at_path(&(strings[0]), &(lengths[0]), "LIBSMM_CL_KERNEL_PATH", "clsmm_common.cl");
        read_file_at_path(&(strings[1]), &(lengths[1]), "LIBSMM_CL_KERNEL_PATH", "clsmm_transpose.cl");

        // get kernel code, build program and kernel
        if (verbose_print) fprintf(stdout,"building transpose kernel ...\n");
        opencl_program = clCreateProgramWithSource( // cl_program
                           opencl_ctx,              // cl_context   context
                           (cl_uint) 2,             // cl_uint      count
                           (const char **) strings, // const char   **strings
                           lengths,                 // const size_t *lengths
                           &cl_error);              // cl_int       *errcode_ret
        if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clCreateProgramWithSource %d\n", (int) cl_error);

        free(lengths); free(strings[0]); free(strings[1]); free(strings);

        if (cl_error == CL_SUCCESS && verbose_src){
          fprintf(stdout, "\n@@@@@@@@@ SOURCE-DATA: @@@@@@@@@\n");
          size_t src_sz;
          cl_error = clGetProgramInfo(opencl_program, CL_PROGRAM_SOURCE, (size_t) 0, NULL, &src_sz);
          if (cl_error != CL_SUCCESS) fprintf(stdout, "Error 1 (print source) %d\n", (int) cl_error);
          char *src = (char *) malloc(src_sz);
          src[src_sz - 1] = '\0';
          cl_error = clGetProgramInfo(opencl_program, CL_PROGRAM_SOURCE, src_sz, src, NULL);
          if (cl_error != CL_SUCCESS) fprintf(stdout, "Error 2 (print source) %d\n", (int) cl_error);
          fprintf(stdout, "%s", src);
          free(src);
          fprintf(stdout, "@@@@@@@@@ END SOURCE-DATA, SIZE=%zu @@@@@@@@@\n", src_sz);
        }

        cl_error = clBuildProgram(                       // cl_int
                     opencl_program,                     // cl_program                     program
                     (cl_uint) 1,                        // cl_uint                        num_devices
                     (const cl_device_id *) &opencl_dev, // const cl_device_id             *device_list
                     BUILD_OPTIONS,                      // const char                     *options
                     NULL,                               // void (CL_CALLBACK* pfn_notify) (cl_program program, void *user_data)
                     NULL);                              // void                           *user_data
        if (cl_error != CL_SUCCESS){
          fprintf(stdout, "\n@@@@@@@@@ BUILD-DATA, ERROR=%d: @@@@@@@@@\n", (int) cl_error);
          size_t bld_sz;
          cl_error = clGetProgramBuildInfo(opencl_program, opencl_dev, CL_PROGRAM_BUILD_LOG, (size_t) 0, NULL, &bld_sz);
          if (cl_error != CL_SUCCESS) fprintf(stdout, "Error 1 (print source) %d\n", (int) cl_error);
          char *bld = (char *) malloc(bld_sz);
          cl_error = clGetProgramBuildInfo(opencl_program, opencl_dev, CL_PROGRAM_BUILD_LOG, bld_sz, bld, NULL);
          if (cl_error != CL_SUCCESS) fprintf(stdout, "Error 2 (print source) %d\n", (int) cl_error);
          bld[bld_sz - 1] = '\0';
          fprintf(stdout, "%s", bld);
          free(bld);
          fprintf(stdout, "@@@@@@@@@ END BUILD-DATA, SIZE=%zu @@@@@@@@@\n", bld_sz);
        }
  
        if ((cl_error == CL_SUCCESS) && (verbose_ptx)) {
          fprintf(stdout, "\n@@@@@@@@@ PTX-DATA: @@@@@@@@@\n");
          size_t ptx_sz;
          cl_error = clGetProgramInfo(opencl_program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &ptx_sz, NULL);
          if (cl_error != CL_SUCCESS) fprintf(stdout,"Error 1 (print ptx) %d\n", (int) cl_error);
          unsigned char *ptx = (unsigned char *) malloc(ptx_sz);
          cl_error = clGetProgramInfo(opencl_program, CL_PROGRAM_BINARIES, ptx_sz, &ptx, NULL);
          if (cl_error != CL_SUCCESS) fprintf(stdout,"Error 2 (print ptx) %d\n", (int) cl_error);
          ptx[ptx_sz - 1] = '\0';
          fprintf(stdout, "%s", ptx);
          free(ptx);
          fprintf(stdout, "@@@@@@@@@ END PTX-DATA, SIZE=%zu: @@@@@@@@@\n", ptx_sz);
        }
  
        opencl_kernel = clCreateKernel(        // cl_kernel
                          opencl_program,      // cl_program program
                          "transpose_23_23_d", // const char *kernel_name
                          &cl_error);          // cl_int     *errcode_ret
        if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clCreateKernel %d\n", (int) cl_error);
  
        // keep for later usage
        transpose_kernel = opencl_kernel;
      }
  
      // set kernel parameters
      if (verbose_print) fprintf(stdout,"set transpose kernel parameters ...\n");
      cl_error = clSetKernelArg(opencl_kernel, 0, sizeof(cl_mem), trs_stack);
      if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clSetKernelArg(0) %d\n", (int) cl_error);
      cl_error = clSetKernelArg(opencl_kernel, 1, sizeof(int), &offset);
      if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clSetKernelArg(1) %d\n", (int) cl_error);
      cl_error = clSetKernelArg(opencl_kernel, 2, sizeof(cl_mem), buffer);
      if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clSetKernelArg(2) %d\n", (int) cl_error);
      cl_error = clSetKernelArg(opencl_kernel, 3, (23 * 23 * sizeof(double)), NULL); // 23x23 buffer in (local) device memory
      if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clSetKernelArg(3) %d\n", (int) cl_error);

      // set kernel size and submit kernel
      if (verbose_print) fprintf(stdout,"set transpose kernel sizes ...\n");
      size_t work_items = {23};
      size_t global_work_size[1] = {nblks * work_items};
      size_t local_work_size[1] = {work_items};

      if (verbose_print) fprintf(stdout,"calling transpose kernel ...\n");
      cl_error = clEnqueueNDRangeKernel( // cl_int
                   opencl_queue,         // cl_command_queue command_queue
                   opencl_kernel,        // cl_kernel        kernel
                   (cl_uint) 1,          // cl_uint          work_dim
                   NULL,                 // const size_t     *global_work_offset
                   global_work_size,     // const size_t     *global_work_size
                   local_work_size,      // const size_t     *local_work_size
                   (cl_uint) 0,          // cl_uint          num_events_in_wait_list
                   NULL,                 // const cl_event   *event_wait_list
                   NULL);                // cl_event         *event
      if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clEnqueueNDRangeKernel %d\n", (int) cl_error);

      return 0;
    break;
    // If there is no kernel for these blocks, we don't need to transpose them.
    default: return 0;
  }

}
예제 #30
0
/* dpoIKernel compileKernel (in AString source, in AString kernelName, [optional] in AString options); */
NS_IMETHODIMP dpoCContext::CompileKernel(const nsAString & source, const nsAString & kernelName, const nsAString & options, dpoIKernel **_retval)
{
	cl_program program;
	cl_kernel kernel;
	cl_int err_code, err_code2;
	cl_uint numDevices;
	cl_device_id *devices = NULL;
	size_t actual;
	char *sourceStr, *optionsStr, *kernelNameStr;
	nsCOMPtr<dpoCKernel> ret;
	nsresult result;

	sourceStr = ToNewUTF8String(source);
	DEBUG_LOG_STATUS("CompileKernel", "Source: " << sourceStr);
	program = clCreateProgramWithSource(context, 1, (const char**)&sourceStr, NULL, &err_code);
	nsMemory::Free(sourceStr);
	if (err_code != CL_SUCCESS) {
		DEBUG_LOG_ERROR("CompileKernel", err_code);
		return NS_ERROR_ILLEGAL_VALUE;
	}

	optionsStr = ToNewUTF8String(options);
	err_code = clBuildProgram(program, 0, NULL, optionsStr, NULL, NULL);
	nsMemory::Free(optionsStr);
	if (err_code != CL_SUCCESS) {
		DEBUG_LOG_ERROR("CompileKernel", err_code);
	}
		
	err_code2 = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &numDevices, NULL);
	if (err_code2 != CL_SUCCESS) {
		DEBUG_LOG_ERROR("CompileKernel", err_code2);
		goto FAIL;
	} 

	devices = (cl_device_id *) nsMemory::Alloc(numDevices * sizeof(cl_device_id));
	err_code2 = clGetProgramInfo(program, CL_PROGRAM_DEVICES, numDevices * sizeof(cl_device_id), devices, NULL);
	if (err_code2 != CL_SUCCESS) {
		DEBUG_LOG_ERROR("CompileKernel", err_code);
		goto FAIL;
	} 
	err_code2 = clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, &actual);
	if (actual > buildLogSize) {
		if (buildLog != NULL) {
			nsMemory::Free(buildLog);
		}
		buildLog = (char *) nsMemory::Alloc(actual * sizeof(char));
		if (buildLog == NULL) {
			DEBUG_LOG_STATUS("CompileKernel", "Cannot allocate buildLog");
			buildLogSize = 0;
			goto DONE;
		}
		buildLogSize = actual;
		err_code2 = clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, &actual);
	}
			
	if (err_code2 != CL_SUCCESS) {
		DEBUG_LOG_ERROR("CompileKernel", err_code);
		goto FAIL;
	}

	DEBUG_LOG_STATUS("CompileKernel", "buildLog: " << buildLog);
	goto DONE;

FAIL:
	if (buildLog != NULL) {
		nsMemory::Free(buildLog);
		buildLog = NULL;
		buildLogSize = 0;
	}

DONE:
	if (devices != NULL) {
		nsMemory::Free(devices);
	}
	
	kernelNameStr = ToNewUTF8String(kernelName);
	kernel = clCreateKernel(program, kernelNameStr, &err_code);
	nsMemory::Free( kernelNameStr);
	clReleaseProgram(program);
	if (err_code != CL_SUCCESS) {
		DEBUG_LOG_ERROR("CompileKernel", err_code);
		return NS_ERROR_NOT_AVAILABLE;
	}

	ret = new dpoCKernel(this);
	if (ret == NULL) {
		clReleaseKernel(kernel);
		DEBUG_LOG_STATUS("CompileKernel", "Cannot create new dpoCKernel object");
		return NS_ERROR_OUT_OF_MEMORY;
	}

	/* all kernels share the single buffer for the failure code */
	result = ret->InitKernel(cmdQueue, kernel, kernelFailureMem);
	if (NS_FAILED(result)) {
		clReleaseKernel(kernel);
		return result;
	}

	ret.forget((dpoCKernel **)_retval);
	
	return NS_OK;
}