cl_int WINAPI wine_clGetProgramInfo(cl_program program, cl_program_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int ret; TRACE("\n"); ret = clGetProgramInfo(program, param_name, param_value_size, param_value, param_value_size_ret); return ret; }
WebCLGetInfo WebCLProgram::getInfo(int param_name, ExceptionState& es) { cl_int err = 0; cl_uint uint_units = 0; char program_string[4096]; RefPtr<WebCLContext> contextObj = nullptr; RefPtr<WebCLDeviceList> deviceList = nullptr; if (m_cl_program == NULL) { es.throwWebCLException( WebCLException::INVALID_PROGRAM, WebCLException::invalidProgramMessage); printf("Error: Invalid program object\n"); return WebCLGetInfo(); } switch(param_name) { case WebCL::PROGRAM_NUM_DEVICES: err = clGetProgramInfo(m_cl_program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &uint_units, NULL); if (err == CL_SUCCESS) return WebCLGetInfo(static_cast<unsigned int>(uint_units)); break; case WebCL::PROGRAM_SOURCE: err = clGetProgramInfo(m_cl_program, CL_PROGRAM_SOURCE, sizeof(program_string), &program_string, NULL); if (err == CL_SUCCESS) return WebCLGetInfo(String(program_string)); break; case WebCL::PROGRAM_CONTEXT: return WebCLGetInfo(PassRefPtr<WebCLContext>(m_cl_context.get())); break; case WebCL::PROGRAM_DEVICES: return WebCLGetInfo(m_cl_context->getDevices()); break; default: printf("Error: UNSUPPORTED program Info type = %d ",param_name); es.throwWebCLException( WebCLException::INVALID_PROGRAM, WebCLException::invalidProgramMessage); return WebCLGetInfo(); } WebCLException::throwException(err, es); return WebCLGetInfo(); }
void cl_printBinaries(cl_program program) { cl_uint program_num_devices; clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &program_num_devices, NULL ); printf("Number of devices: %d\n", program_num_devices); //size_t binaries_sizes[program_num_devices]; size_t * binaries_sizes = (size_t *)malloc(sizeof(size_t)*program_num_devices); clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, program_num_devices*sizeof(size_t), binaries_sizes, NULL ); char** binaries = (char**)malloc(sizeof(char*)*program_num_devices); for (unsigned int i = 0; i < program_num_devices; i++) binaries[i] = (char*)malloc(sizeof(char)*(binaries_sizes[i]+1)); clGetProgramInfo(program, CL_PROGRAM_BINARIES, program_num_devices*sizeof(size_t), binaries, NULL); for (unsigned int i = 0; i < program_num_devices; i++) { binaries[i][binaries_sizes[i]] = '\0'; printf("Program %d\n", i); printf("%s\n", binaries[i]); } for (unsigned int i = 0; i < program_num_devices; i++) free(binaries[i]); free(binaries); }
void oclLogBinary(cl_program clProg, cl_device_id clDev) { // Grab the number of devices associated with the program cl_uint num_devices; clGetProgramInfo(clProg, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices, NULL); // Grab the device ids cl_device_id* devices = (cl_device_id*) malloc(num_devices * sizeof(cl_device_id)); clGetProgramInfo(clProg, CL_PROGRAM_DEVICES, num_devices * sizeof(cl_device_id), devices, 0); // Grab the sizes of the binaries size_t* binary_sizes = (size_t*)malloc(num_devices * sizeof(size_t)); clGetProgramInfo(clProg, CL_PROGRAM_BINARY_SIZES, num_devices * sizeof(size_t), binary_sizes, NULL); // Now get the binaries char** ptx_code = (char**)malloc(num_devices * sizeof(char*)); for( unsigned int i=0; i<num_devices; ++i) { ptx_code[i] = (char*)malloc(binary_sizes[i]); } clGetProgramInfo(clProg, CL_PROGRAM_BINARIES, 0, ptx_code, NULL); // Find the index of the device of interest unsigned int idx = 0; while((idx < num_devices) && (devices[idx] != clDev)) { ++idx; } // If the index is associated, log the result if( idx < num_devices ) { MITK_INFO<< "\n ---------------- \n Program Binary: \n -----------------------\n"; MITK_INFO<< ptx_code[idx]; } free( devices ); free( binary_sizes ); for(unsigned int i=0; i<num_devices; ++i) { free(ptx_code[i]); } free( ptx_code ); }
void cb(cl_program p,void* data) { clRetainProgram(p); cl_device_id devid[1]; clGetProgramInfo(p,CL_PROGRAM_DEVICES,sizeof(cl_device_id),(void*)devid,NULL); char bug[65536]; clGetProgramBuildInfo(p,devid[0],CL_PROGRAM_BUILD_LOG,65536*sizeof(char),bug,NULL); clReleaseProgram(p); LOGE("Build log \n %s\n",bug); }
bool programInfo(cl_program id, cl_program_info info, Value* buf, size_t length) { cl_int error = 0; if((error = clGetProgramInfo(id, info, sizeof(Value) * length, buf, nullptr)) != CL_SUCCESS) { reportError("programInfo(): ", error); return false; } return true; }
bool CL_Program::GetCompiledBinaries(cl_uchar** ppBinaryArrayOutput) const { CL_CPP_CONDITIONAL_RETURN_FALSE(m_uNumDevices == 0); CL_CPP_CONDITIONAL_RETURN_FALSE(!ppBinaryArrayOutput); // Dynamically retreive the compiled source buffers. cl_int iErrorCode = clGetProgramInfo(m_Program, CL_PROGRAM_BINARIES, 0, ppBinaryArrayOutput, NULL); CL_CPP_CATCH_ERROR(iErrorCode); CL_CPP_FORCE_RETURN_BOOL_BY_ERROR(iErrorCode); }
bool save_binary(const string& clbin) { size_t size = 0; clGetProgramInfo(cpProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL); if(!size) return false; vector<uint8_t> binary(size); uint8_t *bytes = &binary[0]; clGetProgramInfo(cpProgram, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL); if(!path_write_binary(clbin, binary)) { opencl_error(string_printf("OpenCL failed to write cached binary %s.", clbin.c_str())); return false; } return true; }
void pclu_dump_binary(pclu_program* pgm, const char* path) { int errcode; size_t bin_size; errcode = clGetProgramInfo(pgm->program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &bin_size, 0); pclu_check_call("clGetProgramInfo(BIN_SIZE)", errcode); cl_uchar* binary = (cl_uchar*) malloc(bin_size); errcode = clGetProgramInfo(pgm->program, CL_PROGRAM_BINARIES, bin_size, &binary, 0); pclu_check_call("clGetProgramInfo(BINARIES)", errcode); FILE* bf = fopen(path, "w"); fwrite((void*)binary, bin_size, 1, bf); fclose(bf); free(binary); }
void Program::getInfo(cl_program_info paramName, size_t paramValueSize, void *paramValue) const { cl_int err = 0; size_t written = 0; err = clGetProgramInfo(programHandle(), paramName, paramValueSize, paramValue, &written); if(err != CL_SUCCESS) { throw OpenCLException(err); } }
// **************************************************************************** // Method: oclGetProgBinary // // Purpose: // Get the binary (PTX) of the program associated with the device // // Arguments: // cpProgram OpenCL program // cdDevice device of interest // binary returned code // length length of returned code // // Copyright 1993-2013 NVIDIA Corporation // // **************************************************************************** inline void oclGetProgBinary (cl_program cpProgram, cl_device_id cdDevice, char** binary, size_t* length) { // Grab the number of devices associated witht the program cl_uint num_devices; clGetProgramInfo(cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices, NULL); // Grab the device ids cl_device_id* devices = (cl_device_id*) malloc(num_devices * sizeof(cl_device_id)); clGetProgramInfo(cpProgram, CL_PROGRAM_DEVICES, num_devices * sizeof(cl_device_id), devices, 0); // Grab the sizes of the binaries size_t* binary_sizes = (size_t*)malloc(num_devices * sizeof(size_t)); clGetProgramInfo(cpProgram, CL_PROGRAM_BINARY_SIZES, num_devices * sizeof(size_t), binary_sizes, NULL); // Now get the binaries char** ptx_code = (char**) malloc(num_devices * sizeof(char*)); for( unsigned int i=0; i<num_devices; ++i) { ptx_code[i]= (char*)malloc(binary_sizes[i]); } clGetProgramInfo(cpProgram, CL_PROGRAM_BINARIES, 0, ptx_code, NULL); // Find the index of the device of interest unsigned int idx = 0; while( idx<num_devices && devices[idx] != cdDevice ) ++idx; // If it is associated prepare the result if( idx < num_devices ) { *binary = ptx_code[idx]; *length = binary_sizes[idx]; } // Cleanup free( devices ); free( binary_sizes ); for( unsigned int i=0; i<num_devices; ++i) { if( i != idx ) free(ptx_code[i]); } free( ptx_code ); }
Value programInfo(cl_program id, cl_program_info info) { Value value; cl_int error = 0; if((error = clGetProgramInfo(id, info, sizeof(Value), &value, nullptr)) != CL_SUCCESS) { reportError("programInfo(): ", error); return Value(0); } return value; }
struct _cl_version clGetProgramVersion(cl_program program) { struct _cl_version version; version.major = 0; version.minor = 0; cl_context context = NULL; cl_int flag = clGetProgramInfo(program, CL_PROGRAM_CONTEXT, sizeof(cl_context), &context, NULL); if(flag != CL_SUCCESS) return version; return clGetContextVersion(context); }
SEXP getProgramInfo(SEXP sProgram, SEXP sProgramInfo){ Rcpp::XPtr<cl_program> program(sProgram); std::string programInfo = Rcpp::as<std::string>(sProgramInfo); char cBuffer[1024]; if(programInfo == "CL_PROGRAM_SOURCE"){ std::cout << "get Program source\n"; clGetProgramInfo(*program, CL_PROGRAM_SOURCE, sizeof(cBuffer), cBuffer, NULL); } std::string retVal = cBuffer; return Rcpp::wrap(retVal); }
cl_uint CL_Program::GetReferenceCount() const { CL_CPP_CONDITIONAL_RETURN_VALUE(!m_Program, 0); // Dynamically retrieve the current reference count for this kernel. cl_uint uRefCount = 0; cl_int iErrorCode = clGetProgramInfo(m_Program, CL_PROGRAM_REFERENCE_COUNT, sizeof(cl_uint), &uRefCount, NULL); CL_CPP_CATCH_ERROR(iErrorCode); return uRefCount; }
bool CL_Program::CreateFromBinaries(cl_uint uNumBinaries, const cl_uchar** ppCompiledBinaries, const size_t* pBinaryLengths, cl_int** ppBinaryStatusOutputs) { CL_CPP_CONDITIONAL_RETURN_FALSE(m_Program); CL_CPP_CONDITIONAL_RETURN_FALSE(!m_pContextRef); CL_CPP_CONDITIONAL_RETURN_FALSE(!m_pContextRef->IsValid()); const cl_device_id* pDeviceList = m_pContextRef->GetDeviceRef()->GetDeviceList(); cl_context Context = m_pContextRef->GetContext(); cl_int* pBinaryStatus = NULL; cl_int iErrorCode = CL_SUCCESS; if(ppBinaryStatusOutputs && uNumBinaries > 0) pBinaryStatus = (cl_int*) CL_Alloc(sizeof(cl_int) * uNumBinaries); // Create a program from the given compiled source buffers. m_Program = clCreateProgramWithBinary(Context, uNumBinaries, pDeviceList, pBinaryLengths, ppCompiledBinaries, pBinaryStatus, &iErrorCode); if(iErrorCode != CL_SUCCESS) CL_Free(pBinaryStatus); else *ppBinaryStatusOutputs = pBinaryStatus; CL_CPP_CATCH_ERROR(iErrorCode); CL_CPP_ON_ERROR_RETURN_FALSE(iErrorCode); // Cache the properties for this program. clGetProgramInfo(m_Program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &m_uNumDevices, NULL); if(m_uNumDevices > 0) { m_pBinarySizes = (size_t*) CL_Alloc(sizeof(size_t) * m_uNumDevices); clGetProgramInfo(m_Program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * m_uNumDevices, m_pBinarySizes, NULL); } return true; }
size_t Program::getInfoSize(cl_program_info paramName) const { size_t ret; cl_int err = 0; err = clGetProgramInfo(programHandle(), paramName, 0, nullptr, &ret); if(err != CL_SUCCESS) { throw OpenCLException(err); } return ret; }
static cl_kernel create_kernel (UfoResourcesPrivate *priv, cl_program program, const gchar *kernel_name, GError **error) { cl_kernel kernel; gchar *name; cl_int errcode = CL_SUCCESS; if (kernel_name == NULL) { gchar *source; gsize size; UFO_RESOURCES_CHECK_CLERR (clGetProgramInfo (program, CL_PROGRAM_SOURCE, 0, NULL, &size)); source = g_malloc0 (size); UFO_RESOURCES_CHECK_CLERR (clGetProgramInfo (program, CL_PROGRAM_SOURCE, size, source, NULL)); name = get_first_kernel_name (source); g_free (source); } else { name = g_strdup (kernel_name); } kernel = clCreateKernel (program, name, &errcode); g_free (name); if (kernel == NULL || errcode != CL_SUCCESS) { g_set_error (error, UFO_RESOURCES_ERROR, UFO_RESOURCES_ERROR_CREATE_KERNEL, "Failed to create kernel `%s`: %s", kernel_name, ufo_resources_clerr (errcode)); return NULL; } priv->kernels = g_list_append (priv->kernels, kernel); return kernel; }
cl_kernel bfam_cl_kernel_from_string(cl_context ctx, char const *knl, char const *knl_name, char const *options) { // create an OpenCL program (may have multiple kernels) size_t sizes[] = { strlen(knl) }; cl_int status; cl_program program = clCreateProgramWithSource(ctx, 1, &knl, sizes, &status); BFAM_CL_CHECK(status, "clCreateProgramWithSource"); // build it status = clBuildProgram(program, 0, NULL, options, NULL, NULL); if (status != CL_SUCCESS) { // build failed, get build log and print it cl_device_id dev; BFAM_CL_SAFE_CALL(clGetProgramInfo(program, CL_PROGRAM_DEVICES, sizeof(dev), &dev, NULL)); size_t log_size; BFAM_CL_SAFE_CALL(clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size)); char *log = (char *) bfam_malloc(log_size); char devname[MAX_NAME_LEN]; BFAM_CL_SAFE_CALL(clGetDeviceInfo(dev, CL_DEVICE_NAME, sizeof(devname), devname, NULL)); BFAM_CL_SAFE_CALL(clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, log_size, log, NULL)); BFAM_LERROR("*** build of '%s' on '%s' failed:\n%s\n*** (end of error)\n", knl_name, devname, log); BFAM_ABORT("Building kernel from a string"); } else BFAM_CL_CHECK(status, "clBuildProgram"); // fish the kernel out of the program cl_kernel kernel = clCreateKernel(program, knl_name, &status); BFAM_CL_CHECK(status, "clCreateKernel"); BFAM_CL_SAFE_CALL(clReleaseProgram(program)); return kernel; }
static cl_int dump_binaries(cl_program prog, const char *prefix) { cl_int errcode; cl_uint num_binaries, i; size_t *binary_sizes; unsigned char **binaries; if ((errcode = clGetProgramInfo(prog, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_binaries, NULL)) != CL_SUCCESS) return errcode; binary_sizes = malloc(sizeof(size_t) * num_binaries); binaries = malloc(sizeof(unsigned char *) * num_binaries); if ((errcode = clGetProgramInfo(prog, CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * num_binaries, binary_sizes, NULL)) != CL_SUCCESS) return errcode; for (i = 0; i != num_binaries; ++i) binaries[i] = malloc(binary_sizes[i]); if ((errcode = clGetProgramInfo(prog, CL_PROGRAM_BINARIES, sizeof(unsigned char *) * num_binaries, binaries, NULL)) != CL_SUCCESS) return errcode; for (i = 0; i != num_binaries; ++i) { char name[64]; sprintf(name, "%s.bin%lu", prefix, (unsigned long) i); FILE *f = fopen(name, "w"); fwrite(binaries[i], binary_sizes[i], 1, f); fclose(f); free(binaries[i]); } free(binary_sizes); free(binaries); return CL_SUCCESS; }
//Dumps a compiled kernel to a binary file void dumpBinary(cl_program program,const char *kernelName) { //Get number of devices cl_uint deviceCount = 0; status = clGetProgramInfo(program,CL_PROGRAM_NUM_DEVICES,sizeof(cl_uint),&deviceCount,NULL); if(status != CL_SUCCESS) exitOnError("Getting number of devices for the program(clGetProgramInfo)"); //Get sizes of compiled binaries for said devices size_t *binSize = (size_t*)malloc(sizeof(size_t)*deviceCount); status = clGetProgramInfo(program,CL_PROGRAM_BINARY_SIZES,(sizeof(size_t)*deviceCount),binSize,NULL); if(status != CL_SUCCESS) exitOnError("Getting binary sizes for the program(clGetProgramInfo)"); char **bin = ( char**)malloc(sizeof(char*)*deviceCount); for(cl_uint i = 0;i<deviceCount;i++) bin[i] = (char*)malloc(binSize[i]); //Retrieve compiled binaries status = clGetProgramInfo(program,CL_PROGRAM_BINARIES,(sizeof(size_t)*deviceCount),bin,NULL); if(status != CL_SUCCESS) exitOnError("Getting program binaries(clGetProgramInfo)"); //Export binaries to files, appending CL_DEVICE_NAME to each filename char binFileName[MAX_NAME_LENGTH]; for(cl_uint i = 0;i<deviceCount;i++) { char deviceName[MAX_NAME_LENGTH]; status = clGetDeviceInfo(devices[i],CL_DEVICE_NAME,MAX_NAME_LENGTH,deviceName,NULL); if (status != CL_SUCCESS) exitOnError("Cannot get device name for given device number"); printf("Binary image of kernel %s created for device %s.\n",kernelName,deviceName); sprintf(binFileName,"%s_%s.elf",kernelName,deviceName); std::fstream outBinFile(binFileName, (std::fstream::out | std::fstream::binary)); if(outBinFile.fail()) exitOnError("Cannot open binary file"); outBinFile.write(bin[i],binSize[i]); outBinFile.close(); } for(cl_uint i = 0;i<deviceCount;i++) free(bin[i]); }
void OpenCLProgram::getBinary() { cl_uint program_num_devices; cl_int err; err = clGetProgramInfo(clProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &program_num_devices, NULL); assert(err == CL_SUCCESS); if (program_num_devices == 0) { std::cerr << "no valid binary was found" << std::endl; return; } size_t binaries_sizes[program_num_devices]; err = clGetProgramInfo(clProgram, CL_PROGRAM_BINARY_SIZES, program_num_devices*sizeof(size_t), binaries_sizes, NULL); assert(err = CL_SUCCESS); char **binaries = new char*[program_num_devices]; for (size_t i = 0; i < program_num_devices; i++) binaries[i] = new char[binaries_sizes[i]+1]; err = clGetProgramInfo(clProgram, CL_PROGRAM_BINARIES, program_num_devices*sizeof(size_t), binaries, NULL); assert(err = CL_SUCCESS); for (size_t i = 0; i < program_num_devices; i++) { binaries[i][binaries_sizes[i]] = '\0'; std::cout << "Program " << i << ":" << std::endl; std::cout << binaries[i]; } for (size_t i = 0; i < program_num_devices; i++) delete [] binaries[i]; delete [] binaries; }
string Program::sourceCode() const { if(!_ctx || isNull()) return string(); size_t size; cl_int error; if((error = clGetProgramInfo(_id, CL_PROGRAM_SOURCE, 0, nullptr, &size)) != CL_SUCCESS) { detail::reportError("Program::sourceCode(): ", error); return string(); } if(size == 0) return string(); string buf; buf.resize(size); if(detail::programInfo(_id, CL_PROGRAM_SOURCE, const_cast<char*>(buf.data()), size)) return string(); return buf; }
void call_kernel_2d(float *x1,float *x2,int n1,int n2,int n3,float *x3,char * cl_name) { FILE* programHandle; size_t programSize, KernelSourceSize; char *programBuffer, *KernelSource; int err, szA, szB, szC; szA = n1*n2; szB = n2*n3; szC = n1*n3; int DIM = 1; size_t global[DIM]; size_t local[DIM]; cl_device_id device_id; cl_context context; cl_command_queue commands; cl_program program; cl_kernel kernel; cl_uint nd; cl_mem input1, input2, output; err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL); context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); commands = clCreateCommandQueue(context, device_id, 0, &err); input1 = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * szA, NULL, NULL); input2 = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * szB, NULL, NULL); output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * szC, NULL, NULL); err = clEnqueueWriteBuffer(commands, input1, CL_TRUE, 0, sizeof(float) * szA, x1, 0, NULL, NULL); err = clEnqueueWriteBuffer(commands, input2, CL_TRUE, 0, sizeof(float) * szB, x2, 0, NULL, NULL); //---------------------------------------------------------------------------- // get size of kernel source programHandle = fopen(cl_name, "r"); fseek(programHandle, 0, SEEK_END); programSize = ftell(programHandle); rewind(programHandle); programBuffer = (char*) malloc(programSize + 1); programBuffer[programSize] = '\0'; fread(programBuffer, sizeof(char), programSize, programHandle); fclose(programHandle); // create program from buffer program = clCreateProgramWithSource(context,1,(const char**) &programBuffer,&programSize, NULL); free(programBuffer); // read kernel source back in from program to check clGetProgramInfo(program, CL_PROGRAM_SOURCE, 0, NULL, &KernelSourceSize); KernelSource = (char*) malloc(KernelSourceSize); clGetProgramInfo(program, CL_PROGRAM_SOURCE, KernelSourceSize, KernelSource, NULL); program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err); err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); kernel = clCreateKernel(program, "mmul", &err); err = clSetKernelArg(kernel, 0, sizeof(int), &n1); err |= clSetKernelArg(kernel, 1, sizeof(int), &n2); err |= clSetKernelArg(kernel, 2, sizeof(int), &n3); err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &input1); err |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &input2); err |= clSetKernelArg(kernel, 5, sizeof(cl_mem), &output); //err |= clSetKernelArg(kernel, 6, sizeof(float)*1024, NULL); global[0] = (size_t) n1; //global[1] = (size_t) n3; err = clEnqueueNDRangeKernel(commands, kernel, DIM, NULL, &global[0], NULL, 0, NULL, NULL); clFinish(commands); err = clEnqueueReadBuffer(commands, output, CL_TRUE, 0, sizeof(float) * szC, x3, 0, NULL, NULL ); clReleaseMemObject(input1); clReleaseMemObject(input2); clReleaseMemObject(output); clReleaseProgram(program); clReleaseKernel(kernel); clReleaseCommandQueue(commands); clReleaseContext(context); //printf("nKernel source:\n\n %s \n", KernelSource); free(KernelSource); }
int main(void){ cl_int err; cl_platform_id platforms[MAX_PLATFORMS]; cl_uint nplatforms; cl_device_id devices[MAX_DEVICES + 1]; // + 1 for duplicate test cl_device_id device_id0; cl_uint num_devices; size_t i; size_t num_binaries; const unsigned char **binaries = NULL; size_t *binary_sizes = NULL; size_t num_bytes_copied; cl_int binary_statuses[MAX_BINARIES]; cl_int binary_statuses2[MAX_BINARIES]; cl_program program = NULL; cl_program program_with_binary = NULL; err = clGetPlatformIDs(MAX_PLATFORMS, platforms, &nplatforms); CHECK_OPENCL_ERROR_IN("clGetPlatformIDs"); if (!nplatforms) return EXIT_FAILURE; err = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, MAX_DEVICES, devices, &num_devices); CHECK_OPENCL_ERROR_IN("clGetDeviceIDs"); cl_context context = clCreateContext(NULL, num_devices, devices, NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateContext"); size_t kernel_size = strlen(kernel); char* kernel_buffer = kernel; program = clCreateProgramWithSource(context, 1, (const char**)&kernel_buffer, &kernel_size, &err); CHECK_OPENCL_ERROR_IN("clCreateProgramWithSource"); err = clBuildProgram(program, num_devices, devices, NULL, NULL, NULL); CHECK_OPENCL_ERROR_IN("clBuildProgram"); err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, 0, 0, &num_binaries); CHECK_OPENCL_ERROR_IN("clGetProgramInfo"); num_binaries = num_binaries/sizeof(size_t); binary_sizes = (size_t*)malloc(num_binaries * sizeof(size_t)); binaries = (const unsigned char**)calloc(num_binaries, sizeof(unsigned char*)); err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, num_binaries*sizeof(size_t), binary_sizes , &num_bytes_copied); CHECK_OPENCL_ERROR_IN("clGetProgramInfo"); for (i = 0; i < num_binaries; ++i) binaries[i] = (const unsigned char*) malloc(binary_sizes[i] * sizeof(const unsigned char)); err = clGetProgramInfo(program, CL_PROGRAM_BINARIES, num_binaries*sizeof(char*), binaries, &num_bytes_copied); CHECK_OPENCL_ERROR_IN("clGetProgramInfo"); cl_uint num = num_binaries < num_devices ? num_binaries : num_devices; if (num == 0) { err = !CL_SUCCESS; goto FREE_AND_EXIT; } program_with_binary = clCreateProgramWithBinary(context, num, devices, binary_sizes, binaries, binary_statuses, &err); CHECK_OPENCL_ERROR_IN("clCreateProgramWithBinary"); for (i = 0; i < num; ++i) { cl_program_binary_type bin_type = 0; err = clGetProgramBuildInfo(program_with_binary, devices[i], CL_PROGRAM_BINARY_TYPE, sizeof(bin_type), (void *)&bin_type, NULL); CHECK_OPENCL_ERROR_IN("get program binary type"); /* cl_program_binary_type */ switch(bin_type) { case CL_PROGRAM_BINARY_TYPE_NONE: /*0x0*/ fprintf(stderr, "program binary type: CL_PROGRAM_BINARY_TYPE_NONE\n"); break; case CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT: /*0x1*/ fprintf(stderr, "program binary type: CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT\n"); break; case CL_PROGRAM_BINARY_TYPE_LIBRARY: /*0x2*/ fprintf(stderr, "program binary type: CL_PROGRAM_BINARY_TYPE_LIBRARY\n"); break; case CL_PROGRAM_BINARY_TYPE_EXECUTABLE: /*0x4*/ fprintf(stderr, "program binary type: CL_PROGRAM_BINARY_TYPE_EXECUTABLE\n"); break; } } err = clReleaseProgram(program_with_binary); CHECK_OPENCL_ERROR_IN("clReleaseProgram"); for (i = 0; i < num; i++) { if (binary_statuses[i] != CL_SUCCESS) { err = !CL_SUCCESS; goto FREE_AND_EXIT; } } // negative test1: invalid device device_id0 = devices[0]; devices[0] = NULL; // invalid device program_with_binary = clCreateProgramWithBinary(context, num, devices, binary_sizes, binaries, binary_statuses, &err); if (err != CL_INVALID_DEVICE || program_with_binary != NULL) { err = !CL_SUCCESS; goto FREE_AND_EXIT; } err = CL_SUCCESS; devices[0] = device_id0; for (i = 0; i < num_binaries; ++i) free((void*)binaries[i]); free(binary_sizes); free(binaries); // negative test2: duplicate device num_binaries = 2; devices[1] = devices[0]; // duplicate binary_sizes = (size_t*)malloc(num_binaries * sizeof(size_t)); binaries = (const unsigned char**)calloc(num_binaries, sizeof(unsigned char*)); err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, 1*sizeof(size_t), binary_sizes , &num_bytes_copied); CHECK_OPENCL_ERROR_IN("clGetProgramInfo"); binary_sizes[1] = binary_sizes[0]; binaries[0] = (const unsigned char*) malloc(binary_sizes[0] * sizeof(const unsigned char)); binaries[1] = (const unsigned char*) malloc(binary_sizes[1] * sizeof(const unsigned char)); err = clGetProgramInfo(program, CL_PROGRAM_BINARIES, 1 * sizeof(char*), binaries, &num_bytes_copied); CHECK_OPENCL_ERROR_IN("clGetProgramInfo"); memcpy((void*)binaries[1], (void*)binaries[0], binary_sizes[0]); program_with_binary = clCreateProgramWithBinary(context, 2, devices, binary_sizes, binaries, binary_statuses2, &err); if (err != CL_INVALID_DEVICE || program_with_binary != NULL) { err = !CL_SUCCESS; goto FREE_AND_EXIT; } err = CL_SUCCESS; FREE_AND_EXIT: // Free resources for (i = 0; i < num_binaries; ++i) if (binaries) if(binaries[i]) free((void*)binaries[i]); if (binary_sizes) free(binary_sizes); if (binaries) free(binaries); if (program) CHECK_CL_ERROR (clReleaseProgram (program)); if (program_with_binary) CHECK_CL_ERROR (clReleaseProgram (program_with_binary)); if (context) CHECK_CL_ERROR (clReleaseContext (context)); CHECK_CL_ERROR (clUnloadCompiler ()); return err == CL_SUCCESS ? EXIT_SUCCESS : EXIT_FAILURE; }
int NBody::genBinaryImage() { cl_int status = CL_SUCCESS; /* * Have a look at the available platforms and pick either * the AMD one if available or a reasonable default. */ cl_uint numPlatforms; cl_platform_id platform = NULL; status = clGetPlatformIDs(0, NULL, &numPlatforms); if(!sampleCommon->checkVal(status, CL_SUCCESS, "clGetPlatformIDs failed.")) { return SDK_FAILURE; } if (0 < numPlatforms) { cl_platform_id* platforms = new cl_platform_id[numPlatforms]; status = clGetPlatformIDs(numPlatforms, platforms, NULL); if(!sampleCommon->checkVal(status, CL_SUCCESS, "clGetPlatformIDs failed.")) { return SDK_FAILURE; } char platformName[100]; for (unsigned i = 0; i < numPlatforms; ++i) { status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(platformName), platformName, NULL); if(!sampleCommon->checkVal(status, CL_SUCCESS, "clGetPlatformInfo failed.")) { return SDK_FAILURE; } platform = platforms[i]; if (!strcmp(platformName, "Advanced Micro Devices, Inc.")) { break; } } std::cout << "Platform found : " << platformName << "\n"; delete[] platforms; } if(NULL == platform) { sampleCommon->error("NULL platform found so Exiting Application."); return SDK_FAILURE; } /* * If we could find our platform, use it. Otherwise use just available platform. */ cl_context_properties cps[5] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, CL_CONTEXT_OFFLINE_DEVICES_AMD, (cl_context_properties)1, 0 }; context = clCreateContextFromType(cps, CL_DEVICE_TYPE_ALL, NULL, NULL, &status); if(!sampleCommon->checkVal(status, CL_SUCCESS, "clCreateContextFromType failed.")) { return SDK_FAILURE; } /* create a CL program using the kernel source */ streamsdk::SDKFile kernelFile; std::string kernelPath = sampleCommon->getPath(); kernelPath.append("NBody_Kernels.cl"); if(!kernelFile.open(kernelPath.c_str())) { std::cout << "Failed to load kernel file : " << kernelPath << std::endl; return SDK_FAILURE; } const char * source = kernelFile.source().c_str(); size_t sourceSize[] = {strlen(source)}; program = clCreateProgramWithSource(context, 1, &source, sourceSize, &status); if(!sampleCommon->checkVal(status, CL_SUCCESS, "clCreateProgramWithSource failed.")) { return SDK_FAILURE; } std::string flagsStr = std::string(""); // Get additional options if(isComplierFlagsSpecified()) { streamsdk::SDKFile flagsFile; std::string flagsPath = sampleCommon->getPath(); flagsPath.append(flags.c_str()); if(!flagsFile.open(flagsPath.c_str())) { std::cout << "Failed to load flags file: " << flagsPath << std::endl; return SDK_FAILURE; } flagsFile.replaceNewlineWithSpaces(); const char * flags = flagsFile.source().c_str(); flagsStr.append(flags); } if(flagsStr.size() != 0) std::cout << "Build Options are : " << flagsStr.c_str() << std::endl; /* create a cl program executable for all the devices specified */ status = clBuildProgram(program, 0, NULL, flagsStr.c_str(), NULL, NULL); sampleCommon->checkVal(status, CL_SUCCESS, "clBuildProgram failed."); size_t numDevices; status = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(numDevices), &numDevices, NULL ); if(!sampleCommon->checkVal(status, CL_SUCCESS, "clGetProgramInfo(CL_PROGRAM_NUM_DEVICES) failed.")) { return SDK_FAILURE; } std::cout << "Number of devices found : " << numDevices << "\n\n"; devices = (cl_device_id *)malloc( sizeof(cl_device_id) * numDevices ); if(devices == NULL) { sampleCommon->error("Failed to allocate host memory.(devices)"); return SDK_FAILURE; } /* grab the handles to all of the devices in the program. */ status = clGetProgramInfo(program, CL_PROGRAM_DEVICES, sizeof(cl_device_id) * numDevices, devices, NULL ); if(!sampleCommon->checkVal(status, CL_SUCCESS, "clGetProgramInfo(CL_PROGRAM_DEVICES) failed.")) { return SDK_FAILURE; } /* figure out the sizes of each of the binaries. */ size_t *binarySizes = (size_t*)malloc( sizeof(size_t) * numDevices ); if(devices == NULL) { sampleCommon->error("Failed to allocate host memory.(binarySizes)"); return SDK_FAILURE; } status = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * numDevices, binarySizes, NULL); if(!sampleCommon->checkVal(status, CL_SUCCESS, "clGetProgramInfo(CL_PROGRAM_BINARY_SIZES) failed.")) { return SDK_FAILURE; } size_t i = 0; /* copy over all of the generated binaries. */ char **binaries = (char **)malloc( sizeof(char *) * numDevices ); if(binaries == NULL) { sampleCommon->error("Failed to allocate host memory.(binaries)"); return SDK_FAILURE; } for(i = 0; i < numDevices; i++) { if(binarySizes[i] != 0) { binaries[i] = (char *)malloc( sizeof(char) * binarySizes[i]); if(binaries[i] == NULL) { sampleCommon->error("Failed to allocate host memory.(binaries[i])"); return SDK_FAILURE; } } else { binaries[i] = NULL; } } status = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(char *) * numDevices, binaries, NULL); if(!sampleCommon->checkVal(status, CL_SUCCESS, "clGetProgramInfo(CL_PROGRAM_BINARIES) failed.")) { return SDK_FAILURE; } /* dump out each binary into its own separate file. */ for(i = 0; i < numDevices; i++) { char fileName[100]; sprintf(fileName, "%s.%d", dumpBinary.c_str(), (int)i); if(binarySizes[i] != 0) { char deviceName[1024]; status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL); if(!sampleCommon->checkVal(status, CL_SUCCESS, "clGetDeviceInfo(CL_DEVICE_NAME) failed.")) { return SDK_FAILURE; } printf( "%s binary kernel: %s\n", deviceName, fileName); streamsdk::SDKFile BinaryFile; if(!BinaryFile.writeBinaryToFile(fileName, binaries[i], binarySizes[i])) { std::cout << "Failed to load kernel file : " << fileName << std::endl; return SDK_FAILURE; } } else { printf("Skipping %s since there is no binary data to write!\n", fileName); } } // Release all resouces and memory for(i = 0; i < numDevices; i++) { if(binaries[i] != NULL) { free(binaries[i]); binaries[i] = NULL; } } if(binaries != NULL) { free(binaries); binaries = NULL; } if(binarySizes != NULL) { free(binarySizes); binarySizes = NULL; } if(devices != NULL) { free(devices); devices = NULL; } status = clReleaseProgram(program); if(!sampleCommon->checkVal(status, CL_SUCCESS, "clReleaseProgram failed.")) { return SDK_FAILURE; } status = clReleaseContext(context); if(!sampleCommon->checkVal(status, CL_SUCCESS, "clReleaseContext failed.")) { return SDK_FAILURE; } return SDK_SUCCESS; }
// Main function // ********************************************************************* int main(int argc, char **argv) { if (!device_comm_queue) ocl_initialize(); const char* program_source; program_source = load_source_file(argv[1]); cl_program ocl_program = clCreateProgramWithSource(device_context, 1, (const char**)&program_source, NULL, NULL); printf("********** program created.\n"); // Build the program (OpenCL JIT compilation) char options[100]; const char* flags = "-g -w -I\""; const char* OMHOME = getenv("OPENMODELICAHOME"); const char* OMINCL = "/include/omc\""; const char* OMBIN = "/bin\""; if ( OMHOME != NULL ) { strcpy(options, flags); strcat(options, OMHOME); strcat(options, OMINCL); strcat(options, " -I\""); strcat(options, OMHOME); strcat(options, OMBIN); printf("Building OpenCL code with flags %s\n",options); cl_int err; err = clBuildProgram(ocl_program, 0, NULL, options, NULL, NULL); ocl_error_check(OCL_BUILD_PROGRAM, err); size_t size; clGetProgramBuildInfo(ocl_program, ocl_device, CL_PROGRAM_BUILD_LOG, // Get build log size 0, NULL, &size); char * log = (char*)malloc(size); clGetProgramBuildInfo(ocl_program,ocl_device,CL_PROGRAM_BUILD_LOG,size,log, NULL); printf("\t\tCL_PROGRAM_BUILD_LOG: \t%s\n", log); free(log); if(err){ printf("Errors detected in compilation of OpenCL code:\n"); exit(1); } else printf("Program built successfuly.\n"); //if no error create the binary clGetProgramInfo(ocl_program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL); unsigned char * binary = (unsigned char*)malloc(size); printf("Size of program binary :\t%d\n",size); clGetProgramInfo(ocl_program, CL_PROGRAM_BINARIES, sizeof(size_t), &binary, NULL); printf("Program binary retrived.\n"); const char* binary_ext = ".bin"; char* binary_name = strcat(argv[1],binary_ext); printf("binary file name %s\n", binary_name); FILE * cache; cache = fopen(binary_name, "wb"); fwrite(binary, sizeof(char), size, cache); fclose(cache); //free(binary); err = 0; cl_program newprogram = clCreateProgramWithBinary(device_context, 1, &ocl_device, &size, (const unsigned char **)&binary, NULL, &err); if(!err) printf("Program created from binary\n"); else{ switch (err){ case CL_INVALID_CONTEXT: printf("Error building program:\n"); printf("CL_INVALID_CONTEXT \n"); break; case CL_INVALID_VALUE: printf("Error building program:\n"); printf("CL_INVALID_VALUE \n"); break; case CL_INVALID_DEVICE: printf("Error building program:\n"); printf("CL_INVALID_DEVICE \n"); break; case CL_INVALID_BINARY: printf("Error building program:\n"); printf("CL_INVALID_BINARY \n"); break; case CL_OUT_OF_HOST_MEMORY: printf("Error building program:\n"); printf("CL_OUT_OF_HOST_MEMORY \n"); break; } } return 0; } else { printf("Couldn't find OPENMODELICAHOME!\n"); exit(1); } ocl_clean_up(); return 0; }
// Kernel launch static int launch_clsmm_dnt_largeDB_16_23_23_12_23_96_2_3_12_10 (void *param_stack, int stack_size, void *stream, int m_max, int n_max, int k_max, void *a_data, void *b_data, void *c_data){ int shared_size = 0; //{'name': 'clsmm_dnt_largeDB_16_23_23_12_23_96_2_3_12_10', 'tile_n': 3, 'tile_m': 2, 'm': 23, 'n': 23, 'threads': 96, 'w': 10, 'v': 12, 'minblocks': 12, 'k': 23, 'grouping': 16} int careful = (stack_size / 16); int nruns = stack_size - careful * 16; int i; cl_kernel opencl_kernel = NULL; cl_program opencl_program = NULL; // local queue pointer and device + context value acc_opencl_stream_type *opencl_stream = (acc_opencl_stream_type *) stream; acc_opencl_dev_type opencl_device = (*opencl_stream).device; cl_context opencl_ctx = opencl_device.ctx; cl_device_id opencl_dev = opencl_device.device_id; cl_command_queue opencl_queue = (*opencl_stream).queue; // get or create kernel if (multiply_kernel) { opencl_kernel = multiply_kernel; } else { // read kernel code if (verbose_print) fprintf(stdout,"reading multiplication kernel ...\n"); size_t *lengths = (size_t *) malloc(sizeof(size_t) * 2); // 2 - two files, each with different size char **strings = (char **) malloc(sizeof(char *) * 2); // 2 - two files, each with different lenght read_file_at_path(&(strings[0]), &(lengths[0]), "LIBSMM_CL_KERNEL_PATH", "clsmm_common.cl"); read_file_at_path(&(strings[1]), &(lengths[1]), "LIBSMM_CL_KERNEL_PATH", "clsmm_dnt_largeDB2.cl"); // get kernel code, build program and kernel if (verbose_print) fprintf(stdout,"building multiplication kernel ...\n"); opencl_program = clCreateProgramWithSource( // cl_program opencl_ctx, // cl_context context (cl_uint) 2, // cl_uint count (const char **) strings, // const char **strings lengths, // const size_t *lengths &cl_error); // cl_int *errcode_ret if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clCreateProgramWithSource %d\n", (int) cl_error); free(lengths); free(strings[0]); free(strings[1]); free(strings); if (cl_error == CL_SUCCESS && verbose_src){ fprintf(stdout, "\n@@@@@@@@@ SOURCE-DATA: @@@@@@@@@\n"); size_t src_sz; cl_error = clGetProgramInfo(opencl_program, CL_PROGRAM_SOURCE, (size_t) 0, NULL, &src_sz); if (cl_error != CL_SUCCESS) fprintf(stdout, "Error 1 (print source) %d\n", (int) cl_error); char *src = (char *) malloc(src_sz); src[src_sz - 1] = '\0'; cl_error = clGetProgramInfo(opencl_program, CL_PROGRAM_SOURCE, src_sz, src, NULL); if (cl_error != CL_SUCCESS) fprintf(stdout, "Error 2 (print source) %d\n", (int) cl_error); fprintf(stdout, "%s", src); free(src); fprintf(stdout, "@@@@@@@@@ END SOURCE-DATA, SIZE=%zu @@@@@@@@@\n", src_sz); } cl_error = clBuildProgram( // cl_int opencl_program, // cl_program program (cl_uint) 1, // cl_uint num_devices (const cl_device_id *) &opencl_dev, // const cl_device_id *device_list BUILD_OPTIONS, // const char *options NULL, // void (CL_CALLBACK* pfn_notify) (cl_program program, void *user_data) NULL); // void *user_data if (cl_error != CL_SUCCESS){ fprintf(stdout, "\n@@@@@@@@@ BUILD-DATA, ERROR=%d: @@@@@@@@@\n", (int) cl_error); size_t bld_sz; cl_error = clGetProgramBuildInfo(opencl_program, opencl_dev, CL_PROGRAM_BUILD_LOG, (size_t) 0, NULL, &bld_sz); if (cl_error != CL_SUCCESS) fprintf(stdout, "Error 1 (print source) %d\n", (int) cl_error); char *bld = (char *) malloc(bld_sz); cl_error = clGetProgramBuildInfo(opencl_program, opencl_dev, CL_PROGRAM_BUILD_LOG, bld_sz, bld, NULL); if (cl_error != CL_SUCCESS) fprintf(stdout, "Error 2 (print source) %d\n", (int) cl_error); bld[bld_sz - 1] = '\0'; fprintf(stdout, "%s", bld); free(bld); fprintf(stdout, "@@@@@@@@@ END BUILD-DATA, SIZE=%zu @@@@@@@@@\n", bld_sz); } if ((cl_error == CL_SUCCESS) && (verbose_ptx)) { fprintf(stdout, "\n@@@@@@@@@ PTX-DATA: @@@@@@@@@\n"); size_t ptx_sz; cl_error = clGetProgramInfo(opencl_program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &ptx_sz, NULL); if (cl_error != CL_SUCCESS) fprintf(stdout,"Error 1 (print ptx) %d\n", (int) cl_error); unsigned char *ptx = (unsigned char *) malloc(ptx_sz); cl_error = clGetProgramInfo(opencl_program, CL_PROGRAM_BINARIES, ptx_sz, &ptx, NULL); if (cl_error != CL_SUCCESS) fprintf(stdout,"Error 2 (print ptx) %d\n", (int) cl_error); ptx[ptx_sz - 1] = '\0'; fprintf(stdout, "%s", ptx); free(ptx); fprintf(stdout, "@@@@@@@@@ END PTX-DATA, SIZE=%zu: @@@@@@@@@\n", ptx_sz); } opencl_kernel = clCreateKernel( // cl_kernel opencl_program, // cl_program program "clsmm_dnt_largeDB2_16_23_23_12_23_96_3_2_12_8", // const char *kernel_name &cl_error); // cl_int *errcode_ret if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clCreateKernel %d\n", (int) cl_error); // keep for later usage multiply_kernel = opencl_kernel; } // set kernel parameters if (verbose_print) fprintf(stdout,"set multiplication kernel parameters ...\n"); cl_error = clSetKernelArg(opencl_kernel, (cl_uint) 0, sizeof(cl_mem), param_stack); if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clSetKernelArg(0) %d\n", (int) cl_error); cl_error = clSetKernelArg(opencl_kernel, (cl_uint) 1, sizeof(int), &careful); if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clSetKernelArg(1) %d\n", (int) cl_error); cl_error = clSetKernelArg(opencl_kernel, (cl_uint) 2, sizeof(int), &nruns); if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clSetKernelArg(2) %d\n", (int) cl_error); cl_error = clSetKernelArg(opencl_kernel, (cl_uint) 3, sizeof(cl_mem), a_data); if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clSetKernelArg(3) %d\n", (int) cl_error); cl_error = clSetKernelArg(opencl_kernel, (cl_uint) 4, sizeof(cl_mem), b_data); if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clSetKernelArg(4) %d\n", (int) cl_error); cl_error = clSetKernelArg(opencl_kernel, (cl_uint) 5, sizeof(cl_mem), c_data); if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clSetKernelArg(5) %d\n", (int) cl_error); // set kernel sizes and submit kernel if (verbose_print) fprintf(stdout,"set multiplication kernel sizes ...\n"); size_t num_groups = {((stack_size + 16 - 1) / 16)}; size_t work_items = {96}; size_t global_work_size[1] = {num_groups * work_items}; size_t local_work_size[1] = {work_items}; if (verbose_print) fprintf(stdout,"calling multiplication kernel ...\n"); cl_error = clEnqueueNDRangeKernel( // cl_int opencl_queue, // cl_command_queue command_queue opencl_kernel, // cl_kernel kernel (cl_uint) 1, // cl_uint work_dim NULL, // const size_t *global_work_offset global_work_size, // const size_t *global_work_size local_work_size, // const size_t *local_work_size (cl_uint) 0, // cl_uint num_events_in_wait_list NULL, // const cl_event *event_wait_list NULL); // cl_event *event if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clEnqueueNDRangeKernel %d\n", (int) cl_error); return 0; }
// Transpose kernel switch and launch // // NOTE: All arrays are device buffers - no access from host side. // int libclsmm_transpose_d (void *trs_stack, int offset, int nblks, void *buffer, int m, int n, void *stream){ int idx = 0; int missing = 0; //false cl_kernel opencl_kernel = NULL; cl_program opencl_program = NULL; // local queue pointer and device + context value acc_opencl_stream_type *opencl_stream = (acc_opencl_stream_type *) stream; acc_opencl_dev_type opencl_device = (*opencl_stream).device; cl_context opencl_ctx = opencl_device.ctx; cl_device_id opencl_dev = opencl_device.device_id; cl_command_queue opencl_queue = (*opencl_stream).queue; switch(m){ case 23: idx = 0; break; default: missing = 1; } idx *= 1; switch(n){ case 23: idx += 0; break; default: missing = 1; } // If there is no kernel for these blocks, we don't need to transpose them. if (missing) return 0; if (verbose_print) fprintf(stdout, "Transpose %d blocks.\n", nblks); switch(idx){ case 0: // get or create kernel if (transpose_kernel) { opencl_kernel = transpose_kernel; } else { // read kernel code if (verbose_print) fprintf(stdout,"reading transpose kernel ...\n"); size_t *lengths = (size_t *) malloc(sizeof(size_t) * 2); // 2 - two files, each with different size char **strings = (char **) malloc(sizeof(char *) * 2); // 2 - two files, each with different lenght read_file_at_path(&(strings[0]), &(lengths[0]), "LIBSMM_CL_KERNEL_PATH", "clsmm_common.cl"); read_file_at_path(&(strings[1]), &(lengths[1]), "LIBSMM_CL_KERNEL_PATH", "clsmm_transpose.cl"); // get kernel code, build program and kernel if (verbose_print) fprintf(stdout,"building transpose kernel ...\n"); opencl_program = clCreateProgramWithSource( // cl_program opencl_ctx, // cl_context context (cl_uint) 2, // cl_uint count (const char **) strings, // const char **strings lengths, // const size_t *lengths &cl_error); // cl_int *errcode_ret if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clCreateProgramWithSource %d\n", (int) cl_error); free(lengths); free(strings[0]); free(strings[1]); free(strings); if (cl_error == CL_SUCCESS && verbose_src){ fprintf(stdout, "\n@@@@@@@@@ SOURCE-DATA: @@@@@@@@@\n"); size_t src_sz; cl_error = clGetProgramInfo(opencl_program, CL_PROGRAM_SOURCE, (size_t) 0, NULL, &src_sz); if (cl_error != CL_SUCCESS) fprintf(stdout, "Error 1 (print source) %d\n", (int) cl_error); char *src = (char *) malloc(src_sz); src[src_sz - 1] = '\0'; cl_error = clGetProgramInfo(opencl_program, CL_PROGRAM_SOURCE, src_sz, src, NULL); if (cl_error != CL_SUCCESS) fprintf(stdout, "Error 2 (print source) %d\n", (int) cl_error); fprintf(stdout, "%s", src); free(src); fprintf(stdout, "@@@@@@@@@ END SOURCE-DATA, SIZE=%zu @@@@@@@@@\n", src_sz); } cl_error = clBuildProgram( // cl_int opencl_program, // cl_program program (cl_uint) 1, // cl_uint num_devices (const cl_device_id *) &opencl_dev, // const cl_device_id *device_list BUILD_OPTIONS, // const char *options NULL, // void (CL_CALLBACK* pfn_notify) (cl_program program, void *user_data) NULL); // void *user_data if (cl_error != CL_SUCCESS){ fprintf(stdout, "\n@@@@@@@@@ BUILD-DATA, ERROR=%d: @@@@@@@@@\n", (int) cl_error); size_t bld_sz; cl_error = clGetProgramBuildInfo(opencl_program, opencl_dev, CL_PROGRAM_BUILD_LOG, (size_t) 0, NULL, &bld_sz); if (cl_error != CL_SUCCESS) fprintf(stdout, "Error 1 (print source) %d\n", (int) cl_error); char *bld = (char *) malloc(bld_sz); cl_error = clGetProgramBuildInfo(opencl_program, opencl_dev, CL_PROGRAM_BUILD_LOG, bld_sz, bld, NULL); if (cl_error != CL_SUCCESS) fprintf(stdout, "Error 2 (print source) %d\n", (int) cl_error); bld[bld_sz - 1] = '\0'; fprintf(stdout, "%s", bld); free(bld); fprintf(stdout, "@@@@@@@@@ END BUILD-DATA, SIZE=%zu @@@@@@@@@\n", bld_sz); } if ((cl_error == CL_SUCCESS) && (verbose_ptx)) { fprintf(stdout, "\n@@@@@@@@@ PTX-DATA: @@@@@@@@@\n"); size_t ptx_sz; cl_error = clGetProgramInfo(opencl_program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &ptx_sz, NULL); if (cl_error != CL_SUCCESS) fprintf(stdout,"Error 1 (print ptx) %d\n", (int) cl_error); unsigned char *ptx = (unsigned char *) malloc(ptx_sz); cl_error = clGetProgramInfo(opencl_program, CL_PROGRAM_BINARIES, ptx_sz, &ptx, NULL); if (cl_error != CL_SUCCESS) fprintf(stdout,"Error 2 (print ptx) %d\n", (int) cl_error); ptx[ptx_sz - 1] = '\0'; fprintf(stdout, "%s", ptx); free(ptx); fprintf(stdout, "@@@@@@@@@ END PTX-DATA, SIZE=%zu: @@@@@@@@@\n", ptx_sz); } opencl_kernel = clCreateKernel( // cl_kernel opencl_program, // cl_program program "transpose_23_23_d", // const char *kernel_name &cl_error); // cl_int *errcode_ret if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clCreateKernel %d\n", (int) cl_error); // keep for later usage transpose_kernel = opencl_kernel; } // set kernel parameters if (verbose_print) fprintf(stdout,"set transpose kernel parameters ...\n"); cl_error = clSetKernelArg(opencl_kernel, 0, sizeof(cl_mem), trs_stack); if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clSetKernelArg(0) %d\n", (int) cl_error); cl_error = clSetKernelArg(opencl_kernel, 1, sizeof(int), &offset); if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clSetKernelArg(1) %d\n", (int) cl_error); cl_error = clSetKernelArg(opencl_kernel, 2, sizeof(cl_mem), buffer); if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clSetKernelArg(2) %d\n", (int) cl_error); cl_error = clSetKernelArg(opencl_kernel, 3, (23 * 23 * sizeof(double)), NULL); // 23x23 buffer in (local) device memory if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clSetKernelArg(3) %d\n", (int) cl_error); // set kernel size and submit kernel if (verbose_print) fprintf(stdout,"set transpose kernel sizes ...\n"); size_t work_items = {23}; size_t global_work_size[1] = {nblks * work_items}; size_t local_work_size[1] = {work_items}; if (verbose_print) fprintf(stdout,"calling transpose kernel ...\n"); cl_error = clEnqueueNDRangeKernel( // cl_int opencl_queue, // cl_command_queue command_queue opencl_kernel, // cl_kernel kernel (cl_uint) 1, // cl_uint work_dim NULL, // const size_t *global_work_offset global_work_size, // const size_t *global_work_size local_work_size, // const size_t *local_work_size (cl_uint) 0, // cl_uint num_events_in_wait_list NULL, // const cl_event *event_wait_list NULL); // cl_event *event if (cl_error != CL_SUCCESS) fprintf(stdout,"Error in: clEnqueueNDRangeKernel %d\n", (int) cl_error); return 0; break; // If there is no kernel for these blocks, we don't need to transpose them. default: return 0; } }
/* dpoIKernel compileKernel (in AString source, in AString kernelName, [optional] in AString options); */ NS_IMETHODIMP dpoCContext::CompileKernel(const nsAString & source, const nsAString & kernelName, const nsAString & options, dpoIKernel **_retval) { cl_program program; cl_kernel kernel; cl_int err_code, err_code2; cl_uint numDevices; cl_device_id *devices = NULL; size_t actual; char *sourceStr, *optionsStr, *kernelNameStr; nsCOMPtr<dpoCKernel> ret; nsresult result; sourceStr = ToNewUTF8String(source); DEBUG_LOG_STATUS("CompileKernel", "Source: " << sourceStr); program = clCreateProgramWithSource(context, 1, (const char**)&sourceStr, NULL, &err_code); nsMemory::Free(sourceStr); if (err_code != CL_SUCCESS) { DEBUG_LOG_ERROR("CompileKernel", err_code); return NS_ERROR_ILLEGAL_VALUE; } optionsStr = ToNewUTF8String(options); err_code = clBuildProgram(program, 0, NULL, optionsStr, NULL, NULL); nsMemory::Free(optionsStr); if (err_code != CL_SUCCESS) { DEBUG_LOG_ERROR("CompileKernel", err_code); } err_code2 = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &numDevices, NULL); if (err_code2 != CL_SUCCESS) { DEBUG_LOG_ERROR("CompileKernel", err_code2); goto FAIL; } devices = (cl_device_id *) nsMemory::Alloc(numDevices * sizeof(cl_device_id)); err_code2 = clGetProgramInfo(program, CL_PROGRAM_DEVICES, numDevices * sizeof(cl_device_id), devices, NULL); if (err_code2 != CL_SUCCESS) { DEBUG_LOG_ERROR("CompileKernel", err_code); goto FAIL; } err_code2 = clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, &actual); if (actual > buildLogSize) { if (buildLog != NULL) { nsMemory::Free(buildLog); } buildLog = (char *) nsMemory::Alloc(actual * sizeof(char)); if (buildLog == NULL) { DEBUG_LOG_STATUS("CompileKernel", "Cannot allocate buildLog"); buildLogSize = 0; goto DONE; } buildLogSize = actual; err_code2 = clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, &actual); } if (err_code2 != CL_SUCCESS) { DEBUG_LOG_ERROR("CompileKernel", err_code); goto FAIL; } DEBUG_LOG_STATUS("CompileKernel", "buildLog: " << buildLog); goto DONE; FAIL: if (buildLog != NULL) { nsMemory::Free(buildLog); buildLog = NULL; buildLogSize = 0; } DONE: if (devices != NULL) { nsMemory::Free(devices); } kernelNameStr = ToNewUTF8String(kernelName); kernel = clCreateKernel(program, kernelNameStr, &err_code); nsMemory::Free( kernelNameStr); clReleaseProgram(program); if (err_code != CL_SUCCESS) { DEBUG_LOG_ERROR("CompileKernel", err_code); return NS_ERROR_NOT_AVAILABLE; } ret = new dpoCKernel(this); if (ret == NULL) { clReleaseKernel(kernel); DEBUG_LOG_STATUS("CompileKernel", "Cannot create new dpoCKernel object"); return NS_ERROR_OUT_OF_MEMORY; } /* all kernels share the single buffer for the failure code */ result = ret->InitKernel(cmdQueue, kernel, kernelFailureMem); if (NS_FAILED(result)) { clReleaseKernel(kernel); return result; } ret.forget((dpoCKernel **)_retval); return NS_OK; }