/* * init_opencl_context_and_shmem * * We can have performance gain using asynchronous DMA transfer when data * chunk it moved to OpenCL device from host machine, however, it requires * preparations to ensure the memory region to be copied to/from is pinned * on RAM; not swapped out. OpenCL provides an interface to map a certain * host address area as pinned buffer object, even though its size is * restricted to CL_DEVICE_MAX_MEM_ALLOC_SIZE parameter. Usually, it is * much less than size of shared memory to be assigned to PG-Strom, around * 500MB - 2GB in typical GPU/MIC device. So, we need to split a flat * continuous memory into several 'zones' to pin it using OpenCL interface. * Because it is a job of OpenCL intermediation server to collect properties * of devices, and this server shall be launched post initialization stage, * we also have to acquire and pin the shared memory region in the context * of OpenCL intermediation server, not postmaster itself. */ static void init_opencl_context_and_shmem(void) { Size zone_length = LONG_MAX; cl_int i, rc; /* * Create an OpenCL context */ opencl_context = clCreateContext(NULL, opencl_num_devices, opencl_devices, NULL, NULL, &rc); if (rc != CL_SUCCESS) elog(ERROR, "clCreateContext failed: %s", opencl_strerror(rc)); /* * Create an OpenCL command queue for each device */ for (i=0; i < opencl_num_devices; i++) { const pgstrom_device_info *dev_info = pgstrom_get_device_info(i); opencl_cmdq[i] = clCreateCommandQueue(opencl_context, opencl_devices[i], CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE, &rc); if (rc != CL_SUCCESS) elog(ERROR, "clCreateCommandQueue failed: %s", opencl_strerror(rc)); if (zone_length > dev_info->dev_max_mem_alloc_size) zone_length = (dev_info->dev_max_mem_alloc_size & ~((1UL << 20) - 1)); } /* Lock shared memory of PG-Strom's private area */ pgstrom_setup_shmem(zone_length, on_shmem_zone_callback); /* Lock shared memory of shared buffer area */ if (!on_shmem_zone_callback(BufferBlocks, NBuffers * (Size) BLCKSZ, "buffer", false)) { Size total_size = NBuffers * (Size) BLCKSZ; Size offset; Assert((zone_length & (BLCKSZ - 1)) == 0); for (offset = 0; offset < total_size; offset += zone_length) { on_shmem_zone_callback(BufferBlocks + offset, Min(zone_length, total_size - offset), "buffer", true); } } }
Datum pgstrom_opencl_device_info(PG_FUNCTION_ARGS) { FuncCallContext *fncxt; Datum values[4]; bool isnull[4]; HeapTuple tuple; uint32 dindex; uint32 pindex; const pgstrom_device_info *dinfo; const char *key; const char *value; char buf[256]; int ofs = 0; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcxt; fncxt = SRF_FIRSTCALL_INIT(); oldcxt = MemoryContextSwitchTo(fncxt->multi_call_memory_ctx); tupdesc = CreateTemplateTupleDesc(4, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "dnum", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "pnum", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "property", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "value", TEXTOID, -1, 0); fncxt->tuple_desc = BlessTupleDesc(tupdesc); fncxt->user_fctx = 0; MemoryContextSwitchTo(oldcxt); } fncxt = SRF_PERCALL_SETUP(); dindex = fncxt->call_cntr / 55; pindex = fncxt->call_cntr % 55; if (dindex == pgstrom_get_device_nums()) SRF_RETURN_DONE(fncxt); dinfo = pgstrom_get_device_info(dindex); Assert(dinfo != NULL); switch (pindex) { case 0: key = "platform index"; value = psprintf("%u", dinfo->pl_info->pl_index); break; case 1: key = "platform profile"; value = dinfo->pl_info->pl_profile; break; case 2: key = "platform version"; value = dinfo->pl_info->pl_version; break; case 3: key = "platform name"; value = dinfo->pl_info->pl_name; break; case 4: key = "platform vendor"; value = dinfo->pl_info->pl_vendor; break; case 5: key = "platform extensions"; value = dinfo->pl_info->pl_extensions; break; case 6: key = "address bits"; value = psprintf("%u", dinfo->dev_address_bits); break; case 7: key = "device available"; value = dinfo->dev_available ? "yes" : "no"; break; case 8: key = "compiler available"; value = dinfo->dev_compiler_available ? "yes" : "no"; break; case 9: key = "double fp config"; value = fp_config_to_cstring(dinfo->dev_double_fp_config); break; case 10: key = "little endian"; value = dinfo->dev_endian_little ? "yes" : "no"; break; case 11: key = "error correction support"; value = dinfo->dev_error_correction_support ? "yes" : "no"; break; case 12: key = "execution capabilities"; if (dinfo->dev_execution_capabilities & CL_EXEC_KERNEL) ofs += sprintf(buf + ofs, "OpenCL"); if (dinfo->dev_execution_capabilities & CL_EXEC_NATIVE_KERNEL) ofs += sprintf(buf + ofs, "%sNative", ofs > 0 ? ", " : ""); value = buf; break; case 13: key = "device extensions"; value = dinfo->dev_device_extensions; break; case 14: key = "global mem cache size"; value = psprintf("%lu", dinfo->dev_global_mem_cache_size); break; case 15: key = "global mem cache type"; switch (dinfo->dev_global_mem_cache_type) { case CL_NONE: value = "none"; break; case CL_READ_ONLY_CACHE: value = "read only"; break; case CL_READ_WRITE_CACHE: value = "read write"; break; default: value = "???"; break; } break; case 16: key = "global mem cacheline size"; value = psprintf("%u", dinfo->dev_global_mem_cacheline_size); break; case 17: key = "global mem size"; value = psprintf("%lu", dinfo->dev_global_mem_size); break; case 18: key = "host unified memory"; value = dinfo->dev_host_unified_memory ? "yes" : "no"; break; case 19: key = "local mem size"; value = psprintf("%lu", dinfo->dev_local_mem_size); break; case 20: key = "local mem type"; switch (dinfo->dev_local_mem_type) { case CL_LOCAL: value = "local"; break; case CL_GLOBAL: value = "global"; break; case CL_NONE: value = "none"; break; default: value = "???"; break; } break; case 21: key = "max clock frequency"; value = psprintf("%u", dinfo->dev_max_clock_frequency); break; case 22: key = "max compute units"; value = psprintf("%u", dinfo->dev_max_compute_units); break; case 23: key = "max constant args"; value = psprintf("%u", dinfo->dev_max_constant_args); break; case 24: key = "max constant buffer size"; value = psprintf("%lu", dinfo->dev_max_constant_buffer_size); break; case 25: key = "max mem alloc size"; value = psprintf("%lu", dinfo->dev_max_mem_alloc_size); break; case 26: key = "max parameter size"; value = psprintf("%lu", dinfo->dev_max_parameter_size); break; case 27: key = "max samplers"; value = psprintf("%u", dinfo->dev_max_samplers); break; case 28: key = "max work group size"; value = psprintf("%zu", dinfo->dev_max_work_group_size); break; case 29: key = "max work group dimensions"; value = psprintf("%u", dinfo->dev_max_work_item_dimensions); break; case 30: key = "max work item sizes"; value = psprintf("{%zu, %zu, %zu}", dinfo->dev_max_work_item_sizes[0], dinfo->dev_max_work_item_sizes[1], dinfo->dev_max_work_item_sizes[2]); break; case 31: key = "mem base address align"; value = psprintf("%u", dinfo->dev_mem_base_addr_align); break; case 32: key = "device name"; value = dinfo->dev_name; break; case 33: key = "native vector width (char)"; value = psprintf("%u", dinfo->dev_native_vector_width_char); break; case 34: key = "native vector width (short)"; value = psprintf("%u", dinfo->dev_native_vector_width_short); break; case 35: key = "native vector width (int)"; value = psprintf("%u", dinfo->dev_native_vector_width_int); break; case 36: key = "native vector width (long)"; value = psprintf("%u", dinfo->dev_native_vector_width_long); break; case 37: key = "native vector width (float)"; value = psprintf("%u", dinfo->dev_native_vector_width_float); break; case 38: key = "native vector width (double)"; value = psprintf("%u", dinfo->dev_native_vector_width_double); break; case 39: key = "opencl c version"; value = dinfo->dev_opencl_c_version; break; case 40: key = "preferred vector width (char)"; value = psprintf("%u", dinfo->dev_preferred_vector_width_char); break; case 41: key = "preferred vector width (short)"; value = psprintf("%u", dinfo->dev_preferred_vector_width_short); break; case 42: key = "preferred vector width (int)"; value = psprintf("%u", dinfo->dev_preferred_vector_width_int); break; case 43: key = "preferred vector width (long)"; value = psprintf("%u", dinfo->dev_preferred_vector_width_long); break; case 44: key = "preferred vector width (float)"; value = psprintf("%u", dinfo->dev_preferred_vector_width_float); break; case 45: key = "preferred vector width (double)"; value = psprintf("%u", dinfo->dev_preferred_vector_width_double); break; case 46: key = "device profile"; value = dinfo->dev_profile; break; case 47: key = "profiling timer resolution"; value = psprintf("%zu", dinfo->dev_profiling_timer_resolution); break; case 48: key = "command queue properties"; if (dinfo->dev_queue_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) ofs += sprintf(buf, "%sout of order", ofs > 0 ? ", " : ""); if (dinfo->dev_queue_properties & CL_QUEUE_PROFILING_ENABLE) ofs += sprintf(buf, "%sprofiling", ofs > 0 ? ", " : ""); value = buf; break; case 49: key = "single fp config"; value = fp_config_to_cstring(dinfo->dev_single_fp_config); break; case 50: key = "device type"; if (dinfo->dev_type & CL_DEVICE_TYPE_CPU) ofs += sprintf(buf, "%scpu", ofs > 0 ? ", " : ""); if (dinfo->dev_type & CL_DEVICE_TYPE_GPU) ofs += sprintf(buf, "%sgpu", ofs > 0 ? ", " : ""); if (dinfo->dev_type & CL_DEVICE_TYPE_ACCELERATOR) ofs += sprintf(buf, "%saccelerator", ofs > 0 ? ", " : ""); if (dinfo->dev_type & CL_DEVICE_TYPE_DEFAULT) ofs += sprintf(buf, "%sdefault", ofs > 0 ? ", " : ""); if (dinfo->dev_type & CL_DEVICE_TYPE_CUSTOM) ofs += sprintf(buf, "%scustom", ofs > 0 ? ", " : ""); value = buf; break; case 51: key = "device vendor"; value = dinfo->dev_vendor; break; case 52: key = "device vendor id"; value = psprintf("%u", dinfo->dev_vendor_id); break; case 53: key = "device version"; value = dinfo->dev_version; break; case 54: key = "driver version"; value = dinfo->driver_version; break; default: elog(ERROR, "unexpected property index"); break; } memset(isnull, 0, sizeof(isnull)); values[0] = Int32GetDatum(dindex); values[1] = Int32GetDatum(pindex); values[2] = CStringGetTextDatum(key); values[3] = CStringGetTextDatum(value); tuple = heap_form_tuple(fncxt->tuple_desc, values, isnull); SRF_RETURN_NEXT(fncxt, HeapTupleGetDatum(tuple)); }