// Serilize Constructor MessageResourcesQueryACK() : Message( PROTOCOL_VERSION , 112 , 0 ) { cpu( "not clear" ); cpufrequency( "not clear" ); ram( "not clear" ); gpu( "not clear" ); video_ram( "not clear" ); }
static size_t get_task_max_work_group_size(){ size_t max_available; if (use_local(source_in_use)) max_available = get_local_memory_size(ocl_gpu_id) / (sizeof(sha512_password) + sizeof(sha512_ctx) + sizeof(sha512_buffers)) - 1; else if (gpu(source_in_use)) max_available = get_local_memory_size(ocl_gpu_id) / sizeof(sha512_password); else max_available = get_max_work_group_size(ocl_gpu_id); if (max_available > get_current_work_group_size(ocl_gpu_id, crypt_kernel)) return get_current_work_group_size(ocl_gpu_id, crypt_kernel); return max_available; }
/* ------- Crypt function ------- */ static void crypt_all(int count) { int i; //Send data to device. HANDLE_CLERROR(clEnqueueWriteBuffer(queue[ocl_gpu_id], salt_buffer, CL_FALSE, 0, sizeof(sha512_salt), salt, 0, NULL, NULL), "failed in clEnqueueWriteBuffer salt_buffer"); if (new_keys) HANDLE_CLERROR(clEnqueueWriteBuffer(queue[ocl_gpu_id], pass_buffer, CL_FALSE, 0, sizeof(sha512_password) * global_work_size, plaintext, 0, NULL, NULL), "failed in clEnqueueWriteBuffer pass_buffer"); //Enqueue the kernel if (gpu(source_in_use) || use_local(source_in_use)) { HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], prepare_kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL), "failed in clEnqueueNDRangeKernel I"); for (i = 0; i < (salt->rounds / HASH_LOOPS); i++) { HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], crypt_kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, profilingEvent), "failed in clEnqueueNDRangeKernel"); } HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], final_kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL), "failed in clEnqueueNDRangeKernel II"); } else HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], crypt_kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, profilingEvent), "failed in clEnqueueNDRangeKernel"); //Read back hashes HANDLE_CLERROR(clEnqueueReadBuffer(queue[ocl_gpu_id], hash_buffer, CL_FALSE, 0, sizeof(sha512_hash) * global_work_size, calculated_hash, 0, NULL, NULL), "failed in reading data back"); //Do the work HANDLE_CLERROR(clFinish(queue[ocl_gpu_id]), "failed in clFinish"); new_keys = 0; }
bool GrGLProgramBuilder::compileAndAttachShaders(GrGLSLShaderBuilder& shader, GrGLuint programId, GrGLenum type, SkTDArray<GrGLuint>* shaderIds, const SkSL::Program::Settings& settings, SkSL::Program::Inputs* outInputs) { SkSL::String glsl; std::unique_ptr<SkSL::Program> program = GrSkSLtoGLSL(gpu()->glContext(), type, shader.fCompilerStrings.begin(), shader.fCompilerStringLengths.begin(), shader.fCompilerStrings.count(), settings, &glsl); *outInputs = program->fInputs; return this->compileAndAttachShaders(glsl.c_str(), glsl.size(), programId, type, shaderIds, settings, *outInputs); }
/// Creates a shared OpenCL/OpenGL context for the currently active /// OpenGL context. /// /// Once created, the shared context can be used to create OpenCL memory /// objects which can interact with OpenGL memory objects (e.g. VBOs). /// /// \throws unsupported_extension_error if no CL-GL sharing capable devices /// are found. inline context opengl_create_shared_context() { // name of the OpenGL sharing extension for the system #if defined(__APPLE__) const char *cl_gl_sharing_extension = "cl_APPLE_gl_sharing"; #else const char *cl_gl_sharing_extension = "cl_khr_gl_sharing"; #endif #if defined(__APPLE__) // get OpenGL share group CGLContextObj cgl_current_context = CGLGetCurrentContext(); CGLShareGroupObj cgl_share_group = CGLGetShareGroup(cgl_current_context); cl_context_properties properties[] = { CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties) cgl_share_group, 0 }; cl_int error = 0; cl_context cl_gl_context = clCreateContext(properties, 0, 0, 0, 0, &error); if(!cl_gl_context){ BOOST_THROW_EXCEPTION(opencl_error(error)); } return context(cl_gl_context, false); #else typedef cl_int(*GetGLContextInfoKHRFunction)( const cl_context_properties*, cl_gl_context_info, size_t, void *, size_t * ); std::vector<platform> platforms = system::platforms(); for(size_t i = 0; i < platforms.size(); i++){ const platform &platform = platforms[i]; // load clGetGLContextInfoKHR() extension function GetGLContextInfoKHRFunction GetGLContextInfoKHR = reinterpret_cast<GetGLContextInfoKHRFunction>( reinterpret_cast<size_t>( platform.get_extension_function_address("clGetGLContextInfoKHR") ) ); if(!GetGLContextInfoKHR){ continue; } // create context properties listing the platform and current OpenGL display cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties) platform.id(), #if defined(__linux__) CL_GL_CONTEXT_KHR, (cl_context_properties) glXGetCurrentContext(), CL_GLX_DISPLAY_KHR, (cl_context_properties) glXGetCurrentDisplay(), #elif defined(WIN32) CL_GL_CONTEXT_KHR, (cl_context_properties) wglGetCurrentContext(), CL_WGL_HDC_KHR, (cl_context_properties) wglGetCurrentDC(), #endif 0 }; // lookup current OpenCL device for current OpenGL context cl_device_id gpu_id; cl_int ret = GetGLContextInfoKHR( properties, CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR, sizeof(cl_device_id), &gpu_id, 0 ); if(ret != CL_SUCCESS){ continue; } // create device object for the GPU and ensure it supports CL-GL sharing device gpu(gpu_id, false); if(!gpu.supports_extension(cl_gl_sharing_extension)){ continue; } // return CL-GL sharing context return context(gpu, properties); } #endif // no CL-GL sharing capable devices found BOOST_THROW_EXCEPTION( unsupported_extension_error(cl_gl_sharing_extension) ); }
int GrGLFragmentOnlyShaderBuilder::addTexCoordSets(int count) { int firstFreeCoordSet = fTexCoordSetCnt; fTexCoordSetCnt += count; SkASSERT(gpu()->glCaps().maxFixedFunctionTextureCoords() >= fTexCoordSetCnt); return firstFreeCoordSet; }
int main(int argc, char **argv) { //ClImage image("./checker.png"); ClImage image; if(image.open("./test.png") == false) return 1; ClImage results(image.width(), image.height()); size_t nSize = image.Size(); unsigned char *out_img = new unsigned char[nSize]; TRACE("image nSize = %zu\n", nSize); TRACE("image size input = (%zu, %zu)\n", image.width(), image.height()); PERF_START("ocl-copy"); ClHost host(CL_DEVICE_TYPE_GPU); ClDevice gpu(&host); ClBuffer iImage = gpu.image(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, image); ClBuffer oImage = gpu.image(CL_MEM_WRITE_ONLY, NULL, image.width(), image.height()); ClBuffer sampler = gpu.sampler(CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST); if(gpu.open("./hello-img.cl", "copyimg") == false) return 1; gpu.arg(iImage); gpu.arg(oImage); gpu.arg(sampler); #define WORKDIM 2 #ifdef _WITH_DIRECT const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {image.width(), image.height(), 1}; #else gpu.write(0, iImage, image.width(), image.height()); #endif //_WITH_DIRECT size_t dev_local = gpu.getWorkGroupInfo(); #ifdef _INTEL TRACE("before dev_local = %zu\n", dev_local); if(argc > 1) { TRACE("argv = %s\n", argv[1]); if(atoi(argv[1]) < 1) dev_local = 9; else dev_local = atoi(argv[1]); } TRACE("after dev_local = %zu\n", dev_local); #else TRACE("before dev_local = %d\n", dev_local); if(argc > 1) { TRACE("argv = %s\n", argv[1]); if(atoi(argv[1]) < 1) dev_local = (dev_local/2) - 1; else dev_local = atoi(argv[1]); } TRACE("after dev_local = %d\n", dev_local); #endif //_INTEL size_t local[WORKDIM] = {dev_local, dev_local}; size_t global[WORKDIM] = {gpu.roundup(local[0], image.width()), gpu.roundup(local[1], image.height())}; TRACE("run (%zu,%zu)\n", global[0], global[1]); gpu.run(WORKDIM, global, local); gpu.read(1, out_img, origin, region); //worker(ostr, nsize[0]); results.store("results.png", out_img); results.close(); image.close(); PERF_END("ocl-copy"); delete out_img; TRACE("%s\n", "eof"); return 0; }
GrGLProgram* GrGLProgramBuilder::finalize() { TRACE_EVENT0("skia", TRACE_FUNC); // verify we can get a program id GrGLuint programID; GL_CALL_RET(programID, CreateProgram()); if (0 == programID) { this->cleanupFragmentProcessors(); return nullptr; } if (this->gpu()->glCaps().programBinarySupport() && this->gpu()->getContext()->contextPriv().getPersistentCache()) { GL_CALL(ProgramParameteri(programID, GR_GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GR_GL_TRUE)); } this->finalizeShaders(); // compile shaders and bind attributes / uniforms const GrPrimitiveProcessor& primProc = this->primitiveProcessor(); SkSL::Program::Settings settings; settings.fCaps = this->gpu()->glCaps().shaderCaps(); settings.fFlipY = this->pipeline().proxy()->origin() != kTopLeft_GrSurfaceOrigin; settings.fSharpenTextures = this->gpu()->getContext()->contextPriv().sharpenMipmappedTextures(); SkSL::Program::Inputs inputs; SkTDArray<GrGLuint> shadersToDelete; bool cached = fGpu->glCaps().programBinarySupport() && nullptr != fCached.get(); if (cached) { this->bindProgramResourceLocations(programID); // cache hit, just hand the binary to GL const uint8_t* bytes = fCached->bytes(); size_t offset = 0; memcpy(&inputs, bytes + offset, sizeof(inputs)); offset += sizeof(inputs); int binaryFormat; memcpy(&binaryFormat, bytes + offset, sizeof(binaryFormat)); offset += sizeof(binaryFormat); GrGLClearErr(this->gpu()->glInterface()); GR_GL_CALL_NOERRCHECK(this->gpu()->glInterface(), ProgramBinary(programID, binaryFormat, (void*) (bytes + offset), fCached->size() - offset)); if (GR_GL_GET_ERROR(this->gpu()->glInterface()) == GR_GL_NO_ERROR) { if (inputs.fRTHeight) { this->addRTHeightUniform(SKSL_RTHEIGHT_NAME); } cached = this->checkLinkStatus(programID); } else { cached = false; } } if (!cached) { // cache miss, compile shaders if (fFS.fForceHighPrecision) { settings.fForceHighPrecision = true; } SkSL::String glsl; std::unique_ptr<SkSL::Program> fs = GrSkSLtoGLSL(gpu()->glContext(), GR_GL_FRAGMENT_SHADER, fFS.fCompilerStrings.begin(), fFS.fCompilerStringLengths.begin(), fFS.fCompilerStrings.count(), settings, &glsl); inputs = fs->fInputs; if (inputs.fRTHeight) { this->addRTHeightUniform(SKSL_RTHEIGHT_NAME); } if (!this->compileAndAttachShaders(glsl.c_str(), glsl.size(), programID, GR_GL_FRAGMENT_SHADER, &shadersToDelete, settings, inputs)) { this->cleanupProgram(programID, shadersToDelete); return nullptr; } std::unique_ptr<SkSL::Program> vs = GrSkSLtoGLSL(gpu()->glContext(), GR_GL_VERTEX_SHADER, fVS.fCompilerStrings.begin(), fVS.fCompilerStringLengths.begin(), fVS.fCompilerStrings.count(), settings, &glsl); if (!this->compileAndAttachShaders(glsl.c_str(), glsl.size(), programID, GR_GL_VERTEX_SHADER, &shadersToDelete, settings, inputs)) { this->cleanupProgram(programID, shadersToDelete); return nullptr; } // NVPR actually requires a vertex shader to compile bool useNvpr = primProc.isPathRendering(); if (!useNvpr) { int vaCount = primProc.numAttribs(); for (int i = 0; i < vaCount; i++) { GL_CALL(BindAttribLocation(programID, i, primProc.getAttrib(i).fName)); } } if (primProc.willUseGeoShader()) { std::unique_ptr<SkSL::Program> gs; gs = GrSkSLtoGLSL(gpu()->glContext(), GR_GL_GEOMETRY_SHADER, fGS.fCompilerStrings.begin(), fGS.fCompilerStringLengths.begin(), fGS.fCompilerStrings.count(), settings, &glsl); if (!this->compileAndAttachShaders(glsl.c_str(), glsl.size(), programID, GR_GL_GEOMETRY_SHADER, &shadersToDelete, settings, inputs)) { this->cleanupProgram(programID, shadersToDelete); return nullptr; } } this->bindProgramResourceLocations(programID); GL_CALL(LinkProgram(programID)); } // Calling GetProgramiv is expensive in Chromium. Assume success in release builds. bool checkLinked = kChromium_GrGLDriver != fGpu->ctxInfo().driver(); #ifdef SK_DEBUG checkLinked = true; #endif if (checkLinked) { if (!this->checkLinkStatus(programID)) { SkDebugf("VS:\n"); GrGLPrintShader(fGpu->glContext(), GR_GL_VERTEX_SHADER, fVS.fCompilerStrings.begin(), fVS.fCompilerStringLengths.begin(), fVS.fCompilerStrings.count(), settings); if (primProc.willUseGeoShader()) { SkDebugf("\nGS:\n"); GrGLPrintShader(fGpu->glContext(), GR_GL_GEOMETRY_SHADER, fGS.fCompilerStrings.begin(), fGS.fCompilerStringLengths.begin(), fGS.fCompilerStrings.count(), settings); } SkDebugf("\nFS:\n"); GrGLPrintShader(fGpu->glContext(), GR_GL_FRAGMENT_SHADER, fFS.fCompilerStrings.begin(), fFS.fCompilerStringLengths.begin(), fFS.fCompilerStrings.count(), settings); SkDEBUGFAIL(""); return nullptr; } } this->resolveProgramResourceLocations(programID); this->cleanupShaders(shadersToDelete); if (!cached && this->gpu()->getContext()->contextPriv().getPersistentCache() && fGpu->glCaps().programBinarySupport()) { GrGLsizei length = 0; GL_CALL(GetProgramiv(programID, GL_PROGRAM_BINARY_LENGTH, &length)); if (length > 0) { // store shader in cache sk_sp<SkData> key = SkData::MakeWithoutCopy(desc()->asKey(), desc()->keyLength()); GrGLenum binaryFormat; std::unique_ptr<char[]> binary(new char[length]); GL_CALL(GetProgramBinary(programID, length, &length, &binaryFormat, binary.get())); size_t dataLength = sizeof(inputs) + sizeof(binaryFormat) + length; std::unique_ptr<uint8_t[]> data(new uint8_t[dataLength]); size_t offset = 0; memcpy(data.get() + offset, &inputs, sizeof(inputs)); offset += sizeof(inputs); memcpy(data.get() + offset, &binaryFormat, sizeof(binaryFormat)); offset += sizeof(binaryFormat); memcpy(data.get() + offset, binary.get(), length); this->gpu()->getContext()->contextPriv().getPersistentCache()->store( *key, *SkData::MakeWithoutCopy(data.get(), dataLength)); } } return this->createProgram(programID); }
/* ------- Initialization ------- */ static void init(struct fmt_main * self) { char * tmp_value; char * task = "$JOHN/cryptsha512_kernel_DEFAULT.cl"; uint64_t startTime, runtime; opencl_init_dev(ocl_gpu_id, platform_id); startTime = (unsigned long) time(NULL); source_in_use = device_info[ocl_gpu_id]; if ((tmp_value = getenv("_TYPE"))) source_in_use = atoi(tmp_value); if ((tmp_value = getenv("_FAST"))) fast_mode = TRUE; if (use_local(source_in_use)) task = "$JOHN/cryptsha512_kernel_LOCAL.cl"; else if (gpu(source_in_use)) { fprintf(stderr, "Building the kernel, this could take a while\n"); task = "$JOHN/cryptsha512_kernel_GPU.cl"; } fflush(stdout); opencl_build_kernel(task, ocl_gpu_id); if ((runtime = (unsigned long) (time(NULL) - startTime)) > 2UL) fprintf(stderr, "Elapsed time: %lu seconds\n", runtime); fflush(stdout); // create kernel(s) to execute crypt_kernel = clCreateKernel(program[ocl_gpu_id], "kernel_crypt", &ret_code); HANDLE_CLERROR(ret_code, "Error creating kernel. Double-check kernel name?"); if (gpu(source_in_use) || use_local(source_in_use)) { prepare_kernel = clCreateKernel(program[ocl_gpu_id], "kernel_prepare", &ret_code); HANDLE_CLERROR(ret_code, "Error creating kernel_prepare. Double-check kernel name?"); final_kernel = clCreateKernel(program[ocl_gpu_id], "kernel_final", &ret_code); HANDLE_CLERROR(ret_code, "Error creating kernel_final. Double-check kernel name?"); } global_work_size = get_task_max_size(); local_work_size = get_default_workgroup(); if (source_in_use != device_info[ocl_gpu_id]) fprintf(stderr, "Selected runtime id %d, source (%s)\n", source_in_use, task); if ((tmp_value = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, LWS_CONFIG))) local_work_size = atoi(tmp_value); if ((tmp_value = getenv("LWS"))) local_work_size = atoi(tmp_value); //Check if local_work_size is a valid number. if (local_work_size > get_task_max_work_group_size()){ local_work_size = 0; //Force find a valid number. } self->params.max_keys_per_crypt = global_work_size; if (!local_work_size) { local_work_size = get_task_max_work_group_size(); create_clobj(global_work_size, self); find_best_workgroup(self); release_clobj(); } if ((tmp_value = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, GWS_CONFIG))) global_work_size = atoi(tmp_value); if ((tmp_value = getenv("GWS"))) global_work_size = atoi(tmp_value); //Check if a valid multiple is used. global_work_size = get_multiple(global_work_size, local_work_size); if (global_work_size) create_clobj(global_work_size, self); else { //user chose to die of boredom global_work_size = get_task_max_size(); find_best_gws(self); } fprintf(stderr, "Local work size (LWS) %d, global work size (GWS) %zd\n", (int) local_work_size, global_work_size); self->params.max_keys_per_crypt = global_work_size; }
//Do the proper test using different sizes. static cl_ulong gws_test(size_t num, struct fmt_main * self) { cl_event myEvent; cl_int ret_code; cl_uint *tmpbuffer; cl_ulong startTime, endTime, runtime; int i, loops; //Prepare buffers. create_clobj(num, self); tmpbuffer = mem_alloc(sizeof(sha512_hash) * num); if (tmpbuffer == NULL) { fprintf(stderr, "Malloc failure in find_best_gws\n"); exit(EXIT_FAILURE); } queue_prof = clCreateCommandQueue(context[ocl_gpu_id], devices[ocl_gpu_id], CL_QUEUE_PROFILING_ENABLE, &ret_code); HANDLE_CLERROR(ret_code, "Failed in clCreateCommandQueue"); // Set salt. set_salt(get_salt("$6$saltstring$")); salt->initial = salt->rounds - get_multiple(salt->rounds, HASH_LOOPS); // Set keys for (i = 0; i < num; i++) { set_key("aaabaabaaa", i); } //** Get execution time **// HANDLE_CLERROR(clEnqueueWriteBuffer(queue_prof, salt_buffer, CL_FALSE, 0, sizeof(sha512_salt), salt, 0, NULL, &myEvent), "Failed in clEnqueueWriteBuffer"); HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish"); HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &startTime, NULL), "Failed in clGetEventProfilingInfo I"); HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, NULL), "Failed in clGetEventProfilingInfo II"); HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent"); runtime = endTime - startTime; //** Get execution time **// HANDLE_CLERROR(clEnqueueWriteBuffer(queue_prof, pass_buffer, CL_FALSE, 0, sizeof(sha512_password) * num, plaintext, 0, NULL, &myEvent), "Failed in clEnqueueWriteBuffer"); HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish"); HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &startTime, NULL), "Failed in clGetEventProfilingInfo I"); HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, NULL), "Failed in clGetEventProfilingInfo II"); HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent"); runtime += endTime - startTime; //** Get execution time **// if (gpu(source_in_use) || use_local(source_in_use)) { ret_code = clEnqueueNDRangeKernel(queue_prof, prepare_kernel, 1, NULL, &num, &local_work_size, 0, NULL, &myEvent); HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish"); HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &startTime, NULL), "Failed in clGetEventProfilingInfo I"); HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, NULL), "Failed in clGetEventProfilingInfo II"); HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent"); runtime += endTime - startTime; } loops = gpu(source_in_use) || use_local(source_in_use) ? (salt->rounds / HASH_LOOPS) : 1; //** Get execution time **// for (i = 0; i < loops; i++) { ret_code = clEnqueueNDRangeKernel(queue_prof, crypt_kernel, 1, NULL, &num, &local_work_size, 0, NULL, &myEvent); HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish"); HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &startTime, NULL), "Failed in clGetEventProfilingInfo I"); HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, NULL), "Failed in clGetEventProfilingInfo II"); HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent"); runtime += endTime - startTime; } //** Get execution time **// HANDLE_CLERROR(clEnqueueReadBuffer(queue_prof, hash_buffer, CL_FALSE, 0, sizeof(sha512_hash) * num, tmpbuffer, 0, NULL, &myEvent), "Failed in clEnqueueReadBuffer"); HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish"); HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &startTime, NULL), "Failed in clGetEventProfilingInfo I"); HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, NULL), "Failed in clGetEventProfilingInfo II"); HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent"); runtime += endTime - startTime; MEM_FREE(tmpbuffer); HANDLE_CLERROR(clReleaseCommandQueue(queue_prof), "Failed in clReleaseCommandQueue"); release_clobj(); if (ret_code != CL_SUCCESS) { if (ret_code != CL_INVALID_WORK_GROUP_SIZE) fprintf(stderr, "Error %d\n", ret_code); return 0; } return runtime; }
/* ------- Create and destroy necessary objects ------- */ static void create_clobj(int gws, struct fmt_main * self) { self->params.min_keys_per_crypt = self->params.max_keys_per_crypt = gws; pinned_saved_keys = clCreateBuffer(context[ocl_gpu_id], CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(sha512_password) * gws, NULL, &ret_code); HANDLE_CLERROR(ret_code, "Error creating page-locked memory pinned_saved_keys"); plaintext = (sha512_password *) clEnqueueMapBuffer(queue[ocl_gpu_id], pinned_saved_keys, CL_TRUE, CL_MAP_WRITE | CL_MAP_READ, 0, sizeof(sha512_password) * gws, 0, NULL, NULL, &ret_code); HANDLE_CLERROR(ret_code, "Error mapping page-locked memory saved_plain"); pinned_partial_hashes = clCreateBuffer(context[ocl_gpu_id], CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(sha512_hash) * gws, NULL, &ret_code); HANDLE_CLERROR(ret_code, "Error creating page-locked memory pinned_partial_hashes"); calculated_hash = (sha512_hash *) clEnqueueMapBuffer(queue[ocl_gpu_id], pinned_partial_hashes, CL_TRUE, CL_MAP_READ, 0, sizeof(sha512_hash) * gws, 0, NULL, NULL, &ret_code); HANDLE_CLERROR(ret_code, "Error mapping page-locked memory out_hashes"); // create arguments (buffers) salt_buffer = clCreateBuffer(context[ocl_gpu_id], CL_MEM_READ_ONLY, sizeof(sha512_salt), NULL, &ret_code); HANDLE_CLERROR(ret_code, "Error creating salt_buffer out argument"); pass_buffer = clCreateBuffer(context[ocl_gpu_id], CL_MEM_READ_ONLY, sizeof(sha512_password) * gws, NULL, &ret_code); HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_keys"); hash_buffer = clCreateBuffer(context[ocl_gpu_id], CL_MEM_WRITE_ONLY, sizeof(sha512_hash) * gws, NULL, &ret_code); HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_out"); work_buffer = clCreateBuffer(context[ocl_gpu_id], CL_MEM_READ_WRITE, sizeof(sha512_buffers) * gws, NULL, &ret_code); HANDLE_CLERROR(ret_code, "Error creating buffer argument work_area"); //Set kernel arguments HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 0, sizeof(cl_mem), (void *) &salt_buffer), "Error setting argument 0"); HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 1, sizeof(cl_mem), (void *) &pass_buffer), "Error setting argument 1"); HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 2, sizeof(cl_mem), (void *) &hash_buffer), "Error setting argument 2"); if (gpu(source_in_use) || use_local(source_in_use)) { //Set prepare kernel arguments HANDLE_CLERROR(clSetKernelArg(prepare_kernel, 0, sizeof(cl_mem), (void *) &salt_buffer), "Error setting argument 0"); HANDLE_CLERROR(clSetKernelArg(prepare_kernel, 1, sizeof(cl_mem), (void *) &pass_buffer), "Error setting argument 1"); HANDLE_CLERROR(clSetKernelArg(prepare_kernel, 2, sizeof(cl_mem), (void *) &work_buffer), "Error setting argument 2"); //Fast working memory. HANDLE_CLERROR(clSetKernelArg(prepare_kernel, 3, sizeof(sha512_password) * local_work_size, NULL), "Error setting argument 3"); if (use_local(source_in_use)) { HANDLE_CLERROR(clSetKernelArg(prepare_kernel, 4, sizeof(sha512_buffers) * local_work_size, NULL), "Error setting argument 4"); HANDLE_CLERROR(clSetKernelArg(prepare_kernel, 5, sizeof(sha512_ctx) * local_work_size, NULL), "Error setting argument 5"); } //Set crypt kernel arguments HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 3, sizeof(cl_mem), (void *) &work_buffer), "Error setting argument crypt_kernel (3)"); if (use_local(source_in_use)) { //Fast working memory. HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 4, sizeof(sha512_buffers) * local_work_size, NULL), "Error setting argument 4"); HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 5, sizeof(sha512_ctx) * local_work_size, NULL), "Error setting argument 5"); } //Set final kernel arguments HANDLE_CLERROR(clSetKernelArg(final_kernel, 0, sizeof(cl_mem), (void *) &salt_buffer), "Error setting argument 0"); HANDLE_CLERROR(clSetKernelArg(final_kernel, 1, sizeof(cl_mem), (void *) &pass_buffer), "Error setting argument 1"); HANDLE_CLERROR(clSetKernelArg(final_kernel, 2, sizeof(cl_mem), (void *) &hash_buffer), "Error setting argument 2"); HANDLE_CLERROR(clSetKernelArg(final_kernel, 3, sizeof(cl_mem), (void *) &work_buffer), "Error setting argument crypt_kernel (3)"); if (use_local(source_in_use)) { //Fast working memory. HANDLE_CLERROR(clSetKernelArg(final_kernel, 4, sizeof(sha512_buffers) * local_work_size, NULL), "Error setting argument 4"); HANDLE_CLERROR(clSetKernelArg(final_kernel, 5, sizeof(sha512_ctx) * local_work_size, NULL), "Error setting argument 5"); } } memset(plaintext, '\0', sizeof(sha512_password) * gws); global_work_size = gws; }