// Serilize Constructor
 MessageResourcesQueryACK()
     : Message( PROTOCOL_VERSION , 112 , 0 )
 {
     cpu( "not clear" );
     cpufrequency( "not clear" );
     ram( "not clear" );
     gpu( "not clear" );
     video_ram( "not clear" );
 }
Пример #2
0
static size_t get_task_max_work_group_size(){
    size_t max_available;

    if (use_local(source_in_use))
        max_available = get_local_memory_size(ocl_gpu_id) /
                (sizeof(sha512_password) + sizeof(sha512_ctx) +
                 sizeof(sha512_buffers)) - 1;
    else if (gpu(source_in_use))
        max_available = get_local_memory_size(ocl_gpu_id) /
                sizeof(sha512_password);
    else
        max_available = get_max_work_group_size(ocl_gpu_id);

    if (max_available > get_current_work_group_size(ocl_gpu_id, crypt_kernel))
        return get_current_work_group_size(ocl_gpu_id, crypt_kernel);

    return max_available;
}
Пример #3
0
/* ------- Crypt function ------- */
static void crypt_all(int count) {
    int i;

    //Send data to device.
    HANDLE_CLERROR(clEnqueueWriteBuffer(queue[ocl_gpu_id], salt_buffer, CL_FALSE, 0,
            sizeof(sha512_salt), salt, 0, NULL, NULL),
            "failed in clEnqueueWriteBuffer salt_buffer");

    if (new_keys)
        HANDLE_CLERROR(clEnqueueWriteBuffer(queue[ocl_gpu_id], pass_buffer, CL_FALSE, 0,
                sizeof(sha512_password) * global_work_size, plaintext, 0, NULL, NULL),
                "failed in clEnqueueWriteBuffer pass_buffer");

    //Enqueue the kernel
    if (gpu(source_in_use) || use_local(source_in_use)) {
        HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], prepare_kernel, 1, NULL,
            &global_work_size, &local_work_size, 0, NULL, NULL),
            "failed in clEnqueueNDRangeKernel I");

        for (i = 0; i < (salt->rounds / HASH_LOOPS); i++) {
            HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], crypt_kernel, 1, NULL,
                &global_work_size, &local_work_size, 0, NULL, profilingEvent),
                "failed in clEnqueueNDRangeKernel");
        }
        HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], final_kernel, 1, NULL,
            &global_work_size, &local_work_size, 0, NULL, NULL),
            "failed in clEnqueueNDRangeKernel II");
    } else
        HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], crypt_kernel, 1, NULL,
            &global_work_size, &local_work_size, 0, NULL, profilingEvent),
            "failed in clEnqueueNDRangeKernel");

    //Read back hashes
    HANDLE_CLERROR(clEnqueueReadBuffer(queue[ocl_gpu_id], hash_buffer, CL_FALSE, 0,
            sizeof(sha512_hash) * global_work_size, calculated_hash, 0, NULL, NULL),
            "failed in reading data back");

    //Do the work
    HANDLE_CLERROR(clFinish(queue[ocl_gpu_id]), "failed in clFinish");
    new_keys = 0;
}
Пример #4
0
bool GrGLProgramBuilder::compileAndAttachShaders(GrGLSLShaderBuilder& shader,
                                                 GrGLuint programId,
                                                 GrGLenum type,
                                                 SkTDArray<GrGLuint>* shaderIds,
                                                 const SkSL::Program::Settings& settings,
                                                 SkSL::Program::Inputs* outInputs) {
    SkSL::String glsl;
    std::unique_ptr<SkSL::Program> program = GrSkSLtoGLSL(gpu()->glContext(), type,
                                                 shader.fCompilerStrings.begin(),
                                                 shader.fCompilerStringLengths.begin(),
                                                 shader.fCompilerStrings.count(),
                                                 settings,
                                                 &glsl);
    *outInputs = program->fInputs;
    return this->compileAndAttachShaders(glsl.c_str(),
                                         glsl.size(),
                                         programId,
                                         type,
                                         shaderIds,
                                         settings,
                                         *outInputs);
}
/// Creates a shared OpenCL/OpenGL context for the currently active
/// OpenGL context.
///
/// Once created, the shared context can be used to create OpenCL memory
/// objects which can interact with OpenGL memory objects (e.g. VBOs).
///
/// \throws unsupported_extension_error if no CL-GL sharing capable devices
///         are found.
inline context opengl_create_shared_context()
{
    // name of the OpenGL sharing extension for the system
#if defined(__APPLE__)
    const char *cl_gl_sharing_extension = "cl_APPLE_gl_sharing";
#else
    const char *cl_gl_sharing_extension = "cl_khr_gl_sharing";
#endif

#if defined(__APPLE__)
    // get OpenGL share group
    CGLContextObj cgl_current_context = CGLGetCurrentContext();
    CGLShareGroupObj cgl_share_group = CGLGetShareGroup(cgl_current_context);

    cl_context_properties properties[] = {
        CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE,
        (cl_context_properties) cgl_share_group,
        0
    };

    cl_int error = 0;
    cl_context cl_gl_context = clCreateContext(properties, 0, 0, 0, 0, &error);
    if(!cl_gl_context){
        BOOST_THROW_EXCEPTION(opencl_error(error));
    }

    return context(cl_gl_context, false);
#else
    typedef cl_int(*GetGLContextInfoKHRFunction)(
        const cl_context_properties*, cl_gl_context_info, size_t, void *, size_t *
    );

    std::vector<platform> platforms = system::platforms();
    for(size_t i = 0; i < platforms.size(); i++){
        const platform &platform = platforms[i];

        // load clGetGLContextInfoKHR() extension function
        GetGLContextInfoKHRFunction GetGLContextInfoKHR =
            reinterpret_cast<GetGLContextInfoKHRFunction>(
                reinterpret_cast<size_t>(
                    platform.get_extension_function_address("clGetGLContextInfoKHR")
                )
            );
        if(!GetGLContextInfoKHR){
            continue;
        }

        // create context properties listing the platform and current OpenGL display
        cl_context_properties properties[] = {
            CL_CONTEXT_PLATFORM, (cl_context_properties) platform.id(),
        #if defined(__linux__)
            CL_GL_CONTEXT_KHR, (cl_context_properties) glXGetCurrentContext(),
            CL_GLX_DISPLAY_KHR, (cl_context_properties) glXGetCurrentDisplay(),
        #elif defined(WIN32)
            CL_GL_CONTEXT_KHR, (cl_context_properties) wglGetCurrentContext(),
            CL_WGL_HDC_KHR, (cl_context_properties) wglGetCurrentDC(), 
        #endif
            0
        };

        // lookup current OpenCL device for current OpenGL context
        cl_device_id gpu_id;
        cl_int ret = GetGLContextInfoKHR(
            properties,
            CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,
            sizeof(cl_device_id),
            &gpu_id,
            0
        );
        if(ret != CL_SUCCESS){
            continue;
        }

        // create device object for the GPU and ensure it supports CL-GL sharing
        device gpu(gpu_id, false);
        if(!gpu.supports_extension(cl_gl_sharing_extension)){
            continue;
        }

        // return CL-GL sharing context
        return context(gpu, properties);
    }
#endif

    // no CL-GL sharing capable devices found
    BOOST_THROW_EXCEPTION(
        unsupported_extension_error(cl_gl_sharing_extension)
    );
}
Пример #6
0
int GrGLFragmentOnlyShaderBuilder::addTexCoordSets(int count) {
    int firstFreeCoordSet = fTexCoordSetCnt;
    fTexCoordSetCnt += count;
    SkASSERT(gpu()->glCaps().maxFixedFunctionTextureCoords() >= fTexCoordSetCnt);
    return firstFreeCoordSet;
}
Пример #7
0
int main(int argc, char **argv) 
{
	//ClImage image("./checker.png");
	ClImage image;

	if(image.open("./test.png") == false)
		return 1;

	ClImage results(image.width(), image.height());
	size_t nSize = image.Size();
	unsigned char *out_img = new unsigned char[nSize];

	TRACE("image nSize = %zu\n", nSize);
	TRACE("image size input = (%zu, %zu)\n", image.width(), image.height());

	PERF_START("ocl-copy");
	ClHost host(CL_DEVICE_TYPE_GPU);
	ClDevice gpu(&host);

	ClBuffer iImage = gpu.image(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, image);
	ClBuffer oImage = gpu.image(CL_MEM_WRITE_ONLY, NULL, image.width(), image.height());
	ClBuffer sampler = gpu.sampler(CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST);

	if(gpu.open("./hello-img.cl", "copyimg") == false)
		return 1;

	gpu.arg(iImage);
	gpu.arg(oImage);
	gpu.arg(sampler);

#define WORKDIM 2

#ifdef _WITH_DIRECT
	const size_t origin[3] = {0, 0, 0};
	const size_t region[3] = {image.width(), image.height(), 1};
#else
	gpu.write(0, iImage, image.width(), image.height());
#endif //_WITH_DIRECT

	size_t dev_local = gpu.getWorkGroupInfo();
#ifdef _INTEL
	TRACE("before dev_local = %zu\n", dev_local);
	if(argc > 1) {
		TRACE("argv = %s\n", argv[1]);
		if(atoi(argv[1]) < 1)
			dev_local = 9;
		else
			dev_local = atoi(argv[1]);
	}
	TRACE("after dev_local = %zu\n", dev_local);
#else
	TRACE("before dev_local = %d\n", dev_local);
	if(argc > 1) {
		TRACE("argv = %s\n", argv[1]);
		if(atoi(argv[1]) < 1)
			dev_local = (dev_local/2) - 1;
		else
			dev_local = atoi(argv[1]);
	}
	TRACE("after dev_local = %d\n", dev_local);
#endif //_INTEL

	size_t local[WORKDIM] = {dev_local, dev_local};
	size_t global[WORKDIM] = {gpu.roundup(local[0], image.width()), gpu.roundup(local[1], image.height())};

	TRACE("run (%zu,%zu)\n", global[0], global[1]);
	gpu.run(WORKDIM, global, local);
	gpu.read(1, out_img, origin, region);

	//worker(ostr, nsize[0]);
	results.store("results.png", out_img);
	results.close();
	image.close();

	PERF_END("ocl-copy");
	delete out_img;
	TRACE("%s\n", "eof");
	return 0;
}
Пример #8
0
GrGLProgram* GrGLProgramBuilder::finalize() {
    TRACE_EVENT0("skia", TRACE_FUNC);

    // verify we can get a program id
    GrGLuint programID;
    GL_CALL_RET(programID, CreateProgram());
    if (0 == programID) {
        this->cleanupFragmentProcessors();
        return nullptr;
    }

    if (this->gpu()->glCaps().programBinarySupport() &&
        this->gpu()->getContext()->contextPriv().getPersistentCache()) {
        GL_CALL(ProgramParameteri(programID, GR_GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GR_GL_TRUE));
    }

    this->finalizeShaders();

    // compile shaders and bind attributes / uniforms
    const GrPrimitiveProcessor& primProc = this->primitiveProcessor();
    SkSL::Program::Settings settings;
    settings.fCaps = this->gpu()->glCaps().shaderCaps();
    settings.fFlipY = this->pipeline().proxy()->origin() != kTopLeft_GrSurfaceOrigin;
    settings.fSharpenTextures = this->gpu()->getContext()->contextPriv().sharpenMipmappedTextures();
    SkSL::Program::Inputs inputs;
    SkTDArray<GrGLuint> shadersToDelete;
    bool cached = fGpu->glCaps().programBinarySupport() && nullptr != fCached.get();
    if (cached) {
        this->bindProgramResourceLocations(programID);
        // cache hit, just hand the binary to GL
        const uint8_t* bytes = fCached->bytes();
        size_t offset = 0;
        memcpy(&inputs, bytes + offset, sizeof(inputs));
        offset += sizeof(inputs);
        int binaryFormat;
        memcpy(&binaryFormat, bytes + offset, sizeof(binaryFormat));
        offset += sizeof(binaryFormat);
        GrGLClearErr(this->gpu()->glInterface());
        GR_GL_CALL_NOERRCHECK(this->gpu()->glInterface(),
                              ProgramBinary(programID, binaryFormat, (void*) (bytes + offset),
                                            fCached->size() - offset));
        if (GR_GL_GET_ERROR(this->gpu()->glInterface()) == GR_GL_NO_ERROR) {
            if (inputs.fRTHeight) {
                this->addRTHeightUniform(SKSL_RTHEIGHT_NAME);
            }
            cached = this->checkLinkStatus(programID);
        } else {
            cached = false;
        }
    }
    if (!cached) {
        // cache miss, compile shaders
        if (fFS.fForceHighPrecision) {
            settings.fForceHighPrecision = true;
        }
        SkSL::String glsl;
        std::unique_ptr<SkSL::Program> fs = GrSkSLtoGLSL(gpu()->glContext(),
                                                         GR_GL_FRAGMENT_SHADER,
                                                         fFS.fCompilerStrings.begin(),
                                                         fFS.fCompilerStringLengths.begin(),
                                                         fFS.fCompilerStrings.count(),
                                                         settings,
                                                         &glsl);
        inputs = fs->fInputs;
        if (inputs.fRTHeight) {
            this->addRTHeightUniform(SKSL_RTHEIGHT_NAME);
        }
        if (!this->compileAndAttachShaders(glsl.c_str(), glsl.size(), programID,
                                           GR_GL_FRAGMENT_SHADER, &shadersToDelete, settings,
                                           inputs)) {
            this->cleanupProgram(programID, shadersToDelete);
            return nullptr;
        }

        std::unique_ptr<SkSL::Program> vs = GrSkSLtoGLSL(gpu()->glContext(),
                                                         GR_GL_VERTEX_SHADER,
                                                         fVS.fCompilerStrings.begin(),
                                                         fVS.fCompilerStringLengths.begin(),
                                                         fVS.fCompilerStrings.count(),
                                                         settings,
                                                         &glsl);
        if (!this->compileAndAttachShaders(glsl.c_str(), glsl.size(), programID,
                                           GR_GL_VERTEX_SHADER, &shadersToDelete, settings,
                                           inputs)) {
            this->cleanupProgram(programID, shadersToDelete);
            return nullptr;
        }

        // NVPR actually requires a vertex shader to compile
        bool useNvpr = primProc.isPathRendering();
        if (!useNvpr) {
            int vaCount = primProc.numAttribs();
            for (int i = 0; i < vaCount; i++) {
                GL_CALL(BindAttribLocation(programID, i, primProc.getAttrib(i).fName));
            }
        }

        if (primProc.willUseGeoShader()) {
            std::unique_ptr<SkSL::Program> gs;
            gs = GrSkSLtoGLSL(gpu()->glContext(),
                              GR_GL_GEOMETRY_SHADER,
                              fGS.fCompilerStrings.begin(),
                              fGS.fCompilerStringLengths.begin(),
                              fGS.fCompilerStrings.count(),
                              settings,
                              &glsl);
            if (!this->compileAndAttachShaders(glsl.c_str(), glsl.size(), programID,
                                               GR_GL_GEOMETRY_SHADER, &shadersToDelete, settings,
                                               inputs)) {
                this->cleanupProgram(programID, shadersToDelete);
                return nullptr;
            }

        }
        this->bindProgramResourceLocations(programID);

        GL_CALL(LinkProgram(programID));
    }
    // Calling GetProgramiv is expensive in Chromium. Assume success in release builds.
    bool checkLinked = kChromium_GrGLDriver != fGpu->ctxInfo().driver();
#ifdef SK_DEBUG
    checkLinked = true;
#endif
    if (checkLinked) {
        if (!this->checkLinkStatus(programID)) {
            SkDebugf("VS:\n");
            GrGLPrintShader(fGpu->glContext(), GR_GL_VERTEX_SHADER, fVS.fCompilerStrings.begin(),
                            fVS.fCompilerStringLengths.begin(), fVS.fCompilerStrings.count(),
                            settings);
            if (primProc.willUseGeoShader()) {
                SkDebugf("\nGS:\n");
                GrGLPrintShader(fGpu->glContext(), GR_GL_GEOMETRY_SHADER,
                                fGS.fCompilerStrings.begin(), fGS.fCompilerStringLengths.begin(),
                                fGS.fCompilerStrings.count(), settings);
            }
            SkDebugf("\nFS:\n");
            GrGLPrintShader(fGpu->glContext(), GR_GL_FRAGMENT_SHADER, fFS.fCompilerStrings.begin(),
                            fFS.fCompilerStringLengths.begin(), fFS.fCompilerStrings.count(),
                            settings);
            SkDEBUGFAIL("");
            return nullptr;
        }
    }
    this->resolveProgramResourceLocations(programID);

    this->cleanupShaders(shadersToDelete);
    if (!cached && this->gpu()->getContext()->contextPriv().getPersistentCache() &&
        fGpu->glCaps().programBinarySupport()) {
        GrGLsizei length = 0;
        GL_CALL(GetProgramiv(programID, GL_PROGRAM_BINARY_LENGTH, &length));
        if (length > 0) {
            // store shader in cache
            sk_sp<SkData> key = SkData::MakeWithoutCopy(desc()->asKey(), desc()->keyLength());
            GrGLenum binaryFormat;
            std::unique_ptr<char[]> binary(new char[length]);
            GL_CALL(GetProgramBinary(programID, length, &length, &binaryFormat, binary.get()));
            size_t dataLength = sizeof(inputs) + sizeof(binaryFormat) + length;
            std::unique_ptr<uint8_t[]> data(new uint8_t[dataLength]);
            size_t offset = 0;
            memcpy(data.get() + offset, &inputs, sizeof(inputs));
            offset += sizeof(inputs);
            memcpy(data.get() + offset, &binaryFormat, sizeof(binaryFormat));
            offset += sizeof(binaryFormat);
            memcpy(data.get() + offset, binary.get(), length);
            this->gpu()->getContext()->contextPriv().getPersistentCache()->store(
                                            *key, *SkData::MakeWithoutCopy(data.get(), dataLength));
        }
    }
    return this->createProgram(programID);
}
Пример #9
0
/* ------- Initialization  ------- */
static void init(struct fmt_main * self) {
    char * tmp_value;
    char * task = "$JOHN/cryptsha512_kernel_DEFAULT.cl";
    uint64_t startTime, runtime;

    opencl_init_dev(ocl_gpu_id, platform_id);
    startTime = (unsigned long) time(NULL);
    source_in_use = device_info[ocl_gpu_id];

    if ((tmp_value = getenv("_TYPE")))
        source_in_use = atoi(tmp_value);

    if ((tmp_value = getenv("_FAST")))
        fast_mode = TRUE;

    if (use_local(source_in_use))
            task = "$JOHN/cryptsha512_kernel_LOCAL.cl";
    else if (gpu(source_in_use)) {
        fprintf(stderr, "Building the kernel, this could take a while\n");
        task = "$JOHN/cryptsha512_kernel_GPU.cl";
    }
    fflush(stdout);
    opencl_build_kernel(task, ocl_gpu_id);

    if ((runtime = (unsigned long) (time(NULL) - startTime)) > 2UL)
        fprintf(stderr, "Elapsed time: %lu seconds\n", runtime);
    fflush(stdout);

    // create kernel(s) to execute
    crypt_kernel = clCreateKernel(program[ocl_gpu_id], "kernel_crypt", &ret_code);
    HANDLE_CLERROR(ret_code, "Error creating kernel. Double-check kernel name?");

    if (gpu(source_in_use) || use_local(source_in_use)) {
        prepare_kernel = clCreateKernel(program[ocl_gpu_id], "kernel_prepare", &ret_code);
        HANDLE_CLERROR(ret_code, "Error creating kernel_prepare. Double-check kernel name?");
        final_kernel = clCreateKernel(program[ocl_gpu_id], "kernel_final", &ret_code);
        HANDLE_CLERROR(ret_code, "Error creating kernel_final. Double-check kernel name?");
    }
    global_work_size = get_task_max_size();
    local_work_size = get_default_workgroup();

    if (source_in_use != device_info[ocl_gpu_id])
        fprintf(stderr, "Selected runtime id %d, source (%s)\n", source_in_use, task);

    if ((tmp_value = cfg_get_param(SECTION_OPTIONS,
                                   SUBSECTION_OPENCL, LWS_CONFIG)))
        local_work_size = atoi(tmp_value);

    if ((tmp_value = getenv("LWS")))
        local_work_size = atoi(tmp_value);

    //Check if local_work_size is a valid number.
    if (local_work_size > get_task_max_work_group_size()){
        local_work_size = 0; //Force find a valid number.
    }
    self->params.max_keys_per_crypt = global_work_size;

    if (!local_work_size) {
        local_work_size = get_task_max_work_group_size();
        create_clobj(global_work_size, self);
        find_best_workgroup(self);
        release_clobj();
    }

    if ((tmp_value = cfg_get_param(SECTION_OPTIONS,
                                   SUBSECTION_OPENCL, GWS_CONFIG)))
        global_work_size = atoi(tmp_value);

    if ((tmp_value = getenv("GWS")))
        global_work_size = atoi(tmp_value);

    //Check if a valid multiple is used.
    global_work_size = get_multiple(global_work_size, local_work_size);

    if (global_work_size)
        create_clobj(global_work_size, self);

    else {
        //user chose to die of boredom
        global_work_size = get_task_max_size();
        find_best_gws(self);
    }
    fprintf(stderr, "Local work size (LWS) %d, global work size (GWS) %zd\n",
           (int) local_work_size, global_work_size);
    self->params.max_keys_per_crypt = global_work_size;
}
Пример #10
0
//Do the proper test using different sizes.
static cl_ulong gws_test(size_t num, struct fmt_main * self) {

    cl_event myEvent;
    cl_int ret_code;
    cl_uint *tmpbuffer;
    cl_ulong startTime, endTime, runtime;
    int i, loops;

    //Prepare buffers.
    create_clobj(num, self);

    tmpbuffer = mem_alloc(sizeof(sha512_hash) * num);

    if (tmpbuffer == NULL) {
        fprintf(stderr, "Malloc failure in find_best_gws\n");
        exit(EXIT_FAILURE);
    }

    queue_prof = clCreateCommandQueue(context[ocl_gpu_id], devices[ocl_gpu_id],
            CL_QUEUE_PROFILING_ENABLE, &ret_code);
    HANDLE_CLERROR(ret_code, "Failed in clCreateCommandQueue");

    // Set salt.
    set_salt(get_salt("$6$saltstring$"));
    salt->initial = salt->rounds - get_multiple(salt->rounds, HASH_LOOPS);

    // Set keys
    for (i = 0; i < num; i++) {
        set_key("aaabaabaaa", i);
    }
    //** Get execution time **//
    HANDLE_CLERROR(clEnqueueWriteBuffer(queue_prof, salt_buffer, CL_FALSE, 0,
            sizeof(sha512_salt), salt, 0, NULL, &myEvent),
            "Failed in clEnqueueWriteBuffer");

    HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish");
    HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT,
            sizeof(cl_ulong), &startTime, NULL),
            "Failed in clGetEventProfilingInfo I");
    HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END,
            sizeof(cl_ulong), &endTime, NULL),
            "Failed in clGetEventProfilingInfo II");
    HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent");
    runtime = endTime - startTime;

    //** Get execution time **//
    HANDLE_CLERROR(clEnqueueWriteBuffer(queue_prof, pass_buffer, CL_FALSE, 0,
            sizeof(sha512_password) * num, plaintext, 0, NULL, &myEvent),
            "Failed in clEnqueueWriteBuffer");

    HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish");
    HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT,
            sizeof(cl_ulong), &startTime, NULL),
            "Failed in clGetEventProfilingInfo I");
    HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END,
            sizeof(cl_ulong), &endTime, NULL),
            "Failed in clGetEventProfilingInfo II");
    HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent");
    runtime += endTime - startTime;

    //** Get execution time **//
    if (gpu(source_in_use) || use_local(source_in_use)) {
        ret_code = clEnqueueNDRangeKernel(queue_prof, prepare_kernel,
            1, NULL, &num, &local_work_size, 0, NULL, &myEvent);

        HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish");
        HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT,
            sizeof(cl_ulong), &startTime, NULL),
            "Failed in clGetEventProfilingInfo I");
        HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END,
            sizeof(cl_ulong), &endTime, NULL),
            "Failed in clGetEventProfilingInfo II");
        HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent");
        runtime += endTime - startTime;
    }

    loops = gpu(source_in_use) || use_local(source_in_use) ? (salt->rounds / HASH_LOOPS) : 1;

    //** Get execution time **//
    for (i = 0; i < loops; i++)
    {
        ret_code = clEnqueueNDRangeKernel(queue_prof, crypt_kernel,
               1, NULL, &num, &local_work_size, 0, NULL, &myEvent);

        HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish");
        HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT,
            sizeof(cl_ulong), &startTime, NULL),
            "Failed in clGetEventProfilingInfo I");
        HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END,
            sizeof(cl_ulong), &endTime, NULL),
            "Failed in clGetEventProfilingInfo II");
        HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent");
        runtime += endTime - startTime;
    }

    //** Get execution time **//
    HANDLE_CLERROR(clEnqueueReadBuffer(queue_prof, hash_buffer, CL_FALSE, 0,
            sizeof(sha512_hash) * num, tmpbuffer, 0, NULL, &myEvent),
            "Failed in clEnqueueReadBuffer");

    HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish");
    HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT,
            sizeof(cl_ulong), &startTime, NULL),
            "Failed in clGetEventProfilingInfo I");
    HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END,
            sizeof(cl_ulong), &endTime, NULL),
            "Failed in clGetEventProfilingInfo II");
    HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent");
    runtime += endTime - startTime;

    MEM_FREE(tmpbuffer);
    HANDLE_CLERROR(clReleaseCommandQueue(queue_prof),
            "Failed in clReleaseCommandQueue");
    release_clobj();

     if (ret_code != CL_SUCCESS) {

        if (ret_code != CL_INVALID_WORK_GROUP_SIZE)
            fprintf(stderr, "Error %d\n", ret_code);
        return 0;
    }
    return runtime;
}
Пример #11
0
/* ------- Create and destroy necessary objects ------- */
static void create_clobj(int gws, struct fmt_main * self) {
    self->params.min_keys_per_crypt = self->params.max_keys_per_crypt = gws;

    pinned_saved_keys = clCreateBuffer(context[ocl_gpu_id],
            CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
            sizeof(sha512_password) * gws, NULL, &ret_code);
    HANDLE_CLERROR(ret_code, "Error creating page-locked memory pinned_saved_keys");

    plaintext = (sha512_password *) clEnqueueMapBuffer(queue[ocl_gpu_id],
            pinned_saved_keys, CL_TRUE, CL_MAP_WRITE | CL_MAP_READ, 0,
            sizeof(sha512_password) * gws, 0, NULL, NULL, &ret_code);
    HANDLE_CLERROR(ret_code, "Error mapping page-locked memory saved_plain");

    pinned_partial_hashes = clCreateBuffer(context[ocl_gpu_id],
            CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
            sizeof(sha512_hash) * gws, NULL, &ret_code);
    HANDLE_CLERROR(ret_code, "Error creating page-locked memory pinned_partial_hashes");

    calculated_hash = (sha512_hash *) clEnqueueMapBuffer(queue[ocl_gpu_id],
            pinned_partial_hashes, CL_TRUE, CL_MAP_READ, 0,
            sizeof(sha512_hash) * gws, 0, NULL, NULL, &ret_code);
    HANDLE_CLERROR(ret_code, "Error mapping page-locked memory out_hashes");

    // create arguments (buffers)
    salt_buffer = clCreateBuffer(context[ocl_gpu_id], CL_MEM_READ_ONLY,
            sizeof(sha512_salt), NULL, &ret_code);
    HANDLE_CLERROR(ret_code, "Error creating salt_buffer out argument");

    pass_buffer = clCreateBuffer(context[ocl_gpu_id], CL_MEM_READ_ONLY,
            sizeof(sha512_password) * gws, NULL, &ret_code);
    HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_keys");

    hash_buffer = clCreateBuffer(context[ocl_gpu_id], CL_MEM_WRITE_ONLY,
            sizeof(sha512_hash) * gws, NULL, &ret_code);
    HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_out");

    work_buffer = clCreateBuffer(context[ocl_gpu_id], CL_MEM_READ_WRITE,
            sizeof(sha512_buffers) * gws, NULL, &ret_code);
    HANDLE_CLERROR(ret_code, "Error creating buffer argument work_area");

    //Set kernel arguments
    HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 0, sizeof(cl_mem),
            (void *) &salt_buffer), "Error setting argument 0");
    HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 1, sizeof(cl_mem),
            (void *) &pass_buffer), "Error setting argument 1");
    HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 2, sizeof(cl_mem),
            (void *) &hash_buffer), "Error setting argument 2");

    if (gpu(source_in_use) || use_local(source_in_use)) {
        //Set prepare kernel arguments
        HANDLE_CLERROR(clSetKernelArg(prepare_kernel, 0, sizeof(cl_mem),
            (void *) &salt_buffer), "Error setting argument 0");
        HANDLE_CLERROR(clSetKernelArg(prepare_kernel, 1, sizeof(cl_mem),
            (void *) &pass_buffer), "Error setting argument 1");
        HANDLE_CLERROR(clSetKernelArg(prepare_kernel, 2, sizeof(cl_mem),
            (void *) &work_buffer), "Error setting argument 2");

        //Fast working memory.
        HANDLE_CLERROR(clSetKernelArg(prepare_kernel, 3,
            sizeof(sha512_password) * local_work_size,
            NULL), "Error setting argument 3");

        if (use_local(source_in_use)) {
            HANDLE_CLERROR(clSetKernelArg(prepare_kernel, 4,
                sizeof(sha512_buffers) * local_work_size,
                NULL), "Error setting argument 4");
            HANDLE_CLERROR(clSetKernelArg(prepare_kernel, 5,
                sizeof(sha512_ctx) * local_work_size,
                NULL), "Error setting argument 5");
        }
        //Set crypt kernel arguments
        HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 3, sizeof(cl_mem),
            (void *) &work_buffer), "Error setting argument crypt_kernel (3)");

        if (use_local(source_in_use)) {
            //Fast working memory.
            HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 4,
                sizeof(sha512_buffers) * local_work_size,
                NULL), "Error setting argument 4");
            HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 5,
                sizeof(sha512_ctx) * local_work_size,
                NULL), "Error setting argument 5");
        }
        //Set final kernel arguments
        HANDLE_CLERROR(clSetKernelArg(final_kernel, 0, sizeof(cl_mem),
                (void *) &salt_buffer), "Error setting argument 0");
        HANDLE_CLERROR(clSetKernelArg(final_kernel, 1, sizeof(cl_mem),
                (void *) &pass_buffer), "Error setting argument 1");
        HANDLE_CLERROR(clSetKernelArg(final_kernel, 2, sizeof(cl_mem),
                (void *) &hash_buffer), "Error setting argument 2");
        HANDLE_CLERROR(clSetKernelArg(final_kernel, 3, sizeof(cl_mem),
            (void *) &work_buffer), "Error setting argument crypt_kernel (3)");

        if (use_local(source_in_use)) {
            //Fast working memory.
            HANDLE_CLERROR(clSetKernelArg(final_kernel, 4,
                sizeof(sha512_buffers) * local_work_size,
                NULL), "Error setting argument 4");
            HANDLE_CLERROR(clSetKernelArg(final_kernel, 5,
                sizeof(sha512_ctx) * local_work_size,
                NULL), "Error setting argument 5");
        }
    }
    memset(plaintext, '\0', sizeof(sha512_password) * gws);
    global_work_size = gws;
}