Ejemplo n.º 1
0
void MFNHashTypePlainOpenCL::copyDeviceFoundPasswordsToHost() {
    trace_printf("MFNHashTypePlainOpenCL::copyDeviceFoundPasswordsToHost()\n");

    cl_int errorCode;
    
    errorCode = clEnqueueReadBuffer (this->OpenCL->getCommandQueue(),
        this->DeviceSuccessAddress,
        CL_TRUE /* blocking write */,
        0 /* offset */,
        this->activeHashesProcessed.size() /* bytes to copy */,
        (void *)this->HostSuccessAddress,
        NULL, NULL, NULL /* event list stuff */);
    if (errorCode != CL_SUCCESS) {
        printf("Error: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }

    errorCode = clEnqueueReadBuffer (this->OpenCL->getCommandQueue(),
        this->DeviceFoundPasswordsAddress,
        CL_TRUE /* blocking write */,
        0 /* offset */,
        this->passwordLength * this->activeHashesProcessed.size() /* bytes to copy */,
        (void *)this->HostFoundPasswordsAddress,
        NULL, NULL, NULL /* event list stuff */);
    if (errorCode != CL_SUCCESS) {
        printf("Error: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }
}
Ejemplo n.º 2
0
cl_bool there_was_an_error(cl_int err){
    if (err != CL_SUCCESS){
        printf("%s\n", print_cl_errstring(err));
        return 1;
    }
    return 0;
}
Ejemplo n.º 3
0
inline void check_error(cl_int err) {
  if (err != CL_SUCCESS) {
    std::cerr << "ERROR: "
    << " (" << err << ") " << print_cl_errstring(err) << std::endl;
    exit(EXIT_FAILURE);
  }
}
Ejemplo n.º 4
0
Archivo: utils.c Proyecto: 8l/insieme
int errorCodeOut(const cl_int error,const char* ownMessage) {

	if (error != CL_SUCCESS) {
		printf("%s\n%s\n", ownMessage, print_cl_errstring(error));
		return -1;
	}
	return 0;
}
void MFNHashTypeSaltedOpenCL_MD5_PS::launchKernel() {
    trace_printf("MFNHashTypeSaltedOpenCL_MD5_PS::launchKernel()\n");
    cl_event kernelLaunchEvent;
    cl_int errorCode;
    size_t numberWorkgroups;
    size_t numberWorkitems;

    numberWorkgroups = this->GPUBlocks * this->GPUThreads;
    numberWorkitems = this->GPUThreads;

    
    klaunch_printf("T %d: Platform/Device: %d/%d\n", this->threadId, this->openCLPlatformId, this->gpuDeviceId);
    klaunch_printf("T %d: Workgroups/Workitems: %d/%d\n", this->threadId, numberWorkgroups, numberWorkitems);

    // Copy the per-step value to the kernel
    errorCode = clSetKernelArg (this->HashKernel, 14, sizeof(cl_uint), &this->perStep);
    errorCode |= clSetKernelArg (this->HashKernel, 20, sizeof(cl_uint), &this->saltStartOffset);
    if (errorCode != CL_SUCCESS) {
        printf("Error 1: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }

    errorCode = clEnqueueNDRangeKernel(this->OpenCL->getCommandQueue(),
            this->HashKernel,
            1 /* numDims */,
            NULL /* offset */,
            &numberWorkgroups,
            &numberWorkitems,
            NULL, NULL,
            &kernelLaunchEvent);
    
    if (errorCode != CL_SUCCESS) {
        printf("Error 2: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }

    if (clWaitForEvents(1, &kernelLaunchEvent) != CL_SUCCESS) {
        printf("\nError on wait for event!\n");
        fflush(stdout);
    };
    // Release the event to prevent memory leaks.
    clReleaseEvent(kernelLaunchEvent);
}
void MFNHashTypeSaltedOpenCL_MD5_PS::copySaltConstantsToDevice() {
    trace_printf("MFNHashTypeSaltedOpenCL_MD5_PS::copySaltConstantsToDevice()\n");
    
    cl_int errorCode = 0;
    
    // Salted hash data
    uint64_t localNumberSaltValues = this->numberSaltsCopiedToDevice;

    errorCode |= clSetKernelArg (this->HashKernel, 17, sizeof(cl_mem), &this->DeviceSaltLengthsAddress);
    errorCode |= clSetKernelArg (this->HashKernel, 18, sizeof(cl_mem), &this->DeviceSaltValuesAddress);
    errorCode |= clSetKernelArg (this->HashKernel, 19, sizeof(cl_ulong), &localNumberSaltValues);

    if (errorCode != CL_SUCCESS) {
        printf("Thread %d, dev %d: OpenCL error 5: %s. Exiting.\n",
                this->threadId, this->gpuDeviceId, print_cl_errstring(errorCode));
        exit(1);
    }
}
Ejemplo n.º 7
0
void MFNHashTypePlainOpenCL::setupDevice() {
    trace_printf("CHHashTypeVPlainCUDA::setupDevice()\n");
    char buildOptions[1024];
    cl_int errorCode;

    // Set the OpenCL platform & device
    trace_printf("Thread %d setting OpenCL platform/device to %d, %d\n",
            this->threadId, this->openCLPlatformId, this->gpuDeviceId);
    this->OpenCL->selectPlatformById(this->openCLPlatformId);
    this->OpenCL->selectDeviceById(this->gpuDeviceId);
    
    /**
     * Handle generating the kernels.  This involves building with the specified
     * password length, vector width, and BFI_INT status.
     */

    if (MultiforcerGlobalClassFactory.getCommandlinedataClass()->GetUseBfiInt()) {
        // BFI_INT patching - pass BITALIGN to kernel
        sprintf(buildOptions, "-D PASSWORD_LENGTH=%d -D VECTOR_WIDTH=%d -D BITALIGN=1",
            this->passwordLength, this->VectorWidth);
    } else {
        // No BFI_INT patching.
        sprintf(buildOptions, "-D PASSWORD_LENGTH=%d -D VECTOR_WIDTH=%d",
                this->passwordLength, this->VectorWidth);
    }
    this->OpenCL->buildProgramFromManySourcesConcat(this->getHashFileNames(), buildOptions);

    // If the BFI_INT patching is being used, patch the generated binary.
    if (MultiforcerGlobalClassFactory.getCommandlinedataClass()->GetUseBfiInt()) {
        this->OpenCL->doAMDBFIPatch();
    }

    this->HashProgram = this->OpenCL->getProgram();
    this->HashKernel = clCreateKernel (this->HashProgram, this->getHashKernelName().c_str(), &errorCode);

    if (errorCode != CL_SUCCESS) {
        printf("Error: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }

}
void MFNHashTypePlainOpenCL_MD5::copyConstantDataToDevice() {
    trace_printf("MFNHashTypePlainOpenCL_MD5::copyConstantDataToDevice()\n");

    cl_int errorCode;

    
    // Begin copying constant data to the device.
    
    errorCode = clEnqueueWriteBuffer (this->OpenCL->getCommandQueue(),
            this->DeviceForwardCharsetAddress,
            CL_TRUE /* blocking write */,
            0 /* offset */,
            this->charsetForwardLookup.size() /* bytes to copy */,
            (void *)&this->charsetForwardLookup[0],
            NULL, NULL, NULL /* event list stuff */);
    if (errorCode != CL_SUCCESS) {
        printf("Error: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }

    errorCode = clEnqueueWriteBuffer (this->OpenCL->getCommandQueue(),
            this->DeviceReverseCharsetAddress,
            CL_TRUE /* blocking write */,
            0 /* offset */,
            this->charsetReverseLookup.size() /* bytes to copy */,
            (void *)&this->charsetReverseLookup[0],
            NULL, NULL, NULL /* event list stuff */);
    if (errorCode != CL_SUCCESS) {
        printf("Error: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }

    errorCode = clEnqueueWriteBuffer (this->OpenCL->getCommandQueue(),
            this->DeviceCharsetLengthsAddress,
            CL_TRUE /* blocking write */,
            0 /* offset */,
            this->charsetLengths.size() /* bytes to copy */,
            (void *)&this->charsetLengths[0],
            NULL, NULL, NULL /* event list stuff */);
    if (errorCode != CL_SUCCESS) {
        printf("Error: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }

    errorCode = clEnqueueWriteBuffer (this->OpenCL->getCommandQueue(),
            this->DeviceBitmap8kb_Address,
            CL_TRUE /* blocking write */,
            0 /* offset */,
            8192 /* bytes to copy */,
            (void *)&this->sharedBitmap8kb_a[0],
            NULL, NULL, NULL /* event list stuff */);
    if (errorCode != CL_SUCCESS) {
        printf("Error: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }
    
    errorCode = clEnqueueWriteBuffer (this->OpenCL->getCommandQueue(),
            this->DeviceBitmap16kb_Address,
            CL_TRUE /* blocking write */,
            0 /* offset */,
            16384 /* bytes to copy */,
            (void *)&this->sharedBitmap16kb_a[0],
            NULL, NULL, NULL /* event list stuff */);
    if (errorCode != CL_SUCCESS) {
        printf("Error: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }

    errorCode = clEnqueueWriteBuffer (this->OpenCL->getCommandQueue(),
            this->DeviceBitmap32kb_Address,
            CL_TRUE /* blocking write */,
            0 /* offset */,
            32768 /* bytes to copy */,
            (void *)&this->sharedBitmap32kb_a[0],
            NULL, NULL, NULL /* event list stuff */);
    if (errorCode != CL_SUCCESS) {
        printf("Error: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }

    errorCode = clEnqueueWriteBuffer (this->OpenCL->getCommandQueue(),
            this->DeviceBitmap256kb_a_Address,
            CL_TRUE /* blocking write */,
            0 /* offset */,
            256*1024 /* bytes to copy */,
            (void *)&this->globalBitmap256kb_a[0],
            NULL, NULL, NULL /* event list stuff */);
    if (errorCode != CL_SUCCESS) {
        printf("Error: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }
    
    // Copy the values into a variable that can be accessed as a pointer.
    uint64_t localNumberHashes = this->activeHashesProcessed.size();
    uint64_t localNumberThreads = this->TotalKernelWidth;
    
    errorCode = 0;
    errorCode |= clSetKernelArg (this->HashKernel,  0, sizeof(cl_mem), &this->DeviceForwardCharsetAddress);
    errorCode |= clSetKernelArg (this->HashKernel,  1, sizeof(cl_mem), &this->DeviceReverseCharsetAddress);
    errorCode |= clSetKernelArg (this->HashKernel,  2, sizeof(cl_mem), &this->DeviceCharsetLengthsAddress);
    if (this->sharedBitmapSize == 8) {
        errorCode |= clSetKernelArg (this->HashKernel,  3, sizeof(cl_mem), &this->DeviceBitmap8kb_Address);
    } else if (this->sharedBitmapSize == 16) {
        errorCode |= clSetKernelArg (this->HashKernel,  3, sizeof(cl_mem), &this->DeviceBitmap16kb_Address);
    } else if (this->sharedBitmapSize == 32) {
        errorCode |= clSetKernelArg (this->HashKernel,  3, sizeof(cl_mem), &this->DeviceBitmap32kb_Address);
    } else {
        printf("Error: Invalid shared bitmap size!  Must be 8, 16, 32\n");
        exit(1);
    }
    
    errorCode |= clSetKernelArg (this->HashKernel,  4, sizeof(cl_ulong), &localNumberHashes);
    errorCode |= clSetKernelArg (this->HashKernel,  5, sizeof(cl_mem), &this->DeviceHashlistAddress);
    errorCode |= clSetKernelArg (this->HashKernel,  6, sizeof(cl_mem), &this->DeviceFoundPasswordsAddress);
    errorCode |= clSetKernelArg (this->HashKernel,  7, sizeof(cl_mem), &this->DeviceSuccessAddress);

    errorCode |= clSetKernelArg (this->HashKernel,  8, sizeof(cl_mem), &this->DeviceBitmap128mb_a_Address);
    errorCode |= clSetKernelArg (this->HashKernel,  9, sizeof(cl_mem), &this->DeviceBitmap128mb_b_Address);
    errorCode |= clSetKernelArg (this->HashKernel, 10, sizeof(cl_mem), &this->DeviceBitmap128mb_c_Address);
    errorCode |= clSetKernelArg (this->HashKernel, 11, sizeof(cl_mem), &this->DeviceBitmap128mb_d_Address);

    errorCode |= clSetKernelArg (this->HashKernel, 12, sizeof(cl_mem), &this->DeviceStartPointAddress);
    errorCode |= clSetKernelArg (this->HashKernel, 13, sizeof(cl_ulong), &localNumberThreads);
    
    errorCode |= clSetKernelArg (this->HashKernel, 15, sizeof(cl_mem), &this->DeviceStartPasswords32Address);
    errorCode |= clSetKernelArg (this->HashKernel, 16, sizeof(cl_mem), &this->DeviceBitmap256kb_a_Address);
    

    if (errorCode != CL_SUCCESS) {
        printf("Thread %d, dev %d: OpenCL error 5: %s. Exiting.\n",
                this->threadId, this->gpuDeviceId, print_cl_errstring(errorCode));
        exit(1);
    }
}
Ejemplo n.º 9
0
void getGPUUnitSupportedImageFormats(cl_context context){
    
    cl_image_format supported_image_formats[1000];
    cl_uint supported_image_format_list_size;
    
    //collect supported image formats
    cl_int status = clGetSupportedImageFormats(
                                               context,
                                               CL_MEM_READ_ONLY,
                                               CL_MEM_OBJECT_IMAGE3D,
                                               sizeof(supported_image_formats) / sizeof(supported_image_formats[0]),
                                               supported_image_formats,
                                               &supported_image_format_list_size);
    if (status != CL_SUCCESS) {
        printf("%s\n", print_cl_errstring(status));
		exit(1);        
    }
    for (int i = 0; i < supported_image_format_list_size; i++) {
        printf("Supported image format: ");
        switch (supported_image_formats[i].image_channel_order) {
            case CL_R:
                printf("CL_R");
                break;
            case CL_A:
                printf("CL_A");
                break;
            case CL_INTENSITY:
                printf("CL_INTENSITY");
                break;
            case CL_LUMINANCE:
                printf("CL_LUMINANCE");
                break;
            case CL_RG:
                printf("CL_RG");
                break;
            case CL_RA:
                printf("CL_RA");
                break;
            case CL_RGB:
                printf("CL_RGB");
                break;
            case CL_RGBA:
                printf("CL_RGBA");
                break;
            case CL_ARGB:
                printf("CL_ARGB");
                break;
            case CL_BGRA:
                printf("CL_BGRA");
                break;
            default:
                printf("Unknown");
                break;
        }
        printf(", ");
        switch (supported_image_formats[i].image_channel_data_type) {
            case CL_UNORM_INT8:
                printf("CL_UNORM_INT8\n");
                break;
            case CL_UNORM_INT16:
                printf("CL_UNORM_INT16\n");
                break;
            case CL_SNORM_INT8:
                printf("CL_SNORM_INT8\n");
                break;
            case CL_SNORM_INT16:
                printf("CL_SNORM_INT16\n");
                break;
            case CL_HALF_FLOAT:
                printf("CL_HALF_FLOAT\n");
                break;
            case CL_FLOAT:
                printf("CL_FLOAT\n");
                break;
            case CL_UNORM_SHORT_565:
                printf("CL_UNORM_SHORT_565\n");
                break;
            case CL_UNORM_SHORT_555:
                printf("CL_UNORM_SHORT_555\n");
                break;
            case CL_UNORM_INT_101010:
                printf("CL_UNORM_INT_101010\n");
                break;
            case CL_SIGNED_INT8:
                printf("CL_SIGNED_INT8\n");
                break;
            case CL_UNSIGNED_INT8:
                printf("CL_UNSIGNED_INT8\n");
                break;
            case CL_SIGNED_INT16:
                printf("CL_SIGNED_INT16\n");
                break;
            case CL_SIGNED_INT32:
                printf("CL_SIGNED_INT32\n");
                break;
            case CL_UNSIGNED_INT16:
                printf("CL_UNSIGNED_INT16\n");
                break;
            case CL_UNSIGNED_INT32:
                printf("CL_UNSIGNED_INT32\n");
                break;
            default:
                printf("Unknown\n");
                break;
        }
    }
    
}
Ejemplo n.º 10
0
void MFNHashTypePlainOpenCL::allocateThreadAndDeviceMemory() {
    trace_printf("MFNHashTypePlainOpenCL::allocateThreadAndDeviceMemory()\n");

    /**
     * Error variable - stores the result of the various mallocs & such.
     */
    cl_int errorCode;
    /*
     * Malloc the device hashlist space.  This is the number of available hashes
     * times the hash length in bytes.  The data will be copied later.
     */
    memalloc_printf("Attempting to openclMalloc %d bytes for device hashlist for thread %d.\n",
            this->activeHashesProcessed.size() * this->hashLengthBytes, this->threadId);
    this->DeviceHashlistAddress =
            clCreateBuffer (this->OpenCL->getContext(),
            CL_MEM_READ_ONLY,
            this->activeHashesProcessed.size() * this->hashLengthBytes,
            NULL,
            &errorCode);
    if (errorCode != CL_SUCCESS) {
        printf("Unable to allocate %d bytes for device hashlist!  Exiting!\n",
                this->activeHashesProcessed.size() * this->hashLengthBytes);
        printf("Error: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }

    /*
     * Allocate the host/device space for the success list (flags for found passwords).
     * This is a byte per password.  To avoid atomic write issues, each password
     * gets a full addressible byte, and the GPU handles the dependencies between
     * multiple threads trying to set a flag in the same segment of memory.
     *
     * On the host, it will be allocated as mapped memory if we are using zerocopy.
     *
     * As this region of memory is frequently copied back to the host, mapping it
     * improves performance.  In theory.
     */
    memalloc_printf("Attempting to cudaHostAlloc %d bytes for HostSuccess\n",
            this->activeHashesProcessed.size());
    this->HostSuccessAddress = new uint8_t [this->activeHashesProcessed.size()];
    memset(this->HostSuccessAddress, 0, this->activeHashesProcessed.size());

    // Allocate memory for the reported flags.
    this->HostSuccessReportedAddress = new uint8_t [this->activeHashesProcessed.size()];
    memset(this->HostSuccessReportedAddress, 0, this->activeHashesProcessed.size());

    // Allocate device memory for the "reported" flags, and copy in the zeroed 
    // host memory for this region.
    this->DeviceSuccessAddress =
            clCreateBuffer (this->OpenCL->getContext(),
            CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
            this->activeHashesProcessed.size(),
            this->HostSuccessAddress,
            &errorCode);
    
    if (errorCode != CL_SUCCESS) {
        printf("Unable to allocate %d bytes for device successlist!  Exiting!\n",
                this->activeHashesProcessed.size());
        printf("Error: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }

    /*
     * Allocate memory for the found passwords.  As this is commonly copied
     * back and forth, it should be made zero copy if requested.
     *
     * This requires (number hashes * passwordLength) bytes of data.
     */

    this->HostFoundPasswordsAddress = new uint8_t [this->passwordLength * 
            this->activeHashesProcessed.size()];
    // Clear the host found password space.
    memset(this->HostFoundPasswordsAddress, 0,
            this->passwordLength * this->activeHashesProcessed.size());

    this->DeviceFoundPasswordsAddress =
            clCreateBuffer (this->OpenCL->getContext(),
            CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
            this->passwordLength * this->activeHashesProcessed.size(),
            this->HostFoundPasswordsAddress,
            &errorCode);
    
    if (errorCode != CL_SUCCESS) {
        printf("Unable to allocate %d bytes for device passwordlist!  Exiting!\n",
                this->passwordLength * this->activeHashesProcessed.size());
        printf("Error: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }


    /**
     * Allocate space for host and device start positions.  To improve performance,
     * this space is now aligned for improved coalescing performance.  All the
     * position 0 elements are together, followed by all the position 1 elements,
     * etc.
     *
     * This memory can be allocated as write combined, as it is not read by
     * the host ever - only written.  Since it is regularly transferred to the
     * GPU, this should help improve performance.
     */

    this->HostStartPointAddress = new uint8_t [this->TotalKernelWidth * 
            this->passwordLength];

    this->DeviceStartPointAddress =
            clCreateBuffer (this->OpenCL->getContext(),
            CL_MEM_READ_ONLY,
            this->TotalKernelWidth * this->passwordLength,
            NULL,
            &errorCode);
    if (errorCode != CL_SUCCESS) {
        printf("Unable to allocate %d bytes for device start points!  Exiting!\n",
                this->TotalKernelWidth * this->passwordLength);
        printf("Error: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }
    
    this->DeviceStartPasswords32Address =
            clCreateBuffer (this->OpenCL->getContext(),
            CL_MEM_READ_ONLY,
            this->TotalKernelWidth * this->passwordLengthWords,
            NULL,
            &errorCode);
    if (errorCode != CL_SUCCESS) {
        printf("Unable to allocate %d bytes for device start passwords!  Exiting!\n",
                this->TotalKernelWidth * this->passwordLengthWords);
        printf("Error: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }

    /**
     * Allocate memory for the things that are considered constant in CUDA
     * and not stored in global memory.  For OpenCL, these are stored in a 
     * constant-tagged chunk of global memory (or something) and therefore
     * need to have space allocated in global memory.
     */
    
    this->DeviceBitmap8kb_a_Address =
            clCreateBuffer (this->OpenCL->getContext(),
            CL_MEM_READ_ONLY,
            8192,
            NULL,
            &errorCode);
    if (errorCode == CL_SUCCESS) {
        memalloc_printf("Successfully allocated 8kb Bitmap A\n");
    } else {
        printf("Unable to allocate 8kb bitmap A\n");
        printf("Error: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }

    this->DeviceForwardCharsetAddress =
            clCreateBuffer (this->OpenCL->getContext(),
            CL_MEM_READ_ONLY,
            MFN_HASH_TYPE_PLAIN_CUDA_MD5_MAX_CHARSET_LENGTH * this->passwordLength,
            NULL,
            &errorCode);
    if (errorCode != CL_SUCCESS) {
        printf("Unable to allocate forward charset\n");
        printf("Error: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }
    
    this->DeviceReverseCharsetAddress =
            clCreateBuffer (this->OpenCL->getContext(),
            CL_MEM_READ_ONLY,
            MFN_HASH_TYPE_PLAIN_CUDA_MD5_MAX_CHARSET_LENGTH * this->passwordLength,
            NULL,
            &errorCode);
    if (errorCode != CL_SUCCESS) {
        printf("Unable to allocate reverse charset\n");
        printf("Error: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }
    
    this->DeviceCharsetLengthsAddress =
            clCreateBuffer (this->OpenCL->getContext(),
            CL_MEM_READ_ONLY,
            this->passwordLength,
            NULL,
            &errorCode);
    if (errorCode != CL_SUCCESS) {
        printf("Unable to allocate charset lengths\n");
        printf("Error: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }
    
    
    
    /**
     * Finally, attempt to allocate space for the giant device bitmaps.  There
     * are 4x128MB bitmaps, and any number can be allocated.  If they are not
     * fully allocated, their address is set to null as a indicator to the device
     * that there is no data present.  Attempt to allocate as many as possible.
     *
     * This will be accessed regularly, so should probably not be zero copy.
     * Also, I'm not sure how mapping host memory into multiple threads would
     * work.  Typically, if the GPU doesn't have enough RAM for the full
     * set of bitmaps, it's a laptop, and therefore may be short on host RAM
     * for the pinned access.
     *
     */
    this->DeviceBitmap128mb_a_Address =
            clCreateBuffer (this->OpenCL->getContext(),
            CL_MEM_READ_ONLY,
            128 * 1024 * 1024,
            NULL,
            &errorCode);
    if (errorCode == CL_SUCCESS) {
        memalloc_printf("Successfully allocated Bitmap A\n");
    } else {
        memalloc_printf("Unable to allocate 128MB bitmap A\n");
        this->DeviceBitmap128mb_a_Address = 0;
    }
    
    this->DeviceBitmap128mb_b_Address =
            clCreateBuffer (this->OpenCL->getContext(),
            CL_MEM_READ_ONLY,
            128 * 1024 * 1024,
            NULL,
            &errorCode);
    if (errorCode == CL_SUCCESS) {
        memalloc_printf("Successfully allocated Bitmap B\n");
    } else {
        memalloc_printf("Unable to allocate 128MB bitmap B\n");
        this->DeviceBitmap128mb_b_Address = 0;
    }
    
    this->DeviceBitmap128mb_c_Address =
            clCreateBuffer (this->OpenCL->getContext(),
            CL_MEM_READ_ONLY,
            128 * 1024 * 1024,
            NULL,
            &errorCode);
    if (errorCode == CL_SUCCESS) {
        memalloc_printf("Successfully allocated Bitmap C\n");
    } else {
        memalloc_printf("Unable to allocate 128MB bitmap C\n");
        this->DeviceBitmap128mb_c_Address = 0;
    }
    
    this->DeviceBitmap128mb_d_Address =
            clCreateBuffer (this->OpenCL->getContext(),
            CL_MEM_READ_ONLY,
            128 * 1024 * 1024,
            NULL,
            &errorCode);
    if (errorCode == CL_SUCCESS) {
        memalloc_printf("Successfully allocated Bitmap D\n");
    } else {
        memalloc_printf("Unable to allocate 128MB bitmap D\n");
        this->DeviceBitmap128mb_d_Address = 0;
    }
    


    memalloc_printf("Thread %d memory allocated successfully\n", this->threadId);
}
Ejemplo n.º 11
0
void MFNHashTypePlainOpenCL::copyDataToDevice() {
    trace_printf("MFNHashTypePlainOpenCL::copyDataToDevice()\n");
    
    cl_int errorCode;
    
    
    
    // Copy all the various elements of data to the device, forming them as needed.
    errorCode = clEnqueueWriteBuffer (this->OpenCL->getCommandQueue(),
            this->DeviceHashlistAddress,
            CL_TRUE /* blocking write */,
            0 /* offset */,
            this->activeHashesProcessedDeviceformat.size() /* bytes to copy */,
            (void *)&this->activeHashesProcessedDeviceformat[0],
            NULL, NULL, NULL /* event list stuff */);
    if (errorCode != CL_SUCCESS) {
        printf("Error: %s\n", print_cl_errstring(errorCode));
        exit(1);
    }
    
    // Device bitmaps: Copy all relevant bitmaps to the device.
    // Only copy bitmaps that are created.
    if (this->DeviceBitmap128mb_a_Address) {
        memalloc_printf("Thread %d: Copying bitmap A\n", this->threadId);
        errorCode = clEnqueueWriteBuffer (this->OpenCL->getCommandQueue(),
                this->DeviceBitmap128mb_a_Address,
                CL_TRUE /* blocking write */,
                0 /* offset */,
                this->globalBitmap128mb_a.size() /* bytes to copy */,
                (void *)&this->globalBitmap128mb_a[0],
                NULL, NULL, NULL /* event list stuff */);
        if (errorCode != CL_SUCCESS) {
            printf("Error: %s\n", print_cl_errstring(errorCode));
            exit(1);
        }
    }

    if (this->DeviceBitmap128mb_b_Address) {
        memalloc_printf("Thread %d: Copying bitmap B\n", this->threadId);
        errorCode = clEnqueueWriteBuffer (this->OpenCL->getCommandQueue(),
                this->DeviceBitmap128mb_b_Address,
                CL_TRUE /* blocking write */,
                0 /* offset */,
                this->globalBitmap128mb_b.size() /* bytes to copy */,
                (void *)&this->globalBitmap128mb_b[0],
                NULL, NULL, NULL /* event list stuff */);
        if (errorCode != CL_SUCCESS) {
            printf("Error: %s\n", print_cl_errstring(errorCode));
            exit(1);
        }
    }
    
    if (this->DeviceBitmap128mb_c_Address) {
        memalloc_printf("Thread %d: Copying bitmap C\n", this->threadId);
        errorCode = clEnqueueWriteBuffer (this->OpenCL->getCommandQueue(),
                this->DeviceBitmap128mb_c_Address,
                CL_TRUE /* blocking write */,
                0 /* offset */,
                this->globalBitmap128mb_c.size() /* bytes to copy */,
                (void *)&this->globalBitmap128mb_c[0],
                NULL, NULL, NULL /* event list stuff */);
        if (errorCode != CL_SUCCESS) {
            printf("Error: %s\n", print_cl_errstring(errorCode));
            exit(1);
        }
    }
    
    if (this->DeviceBitmap128mb_d_Address) {
        memalloc_printf("Thread %d: Copying bitmap D\n", this->threadId);
        errorCode = clEnqueueWriteBuffer (this->OpenCL->getCommandQueue(),
                this->DeviceBitmap128mb_d_Address,
                CL_TRUE /* blocking write */,
                0 /* offset */,
                this->globalBitmap128mb_d.size() /* bytes to copy */,
                (void *)&this->globalBitmap128mb_d[0],
                NULL, NULL, NULL /* event list stuff */);
        if (errorCode != CL_SUCCESS) {
            printf("Error: %s\n", print_cl_errstring(errorCode));
            exit(1);
        }
    }

    // Other data to the device - charset, etc.
}
Ejemplo n.º 12
0
// main() for simple buffer and sub-buffer example
//
int main(int argc, char** argv)
{
    
    std::cout << "Simple Image Processing Example" << std::endl;
    
    
    // First, select an OpenCL platform to run on.
    errNum = clGetPlatformIDs(0, NULL, &numPlatforms);
    checkErr(
             (errNum != CL_SUCCESS) ?
             errNum : (numPlatforms <= 0 ? -1 : CL_SUCCESS),
             "clGetPlatformIDs");
    platformIDs = (cl_platform_id *)alloca(sizeof(cl_platform_id) * numPlatforms);
    std::cout << "Number of platforms: \t" << numPlatforms << std::endl;
    errNum = clGetPlatformIDs(numPlatforms, platformIDs, NULL);
    checkErr(
             (errNum != CL_SUCCESS) ?
             errNum : (numPlatforms <= 0 ? -1 : CL_SUCCESS),
             "clGetPlatformIDs");
    std::ifstream srcFile("gaussian_filter.cl");
    
    checkErr(srcFile.is_open() ? CL_SUCCESS : -1, "reading simple.cl");
    
    std::string srcProg(
                        std::istreambuf_iterator<char>(srcFile),
                        (std::istreambuf_iterator<char>()));
    const char * src = srcProg.c_str();
    size_t length = srcProg.length();
    deviceIDs = NULL;
    DisplayPlatformInfo(
                        platformIDs[PLATFORM_INDEX],
                        CL_PLATFORM_VENDOR,
                        "CL_PLATFORM_VENDOR");
    errNum = clGetDeviceIDs(
                            platformIDs[PLATFORM_INDEX],
                            CL_DEVICE_TYPE_ALL,
                            0,
                            NULL,
                            &numDevices);
    if (errNum != CL_SUCCESS && errNum != CL_DEVICE_NOT_FOUND){
        checkErr(errNum, "clGetDeviceIDs");
    }
    
    deviceIDs = (cl_device_id *)alloca(sizeof(cl_device_id) * numDevices);
    errNum = clGetDeviceIDs(
                            platformIDs[PLATFORM_INDEX],
                            CL_DEVICE_TYPE_ALL,
                            numDevices,
                            &deviceIDs[0],
                            NULL);
    checkErr(errNum, "clGetDeviceIDs");
    
    cl_context_properties contextProperties[] =
    {
        CL_CONTEXT_PLATFORM,
        (cl_context_properties)platformIDs[PLATFORM_INDEX],
        0
    };
    
    context = clCreateContext(
                              contextProperties,
                              numDevices,
                              deviceIDs,
                              NULL,
                              NULL,
                              &errNum);
    
    checkErr(errNum, "clCreateContext");
    // Create program from source
    program = clCreateProgramWithSource(
                                        context,
                                        1,
                                        &src,
                                        &length,
                                        &errNum);
    checkErr(errNum, "clCreateProgramWithSource");
    
    // Build program
    errNum = clBuildProgram(
                            program,
                            numDevices,
                            deviceIDs,
                            "-I.",
                            NULL,
                            NULL);

    if (errNum != CL_SUCCESS){
        // Determine the reason for the error
        char buildLog[16384];
        clGetProgramBuildInfo(
                              program,
                              deviceIDs[0],
                              CL_PROGRAM_BUILD_LOG,
                              sizeof(buildLog),
                              buildLog,
                              NULL);
        std::cerr << "Error in OpenCL C source: " << std::endl;
        std::cerr << buildLog;
        checkErr(errNum, "clBuildProgram");
    }
    
    // Create a command commands
	//
	if(!(commands = clCreateCommandQueue(context, deviceIDs[0], 0, &errNum))) {
        std::cout << "Failed to create a command commands!" << std::endl;
        cleanKill(EXIT_FAILURE);
    }
    
    cl_kernel kernel = clCreateKernel(program, "gaussian_filter", &errNum);
    checkErr(errNum, "clCreateKernel(gaussian_filter)");

    if(!doesGPUSupportImageObjects){
        cleanKill(EXIT_FAILURE);
    }
    
    inputImage = LoadImage(context, (char*)"rgba.png", width, height);
        
    cl_image_format format; 
    format.image_channel_order = CL_RGBA; 
    format.image_channel_data_type = CL_UNORM_INT8;
    
    outputImage = clCreateImage2D(context, 
                             CL_MEM_WRITE_ONLY, 
                             &format, 
                             width, 
                             height,
                             0, 
                             NULL, 
                             &errNum);
    
    if(there_was_an_error(errNum)){
        std::cout << "Output Image Buffer creation error!" << std::endl;
        cleanKill(EXIT_FAILURE);
    }    
    
	if (!inputImage || !outputImage ){
        std::cout << "Failed to allocate device memory!" << std::endl;
        cleanKill(EXIT_FAILURE);
	}
    
    char *buffer = new char [width * height * 4];
    size_t origin[3] = { 0, 0, 0 };
    size_t region[3] = { width, height, 1};

    sampler = clCreateSampler(context,
                              CL_FALSE, // Non-normalized coordinates 
                              CL_ADDRESS_CLAMP_TO_EDGE, 
                              CL_FILTER_NEAREST, 
                              &errNum);
    
    if(there_was_an_error(errNum)){
        std::cout << "Error creating CL sampler object." << std::endl;
        cleanKill(EXIT_FAILURE);
    }
    
    // Set the kernel arguments
    errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage);
    errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage);
    errNum |= clSetKernelArg(kernel, 2, sizeof(cl_sampler), &sampler);
    errNum |= clSetKernelArg(kernel, 3, sizeof(cl_int), &width);
    errNum |= clSetKernelArg(kernel, 4, sizeof(cl_int), &height);
    if (errNum != CL_SUCCESS)
    {
        std::cerr << "Error setting kernel arguments." << std::endl;
        std::cerr << print_cl_errstring(errNum) << std::endl;
        cleanKill(EXIT_FAILURE);
    }
    
    //errNum = clGetKernelWorkGroupInfo(kernel, deviceIDs, CL_KERNEL_WORK_GROUP_SIZE, sizeof(unsigned short)* height*width*4, &local, NULL);
    
//	if (errNum != CL_SUCCESS)
//	{
//        cout << print_cl_errstring(err) << endl;
//        if(err == CL_INVALID_VALUE){
//            cout << "if param_name is not valid, or if size in bytes specified by param_value_size "
//            << "is less than the size of return type as described in the table above and "
//            << "param_value is not NULL." << endl;
//        }
//		cout << "Error: Failed to retrieve kernel work group info!" << err << endl;
//		cleanKill(EXIT_FAILURE);
//	}
    
    std::cout << "Max work group size is " << CL_DEVICE_MAX_WORK_GROUP_SIZE << std::endl;
    std::cout << "Max work item size is " << CL_DEVICE_MAX_WORK_ITEM_SIZES << std::endl;
    
    size_t localWorkSize[2];
    size_t globalWorkSize[2];
    
    localWorkSize[0] = 1;
    localWorkSize[1] = localWorkSize[0];
    globalWorkSize[0] = width*height;
    globalWorkSize[1] = globalWorkSize[0];
    
    //CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and number of work-items specified by global_work_size is not evenly divisable by size of work-group given by local_work_size
    
    
    
        //size_t globalWorkSize[2] =  { RoundUp(localWorkSize[0], width), RoundUp(localWorkSize[1], height)};

//    size_t globalWorkSize[1] = {sizeof(unsigned short)* height * width};
//	size_t localWorkSize[1] = {64};
    
    // Queue the kernel up for execution
    errNum = clEnqueueNDRangeKernel(commands, kernel, 2, NULL,
                                    globalWorkSize, localWorkSize,
                                    0, NULL, NULL);
    
    if (errNum != CL_SUCCESS){
        std::cerr << "Error queuing kernel for execution." << std::endl;
        std::cerr << print_cl_errstring(errNum) << std::endl;
        cleanKill(EXIT_FAILURE);
    }
    
    // Wait for the command commands to get serviced before reading back results
	//
	clFinish(commands);
    
    // Read back computed data
    errNum = clEnqueueReadImage(commands, outputImage,
                                     CL_TRUE, origin, region, 0, 0, buffer, 0, NULL, NULL);
    
    SaveImage((char*)"outRGBA.png", (char*)buffer, width, height);

    std::cout << "Program completed successfully" << std::endl;        
    return 0;     
}
Ejemplo n.º 13
0
cl_program OclHost::setUpProgram(char const * const oclSwScore,
                                 std::string buildOptions) {

    //char const * additional_options_nv = " -cl-nv-verbose -cl-fast-relaxed-math";
    //char * additional_options = 0;

    size_t program_length = strlen(oclSwScore);
    if (strcasestr(clPlatformName, "NVIDIA") != 0) {
        buildOptions += " -D __NVIDIA__ -cl-nv-verbose -cl-fast-relaxed-math";
    }
    cl_int ciErrNum = 0;
    //Log.Message("Source: %s\n===========================", oclSwScore);
    // create the program
    cl_program cpProgram = clCreateProgramWithSource(oclGpuContext, 1,
                           (const char **) &oclSwScore, &program_length, &ciErrNum);
//	checkClError("Unable to build program.", ciErrNum);
    if (ciErrNum == CL_SUCCESS) {

        // build the program
        Log.Verbose("Build Options: %s", buildOptions.c_str());
        ciErrNum = clBuildProgram(cpProgram, 0, NULL, buildOptions.c_str(),
                                  NULL, NULL);
        if (ciErrNum != CL_SUCCESS)
            Log.Error("Build failed: %s", print_cl_errstring(ciErrNum));

        //checkClError("Unable to build program (clBuildProgram).", ciErrNum);
        //clUnloadCompiler();
        char cBuildLog[10240];

        clGetProgramBuildInfo(cpProgram, oclDevice, CL_PROGRAM_BUILD_OPTIONS,
                              sizeof(cBuildLog), cBuildLog, NULL);
        Log.Verbose("Build options: %s", cBuildLog);

        cl_build_status status;
        clGetProgramBuildInfo(cpProgram, oclDevice, CL_PROGRAM_BUILD_STATUS,
                              sizeof(status), &status, NULL);

        if (status != CL_BUILD_SUCCESS) {
            Log.Message("Build status: %s", print_cl_buildstatus(status));
            clGetProgramBuildInfo(cpProgram, oclDevice, CL_PROGRAM_BUILD_LOG,
                                  sizeof(cBuildLog), cBuildLog, NULL);

            Log.Message("Build log:");

            char * pBuildLog = strtok(cBuildLog, "\n");
            while (pBuildLog != NULL) {
                if (strlen(pBuildLog) > 1) {
                    Log.Message("%s", pBuildLog);
                }
                pBuildLog = strtok(NULL, "\n");
            }
        }

        checkClError("Unable to build program end.", ciErrNum);
        return cpProgram;
    } else {
        Log.Error("Unable to load OpenCl kernel source. Error: %d", ciErrNum);
    }

    return 0;
}
Ejemplo n.º 14
0
void OclHost::checkClError(char const * msg, cl_int ciErrNum) {
    if (ciErrNum != CL_SUCCESS) {
        Log.Error("%s\nError: %s (%d)", msg, print_cl_errstring(ciErrNum), ciErrNum);
        throw;
    }
}