// func for find the specific 'search' pattern in a data buffer int find_data_in_buffer(u8* pInBuffer, uint32_t dwSizeOfBuffer, char* pszSearchString, char* pszMaskString, char* pszReplaceString, uint32_t dwReplaceOffset) { u8* pCurrBuffPtr = NULL; u8* pSearchPattern = NULL; u8* pReplacePattern = NULL; u8* pMaskPattern = NULL; uint32_t* pdwSearchBlock = NULL; uint32_t* pdwDataBlock = NULL; uint32_t* pdwMaskBlock = NULL; uint32_t dwSizeOfSearchString = 0; uint32_t dwSizeOfMaskString = 0; uint32_t dwSizeOfReplaceString = 0; uint32_t dwSizeOfSearchData = 0; uint32_t dwSizeOfReplaceData = 0; uint32_t num_blocks = 0; uint32_t matches_found = 0; uint32_t i = 0; uint32_t j = 0; uint32_t dwPadd = 0; int code_matches_found = 0; int retval = -1; // validate input params if ( (pInBuffer == NULL) || (dwSizeOfBuffer == 0) || (pszSearchString == NULL) || (pszMaskString == NULL) || (pszReplaceString == NULL) ) { printf("\nERROR! Invalid input parameters, exiting!\n"); goto exit; } __try { // get the lengths of the 'search' and 'replace' strings dwSizeOfSearchString = strlen(pszSearchString); dwSizeOfMaskString = strlen(pszMaskString); dwSizeOfReplaceString = strlen(pszReplaceString); if ( (dwSizeOfSearchString == 0) || (dwSizeOfMaskString == 0) || ((dwSizeOfReplaceString < 1) && g_bPatchingEnabled == TRUE) ) { printf("\nERROR! Invalid input parameters, exiting!\n"); __leave; } // search and data masks MUST be same size if ( dwSizeOfSearchString != dwSizeOfMaskString ) { printf("\nERROR! Search/Replace patterns are not the same size, exiting!!\n"); __leave; } // calculate the actual size of the searchstring // converted to binary data (ie divide stringsize / 2) dwSizeOfSearchData = dwSizeOfSearchString / 2; dwSizeOfReplaceData = dwSizeOfReplaceString / 2; num_blocks = dwSizeOfSearchData / DATA_PATTERN_ALIGNMENT; // if our data is 'mis-aligned', tack on the extra 'search' byte(s) (CC) // and 'mask' byte(s) (00) needed to align at 32-bits, // and bump up the 'num_blocks' by 1 if ( (dwSizeOfSearchData % DATA_PATTERN_ALIGNMENT > 0) ) { dwPadd = (DATA_PATTERN_ALIGNMENT - (dwSizeOfSearchData % DATA_PATTERN_ALIGNMENT)); for (i = 0; i < dwPadd; i++) { strcat_s(pszSearchString, MAX_HEXSTRINGS_LENGTH, "CC"); strcat_s(pszMaskString, MAX_HEXSTRINGS_LENGTH, "00"); } num_blocks+=1; } // convert the ASCII string to binary buffers pSearchPattern = _x_to_u8_buffer(pszSearchString); pMaskPattern = _x_to_u8_buffer(pszMaskString); pReplacePattern = _x_to_u8_buffer(pszReplaceString); if ( (pReplacePattern == NULL) || (pMaskPattern == NULL) || (pReplacePattern == NULL) ) { printf("\nERROR! Failed to convert strings to binary data, exiting!\n"); retval = -1; __leave; } // iterate through the buffer, searching for the pattern // (search range is our 'buffer size' - 'search string len') pCurrBuffPtr = pInBuffer; for (i = 0; i < (dwSizeOfBuffer - dwSizeOfSearchData); i++ ) { // iterate through the 32-bit 'chunks', AND // off the searc/data with the mask, and XOR // them to see if block(s) match matches_found = 0; for (j = 0; j < num_blocks; j++) { pdwSearchBlock = (uint32_t*)(pSearchPattern+(sizeof(uint32_t)*j)); pdwDataBlock = (uint32_t*)(pCurrBuffPtr+(sizeof(uint32_t)*j)); pdwMaskBlock = (uint32_t*)(pMaskPattern+(sizeof(uint32_t)*j)); if ( ((*pdwSearchBlock ^ *pdwDataBlock) & *pdwMaskBlock) == 0 ) matches_found++; else break; } // if we found all matches, then // break out, success!! if (matches_found == num_blocks) { code_matches_found++; if ( (g_bMultiPatchEnabled == FALSE) && (code_matches_found > 1) ) { printf("!ERROR! Multiple matches found!, exiting!\n"); retval = -1; __leave; } // if 'patching' is enabled, then copy patch to buffer, at ptr+offset // (verify that 'replace offset' is not beyond the buffer size!) if (g_bPatchingEnabled == TRUE) { if ( (dwReplaceOffset+i) > dwSizeOfBuffer ) { printf("!ERROR! Replace Offset is out of range!!!, exiting!\n"); retval = -1; __leave; } memcpy((pCurrBuffPtr+dwReplaceOffset), pReplacePattern, dwSizeOfReplaceData); printf("----PATCHED AT:%.8X\n", (i+dwReplaceOffset)); } else { printf("----FOUND MATCH AT:%.8X\n", (i+dwReplaceOffset)); } // status success retval = STATUS_SUCCESS; } pCurrBuffPtr++; } } // end __try{} __except(EXCEPTION_EXECUTE_HANDLER) { printf("\n!!ERROR!! find_data_in_buffer threw exception!!\n"); retval = -1; } exit: // free any alloc'd memory if (pSearchPattern != NULL) free(pSearchPattern); // free any alloc'd memory if (pMaskPattern != NULL) free(pMaskPattern); // free any alloc'd memory if (pReplacePattern != NULL) free(pReplacePattern); return retval; }
static void _fill_property(keyset_t *ks, s8 *prop, s8 *value) { if(strcmp(prop, "type") == 0) { if(strcmp(value, "SELF") == 0) ks->type = KEYTYPE_SELF; else if(strcmp(value, "RVK") == 0) ks->type = KEYTYPE_RVK; else if(strcmp(value, "PKG") == 0) ks->type = KEYTYPE_PKG; else if(strcmp(value, "SPP") == 0) ks->type = KEYTYPE_SPP; else if(strcmp(value, "OTHER") == 0) ks->type = KEYTYPE_OTHER; else printf("[*] Error: Unknown type '%s'.\n", value); } else if(strcmp(prop, "revision") == 0) ks->key_revision = (u16)_x_to_u64(value); else if(strcmp(prop, "version") == 0) ks->version = _x_to_u64(value); else if(strcmp(prop, "self_type") == 0) { if(strcmp(value, "LV0") == 0) ks->self_type = SELF_TYPE_LV0; else if(strcmp(value, "LV1") == 0) ks->self_type = SELF_TYPE_LV1; else if(strcmp(value, "LV2") == 0) ks->self_type = SELF_TYPE_LV2; else if(strcmp(value, "APP") == 0) ks->self_type = SELF_TYPE_APP; else if(strcmp(value, "ISO") == 0) ks->self_type = SELF_TYPE_ISO; else if(strcmp(value, "LDR") == 0) ks->self_type = SELF_TYPE_LDR; else if(strcmp(value, "UNK_7") == 0) ks->self_type = SELF_TYPE_UNK_7; else if(strcmp(value, "NPDRM") == 0) ks->self_type = SELF_TYPE_NPDRM; else printf("[*] Error: unknown SELF type '%s'.\n", value); } else if(strcmp(prop, "erk") == 0 || strcmp(prop, "key") == 0) { ks->erk = _x_to_u8_buffer(value); ks->erklen = strlen(value) / 2; } else if(strcmp(prop, "riv") == 0) { ks->riv = _x_to_u8_buffer(value); ks->rivlen = strlen(value) / 2; } else if(strcmp(prop, "pub") == 0) ks->pub = _x_to_u8_buffer(value); else if(strcmp(prop, "priv") == 0) ks->priv = _x_to_u8_buffer(value); else if(strcmp(prop, "ctype") == 0) ks->ctype = (u8)_x_to_u64(value); else printf("[*] Error: Unknown keyfile property '%s'.\n", prop); }
int main(int argc, char **argv){ printf("Check OpenCL environtment\n"); cl_platform_id platid; cl_device_id devid; cl_int res; size_t param; /* Query OpenCL, get some information about the returned device */ clGetPlatformIDs(1u, &platid, NULL); clGetDeviceIDs(platid, CL_DEVICE_TYPE_ALL, 1, &devid, NULL); cl_char vendor_name[1024] = {0}; cl_char device_name[1024] = {0}; clGetDeviceInfo(devid, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, NULL); clGetDeviceInfo(devid, CL_DEVICE_NAME, sizeof(device_name), device_name, NULL); printf("Connecting to OpenCL device:\t%s %s\n", vendor_name, device_name); clGetDeviceInfo(devid, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), ¶m, NULL); printf("CL_DEVICE_MAX_COMPUTE_UNITS\t%d\n", param); clGetDeviceInfo(devid, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), ¶m, NULL); printf("CL_DEVICE_MAX_WORK_GROUP_SIZE\t%u\n", param); clGetDeviceInfo(devid, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), ¶m, NULL); printf("CL_DEVICE_LOCAL_MEM_SIZE\t%ub\n", param); /* Check if kernel source exists, we compile argv[1] passed kernel */ if(argv[1] == NULL) { printf("\nUsage: %s kernel_source.cl kernel_function\n", argv[0]); exit(1); } char *kernel_source; if(load_program_source(argv[1], &kernel_source)) return 1; printf("Building from OpenCL source: \t%s\n", argv[1]); printf("Compile/query OpenCL_program:\t%s\n", argv[2]); /* Create context and kernel program */ cl_context context = clCreateContext(0, 1, &devid, NULL, NULL, NULL); cl_program pro = clCreateProgramWithSource(context, 1, (const char **)&kernel_source, NULL, NULL); res = clBuildProgram(pro, 1, &devid, "-cl-fast-relaxed-math", NULL, NULL); if(res != CL_SUCCESS){ printf("clBuildProgram failed: %d\n", res); char buf[0x10000]; clGetProgramBuildInfo(pro, devid, CL_PROGRAM_BUILD_LOG, 0x10000, buf, NULL); printf("\n%s\n", buf); return(-1); } cl_kernel kernelobj = clCreateKernel(pro, argv[2], &res); check_return(res); /* Get the maximum work-group size for executing the kernel on the device */ size_t global, local; res = clGetKernelWorkGroupInfo(kernelobj, devid, CL_KERNEL_WORK_GROUP_SIZE, sizeof(int), &local, NULL); check_return(res); printf("CL_KERNEL_WORK_GROUP_SIZE\t%u\n", local); res = clGetKernelWorkGroupInfo(kernelobj, devid, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(cl_ulong), ¶m, NULL); check_return(res); printf("CL_KERNEL_LOCAL_MEM_SIZE\t%ub\n", param); cl_command_queue cmd_queue = clCreateCommandQueue(context, devid, CL_QUEUE_PROFILING_ENABLE, NULL); if(cmd_queue == NULL) { printf("Compute device setup failed\n"); return(-1); } local = 4; int n = 2 * local; //num_group * local workgroup size global = n; int num_groups= global / local, allocated_local= sizeof(data) * local + sizeof(debug) * local; data *DP __attribute__ ((aligned(16))); DP = calloc(n, sizeof(data) *1); debug *dbg __attribute__ ((aligned(16))); dbg = calloc(n, sizeof(debug)); printf("global:%d, local:%d, (should be):%d groups\n", global, local, num_groups); printf("structs size: %db, %db, %db\n", sizeof(data), sizeof(Elliptic_Curve), sizeof(inv256)); printf("sets:%d, total of %db needed, allocated _local: %db\n", n, n * sizeof(cl_uint4) *5 *4, allocated_local); cl_mem cl_DP, cl_EC, cl_INV, DEBUG; cl_DP = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, n * sizeof(data), NULL, &res); check_return(res); cl_EC = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, 1 * sizeof(Elliptic_Curve), NULL, &res); check_return(res); //_constant address space cl_INV= clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, 1 * sizeof(u8) * 0x80, NULL, &res); check_return(res); DEBUG = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_WRITE_ONLY, n * sizeof(debug), NULL, &res); check_return(res); Elliptic_Curve EC; /* Curve domain parameters, (test vectors) ------------------------------------------------------------------------------------- p: c1c627e1638fdc8e24299bb041e4e23af4bb5427 is prime a: c1c627e1638fdc8e24299bb041e4e23af4bb5424 divisor g = 62980 b: 877a6d84155a1de374b72d9f9d93b36bb563b2ab divisor g = 227169643 Gx: 010aff82b3ac72569ae645af3b527be133442131 divisor g = 32209245 Gy: 46b8ec1e6d71e5ecb549614887d57a287df573cc divisor g = 972 precomputed_per_curve_constants: U: c1c627e1638fdc8e24299bb041e4e23af4bb5425 V: 3e39d81e9c702371dbd6644fbe1b1dc50b44abd9 already prepared mod p to test: a: 07189f858e3f723890a66ec1079388ebd2ed509c b: 6043379beb0dade6eed1e9d6de64f4a0c50639d4 gx: 5ef84aacf4f0ea6752f572d0741f40049f354dca gy: 418c695435af6b3d4d7cbb72967395016ef67239 resulting point: P.x: 01718f862ebe9423bd661a65355aa1c86ba330f8 program MUST got this point !! P.y: 557e8ed53ffbfe2c990a121967b340f62e0e4fe2 taken mod p: P.x: 41da1a8f74ff8d3f1ce20ef3e9d8865c96014fe3 P.y: 73ca143c9badedf2d9d3c7573307115ccfe04f13 */ u8 *t; t = _x_to_u8_buffer("c1c627e1638fdc8e24299bb041e4e23af4bb5427"); memcpy(EC.p, t, 20); t = _x_to_u8_buffer("07189f858e3f723890a66ec1079388ebd2ed509c"); memcpy(EC.a, t, 20); t = _x_to_u8_buffer("6043379beb0dade6eed1e9d6de64f4a0c50639d4"); memcpy(EC.b, t, 20); t = _x_to_u8_buffer("5ef84aacf4f0ea6752f572d0741f40049f354dca"); memcpy(EC.Gx, t, 20); t = _x_to_u8_buffer("418c695435af6b3d4d7cbb72967395016ef67239"); memcpy(EC.Gy, t, 20); t = _x_to_u8_buffer("c1c627e1638fdc8e24299bb041e4e23af4bb5425"); memcpy(EC.U, t, 20); t = _x_to_u8_buffer("3e39d81e9c702371dbd6644fbe1b1dc50b44abd9"); memcpy(EC.V, t, 20); /* we need to map buffer now to load some k into data */ DP = clEnqueueMapBuffer(cmd_queue, cl_DP, CL_TRUE, CL_MAP_WRITE, 0, n * sizeof(data), 0, NULL, NULL, &res); check_return(res); t = _x_to_u8_buffer("00542d46e7b3daac8aeb81e533873aabd6d74bb710"); for(u8 i = 0; i < n; i++) memcpy(DP[i].k, t, 21); free(t); //d for(u8 i = 0; i < n; i++) bn_print("", DP[i].k, 21, 1); /* we can alter just a byte into a chosen k to verify that we'll get a different point! */ //DP[2].k[2] = 0x09; //no res = clEnqueueWriteBuffer(cmd_queue, cl_DP, CL_TRUE, 0, n * sizeof(data), &DP, 0, NULL, NULL); check_return(res); res = clEnqueueWriteBuffer(cmd_queue, cl_EC, CL_TRUE, 0, 1 * sizeof(Elliptic_Curve), &EC, 0, NULL, NULL); check_return(res); res = clEnqueueWriteBuffer(cmd_queue, cl_INV, CL_TRUE, 0, 1 * sizeof(u8) * 0x80, &inv256, 0, NULL, NULL); check_return(res); res = clSetKernelArg(kernelobj, 0, sizeof(cl_mem), &cl_DP); /* i/o buffer */ res|= clSetKernelArg(kernelobj, 1, sizeof(data) * local *1, NULL); //allocate space for __local in kernel (just this!) one * localsize res|= clSetKernelArg(kernelobj, 2, sizeof(cl_mem), &cl_EC); res|= clSetKernelArg(kernelobj, 3, sizeof(cl_mem), &cl_INV); res|= clSetKernelArg(kernelobj, 4, sizeof(debug) * local *1, NULL); //allocate space for __local in kernel (just this!) one * localsize res|= clSetKernelArg(kernelobj, 5, sizeof(cl_mem), &DEBUG); //this used to debug kernel output check_return(res); // printf("n:%d, total of %db needed, allocated _local: %db\n", n, n * sizeof(debug), allocated_local); cl_event NDRangeEvent; cl_ulong start, end; /* Execute NDrange */ res = clEnqueueNDRangeKernel(cmd_queue, kernelobj, 1, NULL, &global, &local, 0, NULL, &NDRangeEvent); check_return(res); // res = clEnqueueNDRangeKernel(cmd_queue, kernelobj, 1, NULL, &global, NULL, 0, NULL, &NDRangeEvent); check_return(res); printf("Read back, Mapping buffer:\t%db\n", n * sizeof(data)); DP = clEnqueueMapBuffer(cmd_queue, cl_DP, CL_TRUE, CL_MAP_READ, 0, n * sizeof(data), 0, NULL, NULL, &res); check_return(res); dbg =clEnqueueMapBuffer(cmd_queue, DEBUG, CL_TRUE, CL_MAP_READ, 0, n * sizeof(debug), 0, NULL, NULL, &res); check_return(res); /* using clEnqueueReadBuffer template */ // res = clEnqueueReadBuffer(cmd_queue, ST, CL_TRUE, 0, sets * sizeof(cl_uint8), dbg, 0, NULL, NULL); check_return(res); clFlush(cmd_queue); clFinish(cmd_queue); /* get NDRange execution time with internal ocl profiler */ res = clGetEventProfilingInfo(NDRangeEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL); res|= clGetEventProfilingInfo(NDRangeEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL); check_return(res); printf("kernel execution time:\t\t%.2f ms\n", (float) ((end - start) /1000000)); //relative to NDRange call printf("number of computes/sec:\t%.2f\n", (float) global *1000000 /((end - start))); printf("i,\tgid\tlid0\tlsize0\tgid0/lsz0,\tgsz0,\tn_gr0,\tlid5,\toffset\n"); for(int i = 0; i < n; i++) { // if(i %local == 0) { printf("%d \t", i); //printf("%u\t%u\t%u\t%u\t| %2u, %2u, %2u, %u\n", *p, *(p +1), *(p +2), *(p +3), *(p +4), *(p +5), *(p +6), *(p +7)); /* silence this doubled debug info printf("%u\t%u\t%u\t%u\t| %2u, %2u, %2u, %u\n", dbg[i].data[0], dbg[i].data[1], dbg[i].data[2], dbg[i].data[3], dbg[i].data[4], dbg[i].data[5], dbg[i].data[6], dbg[i].data[7]); */ //printf("%d %d\n", P[i].dig, P[i].c); bn_print("", DP[i].k, 21, 1); bn_print("", DP[i].rx, 20, 0); bn_print(" ", DP[i].ry, 20, 1); printf("%u(/%u) = %u*%u(/%u) +%u, offset:%u, stride:%u\n", DP[i].pad[0], DP[i].pad[1], DP[i].pad[2], DP[i].pad[3], DP[i].pad[4], DP[i].pad[5], DP[i].pad[6], DP[i].pad[7]); // } } /* Release OpenCL stuff, free the rest */ clReleaseMemObject(cl_DP); clReleaseMemObject(cl_EC); clReleaseMemObject(cl_INV); clReleaseMemObject(DEBUG); clReleaseKernel(kernelobj); clReleaseProgram(pro); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); free(kernel_source); puts("Done!"); return 0; }