// func for find the specific 'search' pattern in a data buffer
int find_data_in_buffer(u8* pInBuffer, uint32_t dwSizeOfBuffer, char* pszSearchString, char* pszMaskString, char* pszReplaceString, uint32_t dwReplaceOffset)
{		
	u8* pCurrBuffPtr = NULL;
	u8* pSearchPattern = NULL;
	u8* pReplacePattern = NULL;	
	u8* pMaskPattern = NULL;
	uint32_t* pdwSearchBlock = NULL;
	uint32_t* pdwDataBlock = NULL;
	uint32_t* pdwMaskBlock = NULL;
	uint32_t dwSizeOfSearchString = 0;
	uint32_t dwSizeOfMaskString = 0;
	uint32_t dwSizeOfReplaceString = 0;
	uint32_t dwSizeOfSearchData = 0;
	uint32_t dwSizeOfReplaceData = 0;	
	uint32_t num_blocks = 0;
	uint32_t matches_found = 0;	
	uint32_t i = 0;
	uint32_t j = 0;
	uint32_t dwPadd = 0;
	int code_matches_found = 0;
	int retval = -1;

	


	// validate input params
	if ( (pInBuffer == NULL) || (dwSizeOfBuffer == 0) || (pszSearchString == NULL) || (pszMaskString == NULL) || (pszReplaceString == NULL) ) {
		printf("\nERROR!  Invalid input parameters, exiting!\n");
		goto exit;
	}


	__try 
	{
		// get the lengths of the 'search' and 'replace' strings
		dwSizeOfSearchString = strlen(pszSearchString);
		dwSizeOfMaskString = strlen(pszMaskString);
		dwSizeOfReplaceString = strlen(pszReplaceString);
		if ( (dwSizeOfSearchString == 0) || (dwSizeOfMaskString == 0) || ((dwSizeOfReplaceString < 1) && g_bPatchingEnabled == TRUE) ) {
			printf("\nERROR!  Invalid input parameters, exiting!\n");
			__leave;
		}

		// search and data masks MUST be same size
		if ( dwSizeOfSearchString != dwSizeOfMaskString ) {
			printf("\nERROR!  Search/Replace patterns are not the same size, exiting!!\n");
			__leave;
		}

		// calculate the actual size of the searchstring
		// converted to binary data (ie divide stringsize / 2)
		dwSizeOfSearchData = dwSizeOfSearchString / 2;
		dwSizeOfReplaceData = dwSizeOfReplaceString / 2;
		num_blocks = dwSizeOfSearchData / DATA_PATTERN_ALIGNMENT;

		// if our data is 'mis-aligned', tack on the extra 'search' byte(s) (CC)
		// and 'mask' byte(s) (00) needed to align at 32-bits, 
		// and bump up the 'num_blocks' by 1
		if ( (dwSizeOfSearchData % DATA_PATTERN_ALIGNMENT > 0) ) {
			dwPadd = (DATA_PATTERN_ALIGNMENT - (dwSizeOfSearchData % DATA_PATTERN_ALIGNMENT));
			for (i = 0; i < dwPadd; i++) {
				strcat_s(pszSearchString, MAX_HEXSTRINGS_LENGTH, "CC");
				strcat_s(pszMaskString, MAX_HEXSTRINGS_LENGTH, "00");
			}
			num_blocks+=1;
		}

		// convert the ASCII string to binary buffers
		pSearchPattern = _x_to_u8_buffer(pszSearchString);
		pMaskPattern = _x_to_u8_buffer(pszMaskString);
		pReplacePattern = _x_to_u8_buffer(pszReplaceString);
		if ( (pReplacePattern == NULL) || (pMaskPattern == NULL) || (pReplacePattern == NULL) ) {
			printf("\nERROR!  Failed to convert strings to binary data, exiting!\n");
			retval = -1;
			__leave;
		}

		// iterate through the buffer, searching for the pattern
		// (search range is our 'buffer size' - 'search string len')
		pCurrBuffPtr = pInBuffer;		
		for (i = 0; i < (dwSizeOfBuffer - dwSizeOfSearchData); i++ )
		{
			// iterate through the 32-bit 'chunks', AND
			// off the searc/data with the mask, and XOR
			// them to see if block(s) match
			matches_found = 0;
			for (j = 0; j < num_blocks; j++) {
				pdwSearchBlock = (uint32_t*)(pSearchPattern+(sizeof(uint32_t)*j));
				pdwDataBlock = (uint32_t*)(pCurrBuffPtr+(sizeof(uint32_t)*j));
				pdwMaskBlock = (uint32_t*)(pMaskPattern+(sizeof(uint32_t)*j));				
				if ( ((*pdwSearchBlock ^ *pdwDataBlock) & *pdwMaskBlock) == 0 ) 			
					matches_found++;
				else
					break;
			}			
			// if we found all matches, then 
			// break out, success!!
			if (matches_found == num_blocks)
			{	
				code_matches_found++;										
				if ( (g_bMultiPatchEnabled == FALSE) && (code_matches_found > 1) ) {
					printf("!ERROR! Multiple matches found!, exiting!\n");					
					retval = -1;
					__leave;
				}
				// if 'patching' is enabled, then copy patch to buffer, at ptr+offset
				// (verify that 'replace offset' is not beyond the buffer size!)				
				if (g_bPatchingEnabled == TRUE) {
					if ( (dwReplaceOffset+i) > dwSizeOfBuffer ) {
						printf("!ERROR! Replace Offset is out of range!!!, exiting!\n");					
						retval = -1;
						__leave;
					}
					memcpy((pCurrBuffPtr+dwReplaceOffset), pReplacePattern, dwSizeOfReplaceData);
					printf("----PATCHED AT:%.8X\n", (i+dwReplaceOffset));			
				} 
				else {
					printf("----FOUND MATCH AT:%.8X\n", (i+dwReplaceOffset));						
				}
				// status success
				retval = STATUS_SUCCESS;
			}
			pCurrBuffPtr++;
		}		
	} // end __try{}

	__except(EXCEPTION_EXECUTE_HANDLER)
	{
		printf("\n!!ERROR!! find_data_in_buffer threw exception!!\n");
		retval = -1;
	}	

exit:
	// free any alloc'd memory
	if (pSearchPattern != NULL)
		free(pSearchPattern);

	// free any alloc'd memory
	if (pMaskPattern != NULL)
		free(pMaskPattern);

	// free any alloc'd memory
	if (pReplacePattern != NULL)
		free(pReplacePattern);
	
	return retval;
}
Beispiel #2
0
static void _fill_property(keyset_t *ks, s8 *prop, s8 *value)
{
	if(strcmp(prop, "type") == 0)
	{
		if(strcmp(value, "SELF") == 0)
			ks->type = KEYTYPE_SELF;
		else if(strcmp(value, "RVK") == 0)
			ks->type = KEYTYPE_RVK;
		else if(strcmp(value, "PKG") == 0)
			ks->type = KEYTYPE_PKG;
		else if(strcmp(value, "SPP") == 0)
			ks->type = KEYTYPE_SPP;
		else if(strcmp(value, "OTHER") == 0)
			ks->type = KEYTYPE_OTHER;
		else
			printf("[*] Error: Unknown type '%s'.\n", value);
	}
	else if(strcmp(prop, "revision") == 0)
		ks->key_revision = (u16)_x_to_u64(value);
	else if(strcmp(prop, "version") == 0)
		ks->version = _x_to_u64(value);
	else if(strcmp(prop, "self_type") == 0)
	{
		if(strcmp(value, "LV0") == 0)
			ks->self_type = SELF_TYPE_LV0;
		else if(strcmp(value, "LV1") == 0)
			ks->self_type = SELF_TYPE_LV1;
		else if(strcmp(value, "LV2") == 0)
			ks->self_type = SELF_TYPE_LV2;
		else if(strcmp(value, "APP") == 0)
			ks->self_type = SELF_TYPE_APP;
		else if(strcmp(value, "ISO") == 0)
			ks->self_type = SELF_TYPE_ISO;
		else if(strcmp(value, "LDR") == 0)
			ks->self_type = SELF_TYPE_LDR;
		else if(strcmp(value, "UNK_7") == 0)
			ks->self_type = SELF_TYPE_UNK_7;
		else if(strcmp(value, "NPDRM") == 0)
			ks->self_type = SELF_TYPE_NPDRM;
		else
			printf("[*] Error: unknown SELF type '%s'.\n", value);
	}
	else if(strcmp(prop, "erk") == 0 || strcmp(prop, "key") == 0)
	{
		ks->erk = _x_to_u8_buffer(value);
		ks->erklen = strlen(value) / 2;
	}
	else if(strcmp(prop, "riv") == 0)
	{
		ks->riv = _x_to_u8_buffer(value);
		ks->rivlen = strlen(value) / 2;
	}
	else if(strcmp(prop, "pub") == 0)
		ks->pub = _x_to_u8_buffer(value);
	else if(strcmp(prop, "priv") == 0)
		ks->priv = _x_to_u8_buffer(value);
	else if(strcmp(prop, "ctype") == 0)
		ks->ctype = (u8)_x_to_u64(value);
	else
		printf("[*] Error: Unknown keyfile property '%s'.\n", prop);
}
Beispiel #3
0
int main(int argc, char **argv){
	
	printf("Check OpenCL environtment\n");

	cl_platform_id platid;
	cl_device_id devid;
	cl_int res;
	size_t param;
	
	/* Query OpenCL, get some information about the returned device */
	clGetPlatformIDs(1u, &platid, NULL);
	clGetDeviceIDs(platid, CL_DEVICE_TYPE_ALL, 1, &devid, NULL);

	cl_char vendor_name[1024] = {0};
	cl_char device_name[1024] = {0};
	clGetDeviceInfo(devid, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, NULL);
	clGetDeviceInfo(devid, CL_DEVICE_NAME,   sizeof(device_name), device_name, NULL);
	printf("Connecting to OpenCL device:\t%s %s\n", vendor_name, device_name);
	
	clGetDeviceInfo(devid, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &param, NULL);
	printf("CL_DEVICE_MAX_COMPUTE_UNITS\t%d\n", param);
	
	clGetDeviceInfo(devid, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &param, NULL);
	printf("CL_DEVICE_MAX_WORK_GROUP_SIZE\t%u\n", param);

	clGetDeviceInfo(devid, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &param, NULL);
	printf("CL_DEVICE_LOCAL_MEM_SIZE\t%ub\n", param);

	/* Check if kernel source exists, we compile argv[1] passed kernel */
	if(argv[1] == NULL) { printf("\nUsage: %s kernel_source.cl kernel_function\n", argv[0]); exit(1); }

	char *kernel_source;
	if(load_program_source(argv[1], &kernel_source)) return 1;
	
	printf("Building from OpenCL source: \t%s\n", argv[1]);
	printf("Compile/query OpenCL_program:\t%s\n", argv[2]);
	
	/* Create context and kernel program */
	cl_context context = 	clCreateContext(0, 1, &devid, NULL, NULL, NULL);
	cl_program pro = 	clCreateProgramWithSource(context, 1, (const char **)&kernel_source, NULL, NULL);
	res = 			clBuildProgram(pro, 1, &devid, "-cl-fast-relaxed-math", NULL, NULL);

	if(res != CL_SUCCESS){
		printf("clBuildProgram failed: %d\n", res); char buf[0x10000];
		clGetProgramBuildInfo(pro, devid, CL_PROGRAM_BUILD_LOG, 0x10000, buf, NULL);
		printf("\n%s\n", buf); return(-1); }

	cl_kernel kernelobj = clCreateKernel(pro, argv[2], &res); 	check_return(res);
	
	/* Get the maximum work-group size for executing the kernel on the device */
	size_t global, local;
	res = clGetKernelWorkGroupInfo(kernelobj, devid, CL_KERNEL_WORK_GROUP_SIZE, sizeof(int), &local, NULL);		check_return(res);
	printf("CL_KERNEL_WORK_GROUP_SIZE\t%u\n", local);
	
	res = clGetKernelWorkGroupInfo(kernelobj, devid, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(cl_ulong), &param, NULL);	check_return(res);
	printf("CL_KERNEL_LOCAL_MEM_SIZE\t%ub\n", param);
	
	cl_command_queue cmd_queue = clCreateCommandQueue(context, devid, CL_QUEUE_PROFILING_ENABLE, NULL);
	if(cmd_queue == NULL) { printf("Compute device setup failed\n"); return(-1); }

	local = 4;
	int n = 2 * local;	//num_group * local workgroup size 
	global = n;
	
	int	num_groups=		global / local,
		allocated_local=	sizeof(data) * local + 
					sizeof(debug) * local;

	data *DP __attribute__ ((aligned(16)));
	DP = calloc(n, sizeof(data) *1);

	debug *dbg __attribute__ ((aligned(16)));
	dbg = calloc(n, sizeof(debug));
	
	printf("global:%d, local:%d, (should be):%d groups\n", global, local, num_groups);
	printf("structs size: %db, %db, %db\n", sizeof(data), sizeof(Elliptic_Curve), sizeof(inv256));
	printf("sets:%d, total of %db needed, allocated _local: %db\n", n, n * sizeof(cl_uint4) *5 *4, allocated_local);

	cl_mem	cl_DP, cl_EC, cl_INV, DEBUG;
	cl_DP = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, n * sizeof(data), NULL, &res);					check_return(res);				
	cl_EC = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY,  1 * sizeof(Elliptic_Curve), NULL, &res);	check_return(res);	//_constant address space
	cl_INV= clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY,  1 * sizeof(u8) * 0x80, NULL, &res);		check_return(res);
	DEBUG = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_WRITE_ONLY, n * sizeof(debug), NULL, &res);		check_return(res);
	
	Elliptic_Curve EC;
	/*	
		Curve domain parameters, (test vectors)
		-------------------------------------------------------------------------------------
		p:	c1c627e1638fdc8e24299bb041e4e23af4bb5427		is prime
		a:	c1c627e1638fdc8e24299bb041e4e23af4bb5424		divisor g = 62980
		b:	877a6d84155a1de374b72d9f9d93b36bb563b2ab		divisor g = 227169643
		Gx: 	010aff82b3ac72569ae645af3b527be133442131		divisor g = 32209245
		Gy: 	46b8ec1e6d71e5ecb549614887d57a287df573cc		divisor g = 972	
		precomputed_per_curve_constants:
		U:	c1c627e1638fdc8e24299bb041e4e23af4bb5425
		V:	3e39d81e9c702371dbd6644fbe1b1dc50b44abd9
		
		already prepared mod p to test:
		a:      07189f858e3f723890a66ec1079388ebd2ed509c
		b:      6043379beb0dade6eed1e9d6de64f4a0c50639d4
		gx:     5ef84aacf4f0ea6752f572d0741f40049f354dca
		gy:     418c695435af6b3d4d7cbb72967395016ef67239
		resulting point:
		P.x:    01718f862ebe9423bd661a65355aa1c86ba330f8		program MUST got this point !!
		P.y:    557e8ed53ffbfe2c990a121967b340f62e0e4fe2
		taken mod p:
		P.x:    41da1a8f74ff8d3f1ce20ef3e9d8865c96014fe3		
		P.y:    73ca143c9badedf2d9d3c7573307115ccfe04f13
	*/	
	u8 *t;
	t = _x_to_u8_buffer("c1c627e1638fdc8e24299bb041e4e23af4bb5427");	memcpy(EC.p, t, 20);
	t = _x_to_u8_buffer("07189f858e3f723890a66ec1079388ebd2ed509c");	memcpy(EC.a, t, 20);
	t = _x_to_u8_buffer("6043379beb0dade6eed1e9d6de64f4a0c50639d4");	memcpy(EC.b, t, 20);
	t = _x_to_u8_buffer("5ef84aacf4f0ea6752f572d0741f40049f354dca");	memcpy(EC.Gx, t, 20);
	t = _x_to_u8_buffer("418c695435af6b3d4d7cbb72967395016ef67239");	memcpy(EC.Gy, t, 20);
	
	t = _x_to_u8_buffer("c1c627e1638fdc8e24299bb041e4e23af4bb5425");	memcpy(EC.U, t, 20);
	t = _x_to_u8_buffer("3e39d81e9c702371dbd6644fbe1b1dc50b44abd9");	memcpy(EC.V, t, 20);

	/* we need to map buffer now to load some k into data */
	DP = clEnqueueMapBuffer(cmd_queue, cl_DP, CL_TRUE, CL_MAP_WRITE, 0, n * sizeof(data),  0, NULL, NULL, &res);	check_return(res);

	t = _x_to_u8_buffer("00542d46e7b3daac8aeb81e533873aabd6d74bb710");
	for(u8 i = 0; i < n; i++) memcpy(DP[i].k, t, 21);
	
	free(t);
//d	for(u8 i = 0; i < n; i++) bn_print("", DP[i].k, 21, 1);

	/* we can alter just a byte into a chosen k to verify that we'll get a different point! */
	//DP[2].k[2] = 0x09;
	
//no	res = clEnqueueWriteBuffer(cmd_queue, cl_DP,  CL_TRUE, 0, n * sizeof(data), &DP, 0, NULL, NULL);	check_return(res);

	res = clEnqueueWriteBuffer(cmd_queue, cl_EC,  CL_TRUE, 0, 1 * sizeof(Elliptic_Curve), &EC, 0, NULL, NULL);	check_return(res);
	res = clEnqueueWriteBuffer(cmd_queue, cl_INV, CL_TRUE, 0, 1 * sizeof(u8) * 0x80, &inv256, 0, NULL, NULL);	check_return(res);

	res = clSetKernelArg(kernelobj, 0, sizeof(cl_mem), &cl_DP);		/* i/o buffer */
	res|= clSetKernelArg(kernelobj, 1, sizeof(data) * local *1, NULL);	//allocate space for __local in kernel (just this!) one * localsize
	res|= clSetKernelArg(kernelobj, 2, sizeof(cl_mem), &cl_EC);
	res|= clSetKernelArg(kernelobj, 3, sizeof(cl_mem), &cl_INV);	
	res|= clSetKernelArg(kernelobj, 4, sizeof(debug) * local *1, NULL);	//allocate space for __local in kernel (just this!) one * localsize
	res|= clSetKernelArg(kernelobj, 5, sizeof(cl_mem), &DEBUG);		//this used to debug kernel output
	check_return(res);

//	printf("n:%d, total of %db needed, allocated _local: %db\n", n, n * sizeof(debug), allocated_local);	
	
	cl_event NDRangeEvent;
	cl_ulong start, end;
	
	/* Execute NDrange */	
	res = clEnqueueNDRangeKernel(cmd_queue, kernelobj, 1, NULL, &global, &local, 0, NULL, &NDRangeEvent);		check_return(res);
//	res = clEnqueueNDRangeKernel(cmd_queue, kernelobj, 1, NULL, &global, NULL, 0, NULL, &NDRangeEvent);		check_return(res);
	
	printf("Read back, Mapping buffer:\t%db\n", n * sizeof(data));

	DP = clEnqueueMapBuffer(cmd_queue, cl_DP, CL_TRUE, CL_MAP_READ, 0, n * sizeof(data),  0, NULL, NULL, &res);	check_return(res);
	dbg =clEnqueueMapBuffer(cmd_queue, DEBUG, CL_TRUE, CL_MAP_READ, 0, n * sizeof(debug), 0, NULL, NULL, &res);	check_return(res);
	
	/* using clEnqueueReadBuffer template */
//	res = clEnqueueReadBuffer(cmd_queue, ST, CL_TRUE, 0, sets * sizeof(cl_uint8), dbg, 0, NULL, NULL);			check_return(res);
		
	clFlush(cmd_queue);
	clFinish(cmd_queue);

	/* get NDRange execution time with internal ocl profiler */
	res = clGetEventProfilingInfo(NDRangeEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
	res|= clGetEventProfilingInfo(NDRangeEvent, CL_PROFILING_COMMAND_END,   sizeof(cl_ulong), &end,   NULL);
	check_return(res);
	printf("kernel execution time:\t\t%.2f ms\n", (float) ((end - start) /1000000));			//relative to NDRange call
	printf("number of computes/sec:\t%.2f\n", (float) global *1000000 /((end - start)));

	
	printf("i,\tgid\tlid0\tlsize0\tgid0/lsz0,\tgsz0,\tn_gr0,\tlid5,\toffset\n");
	for(int i = 0; i < n; i++) {		
//		if(i %local == 0) {
			printf("%d \t", i);
			//printf("%u\t%u\t%u\t%u\t| %2u, %2u, %2u, %u\n", *p, *(p +1), *(p +2), *(p +3), *(p +4), *(p +5), *(p +6), *(p +7));
			
			/* silence this doubled debug info
			printf("%u\t%u\t%u\t%u\t| %2u, %2u, %2u, %u\n", 
				dbg[i].data[0], dbg[i].data[1], dbg[i].data[2], dbg[i].data[3],
				dbg[i].data[4], dbg[i].data[5], dbg[i].data[6], dbg[i].data[7]);
			*/	
			//printf("%d %d\n", P[i].dig, P[i].c);
			bn_print("", DP[i].k, 21, 1);
			bn_print("", DP[i].rx, 20, 0); bn_print(" ", DP[i].ry, 20, 1);
			
			printf("%u(/%u) = %u*%u(/%u) +%u, offset:%u, stride:%u\n", 
				DP[i].pad[0], DP[i].pad[1], DP[i].pad[2], DP[i].pad[3],
				DP[i].pad[4], DP[i].pad[5], DP[i].pad[6], DP[i].pad[7]);
//		}
	}
	
	/* Release OpenCL stuff, free the rest */
	clReleaseMemObject(cl_DP);
	clReleaseMemObject(cl_EC);
	clReleaseMemObject(cl_INV);
	clReleaseMemObject(DEBUG);
	clReleaseKernel(kernelobj);
	clReleaseProgram(pro);
	clReleaseCommandQueue(cmd_queue);
	clReleaseContext(context);
	
	free(kernel_source);
	
	puts("Done!");
	return 0;
}