void NPKIDecrypt (NPKIPrivateKey *pkey, const char* password) { // 변수 선언 및 초기화 uint8_t dkey[20] = {0}, div[20] = {0}, buf[20] = {0}, iv[16] = {0}, seedkey[20] = {0}; // dkey, div, buf is temporary uint32_t roundkey[32] = {0}; // Get SEED Key // 비밀번호 길이는 최대 64자리까지 제한되어 있다. JV_PBKDF1(dkey, (uint8_t*)password, strlen(password), pkey->salt, sizeof(pkey->salt), pkey->itercount); memcpy(seedkey, dkey, 16); // Get SEED IV memcpy(buf, dkey+16, 4); JV_SHA1(div, buf, 4); memcpy(iv, div, 16); #ifdef _DEBUG_DEEP puts("\n== SEED Key =="); DumpBinary(seedkey, 16); puts("\n== IV =="); DumpBinary(iv, 16); #endif JV_SeedRoundKey(roundkey, seedkey); //-- 병렬화할 부분 Start --// // CPU라면 OpenMP // GPU라면 OpenCL for (uint32_t i = 0; i < pkey->crypto_len; i += SeedBlockSize) { if (i == 0) // 맨 처음이면 IV를 넣어주고 JV_SEED_CBC128_Decrypt_OneBlock(pkey->crypto, pkey->plain, roundkey, iv); else // 그게 아니면 앞의 Crypto Block을 넣어준다 JV_SEED_CBC128_Decrypt_OneBlock(pkey->crypto + i, pkey->plain + i, roundkey, pkey->crypto + (i-SeedBlockSize)); } //-- 병렬화할 부분 End --// }
int main (int argc, char* argv[]) { // Joveler if (argc != 2) exit(1); FILE *fp = NULL; const char PrivateKeyPath[] = "signPri.key"; // File Name // This file is saved with PKCS#8 file format, and the key is encrypted with PKCS#5's PBKDF1 uint8_t *keybuf = NULL; uint8_t salt[8] = {0}, dkey[20] = {0}, div[20] = {0}, buf[20] = {0}, iv[16] = {0}, key[20] = {0}; uint16_t itercount; long bufsize = 0; // SEED_KEY_SCHEDULE keyschedule; uint32_t roundkey[32] = {0}; fp = fopen(PrivateKeyPath, "rb"); fseek(fp, 0, SEEK_END); bufsize = ftell(fp); fseek(fp, 0, SEEK_SET); keybuf = (uint8_t *)calloc(bufsize, sizeof(uint8_t)); for (int i = 0; !feof(fp); i++) // Read All Data! fread((void*) (keybuf + i), sizeof(uint8_t), 1, fp); fclose(fp); /* salt와 iteration count가 필요하다. salt는 공인인증서를 발급할때마다 랜덤하게 생성되는것으로, 블특정다수의 사전(Dictionary) 공격을 방지하는 역할을 한다.(21-28바이트 사이의 8바이트를 사용함) iteration count는 비밀키 생성을 위해 해쉬함수를 몇번 반복할 것인가를 나타낸다. (31-32바이트 사이의 2바이트를 사용함) */ // char* JV_PBKDF1(char* dkey, const char password[], const size_t pwlen, const char salt[], const size_t saltlen, const uint32_t itercount) // Get Salt from PrivateKeyBuf memcpy((void*) salt, (void*) (keybuf+20), 8); // Get itercount itercount = keybuf[30] * 16 + keybuf[31]; // 길이는 최대 92자리까지 제한한다 // 그런데 아직 구현이 안 되었지... JV_PBKDF1(dkey, (uint8_t*)argv[1], strlen(argv[1]), salt, sizeof(salt), itercount); // Get dkey 값 memcpy(key, dkey, 16); // Got a SEED Key! memcpy(buf, dkey, 4); // Got a SEED Key! JV_SHA1(div, buf, 4); memcpy(iv, div, 16); uint8_t pbPlain[LIST_SIZE * LIST_SIZE] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F}; uint8_t pbKey[16] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; uint8_t pbCrypto[LIST_SIZE * LIST_SIZE] = {0x1B, 0x58, 0x93, 0x2D, 0x88, 0x0E, 0x34, 0x7A, 0xFB, 0xD5, 0x7A, 0x11, 0x09, 0xA8, 0x74, 0x00}; JV_SeedRoundKey(roundkey, pbKey); //joveler end // Create the two input vectors int i; double start, end; // Load the kernel source code into the array source_str char macro_cl[] = "-I ./seedCL.h -D uint32_t=uint -D uint8_t=uchar";//OpenCL 커널에서의 매크로 처리. seed.h를 Include 하며, 타입명을 OpenCL 전용으로 치환한다. FILE *fp_cl;//OpenCL 커널 파일의 파일 스트림 char *source_str;//소스 코드를 커널 파일에서 열어 저장한다. size_t source_size;//소스코드의 크기 fp_cl = fopen("vector_add_kernel.cl", "r");//OpenCL 커널파일의 파일스트림을 할당한다. if (!fp_cl) { fprintf(stderr, "Failed to load kernel.\n"); exit(1); }//커널 파일 열기에 실패한 경우. source_str = (char*)malloc(MAX_SOURCE_SIZE);//소스 코드의 최대 가용 크기만큼의 공간을 미리 할당해 둔다. source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp_cl);//소스코드 크기를 저장하고, 소스코드를 읽어 소스코드를 위에서 할당한 공간에 저장한다. fclose( fp ); // Get platform and device information cl_platform_id platform_id = NULL;//플랫폼 id가 저장될 공간 cl_device_id device_id = NULL;//디바이스 id가 저장될 공간 cl_uint ret_num_devices;//디바이스 수가 저장될 공간 cl_uint ret_num_platforms;//플랫폼 수가 저장될 공간. cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices); // OpenCL 컨텍스트 생성 cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret); // 컨텍스트를 위한 명령 큐 생성 cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret); // 각 배열을 디바이스에 쓸 메모리 버퍼 생성. 이름짓기 관행은 host변수명_mem_obj cl_mem pbPlain_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(pbPlain), NULL, &ret); cl_mem roundkey_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(roundkey), NULL, &ret); cl_mem pbCrypto_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(pbCrypto), NULL, &ret); cl_mem cryptolen_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cryptolen), NULL, &ret); cl_mem iv_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(iv), NULL, &ret); cl_mem is_big_endian_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_bool), NULL, &ret); cl_mem SS0_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(SS0), NULL, &ret); cl_mem SS1_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(SS1), NULL, &ret); cl_mem SS2_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(SS2), NULL, &ret); cl_mem SS3_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(SS3), NULL, &ret); // 쓰기 버퍼에 각 배열을 복사해 둔다 cl_bool is_big_endian = CL_FALSE; size_t cryptolen = LIST_SIZE * LIST_SIZE; // 임시값. 실제로는 상수가 아니라 변수이다 ret = clEnqueueWriteBuffer(command_queue, pbCrypto_mem_obj, CL_TRUE, 0, sizeof(pbCrypto), pbCrypto, 0, NULL, NULL); ret = clEnqueueWriteBuffer(command_queue, roundkey_mem_obj, CL_TRUE, 0, sizeof(roundkey), roundkey, 0, NULL, NULL); ret = clEnqueueWriteBuffer(command_queue, cryptolen_mem_obj, CL_TRUE, 0, sizeof(size_t), cryptolen, 0, NULL, NULL); ret = clEnqueueWriteBuffer(command_queue, iv_mem_obj, CL_TRUE, 0, sizeof(iv), iv, 0, NULL, NULL); ret = clEnqueueWriteBuffer(command_queue, is_big_endian_mem_obj, CL_TRUE, 0, sizeof(cl_bool), is_big_endian, 0, NULL, NULL); ret = clEnqueueWriteBuffer(command_queue, SS0_mem_obj, CL_TRUE, 0, sizeof(SS0), SS0, 0, NULL, NULL); ret = clEnqueueWriteBuffer(command_queue, SS1_mem_obj, CL_TRUE, 0, sizeof(SS1), SS1, 0, NULL, NULL); ret = clEnqueueWriteBuffer(command_queue, SS2_mem_obj, CL_TRUE, 0, sizeof(SS2), SS2, 0, NULL, NULL); ret = clEnqueueWriteBuffer(command_queue, SS3_mem_obj, CL_TRUE, 0, sizeof(SS3), SS3, 0, NULL, NULL); // 커널 소스코드로부터 OpenCL 프로그램을 생성 cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret); //생성된 프로그램을 빌드한다. ret = clBuildProgram(program, 1, &device_id, macro_cl, NULL, NULL); //프로그램에서 host로부터 호출할 커널을 찾아 할당해 둔다. cl_kernel kernel = clCreateKernel(program, "JV_CRYPTO_cbc128_decrypt", &ret); /* __kernel void JV_CRYPTO_cbc128_decrypt(__global uint8_t *in, __global uint8_t *out, size_t len, __global const void *key, __global uint8_t ivec[SeedBlockSize], __global bool is_big_endian, __global uint32_t* SS0, __global uint32_t* SS1, __global uint32_t* SS2, __global uint32_t* SS3) */ // 커널의 인자를 설정한다 ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&pbCrypto_mem_obj); // in : cypher ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&pbPlain_mem_obj); // out : plain binary ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&cryptolen_mem_obj); ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&roundkey_mem_obj); ret = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&iv_mem_obj); ret = clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *)&is_big_endian_mem_obj); // false는 어떻게 밀어넣나? ret = clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *)&SS0_mem_obj); ret = clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *)&SS1_mem_obj); ret = clSetKernelArg(kernel, 8, sizeof(cl_mem), (void *)&SS2_mem_obj); ret = clSetKernelArg(kernel, 9, sizeof(cl_mem), (void *)&SS3_mem_obj); // 커널을 실행한다. start = omp_get_wtime(); size_t global_item_size = LIST_SIZE; // Process the entire lists size_t local_item_size = 64; // Divide work items into groups of 64 ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL); end = omp_get_wtime(); // Read the memory buffer C on the device to the local variable C int *C = (int*)malloc(sizeof(int)*LIST_SIZE); ret = clEnqueueReadBuffer(command_queue, pbPlain_mem_obj, CL_TRUE, 0, sizeof(pbPlain), pbPlain, 0, NULL, NULL); // Display the result to the screen /*for(i = 0; i < LIST_SIZE; i++) printf("%d + %d = %d\n", A[i], B[i], C[i]);*/ printf("time elapsed: %lf ms", (end-start)*1000); // Clean up ret = clFlush(command_queue); ret = clFinish(command_queue); ret = clReleaseKernel(kernel); ret = clReleaseProgram(program); ret = clReleaseMemObject(pbCrypto_mem_obj); ret = clReleaseMemObject(pbPlain_mem_obj); ret = clReleaseMemObject(cryptolen_mem_obj); ret = clReleaseMemObject(roundkey_mem_obj); ret = clReleaseMemObject(is_big_endian_mem_obj); ret = clReleaseMemObject(SS0_mem_obj); ret = clReleaseMemObject(SS1_mem_obj); ret = clReleaseMemObject(SS2_mem_obj); ret = clReleaseMemObject(SS3_mem_obj); ret = clReleaseCommandQueue(command_queue); ret = clReleaseContext(context); return 0; }
void NPKIDecryptOpenCL (NPKIPrivateKey *pkey, const char* password, cl_kernel kernel, cl_context context, cl_command_queue commandQueue) { // 변수 선언 및 초기화 uint8_t dkey[20] = {0}, div[20] = {0}, buf[20] = {0}, iv[16] = {0}, seedkey[20] = {0}; // dkey, div, buf is temporary uint32_t roundkey[32] = {0}; cl_int errNum; // Get SEED Key // 비밀번호 길이는 최대 64자리까지 제한되어 있다. JV_PBKDF1(dkey, (uint8_t*)password, strlen(password), pkey->salt, sizeof(pkey->salt), pkey->itercount); memcpy(seedkey, dkey, 16); // Get SEED IV memcpy(buf, dkey+16, 4); JV_SHA1(div, buf, 4); memcpy(iv, div, 16); #ifdef _DEBUG_DEEP puts("\n== SEED Key =="); DumpBinary(seedkey, 16); puts("\n== IV =="); DumpBinary(iv, 16); #endif JV_SeedRoundKey(roundkey, seedkey); //Creating OpenCL Memory buffers. if(inBuf == NULL){ inBuf = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, pkey->crypto_len * sizeof(uint8_t), pkey->crypto, &errNum); if(errNum != CL_SUCCESS){ printf("Failed to create inBuf cl_mem buffer.\n\n"); memBufPrintErr(errNum); exit(1); } else{ printf("Successed to create inBuf cl_mem buffer.\n\n"); } } if(outBuf == NULL){ outBuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, pkey->crypto_len * sizeof(uint8_t), NULL, &errNum); if(errNum != CL_SUCCESS){ printf("Failed to create outBuf cl_mem buffer.\n\n"); memBufPrintErr(errNum); exit(1); } else{ printf("Successed to create outBuf cl_mem buffer.\n\n"); } } if(roundKeyBuf == NULL){ roundKeyBuf = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(roundkey), roundkey, &errNum); if(errNum != CL_SUCCESS){ printf("Failed to create roundKeyBuf cl_mem buffer.\n\n"); memBufPrintErr(errNum); exit(1); } else{ printf("Successed to create roundKeyBuf cl_mem buffer.\n\n"); } } if(ivBuf == NULL){ ivBuf = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(iv), iv, &errNum); if(errNum != CL_SUCCESS){ printf("Failed to create ivBuf cl_mem buffer.\n\n"); memBufPrintErr(errNum); exit(1); } else{ printf("Successed to create ivBuf cl_mem buffer.\n\n"); } } //Setting OpenCL kernel's arguments errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inBuf); errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &outBuf); errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &roundKeyBuf); errNum |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &ivBuf); if(errNum != CL_SUCCESS){ printf("Failed to set OpenCL kernel arguments."); exit(1); } //Setting OpenCL kernel's run dimension and size size_t globalWorkSize[WORK_DIM]; size_t localWorkSize[WORK_DIM]; globalWorkSize[0] = pkey->crypto_len / SeedBlockSize; localWorkSize[0] = 1; //Running OpenCL kernel errNum = clEnqueueNDRangeKernel(commandQueue, kernel, WORK_DIM, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL); if(errNum != CL_SUCCESS){ printf("Failed to run OpenCL kernel."); switch(errNum){ case CL_INVALID_PROGRAM_EXECUTABLE: printf("CL_INVALID_PROGRAM_EXECUTABLE \n"); break; case CL_INVALID_COMMAND_QUEUE: printf("CL_INVALID_COMMAND_QUEUE \n"); break; case CL_INVALID_KERNEL: printf("CL_INVALID_KERNEL \n"); break; case CL_INVALID_CONTEXT: printf("CL_INVALID_CONTEXT \n"); break; case CL_INVALID_KERNEL_ARGS: printf("CL_INVALID_KERNEL_ARGS \n"); break; case CL_INVALID_WORK_DIMENSION: printf("CL_INVALID_WORK_DIMENSION \n"); break; case CL_INVALID_WORK_GROUP_SIZE: printf("CL_INVALID_WORK_GROUP_SIZE \n"); break; case CL_INVALID_WORK_ITEM_SIZE: printf("CL_INVALID_WORK_ITEM_SIZE \n"); break; case CL_INVALID_GLOBAL_OFFSET: printf("CL_INVALID_GLOBAL_OFFSET \n"); break; case CL_OUT_OF_RESOURCES: printf("CL_OUT_OF_RESOURCES \n"); break; case CL_MEM_OBJECT_ALLOCATION_FAILURE: printf("CL_MEM_OBJECT_ALLOCATION_FAILURE \n"); break; case CL_INVALID_EVENT_WAIT_LIST: printf("CL_INVALID_EVENT_WAIT_LIST \n"); break; case CL_OUT_OF_HOST_MEMORY: printf("CL_OUT_OF_HOST_MEMORY \n"); break; default: break; } exit(1); } //Reading plain text from OpenCL device /*errNum = clEnqueueReadBuffer(commandQueue, outBuf, CL_TRUE, 0, pkey->crypto_len * sizeof(uint8_t), pkey->plain, 0, NULL, NULL); if(errNum != CL_SUCCESS){ printf("Failed to copy plain string from OpenCL device to OpenCL host."); exit(1); }*/ printf("iter\n"); }