void MainWindow::on_pushButton_2_clicked() { QString ctext = ui->cipherText2->toPlainText(); QString key = ui->keyText->text(); QString ptext = ""; QChar kernel[5][5]; ctext = ctext.replace('j','i'); NormalizeString(&ctext); NormalizeString(&key); BuildKernel(key, kernel); int ctextLen = ctext.length(); for(int i=0; i<ctextLen; i++) { QChar token1 = ctext[i]; i++; QChar token2; if(i < ctextLen) token2 = ctext[i]; else token2 = 'q'; int x1,y1,x2,y2; FindInKernel(kernel, token1,&x1,&y1); FindInKernel(kernel, token2,&x2,&y2); if(x1 == x2) { y1 = (y1+4) % 5; y2 = (y2+4) % 5; ptext += kernel[y1][x1]; ptext += kernel[y2][x2]; } else if(y1 == y2) { x1 = (x1+4) % 5; x2 = (x2+4) % 5; ptext += kernel[y1][x1]; ptext += kernel[y2][x2]; } else { ptext += kernel[y1][x2]; ptext += kernel[y2][x1]; } } //RemoveXForDuplicates(&ptext); ui->plainText2->setPlainText(ptext); }
cl_kernel CRoutine_Sum_NVidia::BuildReductionKernel(int whichKernel, int blockSize, int isPowOf2) { stringstream tmp; tmp << "#define T float" << std::endl; tmp << "#define blockSize " << blockSize << std::endl; tmp << "#define nIsPow2 " << isPowOf2 << std::endl; tmp << ReadSource(mSource[0]); stringstream kernelName; kernelName << "reduce" << whichKernel; BuildKernel(tmp.str(), kernelName.str()); return mKernels[mKernels.size() - 1]; }
void MainWindow::on_pushButton_clicked() { QString ptext = ui->plainText->toPlainText(); QString key = ui->keyText->text(); QString ctext = ""; QChar kernel[5][5]; NormalizeString(&ptext); FixDuplicateChar(&ptext); NormalizeString(&key); BuildKernel(key, kernel); int ptextLen = ptext.length(); for(int i=0; i<ptextLen; i++) { QChar token1 = ptext[i]; i++; QChar token2; if(i < ptextLen) token2 = ptext[i]; else token2 = 'q'; int x1,y1,x2,y2; FindInKernel(kernel, token1,&x1,&y1); FindInKernel(kernel, token2,&x2,&y2); if(x1 == x2) { y1 = (y1+1) % 5; y2 = (y2+1) % 5; ctext += kernel[y1][x1]; ctext += kernel[y2][x2]; } else if(y1 == y2) { x1 = (x1+1) % 5; x2 = (x2+1) % 5; ctext += kernel[y1][x1]; ctext += kernel[y2][x2]; } else { ctext += kernel[y1][x2]; ctext += kernel[y2][x1]; } } ui->cipherText->setPlainText(ctext); }
void CRoutine_FTtoV2::Init() { // Read the kernel, compile it string source = ReadSource(mSource[0]); BuildKernel(source, "ft_to_vis2", mSource[0]); }
void cIKSolver::StepHybrid(const Eigen::MatrixXd& cons_desc, const tProblem& prob, Eigen::MatrixXd& joint_desc, Eigen::VectorXd& out_pose) { const int num_dof = cKinTree::GetNumDof(joint_desc); const int num_joints = static_cast<int>(joint_desc.rows()); const int num_cons = static_cast<int>(cons_desc.rows()); Eigen::VectorXd err; Eigen::MatrixXd J; Eigen::MatrixXd J_weighted_buff = Eigen::MatrixXd::Zero(num_dof, num_dof); Eigen::VectorXd Jt_err_weighted_buff = Eigen::VectorXd::Zero(num_dof); Eigen::MatrixXd N = Eigen::MatrixXd::Identity(num_dof, num_dof); Eigen::VectorXi chain_joints(num_joints); // keeps track of joints in Ik chain double clamp_dist = prob.mClampDist; double damp = prob.mDamp; int min_priority = std::numeric_limits<int>::max(); int max_priority = std::numeric_limits<int>::min(); for (int c = 0; c < num_cons; ++c) { int curr_priority = static_cast<int>(cons_desc(c, eConsDescPriority)); min_priority = std::min(min_priority, curr_priority); max_priority = std::max(max_priority, curr_priority); } for (int p = min_priority; p <= max_priority; ++p) { int curr_num_dof = static_cast<int>(N.cols()); auto J_weighted = J_weighted_buff.block(0, 0, curr_num_dof, curr_num_dof); auto Jt_err_weighted = Jt_err_weighted_buff.block(0, 0, curr_num_dof, 1); J_weighted.setZero(); Jt_err_weighted.setZero(); chain_joints.setZero(); int num_valid_cons = 0; for (int c = 0; c < num_cons; ++c) { const tConsDesc& curr_cons = cons_desc.row(c); int curr_priority = static_cast<int>(curr_cons(eConsDescPriority)); if (curr_priority == p) { ++num_valid_cons; err = BuildErr(joint_desc, out_pose, curr_cons, clamp_dist); J = BuildJacob(joint_desc, out_pose, curr_cons); #if !defined(DISABLE_LINK_SCALE) for (int i = 0; i < num_joints; ++i) { // use entries in the jacobian to figure out if a joint is on the // link chain from the root to the constrained end effectors // this ignores the root which should not have any scaling int scale_idx = gPosDims + num_joints + i; int theta_idx = gPosDims + i; double scaling = J.col(scale_idx).squaredNorm(); if (scaling > 0) { chain_joints(i) = 1; } } #endif J = J * N; double weight = curr_cons(eConsDescWeight); J_weighted += weight * J.transpose() * J; Jt_err_weighted += weight * J.transpose() * err; } } if (num_valid_cons > 0) { // apply damping // a little more tricky with the null space auto N_block = N.block(0, 0, gPosDims + num_joints, N.cols()); J_weighted += damp * N.transpose() * N; #if !defined(DISABLE_LINK_SCALE) // damp link scaling according to stiffness for (int i = 0; i < num_joints; ++i) { // only scale links that are part of the IK chain if (chain_joints(i) == 1) { int idx = gPosDims + num_joints + i; const Eigen::VectorXd& N_row = N.row(idx); double d_scale = 1.f - joint_desc(i, cKinTree::eJointDescScale); double link_stiffness = joint_desc(i, cKinTree::eJointDescLinkStiffness); J_weighted += link_stiffness * N_row * N_row.transpose(); Jt_err_weighted += link_stiffness * d_scale * N_row; } } #endif Eigen::VectorXd y = J_weighted.lu().solve(Jt_err_weighted); Eigen::VectorXd x = N * y; cKinTree::ApplyStep(joint_desc, x, out_pose); bool is_last = p == max_priority; if (!is_last) { Eigen::MatrixXd cons_mat = Eigen::MatrixXd(num_valid_cons, cons_desc.cols()); int r = 0; for (int c = 0; c < num_cons; ++c) { const tConsDesc& curr_cons = cons_desc.row(c); int curr_priority = static_cast<int>(curr_cons(eConsDescPriority)); if (curr_priority == p) { cons_mat.row(r) = curr_cons; ++r; } } J = BuildJacob(joint_desc, out_pose, cons_mat); J = J * N; Eigen::MatrixXd curr_N = BuildKernel(J); if (curr_N.cols() == 0) { break; } N = N * curr_N; } } } }
/// Initialize the Chi2 routine. Note, this internally allocates some memory for computing a parallel sum. void CRoutine_Square::Init() { // Read the kernel, compile it string source = ReadSource(mSource[0]); BuildKernel(source, "square", mSource[0]); }
extern "C" void mixbenchGPU(cl_device_id dev_id, double *c, long size, bool block_strided, bool host_allocated, size_t workgroupsize, unsigned int elements_per_wi, unsigned int fusion_degree) { const char *benchtype; if(block_strided) benchtype = "Workgroup"; else benchtype = "NDRange"; printf("Workitem stride: %s\n", benchtype); const char *buffer_allocation = host_allocated ? "Host allocated" : "Device allocated"; printf("Buffer allocation: %s\n", buffer_allocation); // Set context properties cl_platform_id p_id; OCL_SAFE_CALL( clGetDeviceInfo(dev_id, CL_DEVICE_PLATFORM, sizeof(p_id), &p_id, NULL) ); size_t length; OCL_SAFE_CALL( clGetDeviceInfo(dev_id, CL_DEVICE_EXTENSIONS, 0, NULL, &length) ); char *extensions = (char*)alloca(length); OCL_SAFE_CALL( clGetDeviceInfo(dev_id, CL_DEVICE_EXTENSIONS, length, extensions, NULL) ); bool enable_dp = strstr(extensions, "cl_khr_fp64") != NULL; cl_context_properties ctxProps[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)p_id, 0 }; cl_int errno; // Create context cl_context context = clCreateContext(ctxProps, 1, &dev_id, NULL, NULL, &errno); OCL_SAFE_CALL(errno); cl_mem_flags buf_flags = CL_MEM_READ_WRITE; if( host_allocated ) buf_flags |= CL_MEM_ALLOC_HOST_PTR; cl_mem c_buffer = clCreateBuffer(context, buf_flags, size*sizeof(double), NULL, &errno); OCL_SAFE_CALL(errno); // Create command queue cl_command_queue cmd_queue = clCreateCommandQueue(context, dev_id, CL_QUEUE_PROFILING_ENABLE, &errno); OCL_SAFE_CALL(errno); // Set data on device memory cl_int *mapped_data = (cl_int*)clEnqueueMapBuffer(cmd_queue, c_buffer, CL_TRUE, CL_MAP_WRITE, 0, size*sizeof(double), 0, NULL, NULL, &errno); OCL_SAFE_CALL(errno); for(int i=0; i<size; i++) mapped_data[i] = 0; clEnqueueUnmapMemObject(cmd_queue, c_buffer, mapped_data, 0, NULL, NULL); // Load source, create program and all kernels printf("Loading kernel source file...\n"); const char c_param_format_str[] = "-cl-std=CL1.1 -cl-mad-enable -Dclass_T=%s -Dblockdim=" SIZE_T_FORMAT " -DCOMPUTE_ITERATIONS=%d -DELEMENTS_PER_THREAD=%d -DFUSION_DEGREE=%d %s %s"; const char *c_empty = ""; const char *c_striding = block_strided ? "-DBLOCK_STRIDED" : c_empty; const char *c_enable_dp = "-DENABLE_DP"; char c_build_params[256]; const char *c_kernel_source = {ReadFile("mix_kernels_ro.cl")}; printf("Precompilation of kernels... "); sprintf(c_build_params, c_param_format_str, "short", workgroupsize, 0, 1, 1, c_striding, c_empty); cl_kernel kernel_warmup = BuildKernel(context, dev_id, c_kernel_source, c_build_params); show_progress_init(compute_iterations_len); cl_kernel kernels[kdt_double+1][compute_iterations_len]; for(int i=0; i<compute_iterations_len; i++) { show_progress_step(0, '\\'); sprintf(c_build_params, c_param_format_str, "float", workgroupsize, compute_iterations[i], elements_per_wi, fusion_degree, c_striding, c_empty); //printf("%s\n",c_build_params); kernels[kdt_float][i] = BuildKernel(context, dev_id, c_kernel_source, c_build_params); show_progress_step(0, '|'); sprintf(c_build_params, c_param_format_str, "int", workgroupsize, compute_iterations[i], elements_per_wi, fusion_degree, c_striding, c_empty); //printf("%s\n",c_build_params); kernels[kdt_int][i] = BuildKernel(context, dev_id, c_kernel_source, c_build_params); if( enable_dp ) { show_progress_step(0, '/'); sprintf(c_build_params, c_param_format_str, "double", workgroupsize, compute_iterations[i], elements_per_wi, fusion_degree, c_striding, c_enable_dp); //printf("%s\n",c_build_params); kernels[kdt_double][i] = BuildKernel(context, dev_id, c_kernel_source, c_build_params); } else kernels[kdt_double][i] = 0; show_progress_step(1, '>'); } show_progress_done(); free((char*)c_kernel_source); runbench_warmup(cmd_queue, kernel_warmup, c_buffer, size, workgroupsize); // Synchronize in order to wait for memory operations to finish OCL_SAFE_CALL( clFinish(cmd_queue) ); printf("---------------------------------------------------------- CSV data ----------------------------------------------------------\n"); printf("Experiment ID, Single Precision ops,,,, Double precision ops,,,, Integer operations,,, \n"); printf("Compute iters, Flops/byte, ex.time, GFLOPS, GB/sec, Flops/byte, ex.time, GFLOPS, GB/sec, Iops/byte, ex.time, GIOPS, GB/sec\n"); for(int i=0; i<compute_iterations_len; i++) runbench(compute_iterations, i, cmd_queue, kernels, c_buffer, size, workgroupsize, elements_per_wi, fusion_degree); printf("------------------------------------------------------------------------------------------------------------------------------\n"); // Copy results back to host memory OCL_SAFE_CALL( clEnqueueReadBuffer(cmd_queue, c_buffer, CL_TRUE, 0, size*sizeof(double), c, 0, NULL, NULL) ); // Release kernels and program ReleaseKernelNProgram(kernel_warmup); for(int i=0; i<compute_iterations_len; i++) { ReleaseKernelNProgram(kernels[kdt_float][i]); ReleaseKernelNProgram(kernels[kdt_int][i]); if( enable_dp ) ReleaseKernelNProgram(kernels[kdt_double][i]); } // Release buffer OCL_SAFE_CALL( clReleaseMemObject(c_buffer) ); }