/** * @brief Main code generator dispatch. * * According to the type of the input signal, generateCode calls * the appropriate generator with appropriate arguments. * * @param sig The signal expression to compile. * @param priority The environment priority of the expression. * @return <string> The LaTeX code translation of the signal. */ string DocCompiler::generateCode (Tree sig, int priority) { int i; double r; Tree c, sel, x, y, z, u, label, ff, largs, type, name, file; if ( getUserData(sig) ) { printGCCall(sig,"generateXtended"); return generateXtended (sig, priority); } else if ( isSigInt(sig, &i) ) { printGCCall(sig,"generateNumber"); return generateNumber (sig, docT(i)); } else if ( isSigReal(sig, &r) ) { printGCCall(sig,"generateNumber"); return generateNumber (sig, docT(r)); } else if ( isSigInput(sig, &i) ) { printGCCall(sig,"generateInput"); return generateInput (sig, docT(i+1)); } else if ( isSigOutput(sig, &i, x) ) { printGCCall(sig,"generateOutput"); return generateOutput (sig, docT(i+1), CS(x, priority)); } else if ( isSigFixDelay(sig, x, y) ) { printGCCall(sig,"generateFixDelay"); return generateFixDelay (sig, x, y, priority); } else if ( isSigPrefix(sig, x, y) ) { printGCCall(sig,"generatePrefix"); return generatePrefix (sig, x, y, priority); } else if ( isSigIota(sig, x) ) { printGCCall(sig,"generateIota"); return generateIota (sig, x); } else if ( isSigBinOp(sig, &i, x, y) ) { printGCCall(sig,"generateBinOp"); return generateBinOp (sig, i, x, y, priority); } else if ( isSigFFun(sig, ff, largs) ) { printGCCall(sig,"generateFFun"); return generateFFun (sig, ff, largs, priority); } else if ( isSigFConst(sig, type, name, file) ) { printGCCall(sig,"generateFConst"); return generateFConst (sig, tree2str(file), tree2str(name)); } else if ( isSigFVar(sig, type, name, file) ) { printGCCall(sig,"generateFVar"); return generateFVar (sig, tree2str(file), tree2str(name)); } // new special tables for documentation purposes else if ( isSigDocConstantTbl(sig, x, y) ) { printGCCall(sig,"generateDocConstantTbl"); return generateDocConstantTbl (sig, x, y); } else if ( isSigDocWriteTbl(sig,x,y,z,u) ) { printGCCall(sig,"generateDocWriteTbl"); return generateDocWriteTbl (sig, x, y, z, u); } else if ( isSigDocAccessTbl(sig, x, y) ) { printGCCall(sig, "generateDocAccessTbl"); return generateDocAccessTbl(sig, x, y); } else if ( isSigSelect2(sig, sel, x, y) ) { printGCCall(sig,"generateSelect2"); return generateSelect2 (sig, sel, x, y, priority); } else if ( isSigSelect3(sig, sel, x, y, z) ) { printGCCall(sig,"generateSelect3"); return generateSelect3 (sig, sel, x, y, z, priority); } else if ( isProj(sig, &i, x) ) { printGCCall(sig,"generateRecProj"); return generateRecProj (sig, x, i, priority); } else if ( isSigIntCast(sig, x) ) { printGCCall(sig,"generateIntCast"); return generateIntCast (sig, x, priority); } else if ( isSigFloatCast(sig, x) ) { printGCCall(sig,"generateFloatCast"); return generateFloatCast(sig, x, priority); } else if ( isSigButton(sig, label) ) { printGCCall(sig,"generateButton"); return generateButton (sig, label); } else if ( isSigCheckbox(sig, label) ) { printGCCall(sig,"generateCheckbox"); return generateCheckbox (sig, label); } else if ( isSigVSlider(sig, label,c,x,y,z) ) { printGCCall(sig,"generateVSlider"); return generateVSlider (sig, label, c,x,y,z); } else if ( isSigHSlider(sig, label,c,x,y,z) ) { printGCCall(sig,"generateHSlider"); return generateHSlider (sig, label, c,x,y,z); } else if ( isSigNumEntry(sig, label,c,x,y,z) ) { printGCCall(sig,"generateNumEntry"); return generateNumEntry (sig, label, c,x,y,z); } else if ( isSigVBargraph(sig, label,x,y,z) ) { printGCCall(sig,"generateVBargraph"); return CS(z, priority);}//generateVBargraph (sig, label, x, y, CS(z, priority)); } else if ( isSigHBargraph(sig, label,x,y,z) ) { printGCCall(sig,"generateHBargraph"); return CS(z, priority);}//generateHBargraph (sig, label, x, y, CS(z, priority)); } else if ( isSigAttach(sig, x, y) ) { printGCCall(sig,"generateAttach"); return generateAttach (sig, x, y, priority); } else if ( isSigEnable(sig, x, y) ) { printGCCall(sig,"generateEnable"); return generateEnable (sig, x, y, priority); } else { stringstream error; error << "ERROR in d signal, unrecognized signal : " << *sig << endl; throw faustexception(error.str()); } faustassert(0); return "error in generate code"; }
main(){ char name[128], pw[128]; /* passwords are eight characters - double this */ char *good = "Welcome to The Machine!\n"; char *evil = "Invalid identity, exiting!\n"; //printf("login: "******"%s", name); //printf("password: "******"%s", pw); /*if (match(name, pw) == 0) welcome(good); else goodbye(evil);*/ generateInput(pw); }
extern "C" int main() { auto input = generateInput(); for(const auto x: input) { std::cout << x << std::endl; } auto start = std::chrono::high_resolution_clock::now(); for(const auto num: input) { changeBit(bitmapArray, num, 1); } auto output = generateBit(bitmapArray); for(int i =0; i<10; i++) { std::cout << output[i] << " "; } auto end = std::chrono::high_resolution_clock::now(); std::cout << (end - start).count(); return 0; }
/* * main execution routine * Basically it consists of three parts: * - generating the inputs * - running OpenCL kernel * - reading results of processing */ int _tmain(int argc, TCHAR* argv[]) { cl_int err; ocl_args_d_t ocl; cl_device_type deviceType = CL_DEVICE_TYPE_GPU; LARGE_INTEGER perfFrequency; LARGE_INTEGER performanceCountNDRangeStart; LARGE_INTEGER performanceCountNDRangeStop; cl_uint arrayWidth = 1024; cl_uint arrayHeight = 1024; //initialize Open CL objects (context, queue, etc.) if (CL_SUCCESS != SetupOpenCL(&ocl, deviceType)) { return -1; } // allocate working buffers. // the buffer should be aligned with 4K page and size should fit 64-byte cached line cl_uint optimizedSize = ((sizeof(cl_int) * arrayWidth * arrayHeight - 1) / 64 + 1) * 64; cl_int* inputA = (cl_int*)_aligned_malloc(optimizedSize, 4096); cl_int* inputB = (cl_int*)_aligned_malloc(optimizedSize, 4096); cl_int* outputC = (cl_int*)_aligned_malloc(optimizedSize, 4096); if (NULL == inputA || NULL == inputB || NULL == outputC) { LogError("Error: _aligned_malloc failed to allocate buffers.\n"); return -1; } //random input generateInput(inputA, arrayWidth, arrayHeight); generateInput(inputB, arrayWidth, arrayHeight); // Create OpenCL buffers from host memory // These buffers will be used later by the OpenCL kernel if (CL_SUCCESS != CreateBufferArguments(&ocl, inputA, inputB, outputC, arrayWidth, arrayHeight)) { return -1; } // Create and build the OpenCL program if (CL_SUCCESS != CreateAndBuildProgram(&ocl)) { return -1; } // Program consists of kernels. // Each kernel can be called (enqueued) from the host part of OpenCL application. // To call the kernel, you need to create it from existing program. ocl.kernel = clCreateKernel(ocl.program, "Add", &err); if (CL_SUCCESS != err) { LogError("Error: clCreateKernel returned %s\n", TranslateOpenCLError(err)); return -1; } // Passing arguments into OpenCL kernel. if (CL_SUCCESS != SetKernelArguments(&ocl)) { return -1; } // Regularly you wish to use OpenCL in your application to achieve greater performance results // that are hard to achieve in other ways. // To understand those performance benefits you may want to measure time your application spent in OpenCL kernel execution. // The recommended way to obtain this time is to measure interval between two moments: // - just before clEnqueueNDRangeKernel is called, and // - just after clFinish is called // clFinish is necessary to measure entire time spending in the kernel, measuring just clEnqueueNDRangeKernel is not enough, // because this call doesn't guarantees that kernel is finished. // clEnqueueNDRangeKernel is just enqueue new command in OpenCL command queue and doesn't wait until it ends. // clFinish waits until all commands in command queue are finished, that suits your need to measure time. bool queueProfilingEnable = true; if (queueProfilingEnable) QueryPerformanceCounter(&performanceCountNDRangeStart); // Execute (enqueue) the kernel if (CL_SUCCESS != ExecuteAddKernel(&ocl, arrayWidth, arrayHeight)) { return -1; } if (queueProfilingEnable) QueryPerformanceCounter(&performanceCountNDRangeStop); // The last part of this function: getting processed results back. // use map-unmap sequence to update original memory area with output buffer. ReadAndVerify(&ocl, arrayWidth, arrayHeight, inputA, inputB); // retrieve performance counter frequency if (queueProfilingEnable) { QueryPerformanceFrequency(&perfFrequency); LogInfo("NDRange performance counter time %f ms.\n", 1000.0f*(float)(performanceCountNDRangeStop.QuadPart - performanceCountNDRangeStart.QuadPart) / (float)perfFrequency.QuadPart); } _aligned_free(inputA); _aligned_free(inputB); _aligned_free(outputC); #if defined(_DEBUG) getchar(); #endif return 0; }
int main(int argc, char *argv[]) { init_rpc(argv[1]); cl_platform_id platform; cl_device_id device; cl_context context; cl_command_queue queue; cl_program program; cl_kernel kernel; cl_mem buffer; size_t i; int scale = 8; // scale should be higher than 8 size_t array_size = powl(2, scale) * 4; cl_int *input = (cl_int *) malloc(sizeof(cl_int) * array_size); cl_int *output = (cl_int *) malloc(sizeof(cl_int) * array_size); cl_int dir = 1; cl_int no_stages = 0; cl_int temp; generateInput(input, array_size); //ExecuteSortReference(input, array_size, dir); for (temp = array_size; temp > 2; temp >>= 1) { no_stages++; } size_t local_work_size[1]; size_t global_work_size[1]; const char *source = load_program_source("BitonicSort.cl"); size_t source_len = strlen(source);; cl_uint err = 0; char *flags = "-cl-fast-relaxed-math"; clGetPlatformIDs(1, &platform, NULL); printf("platform %p err %d\n", platform, err); clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, &err); printf("device %p err %d\n", device, err); context = clCreateContext(0, 1, &device, NULL, NULL, &err); printf("context %p err %d\n", context, err); queue = clCreateCommandQueue(context, device, 0, &err); printf("queue %p err %d\n", queue, err); program = clCreateProgramWithSource(context, 1, &source, &source_len, &err); printf("program %p err %d\n", program, err); err = clBuildProgram(program, 0, NULL, flags, NULL, NULL); printf("err %d\n", err); kernel = clCreateKernel(program, "BitonicSort", NULL); printf("kernel %p\n", kernel); buffer = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int) * array_size, input, &err); printf("buffer %p err %d\n", buffer, err); err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&buffer); printf("err %d\n", err); err = clSetKernelArg(kernel, 3, sizeof(cl_int), (void*)&dir); printf("err %d\n", err); cl_int stage, pass_stage; for (stage = 0; stage < no_stages; stage++) { err = clSetKernelArg(kernel, 1, sizeof(cl_int), (void*)&stage); printf("err %d\n", err); for (pass_stage = stage; pass_stage >= 0; pass_stage--) { err = clSetKernelArg(kernel, 2, sizeof(cl_int), (void*)&pass_stage); printf("err %d\n", err); size_t gsz = array_size/(2*4); global_work_size[0] = pass_stage ? gsz : gsz << 1; local_work_size[0] = 128; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); printf("err %d\n", err); } } clFinish(queue); err = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sizeof(cl_int) * array_size, output, 0, NULL, NULL); printf("err %d\n", err); for (i = 0; i < array_size; i++) { printf("%i %i\n", i, output[i]); } clReleaseMemObject(buffer); clReleaseProgram(program); clReleaseKernel(kernel); clReleaseCommandQueue(queue); }