int main(int argc, char* argv[]) { using namespace asmjit; Performance perf; uint32_t kNumRepeats = 10; uint32_t kNumIterations = 10000; JitRuntime runtime; X86Assembler a(&runtime); X86Compiler c(&runtime); uint32_t r, i; // -------------------------------------------------------------------------- // [Bench - Opcode] // -------------------------------------------------------------------------- perf.reset(); for (r = 0; r < kNumRepeats; r++) { perf.start(); for (i = 0; i < kNumIterations; i++) { asmgen::opcode(a); void *p = a.make(); runtime.release(p); a.reset(); } perf.end(); } printf("Opcode | Time: %-6u [ms] | Speed: %-9u [inst/s]\n", perf.best, instPerMs(perf.best, kNumIterations, asmgen::kGenOpCodeInstCount)); // -------------------------------------------------------------------------- // [Bench - Blend] // -------------------------------------------------------------------------- perf.reset(); for (r = 0; r < kNumRepeats; r++) { perf.start(); for (i = 0; i < kNumIterations; i++) { asmgen::blend(c); void* p = c.make(); runtime.release(p); c.reset(); } perf.end(); } printf("Blend | Time: %-6u [ms] | Speed: %-9u [inst/s]\n", perf.best, instPerMs(perf.best, kNumIterations, asmgen::kGenBlendInstCount)); return 0; }
void run() { Performance p; printf("Allocating buffers\n"); text_buf = alloc_shared_buffer<char> (LENGTH_OF_TEXT, &text_c); pattern_buf= alloc_shared_buffer<char> (LENGTH_OF_PATTERN, &pattern_c); result_buf= alloc_shared_buffer<char> (1, &result_c); result_c[0] = 0; init_string( text_c, pattern_c ); printf("Initializing kernels\n"); size_t task_dim = 1; clKernelSet kernel_set (device, context, program); // kernel_set.addKernel ("text_processor", 1, &task_dim, text_buf, LENGTH_OF_TEXT); // kernel_set.addKernel ("word_processor", 1, &task_dim); kernel_set.addKernel ("word_processor", 1, &task_dim, text_buf, LENGTH_OF_TEXT); kernel_set.addKernel ("matching", 1, &task_dim, pattern_buf, LENGTH_OF_PATTERN, result_buf); printf("Launching the kernel...\n"); p.start(); kernel_set.launch(); printf(" start waiting.... \n"); kernel_set.finish(); p.stop(); printf(" done Execution (OpenCL Channel) time = (%u,%u), result = %d\n", p.report_sec(), p.report_usec(),result_c[0]); test_in_cpu( text_c, pattern_c ); return; }
void test_in_cpu( char* text, char* pattern ) { Performance p; p.start(); bool match = false; int count = 0; for( int n = 0 ; n < LENGTH_OF_TEXT - 1; n ++ ) { int m = 0; if( n == 0 || text[n-1] == ' ' ) { for( ; m < LENGTH_OF_PATTERN - 1; m ++ ) { if( pattern[m] != text[n+m] ) break; } } if( m == LENGTH_OF_PATTERN - 1 ) count ++; } p.stop(); printf(" done Execution (CPU) time = (%u,%u), result = %d\n", p.report_sec(), p.report_usec(),count); }
static void benchX86(uint32_t arch, uint32_t callConv) { using namespace asmjit; Performance perf; TestRuntime runtime(arch, callConv); X86Assembler a(&runtime, arch); X86Compiler c; uint32_t r, i; const char* archName = arch == kArchX86 ? "X86" : "X64"; // -------------------------------------------------------------------------- // [Bench - Opcode] // -------------------------------------------------------------------------- size_t asmOutputSize = 0; size_t cmpOutputSize = 0; perf.reset(); for (r = 0; r < kNumRepeats; r++) { asmOutputSize = 0; perf.start(); for (i = 0; i < kNumIterations; i++) { asmgen::opcode(a); void *p = a.make(); runtime.release(p); asmOutputSize += a.getCodeSize(); a.reset(); } perf.end(); } printf("%-12s (%s) | Time: %-6u [ms] | Speed: %7.3f [MB/s]\n", "X86Assembler", archName, perf.best, mbps(perf.best, asmOutputSize)); // -------------------------------------------------------------------------- // [Bench - Blend] // -------------------------------------------------------------------------- perf.reset(); for (r = 0; r < kNumRepeats; r++) { cmpOutputSize = 0; perf.start(); for (i = 0; i < kNumIterations; i++) { c.attach(&a); asmgen::blend(c); c.finalize(); void* p = a.make(); runtime.release(p); cmpOutputSize += a.getCodeSize(); a.reset(); } perf.end(); } printf("%-12s (%s) | Time: %-6u [ms] | Speed: %7.3f [MB/s]\n", "X86Compiler", archName, perf.best, mbps(perf.best, cmpOutputSize)); }