Beispiel #1
0
int main(int argc, char* argv[]) {
  using namespace asmjit;

  Performance perf;
  uint32_t kNumRepeats = 10;
  uint32_t kNumIterations = 10000;

  JitRuntime runtime;
  X86Assembler a(&runtime);
  X86Compiler c(&runtime);

  uint32_t r, i;

  // --------------------------------------------------------------------------
  // [Bench - Opcode]
  // --------------------------------------------------------------------------

  perf.reset();
  for (r = 0; r < kNumRepeats; r++) {
    perf.start();
    for (i = 0; i < kNumIterations; i++) {
      asmgen::opcode(a);

      void *p = a.make();
      runtime.release(p);

      a.reset();
    }
    perf.end();
  }

  printf("Opcode   | Time: %-6u [ms] | Speed: %-9u [inst/s]\n",
    perf.best, instPerMs(perf.best, kNumIterations, asmgen::kGenOpCodeInstCount));

  // --------------------------------------------------------------------------
  // [Bench - Blend]
  // --------------------------------------------------------------------------

  perf.reset();
  for (r = 0; r < kNumRepeats; r++) {
    perf.start();
    for (i = 0; i < kNumIterations; i++) {
      asmgen::blend(c);

      void* p = c.make();
      runtime.release(p);

      c.reset();
    }
    perf.end();
  }

  printf("Blend    | Time: %-6u [ms] | Speed: %-9u [inst/s]\n",
    perf.best, instPerMs(perf.best, kNumIterations, asmgen::kGenBlendInstCount));

  return 0;
}
void run() {
	Performance p;

	printf("Allocating buffers\n");

	text_buf = alloc_shared_buffer<char> (LENGTH_OF_TEXT, &text_c);
	pattern_buf= alloc_shared_buffer<char> (LENGTH_OF_PATTERN, &pattern_c);
	result_buf= alloc_shared_buffer<char> (1, &result_c);
	result_c[0] = 0;

	init_string( text_c, pattern_c );

	printf("Initializing kernels\n");
	size_t task_dim = 1;
	clKernelSet kernel_set (device, context, program);
//	kernel_set.addKernel ("text_processor", 1, &task_dim, text_buf, LENGTH_OF_TEXT);
//	kernel_set.addKernel ("word_processor", 1, &task_dim);
	kernel_set.addKernel ("word_processor", 1, &task_dim, text_buf, LENGTH_OF_TEXT);
	kernel_set.addKernel ("matching", 1, &task_dim, pattern_buf, LENGTH_OF_PATTERN, result_buf);

	printf("Launching the kernel...\n");
	p.start();
	kernel_set.launch();

	printf(" start waiting.... \n");
	kernel_set.finish();


	p.stop();
	printf(" done Execution (OpenCL Channel) time = (%u,%u), result = %d\n", p.report_sec(), p.report_usec(),result_c[0]);

	test_in_cpu( text_c, pattern_c );

	return;
}
void test_in_cpu( char* text, char* pattern )
{
	Performance p;
	p.start();

	bool match = false;
	int count = 0;
	for( int n = 0 ; n < LENGTH_OF_TEXT - 1; n ++ ) { 
		int m = 0;
		if( n == 0 || text[n-1] == ' ' ) {
			for( ; m < LENGTH_OF_PATTERN - 1; m ++ ) {
				if( pattern[m] != text[n+m] )
					break;
			}
		}
		if( m == LENGTH_OF_PATTERN - 1 )
			count ++;
	}

	p.stop();

	printf(" done Execution (CPU) time = (%u,%u), result = %d\n", p.report_sec(), p.report_usec(),count);
}
Beispiel #4
0
static void benchX86(uint32_t arch, uint32_t callConv) {
  using namespace asmjit;

  Performance perf;
  TestRuntime runtime(arch, callConv);

  X86Assembler a(&runtime, arch);
  X86Compiler c;

  uint32_t r, i;

  const char* archName = arch == kArchX86 ? "X86" : "X64";

  // --------------------------------------------------------------------------
  // [Bench - Opcode]
  // --------------------------------------------------------------------------

  size_t asmOutputSize = 0;
  size_t cmpOutputSize = 0;

  perf.reset();
  for (r = 0; r < kNumRepeats; r++) {
    asmOutputSize = 0;
    perf.start();
    for (i = 0; i < kNumIterations; i++) {
      asmgen::opcode(a);

      void *p = a.make();
      runtime.release(p);

      asmOutputSize += a.getCodeSize();
      a.reset();
    }
    perf.end();
  }

  printf("%-12s (%s) | Time: %-6u [ms] | Speed: %7.3f [MB/s]\n",
    "X86Assembler", archName, perf.best, mbps(perf.best, asmOutputSize));

  // --------------------------------------------------------------------------
  // [Bench - Blend]
  // --------------------------------------------------------------------------

  perf.reset();
  for (r = 0; r < kNumRepeats; r++) {
    cmpOutputSize = 0;
    perf.start();
    for (i = 0; i < kNumIterations; i++) {
      c.attach(&a);
      asmgen::blend(c);
      c.finalize();

      void* p = a.make();
      runtime.release(p);

      cmpOutputSize += a.getCodeSize();
      a.reset();
    }
    perf.end();
  }

  printf("%-12s (%s) | Time: %-6u [ms] | Speed: %7.3f [MB/s]\n",
    "X86Compiler", archName, perf.best, mbps(perf.best, cmpOutputSize));
}