Exemple #1
0
int main(void) {
	const int size[NSTEPS] = {500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000};  // specification of array sizes
	static const double epsilon = 0.00001; // maximum floating point error allowed between doubles to be considered equal
	const double flops_per_iteration = 2.0;
	
	int i;  // index variables for looping
	double time[2]; // elapsed time. 0 is test case, 1 is base case
	int mflops[2]; // calculated mflops.  0 is test case, 1 is base case
	double* a;
	double* b;
	double* ctest;
	double* cbase;
	stopwatch* sw = stopwatch_new();
	
	for(i = 0; i < NSTEPS; i++) {
	  int n = size[i];
	  int n2 = n * n;
	  a = (double*) malloc(n2*sizeof(double));
 	  b = (double*) malloc(n2*sizeof(double));
 	  ctest = (double*) malloc(n2*sizeof(double));
 	  cbase = (double*) malloc(n2*sizeof(double));
 	  rand_square_double_matrix(i+10, n, a);
 	  rand_square_double_matrix(i+11, n, b);
    zero_square_double_matrix(n, ctest);
	  zero_square_double_matrix(n, cbase);
	  	  
	  stopwatch_restart(sw);
    my_dgemm(n, a, b, ctest);
	  stopwatch_stop(sw);
    time[0] = stopwatch_time(sw);
	  
	  stopwatch_restart(sw);
    cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n, 1.0, a, n, b, n, 1.0, cbase, n);
	  stopwatch_stop(sw);
	  
	  //printf("A\n");
	  //print_square_matrix(n, a);
	  //printf("B\n");
	  //print_square_matrix(n, b);	  
	  //printf("my_dgemm\n");
	  //print_square_matrix(n, ctest);
	  //printf("cdgemm\n");
	  //print_square_matrix(n, cbase);
	  
	  assert_equal(n, ctest, cbase, epsilon);
    time[1] = stopwatch_time(sw);
    mflops[0] = calc_mflops(flops_per_iteration, n, time[0]);
    mflops[1] = calc_mflops(flops_per_iteration, n, time[1]);
    	  
	  printf("%d, %5.2f, %d, %5.2f, %d\n", n, time[0], mflops[0], time[1], mflops[1]);

    free(a);
    free(b);
    free(ctest);
    free(cbase);
  }
	stopwatch_delete(sw);
	return 0;
}
Exemple #2
0
int main (int argc, char **argv)
{
    int error = 0;
    Vector *vector = vector_new ();
    
    if (vector_load_from_file (vector, argc, argv))
    {
        Stopwatch *stopwatch = stopwatch_new ();
        uint64_t inversions = 0;

        stopwatch_start (stopwatch);
        
        inversions = count_inversions (vector);
        
        stopwatch_stop (stopwatch);
        
        printf ("%llu inversions of %zu numbers in %llu.%03u ms.\n", inversions, vector->length, stopwatch->elapsed, stopwatch->usec);
        
        stopwatch_free (stopwatch);
    }
    else
    {
        error = 1;
    }
    
    vector_free (vector);
    
    return error;
}
Exemple #3
0
static void handle_work(struct worker_info *data, struct work *work)
{
	const char *command = data->command;
	struct benchmark_config *config = data->config;
	struct benchmark_operations *bops = &config->ops;
	unsigned long start, elapsed;

	if (work->progress > 1)
		die("something wrong happened");

	start = stopwatch_start();

	if (strstartswith(command, "putlist")) {
		bops->putlist_test(data->db, command, config->num,
				config->vsiz, config->batch, work->seed);
	} else if (!strcmp(command, "fwmkeys")) {
		bops->fwmkeys_test(data->db, config->num, work->seed);
	} else if (!strcmp(command, "range") || !strcmp(command, "range_atomic")) {
		bops->range_test(data->db, command, config->num,
				config->vsiz, config->batch, work->seed);
	} else if (!strcmp(command, "rangeout_atomic")) {
		bops->rangeout_test(data->db, command, config->num,
				config->vsiz, config->batch, work->seed);
	} else if (strstartswith(command, "getlist")) {
		bops->getlist_test(data->db, command, config->num,
				config->vsiz, config->batch, work->seed);
	} else if (!strcmp(command, "fwmkeys-getlist")) {
		bops->fwmkeys_test(data->db, config->num, work->seed);
		bops->getlist_test(data->db, "getlist", config->num,
				config->vsiz, config->batch, work->seed);
	} else if (!strcmp(command, "fwmkeys-getlist_atomic")) {
		bops->fwmkeys_test(data->db, config->num, work->seed);
		bops->getlist_test(data->db, "getlist_atomic", config->num,
				config->vsiz, config->batch, work->seed);
	} else if (strstartswith(command, "outlist")) {
		bops->outlist_test(data->db, command, config->num,
					config->batch, work->seed);
	} else if (!strcmp(command, "fwmkeys-outlist")) {
		bops->fwmkeys_test(data->db, config->num, work->seed);
		bops->outlist_test(data->db, "outlist", config->num,
					config->batch, work->seed);
	} else if (!strcmp(command, "fwmkeys-outlist_atomic")) {
		bops->fwmkeys_test(data->db, config->num, work->seed);
		bops->outlist_test(data->db, "outlist_atomic", config->num,
					config->batch, work->seed);
	} else if (!strcmp(command, "put")) {
		bops->put_test(data->db, config->num, config->vsiz, work->seed);
	} else if (!strcmp(command, "get")) {
		bops->get_test(data->db, config->num, config->vsiz, work->seed);
	} else if (!strcmp(command, "nop")) {
		/* nop */
	} else {
		die("Invalid command %s", command);
	}

	elapsed = stopwatch_stop(start);
	work->start[work->progress] = start;
	work->elapsed[work->progress] = elapsed;
	work->progress++;
}
Exemple #4
0
int main(int argc, char *argv[]){
  int size = 18;
  long long us = 0;
  stopwatch sw;
  int opt, option_index=0;
  int status = 1;

  if(argc < 2){
    fprintf(stderr, "Usage: %s [-s board_size]\n", argv[0]);
    exit(EXIT_FAILURE);
  }


  while ((opt = getopt_long(argc, argv, "s:",
                            long_options, &option_index)) != -1 ) {
    if(opt == 's')size = atoi(optarg);
    else{
      fprintf(stderr, "Usage: %s [-s board_size]\n", argv[0]);
      exit(EXIT_FAILURE);
    }
  }
  stopwatch_start(&sw);
  long long solutions = nqueen_cpu(size, &us);
  stopwatch_stop(&sw);

  fprintf(stderr, "The number of solutions is %lld, the number of unique solutions is "
          "%lld and the total time it took is %lf seconds\n", solutions, us,
          get_interval_by_sec(&sw));
  printf("{ \"status\": %d, \"options\": \"-s %d\", \"time\": %f, \"output\": \"[%lld, %lld]\" }\n", status, size, get_interval_by_sec(&sw), solutions, us);
  return 0;
}
Exemple #5
0
/**
 * QR-based solver with Givens rotations.
 * @param[in] argc ARGument Counter
 * @param[in] argv ARGument Vector
 * @retval EXIT_SUCCESS Normal termination of the program
 * @retval EXIT_FAILURE Some error occurred
 */
int main(const int argc, char * const argv[]) {
    st_matrix_t M = st_matrix_load(stdin);
    const unsigned int size = st_matrix_size(M);
    double *eigenvalues;
    unsigned int i;
    stopwatch_t stopwatch = stopwatch_create("QR_solver");

    (void) argc;
    (void) argv;

    /* Allocates resources */
    SAFE_MALLOC(eigenvalues, double *, size * sizeof(double));


    /* Computes eigenvalues */
    stopwatch_start(stopwatch, 0, "Compute eigenvalues");
    qr_iterative(M, eigenvalues, NULL);
    stopwatch_stop(stopwatch, 0);


    /* Prints results */
    printf("Eigenvalues:\n[");
    for (i = 0; i < size - 1; ++i) {
        printf("%g, ", eigenvalues[i]);
    }
    printf("%g]\n", eigenvalues[i]);


    /* Frees memory */
    st_matrix_delete(&M);
    free(eigenvalues);
    stopwatch_delete(&stopwatch);

    return EXIT_SUCCESS;
}
void
stopwatch_destroy (struct stopwatch_t* T)
{
    if (T) {
        stopwatch_stop (T);
        free (T);
    }
}
Exemple #7
0
int main(int argc, char **argv)
{
	bmo_verbosity(BMO_MESSAGE_DEBUG);
	int ret = 0;
	char * test_directory = NULL;
	char * test_string = NULL;
	char * replacement = NULL;
	char env_buf[ENV_BUF_LEN] = {'\0'};
    const char * test_proto =
        "return runtests(find_test_files({'$'}, 1, \"*.lua\"))";
    if (argc != 2) {
        test_directory = "./lua";
    }
    else {
        test_directory = argv[1];
    }
    test_string = calloc(
        strlen(test_directory) + strlen(test_proto) + 1,
        sizeof(char)
    );
    if (!test_string) {
        bmo_err("couldn't allocate string.");
        assert(0);
    }

    replacement = strchr(test_proto, '$');
    assert(replacement);
    strncpy(test_string, test_proto, replacement - test_proto);
    strcat(test_string, test_directory);
    strcat(test_string, replacement + 1);

    // some tests rely on sanity testing values by comparing against environment
    // variables
    snprintf(env_buf, ENV_BUF_LEN, "%d", FRAMES);
    setenv("BMO_FRAMES", env_buf, 1);
	snprintf(env_buf, ENV_BUF_LEN, "%d", CHANNELS);
	setenv("BMO_CHANNELS", env_buf, 1);
	snprintf(env_buf, ENV_BUF_LEN, "%d", RATE);
	setenv("BMO_RATE", env_buf, 1);

	stopwatch_start();
    BMO_dsp_obj_t * dsp = bmo_dsp_lua_new(0, CHANNELS, FRAMES, RATE, NULL, 0);
    dsp->_init(dsp, 0);
	ret = bmo_runtests(dsp, "testingunit.lua", test_string, NULL);
	bmo_info("tests ran in %fs\n", stopwatch_stop());
    if (ret != 0) {
        bmo_err("test failure:%d\n", ret);
	}
	dsp->_close(dsp, 0);
	free(dsp);
	free(test_string);

	return ret;
}
Exemple #8
0
int main(int argc, char* argv[]) {
    stopwatch sw;

    if (argc < 2) {
        printf("usage: %s x\n", argv[0]);
        return 0;
    }

    int n = atoi(argv[1]); 
    stopwatch_start(&sw);
    int x = fib(n);
    stopwatch_stop(&sw);
    printf("{ \"time\": %f, \"output\": %d }\n", get_interval_by_sec(&sw), x);
}
Exemple #9
0
matrix_t *multiply_matrix(matrix_t *a, matrix_t *b)
{
	if (a->columns != b->rows) {
		fprintf(stderr, "multiply_matrix(): cannot multiply\n");
		exit(1);
	}

	matrix_t *c = allocate_matrix(a->rows, b->columns);

	stopwatch_start();
	__multiply_matrix(c, a, b);
	stopwatch_stop();

	return c;
}
Exemple #10
0
void benchmark(struct benchmark_config *config)
{
	int i;
	struct worker_info *producers;
	struct worker_info *consumers;
	struct work_queue queue_to_producer;
	struct work_queue queue_to_consumer;
	struct work_queue trash_queue;
	unsigned long long start, elapsed;

	work_queue_init(&queue_to_producer);
	work_queue_init(&queue_to_consumer);
	work_queue_init(&trash_queue);

	producers = create_workers(config, config->producer_thnum,
				config->producer, &queue_to_producer,
				&queue_to_consumer);
	consumers = create_workers(config, config->consumer_thnum,
				config->consumer, &queue_to_consumer,
				&trash_queue);
	start = stopwatch_start();

	for (i = 0; i < config->num_works; i++) {
		struct work *work = xmalloc(sizeof(*work));

		memset(work, 0, sizeof(*work));
		work->seed = config->seed_offset + i;
		work_queue_push(&queue_to_producer, work);
	}
	work_queue_close(&queue_to_producer);

	join_workers(producers, config->producer_thnum);
	work_queue_close(&queue_to_consumer);

	join_workers(consumers, config->consumer_thnum);
	work_queue_close(&trash_queue);

	elapsed = stopwatch_stop(start);

	collect_results(config, &trash_queue, start, elapsed);

	destroy_workers(consumers, config->consumer_thnum);
	destroy_workers(producers, config->producer_thnum);

	work_queue_destroy(&queue_to_producer);
	work_queue_destroy(&queue_to_consumer);
	work_queue_destroy(&trash_queue);
}
Exemple #11
0
static void test_lav3_normalize(unsigned int count) {
	lav3 V;
	las k;
	int i;
	stopwatch watch;
	test_reset_vectors(&V, NULL);
	printf("    {'id' : 'lav3_normalize',\n");
	printf("     'count' : %d,\n", count); 
	printf("     'V' : "); print_lav3_tuple(&V); printf(",\n");
	stopwatch_start(&watch);
	for (i=0; i<count; i++)
		lav3_normalize(&V);
	stopwatch_stop(&watch);
	printf("     'result_V' "); print_lav3_tuple(&V); printf(",\n");
	printf("     'time' : "); stopwatch_print(&watch); printf("\n    },\n");
}
Exemple #12
0
int main(int argc, char **argv) {
	struct stopwatch sw;

	if (argc < 2) {
		printf("usage: %s <input size>\n", argv[0]);
		return 1;
	}

	int n = atoi(argv[1]);
	stopwatch_start(&sw);
	int x = numprime(n);
	stopwatch_stop(&sw);
	printf("{ \"time\": %f, \"result\": %d }\n", get_interval_by_sec(&sw), x);

	return 0;
}
Exemple #13
0
/*******************************************************************************
 * Obsluha optozavory
*******************************************************************************/
int opto_idle()
{
    char opto;
    opto = OPTO_read();

    if (opto != last_opto) {
        last_opto = opto;
        if (opto == 1) {
            if (!isrunning && (t == t0)) {
                stopwatch_start();
            }
            else {
                stopwatch_stop();
            }
        }
    }

    return 0;
}
Exemple #14
0
static void test_lav3_dot(unsigned int count) {
	lav3 V1;
	lav3 V2;
	las dot_res;
	int i;
	stopwatch watch;
	dot_res = 0.;
	printf("    {'id' : 'lav3_dot',\n");
	test_reset_vectors(&V1, &V2);
	printf("     'count' : %d,\n", count); 
	printf("     'V1' : "); print_lav3_tuple(&V1); printf(",\n");
	printf("     'V2' : "); print_lav3_tuple(&V2); printf(",\n");
	stopwatch_start(&watch);
	for (i=0; i<count; i++)
		dot_res += lav3_dot(&V1,&V2);
	stopwatch_stop(&watch);
	printf("     'result_dot' : "); print_las(dot_res); printf(",\n");
	printf("     'time' : "); stopwatch_print(&watch); printf("\n    },\n");
}
Exemple #15
0
int main(void) {

  int *s,*d;
  char msg[255];
  unsigned int fselect;
  Stopwatch sw;
  void (*transfuncs[2])(int *, int *, int) = {transpose, transpose_O};


  printf(" Choose transpose (0=original, 1=optimized)\n");
  scanf("%d",&fselect);

  if (fselect > 1 ) {
    puts("Invalid transpose function");
    return(-1);
  }

  // initialize data`
  s = random_ints(((TEST_DIM) * (TEST_DIM)), RANDOM_M_SEED, 4);
  d = (int *)malloc(sizeof(int) * ((TEST_DIM) * (TEST_DIM)));
  sw = new_stopwatch();
  if (d == NULL || s == NULL || sw == NULL) return -1;

  // performance test given transpose
  stopwatch_start(sw);
  transfuncs[fselect](d,s,TEST_DIM);
  stopwatch_stop(sw);

  snprintf(msg, 255, "%s transform: %li ms\n", 
      (fselect) ? "Optimized" : "Original", stopwatch_milliseconds(sw));
  puts(msg);

  // superfluous memory deallocation activities
  free(s);
  free(d);
  destroy_stopwatch(sw);

  return 0;
}
Exemple #16
0
/*******************************************************************************
 * Obsluha klavesnice
*******************************************************************************/
int keyboard_idle()
{
  char ch;
  ch = key_decode(read_word_keyboard_4x4());
  if (ch != last_ch) 
  {
    last_ch = ch;
    if (ch != 0) 
    {
      if (ch == '*') {
          stopwatch_reset();
      }
      else if (ch == '#' && isrunning) {
          stopwatch_stop();
      }
      else if (ch == '#' && !isrunning && (t == t0)) {
          stopwatch_start();
      }
    }
  }
  return 0;
}
int
main ( int argc, char *argv[] )
{
  printf("WG size of kernel = %d X %d\n", BLOCK_SIZE, BLOCK_SIZE);
	int matrix_dim = 32; /* default matrix_dim */
	int opt, option_index=0;
	func_ret_t ret;
	const char *input_file = NULL;
	float *m, *mm;
	stopwatch sw;
	
	while ((opt = getopt_long(argc, argv, "::vs:i:", 
                            long_options, &option_index)) != -1 ) {
		switch(opt){
			case 'i':
			input_file = optarg;
			break;
			case 'v':
			do_verify = 1;
			break;
        case 's':
			matrix_dim = atoi(optarg);
			printf("Generate input matrix internally, size =%d\n", matrix_dim);
			// fprintf(stderr, "Currently not supported, use -i instead\n");
			// fprintf(stderr, "Usage: %s [-v] [-s matrix_size|-i input_file]\n", argv[0]);
			// exit(EXIT_FAILURE);
			break;
        case '?':
			fprintf(stderr, "invalid option\n");
			break;
        case ':':
			fprintf(stderr, "missing argument\n");
			break;
        default:
			fprintf(stderr, "Usage: %s [-v] [-s matrix_size|-i input_file]\n",
                  argv[0]);
			exit(EXIT_FAILURE);
		}
	}
  
	if ( (optind < argc) || (optind == 1)) {
		fprintf(stderr, "Usage: %s [-v] [-s matrix_size|-i input_file]\n", argv[0]);
		exit(EXIT_FAILURE);
	}	

	if (input_file) {
		printf("Reading matrix from file %s\n", input_file);
		ret = create_matrix_from_file(&m, input_file, &matrix_dim);
		if (ret != RET_SUCCESS) {
			m = NULL;
			fprintf(stderr, "error create matrix from file %s\n", input_file);
			exit(EXIT_FAILURE);
		}
	} 
	
	else if (matrix_dim) {
	  printf("Creating matrix internally size=%d\n", matrix_dim);
	  ret = create_matrix(&m, matrix_dim);
	  if (ret != RET_SUCCESS) {
	    m = NULL;
	    fprintf(stderr, "error create matrix internally size=%d\n", matrix_dim);
	    exit(EXIT_FAILURE);
	  }
	}

	else {
	  printf("No input file specified!\n");
	  exit(EXIT_FAILURE);
	}

	if (do_verify){
		printf("Before LUD\n");
		// print_matrix(m, matrix_dim);
		matrix_duplicate(m, &mm, matrix_dim);
	}
	
	int sourcesize = 1024*1024;
	char * source = (char *)calloc(sourcesize, sizeof(char)); 
	if(!source) { printf("ERROR: calloc(%d) failed\n", sourcesize); return -1; }

	char * kernel_lud_diag   = "lud_diagonal";
	char * kernel_lud_peri   = "lud_perimeter";
	char * kernel_lud_inter  = "lud_internal";
	FILE * fp = fopen("./lud_kernel.cl", "rb"); 
	if(!fp) { printf("ERROR: unable to open '%s'\n"); return -1; }
	fread(source + strlen(source), sourcesize, 1, fp);
	fclose(fp);

	// Use 1: GPU  0: CPU
	int use_gpu = 1;
	// OpenCL initialization
	if(initialize(use_gpu)) return -1;
	// compile kernel
	cl_int err = 0;
	const char * slist[2] = { source, 0 };
	cl_program prog = clCreateProgramWithSource(context, 1, slist, NULL, &err);
	if(err != CL_SUCCESS) { printf("ERROR: clCreateProgramWithSource() => %d\n", err); return -1; }
	char clOptions[110];
	//  sprintf(clOptions,"-I../../src"); 
	sprintf(clOptions," ");
#ifdef BLOCK_SIZE
	sprintf(clOptions + strlen(clOptions), " -DBLOCK_SIZE=%d", BLOCK_SIZE);
#endif

	err = clBuildProgram(prog, 0, NULL, clOptions, NULL, NULL);
	{ // show warnings/errors
		//static char log[65536]; memset(log, 0, sizeof(log));
		//cl_device_id device_id = 0;
		//err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(device_id), &device_id, NULL);
		//clGetProgramBuildInfo(prog, device_id, CL_PROGRAM_BUILD_LOG, sizeof(log)-1, log, NULL);
		//if(err || strstr(log,"warning:") || strstr(log, "error:")) printf("<<<<\n%s\n>>>>\n", log);
	}
	if(err != CL_SUCCESS) { printf("ERROR: clBuildProgram() => %d\n", err); return -1; }
    
	cl_kernel diagnal;
	cl_kernel perimeter;
	cl_kernel internal;
	diagnal   = clCreateKernel(prog, kernel_lud_diag, &err);  
	perimeter = clCreateKernel(prog, kernel_lud_peri, &err);  
	internal  = clCreateKernel(prog, kernel_lud_inter, &err);  
	if(err != CL_SUCCESS) { printf("ERROR: clCreateKernel() 0 => %d\n", err); return -1; }
	clReleaseProgram(prog);
  
	//size_t local_work[3] = { 1, 1, 1 };
	//size_t global_work[3] = {1, 1, 1 }; 
  
	cl_mem d_m;
	d_m = clCreateBuffer(context, CL_MEM_READ_WRITE, matrix_dim*matrix_dim * sizeof(float), NULL, &err );
	if(err != CL_SUCCESS) { printf("ERROR: clCreateBuffer d_m (size:%d) => %d\n", matrix_dim*matrix_dim, err); return -1;} 

	/* beginning of timing point */
	stopwatch_start(&sw);
	err = clEnqueueWriteBuffer(cmd_queue, d_m, 1, 0, matrix_dim*matrix_dim*sizeof(float), m, 0, 0, 0);
	if(err != CL_SUCCESS) { printf("ERROR: clEnqueueWriteBuffer d_m (size:%d) => %d\n", matrix_dim*matrix_dim, err); return -1; }
	
	int i=0;
	for (i=0; i < matrix_dim-BLOCK_SIZE; i += BLOCK_SIZE) {
	 
	  clSetKernelArg(diagnal, 0, sizeof(void *), (void*) &d_m);
	  clSetKernelArg(diagnal, 1, sizeof(float) * BLOCK_SIZE * BLOCK_SIZE, (void*)NULL );
	  clSetKernelArg(diagnal, 2, sizeof(cl_int), (void*) &matrix_dim);
	  clSetKernelArg(diagnal, 3, sizeof(cl_int), (void*) &i);
      
	  size_t global_work1[3]  = {BLOCK_SIZE, 1, 1};
	  size_t local_work1[3]  = {BLOCK_SIZE, 1, 1};
	   
	  err = clEnqueueNDRangeKernel(cmd_queue, diagnal, 2, NULL, global_work1, local_work1, 0, 0, 0);
	  if(err != CL_SUCCESS) { printf("ERROR:  diagnal clEnqueueNDRangeKernel()=>%d failed\n", err); return -1; }	
	  
	  clSetKernelArg(perimeter, 0, sizeof(void *), (void*) &d_m);
	  clSetKernelArg(perimeter, 1, sizeof(float) * BLOCK_SIZE * BLOCK_SIZE, (void*)NULL );
	  clSetKernelArg(perimeter, 2, sizeof(float) * BLOCK_SIZE * BLOCK_SIZE, (void*)NULL );
	  clSetKernelArg(perimeter, 3, sizeof(float) * BLOCK_SIZE * BLOCK_SIZE, (void*)NULL );
	  clSetKernelArg(perimeter, 4, sizeof(cl_int), (void*) &matrix_dim);
	  clSetKernelArg(perimeter, 5, sizeof(cl_int), (void*) &i);
	  
	  size_t global_work2[3] = {BLOCK_SIZE * 2 * ((matrix_dim-i)/BLOCK_SIZE-1), 1, 1};
	  size_t local_work2[3]  = {BLOCK_SIZE * 2, 1, 1};
	  
	  err = clEnqueueNDRangeKernel(cmd_queue, perimeter, 2, NULL, global_work2, local_work2, 0, 0, 0);
	  if(err != CL_SUCCESS) { printf("ERROR:  perimeter clEnqueueNDRangeKernel()=>%d failed\n", err); return -1; }	
	  
	  clSetKernelArg(internal, 0, sizeof(void *), (void*) &d_m);
	  clSetKernelArg(internal, 1, sizeof(float) * BLOCK_SIZE * BLOCK_SIZE, (void*)NULL );
	  clSetKernelArg(internal, 2, sizeof(float) * BLOCK_SIZE * BLOCK_SIZE, (void*)NULL );
	  clSetKernelArg(internal, 3, sizeof(cl_int), (void*) &matrix_dim);
	  clSetKernelArg(internal, 4, sizeof(cl_int), (void*) &i);
      
	  size_t global_work3[3] = {BLOCK_SIZE * ((matrix_dim-i)/BLOCK_SIZE-1), BLOCK_SIZE * ((matrix_dim-i)/BLOCK_SIZE-1), 1};
	  size_t local_work3[3] = {BLOCK_SIZE, BLOCK_SIZE, 1};

	  err = clEnqueueNDRangeKernel(cmd_queue, internal, 2, NULL, global_work3, local_work3, 0, 0, 0);
	  if(err != CL_SUCCESS) { printf("ERROR:  internal clEnqueueNDRangeKernel()=>%d failed\n", err); return -1; }	
	}
	clSetKernelArg(diagnal, 0, sizeof(void *), (void*) &d_m);
	clSetKernelArg(diagnal, 1, sizeof(float) * BLOCK_SIZE * BLOCK_SIZE, (void*)NULL );
	clSetKernelArg(diagnal, 2, sizeof(cl_int), (void*) &matrix_dim);
	clSetKernelArg(diagnal, 3, sizeof(cl_int), (void*) &i);
      
	size_t global_work1[3]  = {BLOCK_SIZE, 1, 1};
	size_t local_work1[3]  = {BLOCK_SIZE, 1, 1};
	err = clEnqueueNDRangeKernel(cmd_queue, diagnal, 2, NULL, global_work1, local_work1, 0, 0, 0);
	if(err != CL_SUCCESS) { printf("ERROR:  diagnal clEnqueueNDRangeKernel()=>%d failed\n", err); return -1; }	
	
	err = clEnqueueReadBuffer(cmd_queue, d_m, 1, 0, matrix_dim*matrix_dim*sizeof(float), m, 0, 0, 0);
	if(err != CL_SUCCESS) { printf("ERROR: clEnqueueReadBuffer  d_m (size:%d) => %d\n", matrix_dim*matrix_dim, err); return -1; }
	clFinish(cmd_queue);
	/* end of timing point */
	stopwatch_stop(&sw);
	printf("Time consumed(ms): %lf\n", 1000*get_interval_by_sec(&sw));

	clReleaseMemObject(d_m);

	if (do_verify){
		printf("After LUD\n");
		// print_matrix(m, matrix_dim);
		printf(">>>Verify<<<<\n");
		lud_verify(mm, m, matrix_dim); 
		free(mm);
	}

	free(m);
	
	if(shutdown()) return -1;
	
}				
void stopwatch_switch(stopwatch_t * watchToStop, stopwatch_t * watchToStart) {
	// TODO: use a single clock_gettime call?
	stopwatch_stop(watchToStop);
	stopwatch_start(watchToStart);
}
Exemple #19
0
int
main ( int argc, char *argv[] )
{
    int matrix_dim = 32; /* default matrix_dim */
    int opt, option_index=0;
    func_ret_t ret;
    const char *input_file = NULL;
    float *m, *mm;
    stopwatch sw;

    cl_device_id clDevice;
    cl_context clContext;
    cl_command_queue clCommands;
    cl_program clProgram;
    cl_kernel clKernel_diagonal;
    cl_kernel clKernel_perimeter;
    cl_kernel clKernel_internal;
    cl_int dev_type;

    cl_int errcode;

    FILE *kernelFile;
    char *kernelSource;
    size_t kernelLength;

    cl_mem d_m;

    ocd_init(&argc, &argv, NULL);
    ocd_options opts = ocd_get_options();
    platform_id = opts.platform_id;
    device_id = opts.device_id;


    while ((opt = getopt_long(argc, argv, "::vs:i:",
                              long_options, &option_index)) != -1 ) {
        switch(opt) {
        case 'i':
            input_file = optarg;
            break;
        case 'v':
            do_verify = 1;
            break;
        case 's':
            matrix_dim = atoi(optarg);
            fprintf(stderr, "Currently not supported, use -i instead\n");
            fprintf(stderr, "Usage: %s [-v] [-s matrix_size|-i input_file|-p platform|-d device]\n", argv[0]);
            exit(EXIT_FAILURE);
        case '?':
            fprintf(stderr, "invalid option\n");
            break;
        case ':':
            fprintf(stderr, "missing argument\n");
            break;
        default:
            fprintf(stderr, "Usage: %s [-v] [-s matrix_size|-i input_file||-p platform|-d device]\n",
                    argv[0]);
            exit(EXIT_FAILURE);
        }
    }

    if ( (optind < argc) || (optind == 1)) {
        fprintf(stderr, "Usage: %s [-v] [-s matrix_size|-i input_file|-p platform|-d device]\n", argv[0]);
        exit(EXIT_FAILURE);
    }

    if (input_file) {
        printf("Reading matrix from file %s\n", input_file);
        ret = create_matrix_from_file(&m, input_file, &matrix_dim);
        if (ret != RET_SUCCESS) {
            m = NULL;
            fprintf(stderr, "error create matrix from file %s\n", input_file);
            exit(EXIT_FAILURE);
        }
    } else {
        printf("No input file specified!\n");
        exit(EXIT_FAILURE);
    }

    if (do_verify) {
        printf("Before LUD\n");
        print_matrix(m, matrix_dim);
        matrix_duplicate(m, &mm, matrix_dim);
    }

//  errcode = clGetPlatformIDs(NUM_PLATFORM, clPlatform, NULL);
//  CHECKERR(errcode);
//
//  errcode = clGetDeviceIDs(clPlatform[PLATFORM_ID], USEGPU ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &clDevice, NULL);
//  CHECKERR(errcode);
#ifdef USEGPU
    dev_type = CL_DEVICE_TYPE_GPU;
#elif defined(USE_AFPGA)
    dev_type = CL_DEVICE_TYPE_ACCELERATOR;
#else
    dev_type = CL_DEVICE_TYPE_CPU;
#endif


    clDevice = GetDevice(platform_id, device_id,dev_type);
    size_t max_worksize[3];
    errcode = clGetDeviceInfo(clDevice, CL_DEVICE_MAX_WORK_ITEM_SIZES,sizeof(size_t)*3, &max_worksize, NULL);
    CHECKERR(errcode);
    while(BLOCK_SIZE*BLOCK_SIZE>max_worksize[0])
        BLOCK_SIZE = BLOCK_SIZE/2;

    clContext = clCreateContext(NULL, 1, &clDevice, NULL, NULL, &errcode);
    CHECKERR(errcode);

    clCommands = clCreateCommandQueue(clContext, clDevice, CL_QUEUE_PROFILING_ENABLE, &errcode);
    CHECKERR(errcode);

    kernelFile = fopen("lud_kernel.cl", "r");
    fseek(kernelFile, 0, SEEK_END);
    kernelLength = (size_t) ftell(kernelFile);
    kernelSource = (char *) malloc(sizeof(char)*kernelLength);
    rewind(kernelFile);
    fread((void *) kernelSource, kernelLength, 1, kernelFile);
    fclose(kernelFile);

    clProgram = clCreateProgramWithSource(clContext, 1, (const char **) &kernelSource, &kernelLength, &errcode);
    CHECKERR(errcode);

    free(kernelSource);
    char arg[100];
    sprintf(arg,"-D BLOCK_SIZE=%d", (int)BLOCK_SIZE);
    errcode = clBuildProgram(clProgram, 1, &clDevice, arg, NULL, NULL);
    if (errcode == CL_BUILD_PROGRAM_FAILURE)
    {
        char *log;
        size_t logLength;
        errcode = clGetProgramBuildInfo(clProgram, clDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &logLength);
        log = (char *) malloc(sizeof(char)*logLength);
        errcode = clGetProgramBuildInfo(clProgram, clDevice, CL_PROGRAM_BUILD_LOG, logLength, (void *) log, NULL);
        fprintf(stderr, "Kernel build error! Log:\n%s", log);
        free(log);
        return 0;
    }
    CHECKERR(errcode);

    clKernel_diagonal = clCreateKernel(clProgram, "lud_diagonal", &errcode);
    CHECKERR(errcode);
    clKernel_perimeter = clCreateKernel(clProgram, "lud_perimeter", &errcode);
    CHECKERR(errcode);
    clKernel_internal = clCreateKernel(clProgram, "lud_internal", &errcode);
    CHECKERR(errcode);

    d_m = clCreateBuffer(clContext, CL_MEM_READ_WRITE, matrix_dim*matrix_dim*sizeof(float), NULL, &errcode);
    CHECKERR(errcode);

    /* beginning of timing point */
    stopwatch_start(&sw);

    errcode = clEnqueueWriteBuffer(clCommands, d_m, CL_TRUE, 0, matrix_dim*matrix_dim*sizeof(float), (void *) m, 0, NULL, &ocdTempEvent);

    clFinish(clCommands);
    START_TIMER(ocdTempEvent, OCD_TIMER_H2D, "Matrix Copy", ocdTempTimer)
    END_TIMER(ocdTempTimer)
    CHECKERR(errcode);

    int i=0;
    size_t localWorkSize[2];
    size_t globalWorkSize[2];
    //printf("BLOCK_SIZE: %d\n",BLOCK_SIZE);
//	printf("max Work-item Size: %d\n",(int)max_worksize[0]);
#ifdef START_POWER
    for( int iter = 0; iter < 1000; iter++)
#endif
        for (i=0; i < matrix_dim-BLOCK_SIZE; i += BLOCK_SIZE) {
            errcode = clSetKernelArg(clKernel_diagonal, 0, sizeof(cl_mem), (void *) &d_m);
            errcode |= clSetKernelArg(clKernel_diagonal, 1, sizeof(int), (void *) &matrix_dim);
            errcode |= clSetKernelArg(clKernel_diagonal, 2, sizeof(int), (void *) &i);
            CHECKERR(errcode);

            localWorkSize[0] = BLOCK_SIZE;
            globalWorkSize[0] = BLOCK_SIZE;

            errcode = clEnqueueNDRangeKernel(clCommands, clKernel_diagonal, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, &ocdTempEvent);
            clFinish(clCommands);
            START_TIMER(ocdTempEvent, OCD_TIMER_KERNEL, "Diagonal Kernels", ocdTempTimer)
            END_TIMER(ocdTempTimer)
            CHECKERR(errcode);
            errcode = clSetKernelArg(clKernel_perimeter, 0, sizeof(cl_mem), (void *) &d_m);
            errcode |= clSetKernelArg(clKernel_perimeter, 1, sizeof(int), (void *) &matrix_dim);
            errcode |= clSetKernelArg(clKernel_perimeter, 2, sizeof(int), (void *) &i);
            CHECKERR(errcode);
            localWorkSize[0] = BLOCK_SIZE*2;
            globalWorkSize[0] = ((matrix_dim-i)/BLOCK_SIZE-1)*localWorkSize[0];

            errcode = clEnqueueNDRangeKernel(clCommands, clKernel_perimeter, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, &ocdTempEvent);
            clFinish(clCommands);
            START_TIMER(ocdTempEvent, OCD_TIMER_KERNEL, "Perimeter Kernel", ocdTempTimer)
            CHECKERR(errcode);
            END_TIMER(ocdTempTimer)
            errcode = clSetKernelArg(clKernel_internal, 0, sizeof(cl_mem), (void *) &d_m);
            errcode |= clSetKernelArg(clKernel_internal, 1, sizeof(int), (void *) &matrix_dim);
            errcode |= clSetKernelArg(clKernel_internal, 2, sizeof(int), (void *) &i);
            CHECKERR(errcode);
            localWorkSize[0] = BLOCK_SIZE;
            localWorkSize[1] = BLOCK_SIZE;
            globalWorkSize[0] = ((matrix_dim-i)/BLOCK_SIZE-1)*localWorkSize[0];
            globalWorkSize[1] = ((matrix_dim-i)/BLOCK_SIZE-1)*localWorkSize[1];

            errcode = clEnqueueNDRangeKernel(clCommands, clKernel_internal, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, &ocdTempEvent);
            clFinish(clCommands);
            START_TIMER(ocdTempEvent, OCD_TIMER_KERNEL, "Internal Kernel", ocdTempTimer)
            END_TIMER(ocdTempTimer)
            CHECKERR(errcode);
        }
    errcode = clSetKernelArg(clKernel_diagonal, 0, sizeof(cl_mem), (void *) &d_m);
    errcode |= clSetKernelArg(clKernel_diagonal, 1, sizeof(int), (void *) &matrix_dim);
    errcode |= clSetKernelArg(clKernel_diagonal, 2, sizeof(int), (void *) &i);
    CHECKERR(errcode);
    localWorkSize[0] = BLOCK_SIZE;
    globalWorkSize[0] = BLOCK_SIZE;

    errcode = clEnqueueNDRangeKernel(clCommands, clKernel_diagonal, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, &ocdTempEvent);
    clFinish(clCommands);
    START_TIMER(ocdTempEvent, OCD_TIMER_KERNEL, "Diagonal Kernels", ocdTempTimer)
    CHECKERR(errcode);

    END_TIMER(ocdTempTimer)

    errcode = clEnqueueReadBuffer(clCommands, d_m, CL_TRUE, 0, matrix_dim*matrix_dim*sizeof(float), (void *) m, 0, NULL, &ocdTempEvent);
    clFinish(clCommands);
    START_TIMER(ocdTempEvent, OCD_TIMER_D2H, "Matrix copy", ocdTempTimer)
    END_TIMER(ocdTempTimer)
    /* end of timing point */
    stopwatch_stop(&sw);
    printf("Time consumed(ms): %lf\n", 1000*get_interval_by_sec(&sw));

    clReleaseMemObject(d_m);

    if (do_verify) {
        printf("After LUD\n");
        print_matrix(m, matrix_dim);
        printf(">>>Verify<<<<\n");
        printf("matrix_dim: %d\n",matrix_dim);
        lud_verify(mm, m, matrix_dim);
        free(mm);
    }

    clReleaseKernel(clKernel_diagonal);
    clReleaseKernel(clKernel_perimeter);
    clReleaseKernel(clKernel_internal);
    clReleaseProgram(clProgram);
    clReleaseCommandQueue(clCommands);
    clReleaseContext(clContext);

    free(m);
    ocd_finalize();
    return EXIT_SUCCESS;
}				/* ----------  end of function main  ---------- */
Exemple #20
0
int main(int argc, char** argv)
{

  ocd_init(&argc, &argv, NULL);
  ocd_initCL();

  std::cerr << "N-Queen solver for OpenCL\n";
  std::cerr << "Ping-Che Chen\n\n";
  if(argc < 2) {
    std::cerr << "Usage: " << argv[0] << " [options] N\n";
    std::cerr << "\tN: board size (1 ~ 32)\n";
    std::cerr << "\t-cpu: use CPU (multi-threaded on Windows)\n";
    std::cerr << "\t-prof: enable profiler\n";
    std::cerr << "\t-threads #: set number of threads to #\n";
    std::cerr << "\t-blocksize #: set size of thread blocks to #\n";
    std::cerr << "\t-local: use local memory for arrays (default: off)\n";
    std::cerr << "\t-noatomics: do not use global atomics\n";
    std::cerr << "\t-novec: do not use vectorization\n";
    std::cerr << "\t-vec4: use 4D vectors instead of 2D (only when vectorized- default: off)\n";
    return 0;
  }

  // handle options
  bool force_cpu = false;
  bool profiling = false;
  int threads = 0;
  int block_size = 0;
  bool local = false;//default OFF (was true)
  bool noatomics = false;
  bool novec = false;
  bool use_vec4 = false;

  int start = 1;
  while(start < argc - 1) {
    if(std::strcmp(argv[start], "-cpu") == 0) {
      force_cpu = true;
    }
    else if(std::strcmp(argv[start], "-threads") == 0 && start < argc - 2) {
      threads = std::atoi(argv[start + 1]);
      start++;
    }
    else if(std::strcmp(argv[start], "-blocksize") == 0 && start < argc - 2) {
      block_size = std::atoi(argv[start + 1]);
      start++;
    }
    else if(std::strcmp(argv[start], "-local") == 0) {
      local = true;
    }
    else if(std::strcmp(argv[start], "-noatomics") == 0) {
      noatomics = true;
    }
    else if(std::strcmp(argv[start], "-novec") == 0) {
      novec = true;
    }
    else if(std::strcmp(argv[start], "-vec4") == 0) {
      use_vec4 = true;
    }
    else {
      std::cerr << "Unknown option " << argv[start] << "\n";
    }

    start ++;
  }

  int board_size = std::atoi(argv[start]);
  if(board_size < 1 || board_size > 32) {
    std::cerr << "Inalid board size (only 1 ~ 32 allowed)\n";
    return 0;
  }

  stopwatch sw;
  long long solutions = 0;
  long long unique_solutions = 0;
  if(force_cpu) {
    stopwatch_start(&sw);
    solutions = nqueen_cpu(board_size, &unique_solutions);
    stopwatch_stop(&sw);
  }
  else {
    stopwatch_start(&sw);
    cl_int err;

    // show device list
    size_t num_devices;

    num_devices=1;//In OpenDwarfs we only work with one device at a time.
    std::vector<cl_device_id> devices(num_devices / sizeof(cl_device_id));

    devices.clear();
    devices.resize(1);
    devices[0] = device_id;
    try {
      NQueenSolver nqueen(context, devices, profiling, threads, block_size, local, noatomics, novec, use_vec4);
      for(int i = 0; i < devices.size(); i++) {
	size_t name_length;
	err = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 0, 0, &name_length);
	if(err == CL_SUCCESS) {
	  std::string name;
	  name.resize(name_length + 1);
	  clGetDeviceInfo(devices[i], CL_DEVICE_NAME, name_length, &name[0], &name_length);
	  name[name_length] = 0;
	  std::cerr << "Device " << i << ": " << name.c_str() << "\n";
	  std::cerr << "\tUsing " << nqueen.GetThreads(i) << " threads\n";
	  std::cerr << "\tBlock size = " << nqueen.GetBlockSize(i) << " threads\n";
	  if(nqueen.AtomicsEnabled(i)) {
	    std::cerr << "\tUsing global atomics\n";
	  }

	  if(nqueen.VectorizationEnabled(i)) {
	    std::cerr << "\tUsing vectorization\n";

	    if(use_vec4) {
	      std::cerr << "\tUse 4D vectors\n";
	    }
	    else {
	      std::cerr << "\tUse 2D vectors\n";
	    }
	  }
	}
      }

      //start_time = std::clock();
      solutions = nqueen.Compute(board_size, &unique_solutions);
      //end_time = std::clock();

    }
    catch(CLError x)
      {
	if(x.GetErrorNo() == 1) {
	  std::cerr << "1 OpenCL kernel execution failed\n";
	}
	if(x.GetErrorNo() == 2) {
	  std::cerr << "2 OpenCL kernel execution failed\n";
	}
	if(x.GetErrorNo() == 3) {
	  std::cerr << "3 OpenCL kernel execution failed\n";
	}
	else {
	  std::cerr << x << "\n";
	}
      }
    stopwatch_stop(&sw);
    clReleaseContext(context);
  }

  std::cerr << "Solution took " << get_interval_by_sec(&sw) << " seconds to complete\n";
  std::cerr << board_size << "-queen has " << solutions << " solutions (" << unique_solutions << " unique)\n";

  printf("{ \"status\": %d, \"options\": \"-s %d\", \"time\": %f }\n", 1, board_size, get_interval_by_sec(&sw));

  ocd_finalize();
  return 0;
}
Exemple #21
0
int
main ( int argc, char *argv[] )
{
    int matrix_dim = 32; /* default matrix_dim */
    int opt, option_index=0, error=0;
    func_ret_t ret;
    char *input_path = NULL;
    char *output_path = NULL;
    FILE *file;
    double *m;
    stopwatch sw;
    int i,j;
    int debug = 0;
    size_t linesiz=0;
    char* linebuf=NULL;
    ssize_t linelen=0;
    char* token;
    const char comma[2] = ",";

    while ((opt = getopt_long(argc, argv, ":dvs:i:o:",
                              long_options, &option_index)) != -1 ) {
        switch(opt){
        case 'v':
            do_verify = 1;
            break;
        case 's':
            matrix_dim = atoi(optarg);
            break;
        case 'i':
            input_path = optarg;
            break;
        case 'o':
            output_path = optarg;
            break;
        case '?':
            fprintf(stderr, "invalid option\n");
            error=1;
            break;
        case 'd':
            debug=1;
            break;
        case ':':
            fprintf(stderr, "missing argument\n");
            error=1;
            break;
        default:
            error=1;
        }
    }

    if ((optind < argc) || (optind == 1) || input_path == NULL || error) {
        fprintf(stderr, "Usage: %s -s size -i input_path [-v] [-d] [-o output_path]\n", argv[0]);
        exit(EXIT_FAILURE);
    }

    file = fopen(input_path, "r");
    if (file == NULL) {
        fprintf(stderr, "Invalid input file path: %s\n", input_path);
        exit(EXIT_FAILURE);
    }

    // Read matrix from file
    if (debug) {
        fprintf(stderr, "Reading data from file %s\n", input_path);
    }
    m = (double *)malloc(sizeof(double) * matrix_dim * matrix_dim);
    j = 0;
    while ((linelen=getline(&linebuf, &linesiz, file)) != -1) {
        if (debug) {
            fprintf(stderr, "Read line: %s\n", linebuf);
        }
        /* get the first number */
        i = 0;
        token = strtok(linebuf, comma);
   
        /* walk through other numbers */
        while( token != NULL ) {
            if (debug) {
                fprintf(stderr, "Read token: %s\n", token);
            }
            m[j*matrix_dim + i] = atof(token);
            token = strtok(NULL, comma);
            i = i + 1;
        }

        free(linebuf);
        linebuf=NULL;
        j = j + 1;
    }
    fclose(file);

    if (debug) {
        fprintf(stderr, "Computing LUD\n");
    }
    stopwatch_start(&sw);
    lud_base(m, matrix_dim);
    stopwatch_stop(&sw);

    if (output_path) {
        if (debug) {
            fprintf(stderr, "Saving result in %s\n", output_path);
        }
        file = fopen(output_path, "w");

        if (file == NULL) {
            free(m);
            exit(EXIT_FAILURE);    
        }

        for (j = 0; j < matrix_dim; ++j) {
            for (i = 0; i < matrix_dim; ++i) {
                fprintf(file, "%.*f", 21, m[j*matrix_dim+i]);
                if (i < matrix_dim-1) {
                    fprintf(file, ",");
                }
            }
            fprintf(file, "\n");
        }  
        fclose(file);
    }

    free(m);
    printf("{ \"status\": %d, \"options\": \"-s %d\", \"time\": %f }\n", 1, matrix_dim, get_interval_by_sec(&sw));
    return EXIT_SUCCESS;
}				/* ----------  end of function main  ---------- */
Exemple #22
0
/*
 * vecsum is a microbenchmark which measures the speed of various ways of
 * reading from HDFS.  It creates a file containing floating-point 'doubles',
 * and computes the sum of all the doubles several times.  For some CPUs,
 * assembly optimizations are used for the summation (SSE, etc).
 */
int main(void)
{
    int ret = 1;
    struct options *opts = NULL;
    struct local_data *cdata = NULL;
    struct libhdfs_data *ldata = NULL;
    struct stopwatch *watch = NULL;

    if (check_byte_size(VECSUM_CHUNK_SIZE, "VECSUM_CHUNK_SIZE") ||
        check_byte_size(ZCR_READ_CHUNK_SIZE,
                "ZCR_READ_CHUNK_SIZE") ||
        check_byte_size(NORMAL_READ_CHUNK_SIZE,
                "NORMAL_READ_CHUNK_SIZE")) {
        goto done;
    }
    opts = options_create();
    if (!opts)
        goto done;
    if (opts->ty == VECSUM_LOCAL) {
        cdata = local_data_create(opts);
        if (!cdata)
            goto done;
    } else {
        ldata = libhdfs_data_create(opts);
        if (!ldata)
            goto done;
    }
    watch = stopwatch_create();
    if (!watch)
        goto done;
    switch (opts->ty) {
    case VECSUM_LOCAL:
        vecsum_local(cdata, opts);
        ret = 0;
        break;
    case VECSUM_LIBHDFS:
        ret = vecsum_libhdfs(ldata, opts);
        break;
    case VECSUM_ZCR:
        ret = vecsum_zcr(ldata, opts);
        break;
    }
    if (ret) {
        fprintf(stderr, "vecsum failed with error %d\n", ret);
        goto done;
    }
    ret = 0;
done:
    fprintf(stderr, "cleaning up...\n");
    if (watch && (ret == 0)) {
        long long length = vecsum_length(opts, ldata);
        if (length >= 0) {
            stopwatch_stop(watch, length * opts->passes);
        }
    }
    if (cdata)
        local_data_free(cdata);
    if (ldata)
        libhdfs_data_free(ldata);
    if (opts)
        options_free(opts);
    return ret;
}
Exemple #23
0
void profiler_exit(PROFILER* profiler)
{
	stopwatch_stop(profiler->stopwatch);
}
void stopwatch_lap(stopwatch * w) {
	stopwatch_stop(w);
}
Exemple #25
0
int
main (int argc, char* argv[])
{
  int option_char;
  int NTRIALS = 1;
  int N = 1000000;
  char *filename = NULL;

  while ((option_char = getopt(argc, argv, "t:n:o:h")) != -1) {
    switch (option_char) {
      case 't': 
        NTRIALS = atoi(optarg);
        break;
      case 'n':
        N = atoi(optarg);
        break;
      case 'o':
        filename = optarg;
        break;
      case 'h':
        fprintf(stderr, "%s\n", USAGE);
        exit(EXIT_SUCCESS);
        break;                      
      default:
        fprintf(stderr, "%s\n", USAGE);
        exit(EXIT_FAILURE);
    }
  }

  long double* T = (long double*) malloc(NTRIALS * sizeof(long double));

  long *next = (long*) malloc(N * sizeof(long));
  long *par_rank = (long*) malloc(N * sizeof(long));
  long *seq_rank = (long*) malloc(N * sizeof(long));
  size_t ncorrect = 0;

  for(int i = 0; i < NTRIALS; i++){

    initRandomList(next, N);

    long head = seqFindHead(next, N);

    seqListRanks(head, next, seq_rank, N);

    stopwatch_t watch = stopwatch_start();
    parallelListRanks(head, next, par_rank, N);
    T[i] = stopwatch_stop(watch);

    if( memcmp(seq_rank, par_rank, N * sizeof(long)) == 0)
      ncorrect++;
  }

  if (!filename)
    printStats(stdout, T, ncorrect, NTRIALS, N);
  else{
    FILE *fp;
    fp = fopen(filename, "w");
    printStats(fp, T, ncorrect, NTRIALS, N);
    fclose(fp);
  }

  free(next);
  free(par_rank);
  free(seq_rank);
  free(T);

  return EXIT_SUCCESS;
}
Exemple #26
0
int
main ( int argc, char *argv[] )
{
//printf("Starting..\n");
  int matrix_dim = 32; /* default size */
  int opt, option_index=0;
  func_ret_t ret;
  const char *input_file = NULL;
  float *m, *mm;
  stopwatch sw;
	int grid_x=0;
	int grid_y=0;

	
  while ((opt = getopt_long(argc, argv, "::vs:i:x:y:", 
                            long_options, &option_index)) != -1 ) {
    switch(opt){
    case 'i':
      input_file = optarg;
      break;
    case 'v':
      do_verify = 1;
      break;
    case 's':
      matrix_dim = atoi(optarg);
      //printf("Generate input matrix internally, size =%d\n", matrix_dim);
      // fprintf(stderr, "Currently not supported, use -i instead\n");
      // fprintf(stderr, "Usage: %s [-v] [-s matrix_size|-i input_file]\n", argv[0]);
      // exit(EXIT_FAILURE);
      break;
    case 'x':
	grid_x = atoi(optarg);
	break;
    case 'y':
	grid_y = atoi(optarg);
	break;

    case '?':
      fprintf(stderr, "invalid option\n");
      break;
    case ':':
      fprintf(stderr, "missing argument\n");
      break;
    default:
      fprintf(stderr, "1Usage: %s [-v] [-s matrix_size|-i input_file]\n",
	      argv[0]);
      exit(EXIT_FAILURE);
    }
  }
 /* 
  if ( (optind < argc) || (optind == 1)) {
    fprintf(stderr, "2Usage: %s [-v] [-n no. of threads] [-s matrix_size|-i input_file]\n", argv[0]);
    exit(EXIT_FAILURE);
  }
*/
  if (input_file) {
    //printf("Reading matrix from file %s\n", input_file);
    ret = create_matrix_from_file(&m, input_file, &matrix_dim);
    if (ret != RET_SUCCESS) {
      m = NULL;
      fprintf(stderr, "error create matrix from file %s\n", input_file);
      exit(EXIT_FAILURE);
    }
  }
  else if (matrix_dim) {
    //printf("Creating matrix internally size=%d\n", matrix_dim);
    ret = create_matrix(&m, matrix_dim);
    if (ret != RET_SUCCESS) {
      m = NULL;
      fprintf(stderr, "error create matrix internally size=%d\n", matrix_dim);
      exit(EXIT_FAILURE);
    }
  }
 
  else {
    printf("No input file specified!\n");
    exit(EXIT_FAILURE);
  } 

  if (do_verify){
    /* print_matrix(m, matrix_dim); */
    matrix_duplicate(m, &mm, matrix_dim);
  }
wul();
//printf("Starting. . . \n");
//lud_oacc(m, matrix_dim,grid_x,grid_y);

  stopwatch_start(&sw);
//  lud_omp(m, matrix_dim);
  lud_oacc(m, matrix_dim,grid_x,grid_y);
  stopwatch_stop(&sw);
  printf("Time consumed(ms): %lf\n", 1000*get_interval_by_sec(&sw));

  if (do_verify){
    printf("After LUD\n");
    /* print_matrix(m, matrix_dim); */
    printf(">>>Verify<<<<\n");
    lud_verify(mm, m, matrix_dim); 
    free(mm);
  }

  free(m);

  return EXIT_SUCCESS;
}				/* ----------  end of function main  ---------- */
Exemple #27
0
int
main (int argc, char **argv)
{
  char *s, *s2, *s3, *p, *p2, *p3;
  size_t slen, slen2;
  long s_size = 1234567;
  s = (char *)malloc(s_size * sizeof(char));
  assert(s);
  s2 = (char *)malloc(s_size * sizeof(char));
  assert(s2);
  s3 = (char *)malloc(s_size * sizeof(char));
  assert(s3);
  for (p = s, p2 = s2, p3 = s3; (p - s) < s_size; p++, p2++, p3++) {
    *p = ((p - s) % 26) + 'a';
    assert(*p != 0);
    *p2 = ((p2 - s2) % 26) + 'a';
    assert(*p2 != 0);
    *p3 = ((p3 - s3) % 26) + 'a';
    assert(*p3 != 0);
  }
  s[0] = '/'; s2[0] = '/'; s3[0] = '/'; //have a different zero'th elem, so that reverse search traverses the full array
  s[1] = ';'; s2[1] = ';'; s3[1] = ';';
  slen = s_size;//strlen(s);
  //s = argv[2];
  //slen = strlen(s);
  s[slen - 1] = '\0';
  s[slen - 2] = '\0';
  s[slen - 3] = '\0';
  s[slen - 4] = '\0';
  s[slen - 5] = '\0';
  s[slen - 6] = '\0';
  s[slen - 7] = '\0';
  s[slen - 8] = '\0';
  s[slen - 9] = '\0';
  s[slen - 10] = '\0';
  s[slen - 11] = '\0';
  s[slen - 12] = '\0';
  s[slen - 13] = '\0';
  s[slen - 14] = '\0';
  s[slen - 15] = '\0';
  s[slen - 16] = '\0';

  slen2 = s_size;
  s2[slen2 - 1] = '\0';
  s2[slen2 - 2] = '\0';
  s2[slen2 - 3] = '\0';
  s2[slen2 - 4] = '\0';
  s2[slen2 - 5] = '\0';
  s2[slen2 - 6] = '\0';
  s2[slen2 - 7] = '\0';
  s2[slen2 - 8] = '\0';
  s2[slen2 - 9] = '\0';
  s2[slen2 - 10] = '\0';
  s2[slen2 - 11] = '\0';
  s2[slen2 - 12] = '\0';
  s2[slen2 - 13] = '\0';
  s2[slen2 - 14] = '\0';
  s2[slen2 - 15] = '\0';
  s2[slen2 - 16] = '\0';

  struct time array_setl_timer;
  stopwatch_reset(&array_setl_timer);
  stopwatch_run(&array_setl_timer);
  int set1l = array_setl(s, s2[0], slen/4);
  stopwatch_stop(&array_setl_timer);

  printf("output = %d\n", set1l);
  printf("first 10 elems:");
  int i;
  for (i = 0; i < 10; i++) {
    printf(" %x", *((uint32_t *)&s[i]));
  }
  printf("\n");
  stopwatch_print(&array_setl_timer);
  return 0;
}
Exemple #28
0
int
main ( int argc, char *argv[] )
{
    int matrix_dim = 32; /* default matrix_dim */
    int opt, option_index=0, error=0;
    func_ret_t ret;
    const char *input_file = NULL;
    double *m, *mm;
    stopwatch sw;
    int i;

    while ((opt = getopt_long(argc, argv, ":vs:i:",
                              long_options, &option_index)) != -1 ) {
        switch(opt){
        case 'v':
            do_verify = 1;
            break;
        case 's':
            matrix_dim = atoi(optarg);
            break;
        case '?':
            fprintf(stderr, "invalid option\n");
            error=1;
            break;
        case ':':
            fprintf(stderr, "missing argument\n");
            error=1;
            break;
        default:
            error=1;
        }
    }

    if ((optind < argc) || (optind == 1) || error) {
        fprintf(stderr, "Usage: %s [-v] [-s matrix_size]\n", argv[0]);
        exit(EXIT_FAILURE);
    }

     if(matrix_dim>1) {
        fprintf(stderr, "Generating matrix of size %d x %d\n", matrix_dim, matrix_dim);
        ret = create_matrix_from_random(&m, matrix_dim);
        if(ret != RET_SUCCESS){
            m = NULL;
            fprintf(stderr, "error could not generate random matrix of size %d x %d!\n", matrix_dim, matrix_dim);
            exit(EXIT_FAILURE);
        }
    }
    else {
        fprintf(stderr, "No input file or valid matrix size specified!\n");
        exit(EXIT_FAILURE);
    }

    if (do_verify){
        //printf("Before LUD\n");
        //print_matrix(m, matrix_dim);
        matrix_duplicate(m, &mm, matrix_dim);
    }

    stopwatch_start(&sw);
    lud_base(m, matrix_dim);
    stopwatch_stop(&sw);

    if (matrix_dim == 1024) {
        for (i=0; i<100; ++i) {
            if (m[expected_row_indices[i]*matrix_dim + expected_col_indices[i]] != expected_values[i]) {
                fprintf(stderr, "ERROR: value at index (%d,%d) = '%.*f' is different from the expected value '%.*f'\n", 
                    expected_row_indices[i],
                    expected_col_indices[i],
                    // the 21 parameter prints enough significant decimal digits to obtain the same floating-point number
                    // when read back
                    21, m[expected_row_indices[i]*matrix_dim + expected_col_indices[i]], 
                    21, expected_values[i]
                );
                fprintf(stderr, "Received values:\n");
                for (i=0; i<100; ++i) {
                    fprintf(stderr, "%.*f, ", 21, m[expected_row_indices[i]*matrix_dim + expected_col_indices[i]]);
                }
                fprintf(stderr, "\n");
                exit(1);
            }
        }
    } else {
        fprintf(stderr, "WARNING: No self-checking step for dimension '%d'\n", matrix_dim);
    }

    if (do_verify){
        //fprintf(stderr, "After LUD\n");
        //print_matrix(m, matrix_dim);
        fprintf(stderr, ">>>Verify<<<<\n");
        lud_verify(mm, m, matrix_dim);
        free(mm);
    }

    free(m);
    printf("{ \"status\": %d, \"options\": \"-s %d\", \"time\": %f }\n", 1, matrix_dim, get_interval_by_sec(&sw));
    return EXIT_SUCCESS;
}				/* ----------  end of function main  ---------- */
Exemple #29
0
int main(int argc, char** argv)
{
	cl_int err;
	int usegpu = USEGPU;
    int do_verify = 0;
    int opt, option_index=0;

    unsigned int correct;

    size_t global_size;
    size_t local_size;

    cl_device_id device_id;
    cl_context context;
    cl_command_queue commands;
    cl_program program;
    cl_kernel kernel;

    stopwatch sw;

    cl_mem csr_ap;
    cl_mem csr_aj;
    cl_mem csr_ax;
    cl_mem x_loc;
    cl_mem y_loc;

    FILE *kernelFile;
    char *kernelSource;
    size_t kernelLength;
    size_t lengthRead;


    ocd_init(&argc, &argv, NULL);
    ocd_options opts = ocd_get_options();
    platform_id = opts.platform_id;
    n_device = opts.device_id;

    while ((opt = getopt_long(argc, argv, "::vc::", 
                            long_options, &option_index)) != -1 ) {
      switch(opt){
        //case 'i':
          //input_file = optarg;
          //break;
        case 'v':
          fprintf(stderr, "verify\n");
          do_verify = 1;
          break;
        case 'c':
          fprintf(stderr, "using cpu\n");
          usegpu = 0;
	  break;
        default:
          fprintf(stderr, "Usage: %s [-v Warning: lots of output] [-c use CPU]\n",
                  argv[0]);
          exit(EXIT_FAILURE);
      }
  }

    /* Fill input set with random float values */
    int i;

    csr_matrix csr;
    csr = laplacian_5pt(512);
    int k = 0;
      for(k = 0; k < csr.num_nonzeros; k++){
         csr.Ax[k] = 1.0 - 2.0 * (rand() / (RAND_MAX + 1.0));
      }

    //The other arrays
    float * x_host = float_new_array(csr.num_cols);
    float * y_host = float_new_array(csr.num_rows);

    unsigned int ii;
    for(ii = 0; ii < csr.num_cols; ii++){
        x_host[ii] = rand() / (RAND_MAX + 1.0);
    }
    for(ii = 0; ii < csr.num_rows; ii++){
        y_host[ii] = rand() / (RAND_MAX + 2.0);
    }

    /* Retrieve an OpenCL platform */
    device_id = GetDevice(platform_id, n_device);

    /* Create a compute context */
    context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
    CHKERR(err, "Failed to create a compute context!");

    /* Create a command queue */
    commands = clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &err);
    CHKERR(err, "Failed to create a command queue!");

    /* Load kernel source */
    kernelFile = fopen("spmv_csr_kernel.cl", "r");
    fseek(kernelFile, 0, SEEK_END);
    kernelLength = (size_t) ftell(kernelFile);
    kernelSource = (char *) malloc(sizeof(char)*kernelLength);
    rewind(kernelFile);
    lengthRead = fread((void *) kernelSource, kernelLength, 1, kernelFile);
    fclose(kernelFile);

    /* Create the compute program from the source buffer */
    program = clCreateProgramWithSource(context, 1, (const char **) &kernelSource, &kernelLength, &err);
    CHKERR(err, "Failed to create a compute program!");

    /* Free kernel source */
    free(kernelSource);

    /* Build the program executable */
    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (err == CL_BUILD_PROGRAM_FAILURE)                                                                                                                                       
    {
        char *buildLog;
        size_t logLen;
        err = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &logLen);
        buildLog = (char *) malloc(sizeof(char)*logLen);
        err = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, logLen, (void *) buildLog, NULL);
        fprintf(stderr, "CL Error %d: Failed to build program! Log:\n%s", err, buildLog);
        free(buildLog);
        exit(1);
    }
    CHKERR(err, "Failed to build program!");

    /* Create the compute kernel in the program we wish to run */
    kernel = clCreateKernel(program, "csr", &err);
    CHKERR(err, "Failed to create a compute kernel!");

    /* Create the input and output arrays in device memory for our calculation */
    csr_ap = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned int)*csr.num_rows+4, NULL, &err);
    CHKERR(err, "Failed to allocate device memory!");
    csr_aj = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned int)*csr.num_nonzeros, NULL, &err);
    CHKERR(err, "Failed to allocate device memory!");
    csr_ax = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float)*csr.num_nonzeros, NULL, &err);
    CHKERR(err, "Failed to allocate device memory!");
    x_loc = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float)*csr.num_cols, NULL, &err);
    CHKERR(err, "Failed to allocate device memory!");
    y_loc = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float)*csr.num_rows, NULL, &err);
    CHKERR(err, "Failed to allocate device memory!");

    /* beginning of timing point */
    stopwatch_start(&sw); 
   
    /* Write our data set into the input array in device memory */
	err = clEnqueueWriteBuffer(commands, csr_ap, CL_TRUE, 0, sizeof(unsigned int)*csr.num_rows+4, csr.Ap, 0, NULL, &ocdTempEvent);
        clFinish(commands);
	START_TIMER(ocdTempEvent, OCD_TIMER_H2D, "CSR Data Copy", ocdTempTimer)
	END_TIMER(ocdTempTimer)
    CHKERR(err, "Failed to write to source array!");
    err = clEnqueueWriteBuffer(commands, csr_aj, CL_TRUE, 0, sizeof(unsigned int)*csr.num_nonzeros, csr.Aj, 0, NULL, &ocdTempEvent);
        clFinish(commands);
	START_TIMER(ocdTempEvent, OCD_TIMER_H2D, "CSR Data Copy", ocdTempTimer)
	END_TIMER(ocdTempTimer)
    CHKERR(err, "Failed to write to source array!");
    err = clEnqueueWriteBuffer(commands, csr_ax, CL_TRUE, 0, sizeof(float)*csr.num_nonzeros, csr.Ax, 0, NULL, &ocdTempEvent);
        clFinish(commands);
	START_TIMER(ocdTempEvent, OCD_TIMER_H2D, "CSR Data Copy", ocdTempTimer)
	END_TIMER(ocdTempTimer)
    CHKERR(err, "Failed to write to source array!");
    err = clEnqueueWriteBuffer(commands, x_loc, CL_TRUE, 0, sizeof(float)*csr.num_cols, x_host, 0, NULL, &ocdTempEvent);
        clFinish(commands);
	START_TIMER(ocdTempEvent, OCD_TIMER_H2D, "CSR Data Copy", ocdTempTimer)
	END_TIMER(ocdTempTimer)
    CHKERR(err, "Failed to write to source array!");
    err = clEnqueueWriteBuffer(commands, y_loc, CL_TRUE, 0, sizeof(float)*csr.num_rows, y_host, 0, NULL, &ocdTempEvent);
        clFinish(commands);
	START_TIMER(ocdTempEvent, OCD_TIMER_H2D, "CSR Data Copy", ocdTempTimer)
    CHKERR(err, "Failed to write to source array!");
	END_TIMER(ocdTempTimer)
    /* Set the arguments to our compute kernel */
    err = 0;
    err = clSetKernelArg(kernel, 0, sizeof(unsigned int), &csr.num_rows);
    err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &csr_ap);
    err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &csr_aj);
    err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &csr_ax);
    err |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &x_loc);
    err |= clSetKernelArg(kernel, 5, sizeof(cl_mem), &y_loc);
    CHKERR(err, "Failed to set kernel arguments!");

    /* Get the maximum work group size for executing the kernel on the device */
    err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), (void *) &local_size, NULL);
    CHKERR(err, "Failed to retrieve kernel work group info!");

    /* Execute the kernel over the entire range of our 1d input data set */
    /* using the maximum number of work group items for this device */
    global_size = csr.num_rows;
    err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global_size, &local_size, 0, NULL, &ocdTempEvent);
        clFinish(commands);
	START_TIMER(ocdTempEvent, OCD_TIMER_KERNEL, "CSR Kernel", ocdTempTimer)
    END_TIMER(ocdTempTimer)
    CHKERR(err, "Failed to execute kernel!");

    /* Wait for the command commands to get serviced before reading back results */
    float output[csr.num_rows];
    
    /* Read back the results from the device to verify the output */
	err = clEnqueueReadBuffer(commands, y_loc, CL_TRUE, 0, sizeof(float)*csr.num_rows, output, 0, NULL, &ocdTempEvent);
        clFinish(commands);
    	START_TIMER(ocdTempEvent, OCD_TIMER_D2H, "CSR Data Copy", ocdTempTimer)
	END_TIMER(ocdTempTimer)
	CHKERR(err, "Failed to read output array!");

    /* end of timing point */
    stopwatch_stop(&sw);
    printf("Time consumed(ms): %lf Gflops: %f \n", 1000*get_interval_by_sec(&sw), (2.0 * (double) csr.num_nonzeros / get_interval_by_sec(&sw)) / 1e9);

   /* Validate our results */
   if(do_verify){
       for (i = 0; i < csr.num_rows; i++){
           printf("row: %d	output: %f \n", i, output[i]);  
       }
   }

   int row = 0;
   float sum = 0;
   int row_start = 0;
   int row_end = 0;
   for(row =0; row < csr.num_rows; row++){     
        sum = y_host[row];
        
        row_start = csr.Ap[row];
        row_end   = csr.Ap[row+1];
        
        unsigned int jj = 0;
        for (jj = row_start; jj < row_end; jj++){             
            sum += csr.Ax[jj] * x_host[csr.Aj[jj]];      
        }
        y_host[row] = sum;
    }
    for (i = 0; i < csr.num_rows; i++){
        if((fabsf(y_host[i]) - fabsf(output[i])) > .001)
             printf("Possible error, difference greater then .001 at row %d \n", i);
    }

    /* Print a brief summary detailing the results */
    ocd_finalize();

    /* Shutdown and cleanup */
    clReleaseMemObject(csr_ap);
    clReleaseMemObject(csr_aj);
    clReleaseMemObject(csr_ax);
    clReleaseMemObject(x_loc);
    clReleaseMemObject(y_loc);
    clReleaseProgram(program);
    clReleaseKernel(kernel);
    clReleaseCommandQueue(commands);
    clReleaseContext(context);
    return 0;
}
// ISR responds to transponder interrupts
void transponder_response_isr(void)
{
	GPIOPinIntClear(GPIO_PORTB_BASE, UUT_TRANSPONDER_RESPONSE_PIN_PB3);
	stopwatch_stop(&g_transponder_stopwatch);
	g_transponder_response_flags[g_transponder_pulse_count]++;
}