예제 #1
0
fp_t array_std(const fp_t* a, const int size)
{
	const fp_t mean = array_mean(a, size);
	fp_t acc = 0;
	for (int i = 0; i < size; i++) {
		acc += pow(a[i] - mean, 2);
	}
	return sqrt(acc);
}
예제 #2
0
파일: main.c 프로젝트: abarcz/jor-parallel
/** call: ./main <matrix_dimension> <number_of_tests> <use_gpu>*/
int main(int argc, char* argv[])
{
	cuda_identify();

	if (argc != 4) {
		printf("program must be called with arguments: matrix_dimension tests_number use_gpu(0/1)\n");
		exit(1);
	}
	const int M = atoi(argv[1]);
	printf("Using matrix dimension: %d\n", M);
	const int tests = atoi(argv[2]);
	const bool cpu = !atoi(argv[3]);

	// always use the same seed to get the same matrices during tests
	srand(0);

	#ifdef DOUBLE
		const fp_t min_diff = 0.00000001;	//for double, fails with 8192 and floats on both cpu and gpu
	#else
		const fp_t min_diff = 0.000001;
	#endif
	const fp_t alpha = 0.9;
	const int max_iter = 50;

	fp_t* exec_times = malloc(tests * sizeof(fp_t));
	fp_t* all_rmse = malloc(tests * sizeof(fp_t));
	for (int k = 0; k < tests; k++) {

		const DataSet dataset = generate_dataset(M);

		Matrix* last_x = aligned_vector(M, true);
		Matrix* x = aligned_vector(M, true);
		for (int i = 0; i < M; i++) {
		}

		int iterations = 0;

		// solve Ax = b
		const fp_t start_time = omp_get_wtime();

		fp_t sum = 0;
		int j = 0;
		int i = 0;
		const Matrix* A = dataset.A;
		const Matrix* b = dataset.b;
		assert(x != last_x);

		if (cpu) {
			//#pragma omp parallel shared(last_x, x, iterations) private(i, j, sum)
			while ((matrix_diff(x, last_x) > min_diff) && (max_iter < 0 || iterations < max_iter)) {
				//fp_t st_time0 = omp_get_wtime();
				//#pragma omp single
				{
					swap(last_x, x);
				}

				// A, M, alpha and b are constant, so they cannot be declared as shared
				//#pragma omp for schedule(dynamic)
				for (i = 0; i < M; i++) {
					sum = 0;

					//#pragma omp simd aligned(A, last_x: 16) reduction(+:sum) linear(j)
					for (j = 0; j < M; j++) {
						sum += A->elements[i * M + j] * last_x->elements[j];
					}

					sum -= A->elements[i * M + i] * last_x->elements[i];	// opt: outside the loop for sse optimizer
					x->elements[i] = (1 - alpha) * last_x->elements[i] + alpha * (b->elements[i] - sum) / A->elements[i * M + i];
				}

				//#pragma omp single nowait
				{
					iterations++;
				}
				//printf("%dus spent\n", (int)((omp_get_wtime() - st_time0) * 1000000));
			}
		} else {
			Matrix* d_A = device_matrix_from(A);
			#ifndef DOUBLE
				#ifdef TEXTURE
					texbind(d_A->elements, d_A->size * sizeof(fp_t));
				#endif
			#endif
			cudaMemcpy(d_A->elements, A->elements, A->size * sizeof(fp_t), cudaMemcpyHostToDevice);

			Matrix* d_b = device_matrix_from(b);
			cudaMemcpy(d_b->elements, b->elements, b->size * sizeof(fp_t), cudaMemcpyHostToDevice);

			Matrix* d_last_x = device_matrix_from(last_x);
			Matrix* d_c = device_matrix_from(b);
			Matrix* d_x = device_matrix_from(x);
			cudaMemcpy(d_x->elements, x->elements, x->size * sizeof(fp_t), cudaMemcpyHostToDevice);
			cudaMemcpy(d_last_x->elements, last_x->elements, last_x->size * sizeof(fp_t), cudaMemcpyHostToDevice);

			fp_t x_diff = 2 * min_diff;
			fp_t* d_x_diff;
			cudaMalloc((void**)&d_x_diff, sizeof(fp_t));

			//fp_t stime;
			while ((x_diff > min_diff) && (max_iter < 0 || iterations < max_iter)) {
				//stime = omp_get_wtime();
				cuda_multiply(*d_A, *d_last_x, *d_c);
				//print_cuda_elapsed(stime);

				//stime = omp_get_wtime();
				cuda_reduce(*d_A, *d_b, *d_c, d_x, d_last_x, alpha); //performs swap
				//print_cuda_elapsed(stime);

				//stime = omp_get_wtime();
				cuda_diff(*d_x, *d_last_x, d_x_diff);
				//print_cuda_elapsed(stime);

				iterations++;
				//cudaMemcpyFromSymbol(&x_diff, "d_x_diff", sizeof(x_diff), 0, cudaMemcpyDeviceToHost);
				//stime = omp_get_wtime();
				cudaMemcpy(&x_diff, d_x_diff, sizeof(fp_t), cudaMemcpyDeviceToHost);
				//print_cuda_elapsed(stime);
			}
			// copy last_x instead, as it was swapped
			cudaMemcpy(x->elements, d_last_x->elements, x->size * sizeof(fp_t), cudaMemcpyDeviceToHost);

			#ifndef DOUBLE
				#ifdef TEXTURE
					texunbind();
				#endif
			#endif
			cudaFree(d_A->elements);
			cudaFree(d_b->elements);
			cudaFree(d_last_x->elements);
			cudaFree(d_c->elements);
			cudaFree(d_x->elements);
			cudaFree(d_x_diff);

			free(d_A);
			free(d_b);
			free(d_c);
			free(d_last_x);
			free(d_x);
		}
		const fp_t end_time = omp_get_wtime();
		const fp_t seconds_spent = end_time - start_time;
		exec_times[k] = seconds_spent;

		if (verbose) {
			printf("x: ");
			print_matrix(x);
			printf("expected_x: ");
			print_matrix(dataset.x);
			//print_matrix(dataset.A);
			//print_matrix(dataset.b);
		}
		Matrix* bx = aligned_vector(M, false);
		for (int i = 0; i < M; i++) {
			for (int j = 0; j < M; j++) {
				bx->elements[i] += A->elements[i * M + j] * x->elements[j];
			}
		}
		if (verbose) {
			printf("resulting b: ");
			print_matrix(bx);
		}
		all_rmse[k] = rmse(bx, b);
		printf("RMSE: %0.10f\n", all_rmse[k]);
		printf("iterations: %d\nseconds: %0.10f\n", iterations, seconds_spent);

		assert(x != last_x);

		free(bx->elements);
		free(x->elements);
		free(last_x->elements);
		free(dataset.x->elements);
		free(dataset.A->elements);
		free(dataset.b->elements);

		free(bx);
		free(x);
		free(last_x);
		free(dataset.x);
		free(dataset.A);
		free(dataset.b);
	}
	printf("Time: mean %0.10f std %0.10f\n", array_mean(exec_times, tests), array_std(exec_times, tests));
	printf("RMSE: mean %0.10f std %0.10f\n", array_mean(all_rmse, tests), array_std(all_rmse, tests));
	free(all_rmse);
	free(exec_times);

	return 0;
}
예제 #3
0
int main(int argc, char **argv) {
 FILE *fp;
 surfspline_desc sspline;
 int err, npoints, itempart=0;
 int binary_output=FALSE, matlab_output=FALSE, mtv_output=FALSE, interpolate_only=FALSE;
 float fbuf, external_value=0.0;
 DATATYPE f, x, y, xmin, xmax, ymin, ymax, xstep, ystep;
 array index;
 pid_t pid;
 char outname[L_tmpnam], inname[L_tmpnam], **inargs, *infile;

 /*{{{  Read command line args*/
 for (inargs=argv+1; inargs-argv<argc && **inargs=='-'; inargs++) {
  switch(inargs[0][1]) {
   case 'B':
    binary_output=TRUE;
    break;
   case 'I':
    if (inargs[0][2]!='\0') {
     itempart=atoi(*inargs+2);
    }
    break;
   case 'i':
    interpolate_only=TRUE;
    if (inargs[0][2]!='\0') {
     external_value=atof(*inargs+2);
    }
    break;
   case 'M':
    matlab_output=TRUE;
    break;
   case 'm':
    mtv_output=TRUE;
    break;
   default:
    fprintf(stderr, "%s: Ignoring unknown option %s\n", argv[0], *inargs);
    continue;
  }
 }
 if (argc-(inargs-argv)!=3) {
  fprintf(stderr, "Usage: %s array_filename degree npoints\n"
          "Options:\n"
          " -Iitem: Use item number item as z-axis (2+item'th column); default: 0\n"
          " -i[value]: Interpolate only; set external values to value (default: 0.0)\n"
          " -B: Binary output (gnuplot floats)\n"
          " -M: Matlab output (x, y vectors and z matrix)\n"
          " -m: Plotmtv output\n"
  	  , argv[0]);
  return -1;
 }
 infile= *inargs++;
 if ((fp=fopen(infile, "r"))==NULL) {
  fprintf(stderr, "Can't open %s\n", infile);
  return -2;
 }
 array_undump(fp, &sspline.inpoints);
 fclose(fp);
 if (sspline.inpoints.message==ARRAY_ERROR) {
  fprintf(stderr, "Error in array_undump\n");
  return -3;
 }
 if (sspline.inpoints.nr_of_elements<3+itempart) {
  fprintf(stderr, "Not enough columns in array %s\n", *(inargs-1));
  return -3;
 }
 sspline.degree=atoi(*inargs++);;
 npoints=atoi(*inargs++);
 /*}}}  */

 /*{{{  Get the index of qhull point pairs by running qhull*/
 tmpnam(outname); tmpnam(inname);
 if ((pid=fork())==0) {	/* I'm the child */
#define LINEBUF_SIZE 128
  char linebuf[LINEBUF_SIZE];
  array tmp_array;

  /*{{{  Dump xy positions to tmp file inname*/
  tmp_array.nr_of_elements=2;
  tmp_array.nr_of_vectors=sspline.inpoints.nr_of_vectors;
  tmp_array.element_skip=1;
  if (array_allocate(&tmp_array)==NULL) {
   fprintf(stderr, "Error allocating tmp_array\n");
   return -4;
  }
  array_reset(&sspline.inpoints);
  do {
   array_write(&tmp_array, array_scan(&sspline.inpoints));
   array_write(&tmp_array, array_scan(&sspline.inpoints));
   array_nextvector(&sspline.inpoints);
  } while (tmp_array.message!=ARRAY_ENDOFSCAN);
  if ((fp=fopen(inname, "w"))==NULL) {
   fprintf(stderr, "Can't open %s\n", inname);
   return -5;
  }
  array_dump(fp, &tmp_array, ARRAY_ASCII);
  fclose(fp);
  array_free(&tmp_array);
  /*}}}  */

  snprintf(linebuf, LINEBUF_SIZE, "qhull C0 i b <%s >%s", inname, outname);
  execl("/bin/sh", "sh", "-c", linebuf, 0);
#undef LINEBUF_SIZE
 } else {
  wait(NULL);
 }
 unlink(inname);
 if ((fp=fopen(outname, "r"))==NULL) {
  fprintf(stderr, "Can't open %s\n", outname);
  return -6;
 }
 array_undump(fp, &index);
 fclose(fp);
 unlink(outname);
 /*}}}  */

 /*{{{  Find min, max and mean coordinates*/
 array_transpose(&sspline.inpoints);
 array_reset(&sspline.inpoints);
 xmin=array_min(&sspline.inpoints);
 ymin=array_min(&sspline.inpoints);
 array_reset(&sspline.inpoints);
 xmax=array_max(&sspline.inpoints);
 ymax=array_max(&sspline.inpoints);
 array_reset(&sspline.inpoints);
 xmean=array_mean(&sspline.inpoints);
 ymean=array_mean(&sspline.inpoints);
 xstep=(xmax-xmin)/npoints;
 ystep=(ymax-ymin)/npoints;
 array_transpose(&sspline.inpoints);
 array_reset(&sspline.inpoints);
 /*}}}  */

 /*{{{  Copy itempart to 3rd location if necessary*/
 /* Note: For this behavior it is essential that array_surfspline
  * will accept vectors of any size >=3 and only process the first three
  * elements ! */
 if (itempart>0) {
  DATATYPE hold;
  do {
   sspline.inpoints.current_element=2+itempart;
   hold=READ_ELEMENT(&sspline.inpoints);
   sspline.inpoints.current_element=2;
   WRITE_ELEMENT(&sspline.inpoints, hold);
   array_nextvector(&sspline.inpoints);
  } while (sspline.inpoints.message!=ARRAY_ENDOFSCAN);
 }
 /*}}}  */

 /*{{{  Do surface spline*/
 if ((err=array_surfspline(&sspline))!=0) {
  fprintf(stderr, "Error %d in array_surfspline\n", err);
  return err;
 }
 xmin-=xstep; xmax+=2*xstep;
 ymin-=ystep; ymax+=2*ystep;

 if (binary_output) {
  /*{{{  Gnuplot binary output*/
  int n_xval=(xmax-xmin)/xstep+1, n;	/* Number of x values */

  fbuf=n_xval;
  fwrite(&fbuf, sizeof(float), 1, stdout);
  for (fbuf=xmin, n=0; n<n_xval; fbuf+=xstep, n++) {	/* x values */
   fwrite(&fbuf, sizeof(float), 1, stdout);
  }
  for (y=ymin; y<=ymax; y+=ystep) {
   fbuf=y; fwrite(&fbuf, sizeof(float), 1, stdout);
   for (x=xmin, n=0; n<n_xval; x+=xstep, n++) {
    if (interpolate_only && !is_inside(&index, &sspline.inpoints, x, y)) {
     f=external_value;
    } else {
     f=array_fsurfspline(&sspline, x, y);
    }
    fbuf=f; fwrite(&fbuf, sizeof(float), 1, stdout);
   }
  }
  /*}}}  */
 } else if (matlab_output) {
  /*{{{  Matlab output*/
  array xm, ym, zm;
  xm.nr_of_elements=ym.nr_of_elements=1;
  xm.nr_of_vectors=zm.nr_of_elements=(xmax-xmin)/xstep+1;
  ym.nr_of_vectors=zm.nr_of_vectors=(ymax-ymin)/ystep+1;
  xm.element_skip=ym.element_skip=zm.element_skip=1;
  array_allocate(&xm); array_allocate(&ym); array_allocate(&zm);
  if (xm.message==ARRAY_ERROR || ym.message==ARRAY_ERROR || zm.message==ARRAY_ERROR) {
   fprintf(stderr, "Error allocating output arrays\n");
   return -7;
  }
  x=xmin; do {
   array_write(&xm, x);
   x+=xstep;
  } while (xm.message!=ARRAY_ENDOFSCAN);
  y=ymin; do {
   array_write(&ym, y);
   x=xmin; do {
    if (interpolate_only && !is_inside(&index, &sspline.inpoints, x, y)) {
     f=external_value;
    } else {
     f=array_fsurfspline(&sspline, x, y);
    }
    array_write(&zm, f);
    x+=xstep;
   } while (zm.message==ARRAY_CONTINUE);
   y+=ystep;
  } while (zm.message!=ARRAY_ENDOFSCAN);
  array_dump(stdout, &xm, ARRAY_MATLAB);
  array_dump(stdout, &ym, ARRAY_MATLAB);
  array_dump(stdout, &zm, ARRAY_MATLAB);
  array_free(&xm); array_free(&ym); array_free(&zm);
  /*}}}  */
 } else if (mtv_output) {
  /*{{{  Plotmtv output*/
  array zm;
  DATATYPE zmin=FLT_MAX, zmax= -FLT_MAX;
  zm.nr_of_elements=(xmax-xmin)/xstep+1;
  zm.nr_of_vectors=(ymax-ymin)/ystep+1;
  zm.element_skip=1;
  array_allocate(&zm);
  if (zm.message==ARRAY_ERROR) {
   fprintf(stderr, "Error allocating output arrays\n");
   return -7;
  }
  y=ymin; do {
   x=xmin; do {
    if (interpolate_only && !is_inside(&index, &sspline.inpoints, x, y)) {
     f=external_value;
    } else {
     f=array_fsurfspline(&sspline, x, y);
    }
    array_write(&zm, f);
    if (f<zmin) zmin=f;
    if (f>zmax) zmax=f;
    x+=xstep;
   } while (zm.message==ARRAY_CONTINUE);
   y+=ystep;
  } while (zm.message!=ARRAY_ENDOFSCAN);
  /*{{{  Print file header*/
  printf(
  "# Output of Spline_Gridder (C) Bernd Feige 1995\n\n"
  "$ DATA=CONTOUR\n\n"
  "%% toplabel   = \"Spline_Gridder output\"\n"
  "%% subtitle   = \"File: %s\"\n\n"
  "%% interp     = 0\n"
  "%% contfill   = on\n"
  "%% meshplot   = off\n\n"
  "%% xmin = %g  xmax = %g\n"
  "%% ymin = %g  ymax = %g\n"
  "%% zmin = %g  zmax = %g\n"
  "%% nx   = %d\n"
  "%% ny   = %d\n"
  , infile, 
  xmin, xmax, 
  ymin, ymax, 
  zmin, zmax, 
  zm.nr_of_elements, 
  zm.nr_of_vectors);
  /*}}}  */
  array_dump(stdout, &zm, ARRAY_MATLAB);
  array_free(&zm);
  /*}}}  */
 } else {
  /*{{{  Gnuplot output*/
  for (x=xmin; x<=xmax; x+=xstep) {
   for (y=ymin; y<=ymax; y+=ystep) {
    if (interpolate_only && !is_inside(&index, &sspline.inpoints, x, y)) {
     f=external_value;
    } else {
     f=array_fsurfspline(&sspline, x, y);
    }
    printf("%g %g %g\n", x, y, f);
   }
   printf("\n");
  }
  /*}}}  */
 }
 /*}}}  */

 return 0;
}
예제 #4
0
//
//  test_distance_utils.cpp
//  TimeKit
//
//  Created by DB on 10/24/14.
//  Copyright (c) 2014 DB. All rights reserved.
//

#include "catch.hpp"
#include "testing_utils.hpp"

#include "distance_utils.hpp"
#include "array_utils.h"

TEST_CASE( "z-normalization", "[distance_utils]" ) {
	
	SECTION("double") {
		typedef double data_t;
		
		unsigned int len = 6;
		data_t x[] = {.7, 0, -.3, 11, -.6, 4};
		znormalize(x, len);
		data_t mean = array_mean(x, len);
		
		REQUIRE(rnd(mean) == 0);
	}
}