fp_t array_std(const fp_t* a, const int size) { const fp_t mean = array_mean(a, size); fp_t acc = 0; for (int i = 0; i < size; i++) { acc += pow(a[i] - mean, 2); } return sqrt(acc); }
/** call: ./main <matrix_dimension> <number_of_tests> <use_gpu>*/ int main(int argc, char* argv[]) { cuda_identify(); if (argc != 4) { printf("program must be called with arguments: matrix_dimension tests_number use_gpu(0/1)\n"); exit(1); } const int M = atoi(argv[1]); printf("Using matrix dimension: %d\n", M); const int tests = atoi(argv[2]); const bool cpu = !atoi(argv[3]); // always use the same seed to get the same matrices during tests srand(0); #ifdef DOUBLE const fp_t min_diff = 0.00000001; //for double, fails with 8192 and floats on both cpu and gpu #else const fp_t min_diff = 0.000001; #endif const fp_t alpha = 0.9; const int max_iter = 50; fp_t* exec_times = malloc(tests * sizeof(fp_t)); fp_t* all_rmse = malloc(tests * sizeof(fp_t)); for (int k = 0; k < tests; k++) { const DataSet dataset = generate_dataset(M); Matrix* last_x = aligned_vector(M, true); Matrix* x = aligned_vector(M, true); for (int i = 0; i < M; i++) { } int iterations = 0; // solve Ax = b const fp_t start_time = omp_get_wtime(); fp_t sum = 0; int j = 0; int i = 0; const Matrix* A = dataset.A; const Matrix* b = dataset.b; assert(x != last_x); if (cpu) { //#pragma omp parallel shared(last_x, x, iterations) private(i, j, sum) while ((matrix_diff(x, last_x) > min_diff) && (max_iter < 0 || iterations < max_iter)) { //fp_t st_time0 = omp_get_wtime(); //#pragma omp single { swap(last_x, x); } // A, M, alpha and b are constant, so they cannot be declared as shared //#pragma omp for schedule(dynamic) for (i = 0; i < M; i++) { sum = 0; //#pragma omp simd aligned(A, last_x: 16) reduction(+:sum) linear(j) for (j = 0; j < M; j++) { sum += A->elements[i * M + j] * last_x->elements[j]; } sum -= A->elements[i * M + i] * last_x->elements[i]; // opt: outside the loop for sse optimizer x->elements[i] = (1 - alpha) * last_x->elements[i] + alpha * (b->elements[i] - sum) / A->elements[i * M + i]; } //#pragma omp single nowait { iterations++; } //printf("%dus spent\n", (int)((omp_get_wtime() - st_time0) * 1000000)); } } else { Matrix* d_A = device_matrix_from(A); #ifndef DOUBLE #ifdef TEXTURE texbind(d_A->elements, d_A->size * sizeof(fp_t)); #endif #endif cudaMemcpy(d_A->elements, A->elements, A->size * sizeof(fp_t), cudaMemcpyHostToDevice); Matrix* d_b = device_matrix_from(b); cudaMemcpy(d_b->elements, b->elements, b->size * sizeof(fp_t), cudaMemcpyHostToDevice); Matrix* d_last_x = device_matrix_from(last_x); Matrix* d_c = device_matrix_from(b); Matrix* d_x = device_matrix_from(x); cudaMemcpy(d_x->elements, x->elements, x->size * sizeof(fp_t), cudaMemcpyHostToDevice); cudaMemcpy(d_last_x->elements, last_x->elements, last_x->size * sizeof(fp_t), cudaMemcpyHostToDevice); fp_t x_diff = 2 * min_diff; fp_t* d_x_diff; cudaMalloc((void**)&d_x_diff, sizeof(fp_t)); //fp_t stime; while ((x_diff > min_diff) && (max_iter < 0 || iterations < max_iter)) { //stime = omp_get_wtime(); cuda_multiply(*d_A, *d_last_x, *d_c); //print_cuda_elapsed(stime); //stime = omp_get_wtime(); cuda_reduce(*d_A, *d_b, *d_c, d_x, d_last_x, alpha); //performs swap //print_cuda_elapsed(stime); //stime = omp_get_wtime(); cuda_diff(*d_x, *d_last_x, d_x_diff); //print_cuda_elapsed(stime); iterations++; //cudaMemcpyFromSymbol(&x_diff, "d_x_diff", sizeof(x_diff), 0, cudaMemcpyDeviceToHost); //stime = omp_get_wtime(); cudaMemcpy(&x_diff, d_x_diff, sizeof(fp_t), cudaMemcpyDeviceToHost); //print_cuda_elapsed(stime); } // copy last_x instead, as it was swapped cudaMemcpy(x->elements, d_last_x->elements, x->size * sizeof(fp_t), cudaMemcpyDeviceToHost); #ifndef DOUBLE #ifdef TEXTURE texunbind(); #endif #endif cudaFree(d_A->elements); cudaFree(d_b->elements); cudaFree(d_last_x->elements); cudaFree(d_c->elements); cudaFree(d_x->elements); cudaFree(d_x_diff); free(d_A); free(d_b); free(d_c); free(d_last_x); free(d_x); } const fp_t end_time = omp_get_wtime(); const fp_t seconds_spent = end_time - start_time; exec_times[k] = seconds_spent; if (verbose) { printf("x: "); print_matrix(x); printf("expected_x: "); print_matrix(dataset.x); //print_matrix(dataset.A); //print_matrix(dataset.b); } Matrix* bx = aligned_vector(M, false); for (int i = 0; i < M; i++) { for (int j = 0; j < M; j++) { bx->elements[i] += A->elements[i * M + j] * x->elements[j]; } } if (verbose) { printf("resulting b: "); print_matrix(bx); } all_rmse[k] = rmse(bx, b); printf("RMSE: %0.10f\n", all_rmse[k]); printf("iterations: %d\nseconds: %0.10f\n", iterations, seconds_spent); assert(x != last_x); free(bx->elements); free(x->elements); free(last_x->elements); free(dataset.x->elements); free(dataset.A->elements); free(dataset.b->elements); free(bx); free(x); free(last_x); free(dataset.x); free(dataset.A); free(dataset.b); } printf("Time: mean %0.10f std %0.10f\n", array_mean(exec_times, tests), array_std(exec_times, tests)); printf("RMSE: mean %0.10f std %0.10f\n", array_mean(all_rmse, tests), array_std(all_rmse, tests)); free(all_rmse); free(exec_times); return 0; }
int main(int argc, char **argv) { FILE *fp; surfspline_desc sspline; int err, npoints, itempart=0; int binary_output=FALSE, matlab_output=FALSE, mtv_output=FALSE, interpolate_only=FALSE; float fbuf, external_value=0.0; DATATYPE f, x, y, xmin, xmax, ymin, ymax, xstep, ystep; array index; pid_t pid; char outname[L_tmpnam], inname[L_tmpnam], **inargs, *infile; /*{{{ Read command line args*/ for (inargs=argv+1; inargs-argv<argc && **inargs=='-'; inargs++) { switch(inargs[0][1]) { case 'B': binary_output=TRUE; break; case 'I': if (inargs[0][2]!='\0') { itempart=atoi(*inargs+2); } break; case 'i': interpolate_only=TRUE; if (inargs[0][2]!='\0') { external_value=atof(*inargs+2); } break; case 'M': matlab_output=TRUE; break; case 'm': mtv_output=TRUE; break; default: fprintf(stderr, "%s: Ignoring unknown option %s\n", argv[0], *inargs); continue; } } if (argc-(inargs-argv)!=3) { fprintf(stderr, "Usage: %s array_filename degree npoints\n" "Options:\n" " -Iitem: Use item number item as z-axis (2+item'th column); default: 0\n" " -i[value]: Interpolate only; set external values to value (default: 0.0)\n" " -B: Binary output (gnuplot floats)\n" " -M: Matlab output (x, y vectors and z matrix)\n" " -m: Plotmtv output\n" , argv[0]); return -1; } infile= *inargs++; if ((fp=fopen(infile, "r"))==NULL) { fprintf(stderr, "Can't open %s\n", infile); return -2; } array_undump(fp, &sspline.inpoints); fclose(fp); if (sspline.inpoints.message==ARRAY_ERROR) { fprintf(stderr, "Error in array_undump\n"); return -3; } if (sspline.inpoints.nr_of_elements<3+itempart) { fprintf(stderr, "Not enough columns in array %s\n", *(inargs-1)); return -3; } sspline.degree=atoi(*inargs++);; npoints=atoi(*inargs++); /*}}} */ /*{{{ Get the index of qhull point pairs by running qhull*/ tmpnam(outname); tmpnam(inname); if ((pid=fork())==0) { /* I'm the child */ #define LINEBUF_SIZE 128 char linebuf[LINEBUF_SIZE]; array tmp_array; /*{{{ Dump xy positions to tmp file inname*/ tmp_array.nr_of_elements=2; tmp_array.nr_of_vectors=sspline.inpoints.nr_of_vectors; tmp_array.element_skip=1; if (array_allocate(&tmp_array)==NULL) { fprintf(stderr, "Error allocating tmp_array\n"); return -4; } array_reset(&sspline.inpoints); do { array_write(&tmp_array, array_scan(&sspline.inpoints)); array_write(&tmp_array, array_scan(&sspline.inpoints)); array_nextvector(&sspline.inpoints); } while (tmp_array.message!=ARRAY_ENDOFSCAN); if ((fp=fopen(inname, "w"))==NULL) { fprintf(stderr, "Can't open %s\n", inname); return -5; } array_dump(fp, &tmp_array, ARRAY_ASCII); fclose(fp); array_free(&tmp_array); /*}}} */ snprintf(linebuf, LINEBUF_SIZE, "qhull C0 i b <%s >%s", inname, outname); execl("/bin/sh", "sh", "-c", linebuf, 0); #undef LINEBUF_SIZE } else { wait(NULL); } unlink(inname); if ((fp=fopen(outname, "r"))==NULL) { fprintf(stderr, "Can't open %s\n", outname); return -6; } array_undump(fp, &index); fclose(fp); unlink(outname); /*}}} */ /*{{{ Find min, max and mean coordinates*/ array_transpose(&sspline.inpoints); array_reset(&sspline.inpoints); xmin=array_min(&sspline.inpoints); ymin=array_min(&sspline.inpoints); array_reset(&sspline.inpoints); xmax=array_max(&sspline.inpoints); ymax=array_max(&sspline.inpoints); array_reset(&sspline.inpoints); xmean=array_mean(&sspline.inpoints); ymean=array_mean(&sspline.inpoints); xstep=(xmax-xmin)/npoints; ystep=(ymax-ymin)/npoints; array_transpose(&sspline.inpoints); array_reset(&sspline.inpoints); /*}}} */ /*{{{ Copy itempart to 3rd location if necessary*/ /* Note: For this behavior it is essential that array_surfspline * will accept vectors of any size >=3 and only process the first three * elements ! */ if (itempart>0) { DATATYPE hold; do { sspline.inpoints.current_element=2+itempart; hold=READ_ELEMENT(&sspline.inpoints); sspline.inpoints.current_element=2; WRITE_ELEMENT(&sspline.inpoints, hold); array_nextvector(&sspline.inpoints); } while (sspline.inpoints.message!=ARRAY_ENDOFSCAN); } /*}}} */ /*{{{ Do surface spline*/ if ((err=array_surfspline(&sspline))!=0) { fprintf(stderr, "Error %d in array_surfspline\n", err); return err; } xmin-=xstep; xmax+=2*xstep; ymin-=ystep; ymax+=2*ystep; if (binary_output) { /*{{{ Gnuplot binary output*/ int n_xval=(xmax-xmin)/xstep+1, n; /* Number of x values */ fbuf=n_xval; fwrite(&fbuf, sizeof(float), 1, stdout); for (fbuf=xmin, n=0; n<n_xval; fbuf+=xstep, n++) { /* x values */ fwrite(&fbuf, sizeof(float), 1, stdout); } for (y=ymin; y<=ymax; y+=ystep) { fbuf=y; fwrite(&fbuf, sizeof(float), 1, stdout); for (x=xmin, n=0; n<n_xval; x+=xstep, n++) { if (interpolate_only && !is_inside(&index, &sspline.inpoints, x, y)) { f=external_value; } else { f=array_fsurfspline(&sspline, x, y); } fbuf=f; fwrite(&fbuf, sizeof(float), 1, stdout); } } /*}}} */ } else if (matlab_output) { /*{{{ Matlab output*/ array xm, ym, zm; xm.nr_of_elements=ym.nr_of_elements=1; xm.nr_of_vectors=zm.nr_of_elements=(xmax-xmin)/xstep+1; ym.nr_of_vectors=zm.nr_of_vectors=(ymax-ymin)/ystep+1; xm.element_skip=ym.element_skip=zm.element_skip=1; array_allocate(&xm); array_allocate(&ym); array_allocate(&zm); if (xm.message==ARRAY_ERROR || ym.message==ARRAY_ERROR || zm.message==ARRAY_ERROR) { fprintf(stderr, "Error allocating output arrays\n"); return -7; } x=xmin; do { array_write(&xm, x); x+=xstep; } while (xm.message!=ARRAY_ENDOFSCAN); y=ymin; do { array_write(&ym, y); x=xmin; do { if (interpolate_only && !is_inside(&index, &sspline.inpoints, x, y)) { f=external_value; } else { f=array_fsurfspline(&sspline, x, y); } array_write(&zm, f); x+=xstep; } while (zm.message==ARRAY_CONTINUE); y+=ystep; } while (zm.message!=ARRAY_ENDOFSCAN); array_dump(stdout, &xm, ARRAY_MATLAB); array_dump(stdout, &ym, ARRAY_MATLAB); array_dump(stdout, &zm, ARRAY_MATLAB); array_free(&xm); array_free(&ym); array_free(&zm); /*}}} */ } else if (mtv_output) { /*{{{ Plotmtv output*/ array zm; DATATYPE zmin=FLT_MAX, zmax= -FLT_MAX; zm.nr_of_elements=(xmax-xmin)/xstep+1; zm.nr_of_vectors=(ymax-ymin)/ystep+1; zm.element_skip=1; array_allocate(&zm); if (zm.message==ARRAY_ERROR) { fprintf(stderr, "Error allocating output arrays\n"); return -7; } y=ymin; do { x=xmin; do { if (interpolate_only && !is_inside(&index, &sspline.inpoints, x, y)) { f=external_value; } else { f=array_fsurfspline(&sspline, x, y); } array_write(&zm, f); if (f<zmin) zmin=f; if (f>zmax) zmax=f; x+=xstep; } while (zm.message==ARRAY_CONTINUE); y+=ystep; } while (zm.message!=ARRAY_ENDOFSCAN); /*{{{ Print file header*/ printf( "# Output of Spline_Gridder (C) Bernd Feige 1995\n\n" "$ DATA=CONTOUR\n\n" "%% toplabel = \"Spline_Gridder output\"\n" "%% subtitle = \"File: %s\"\n\n" "%% interp = 0\n" "%% contfill = on\n" "%% meshplot = off\n\n" "%% xmin = %g xmax = %g\n" "%% ymin = %g ymax = %g\n" "%% zmin = %g zmax = %g\n" "%% nx = %d\n" "%% ny = %d\n" , infile, xmin, xmax, ymin, ymax, zmin, zmax, zm.nr_of_elements, zm.nr_of_vectors); /*}}} */ array_dump(stdout, &zm, ARRAY_MATLAB); array_free(&zm); /*}}} */ } else { /*{{{ Gnuplot output*/ for (x=xmin; x<=xmax; x+=xstep) { for (y=ymin; y<=ymax; y+=ystep) { if (interpolate_only && !is_inside(&index, &sspline.inpoints, x, y)) { f=external_value; } else { f=array_fsurfspline(&sspline, x, y); } printf("%g %g %g\n", x, y, f); } printf("\n"); } /*}}} */ } /*}}} */ return 0; }
// // test_distance_utils.cpp // TimeKit // // Created by DB on 10/24/14. // Copyright (c) 2014 DB. All rights reserved. // #include "catch.hpp" #include "testing_utils.hpp" #include "distance_utils.hpp" #include "array_utils.h" TEST_CASE( "z-normalization", "[distance_utils]" ) { SECTION("double") { typedef double data_t; unsigned int len = 6; data_t x[] = {.7, 0, -.3, 11, -.6, 4}; znormalize(x, len); data_t mean = array_mean(x, len); REQUIRE(rnd(mean) == 0); } }