/** * Test the multiplication of two matrices of all ones **/ void random_multiply (int m, int n, int k, int iterations) { int iter; double *A, *B, *C; double t_start, t_elapsed; printf ("Timing Matrix Multiply m=%d n=%d k=%d iterations=%d....", m, n, k, iterations); /* Allocate matrices */ A = random_matrix (m, k); B = random_matrix (k, n); C = random_matrix (m, n); t_start = MPI_Wtime (); /* Start timer */ /* perform several Matric Mulitplies back-to-back */ for (iter = 0; iter < iterations; iter++) { /* C = (1.0/k)*(A*B) + 0.0*C */ local_mm (m, n, k, 1.0, A, m, B, k, 1.0, C, m); } /* iter */ t_elapsed = MPI_Wtime () - t_start; /* Stop timer */ /* deallocate memory */ deallocate_matrix (A); deallocate_matrix (B); deallocate_matrix (C); printf ("total_time=%lf, per_iteration=%lf\n", t_elapsed, t_elapsed / iterations); }
void random_multiply(mat_mul_specs * mms) { double *A, *B, *C; double t_start, t_elapsed; //Allocate matrices A = random_matrix(mms->m, mms->k); B = random_matrix(mms->k, mms->n); C = random_matrix(mms->m, mms->n); t_start = MPI_Wtime(); //perform several Matric Mulitplies back-to-back int iter; for (iter = 0; iter < mms->trials; iter++) { //C = (1.0/k)*(A*B) + 0.0*C local_mm_mms(mms->m, mms->n, mms->k, 1.0, A, mms->m, B, mms->k, 1.0, C, mms->m, mms); } t_elapsed = MPI_Wtime() - t_start; //deallocate memory deallocate_matrix(A); deallocate_matrix(B); deallocate_matrix(C); if(mms->type == NAIVE) printf("naive, "); else if(mms->type == OPENMP) printf("openmp, "); else if(mms->type == MKL) printf("mkl, "); printf("%d, %d, %d, %d, %d, %d, %d, %d, %d, %lf\n", mms->threads, mms->cbl, mms->cop, mms->bm, mms->bn, mms->bk, mms->m, mms->n, mms->k, t_elapsed / mms->trials); }
void time_ongpu(int TA, int TB, int m, int k, int n) { int iter = 10; float *a = random_matrix(m,k); float *b = random_matrix(k,n); int lda = (!TA)?k:m; int ldb = (!TB)?n:k; float *c = random_matrix(m,n); float *a_cl = cuda_make_array(a, m*k); float *b_cl = cuda_make_array(b, k*n); float *c_cl = cuda_make_array(c, m*n); int i; clock_t start = clock(), end; for(i = 0; i<iter; ++i){ gemm_ongpu(TA,TB,m,n,k,1,a_cl,lda,b_cl,ldb,1,c_cl,n); cudaThreadSynchronize(); } double flop = ((double)m)*n*(2.*k + 2.)*iter; double gflop = flop/pow(10., 9); end = clock(); double seconds = sec(end-start); printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s, %lf GFLOPS\n",m,k,k,n, TA, TB, seconds, gflop/seconds); cuda_free(a_cl); cuda_free(b_cl); cuda_free(c_cl); free(a); free(b); free(c); }
int main(int argc, char *argv[]){ if (argc > 1 && atoi(argv[1]) > 10) limit = atoi(argv[1]); clock_t start, end; srand(clock()); M = rand() % limit/2 + limit/2; N = rand() % limit/2 + limit/2; O = (rand() % limit/8 + 1 + limit/16) * 4; float* A = random_matrix(M, N, 10); float* B = random_matrix(N, O, 10); float* C = malloc(sizeof(float) * M * O); float* D; printf("Generadas dos matrices aleatorias de (%zu x %zu) y (%zu x %zu)\n", M, N, N, O); if (print) { print_matrix(A, M, N); printf("\n"); print_matrix(B, N, O); printf("\n"); } start = clock(); D = MULT(A, B, M, N, O); end = clock(); printf("RESULTADO FUERZA BRUTA: (%f)\n", ((double)(end - start))/CLOCKS_PER_SEC); if (print) print_matrix(D, M, O); start = clock(); SIMD_MULT(A, B, C, M, N, O); end = clock(); printf("RESULTADO SIMD: (%f)\n", ((double)(end - start))/CLOCKS_PER_SEC); if (print) print_matrix(C, M, O); if (equal_mtrx(C, D, M*O)) printf("Son iguales!\n"); else printf("NO son iguales!\n"); free(A); free(B); free(C); free(D); }
void Call_Inverse( int n) { POLY ds; POLY M1[n][n], t_M1[n][n], product[n][n]; int k; printf("Please choose the test matrix: "); printf("1 random matrix.\n"); printf("2 input your own matrix.\n"); scanf("%d", &k ); printf("%d\n", k); if(k==1) random_matrix ( n, n, M1); if(k==2) read_matrix( n, n, M1 ); printf("the original matrix generated :\n"); print( n, n, M1); copy(n, n, M1, t_M1); ds = Inverse_Poly ( n, M1 ); printf(" The inverse matrix of the matrix is :\n" ); print(n, n, M1 ); printf(" The polynomial ds is :\n" ); Print_Poly( ds.d, ds.p ); printf("The product (without ds) of the original matrix"); printf(" and the inverse matrix is:\n"); Multiply(n, n, n, M1, t_M1, product); print(n, n, product); free_matrix(n, n, M1); free_matrix(n, n, t_M1); free_matrix(n, n, product); }
int main(int argc, char* argv[]){ printf("Factor a Matrix into its upper triangular portion\n"); int n = 3; double A[n*n]; // initial matrix double U[n*n]; // to hold factored matrix random_matrix(A, n, n); print_matrix(A, n, n); for (int col = 0; col < n; col++){ *(U + col*n) = *(A + col*n); } for (int row = 1; row < n; row++){ for (int col = 0 + row - 1; col < n; col++){ double num = *(A + row + (row-1)*n); double dnm = *(A + row - 1 + (row-1)*n); *(U + row + col*n) = A[row + col*n] - A[row -1 + col*n]*(num/dnm); } } print_matrix(U, n, n); return 0; }
/** * Verify that a matrix times the identity is itself **/ void identity_test(int n) { double *A, *B, *C; printf("identity_test n=%d............", n); /* Allocate matrices */ A = random_matrix(n, n); B = identity_matrix(n, n); C = zeros_matrix(n, n); /* C = 1.0*(A*B) + 0.0*C */ local_mm(n, n, n, 1.0, A, n, B, n, 5.0, C, n); /* Verfiy the results */ verify_matrix(n, n, A, C); /* Backwards C = 1.0*(B*A) + 0.0*C */ local_mm(n, n, n, 1.0, B, n, A, n, 0.0, C, n); /* Verfiy the results */ verify_matrix(n, n, A, C); /* deallocate memory */ deallocate_matrix(A); deallocate_matrix(B); deallocate_matrix(C); printf("passed\n"); }
int main(void) { init_prg(); uint l = random_dim(); uint n = random_dim(); float *ys = random_vector(n); float eta = 0.001; float *Ys = random_matrix(n, l); perturbate(l, n, ys, eta, Ys); for (uint j = 1; j <= l; j++) { for (uint i = 1; i <= n; i++) { assert(abs((M_IDX(Ys, n, i, j) - V_IDX(ys, i)) / V_IDX(ys, i)) <= eta); } } free(Ys); free(ys); return 0; }
void ChompOptimizer::perturbTrajectory() { //int mid_point = (free_vars_start_ + free_vars_end_) / 2; if(worst_collision_cost_state_ < 0) return; int mid_point = worst_collision_cost_state_; planning_models::RobotState *random_state(state_); random_state.getJointStateGroup(planning_group_)->setToRandomValues(); std::vector<double> vals; random_state.getJointStateGroup(planning_group_)->getGroupStateValues(vals); double* ptr = &vals[0]; Eigen::Map<Eigen::VectorXd> random_matrix(ptr, vals.size()); //Eigen::VectorXd random_matrix = vals; // convert the state into an increment random_matrix -= group_trajectory_.getTrajectoryPoint(mid_point).transpose(); // project the increment orthogonal to joint velocities group_trajectory_.getJointVelocities(mid_point, joint_state_velocities_); joint_state_velocities_.normalize(); random_matrix = (Eigen::MatrixXd::Identity(num_joints_, num_joints_) - joint_state_velocities_ * joint_state_velocities_.transpose()) * random_matrix; int mp_free_vars_index = mid_point - free_vars_start_; for(int i = 0; i < num_joints_; i++) { group_trajectory_.getFreeJointTrajectoryBlock(i) += joint_costs_[i].getQuadraticCostInverse().col(mp_free_vars_index) * random_state_(i); } }
double experiment(size_t NSUB, size_t NCOMP, size_t NVOX, int verbose){ gsl_matrix *estimated_a = gsl_matrix_alloc(NSUB, NCOMP); gsl_matrix *estimated_s = gsl_matrix_alloc(NCOMP, NVOX); gsl_matrix *estimated_x = gsl_matrix_alloc(NSUB, NVOX); gsl_matrix *true_a = gsl_matrix_alloc(NSUB, NCOMP); gsl_matrix *true_s = gsl_matrix_alloc(NCOMP, NVOX); gsl_matrix *true_x = gsl_matrix_alloc(NSUB, NVOX); gsl_matrix *cs = gsl_matrix_alloc(NCOMP, NCOMP); gsl_matrix *noise = gsl_matrix_alloc(NSUB, NVOX); // Random gaussian mixing matrix A random_matrix(true_a, 1.0, gsl_ran_gaussian); // Random logistic mixing matrix S random_matrix(true_s, 1.0, gsl_ran_logistic); // Random gaussian noise random_matrix(noise, 1, gsl_ran_gaussian); // matrix_apply_all(true_s, gsl_pow_3); // X = AS matrix_mmul(true_a, true_s, true_x); // add noise gsl_matrix_add(true_x, noise); double start, end; double cpu_time_used; start = omp_get_wtime(); // A,S <- ICA(X, NCOMP) ica(estimated_a, estimated_s, true_x, verbose); end = omp_get_wtime(); cpu_time_used = ((double) (end - start)); printf("\nTime used : %g\n", cpu_time_used); //Clean gsl_matrix_free(true_a); gsl_matrix_free(true_s); gsl_matrix_free(true_x); gsl_matrix_free(estimated_a); gsl_matrix_free(estimated_s); gsl_matrix_free(estimated_x); gsl_matrix_free(cs); return (cpu_time_used); }
bool test_cholesky_insert (void) { int index = size1 * rand () / RAND_MAX; c_matrix *a; c_matrix *b; c_matrix *l; c_vector *c; double nrm; /* posdef symmetry matrix *a */ { int i; c_matrix *a0 = random_matrix (size1, size1); a = c_matrix_transpose_dot_matrix (1., a0, a0); for (i = 0; i < size1; i++) c_matrix_set (a, i, i, c_matrix_get(a, i, i) + 0.1); c_matrix_free (a0); } l = c_matrix_alloc (size1 - 1, size1 - 1); c = c_vector_alloc (size1); { int i, j, m, n; for (i = 0, m = 0; i < size1; i++) { c_vector_set (c, i, c_matrix_get (a, i, index)); if (i == index) continue; for (j = 0, n = 0; j < size1; j++) { if (j == index) continue; c_matrix_set (l, m, n, c_matrix_get (a, i, j)); n++; } m++; } } c_linalg_cholesky_decomp (l); c_linalg_cholesky_insert (l, index, c); c_vector_free (c); { int i, j; for (i = 0; i < size1; i++) { for (j = 0; j < i; j++) c_matrix_set (l, i, j, 0.); } } b = c_matrix_transpose_dot_matrix (1., l, l); c_matrix_free (l); c_matrix_sub (a, b); c_matrix_free (b); nrm = c_matrix_nrm (a, '1'); c_matrix_free (a); return (nrm < 1.e-8); }
/* * PURPOSE: The command line driven program does matrix creation, reading, writing, and other * miscellaneous operations. The program automatically creates a matrix and writes * that out called temp_mat (in binary do not use the cat command on it). You are * able to display any matrix by using the display command. You can create a new * blank matrix with the command create. To fill a matrix with random values use the * random command between a range of values. To get some experience with bit shifting * there is a command called shift. If you want to write and read in a matrix from * the filesystem use the respective read and write commands. To see memory operations * in action use the duplicate and equal commands. The others commands are sum and add. * To exit the program use the exit command. * INPUTS: No inputs needed * RETURN: Returns 0 on successful exectution otherwise -1 if there was an error **/ int main (int argc, char **argv) { srand(time(NULL)); char *line = NULL; Commands_t* cmd; Matrix_t *mats[10]; memset(&mats,0, sizeof(Matrix_t*) * 10); // IMPORTANT C FUNCTION TO LEARN Matrix_t *temp = NULL; // TODO ERROR CHECK if(!create_matrix (&temp,"temp_mat", 5, 5)) { return -1; } //TODO ERROR CHECK NEEDED if( (add_matrix_to_array(mats,temp, 10)) < 0) { return -1; } int mat_idx = find_matrix_given_name(mats,10,"temp_mat"); if (mat_idx < 0) { perror("PROGRAM FAILED TO INIT\n"); return -1; } random_matrix(mats[mat_idx], 10, 15); // TODO ERROR CHECK if(!write_matrix("temp_mat", mats[mat_idx])) { return -1; } line = readline("> "); while (strncmp(line,"exit", strlen("exit") + 1) != 0) { if (!parse_user_input(line,&cmd)) { printf("Failed at parsing command\n\n"); } if (cmd->num_cmds > 1) { run_commands(cmd,mats,10); } if (line) { free(line); } destroy_commands(&cmd); line = readline("> "); } free(line); destroy_remaining_heap_allocations(mats,10); return 0; }
void time_gpu_random_matrix(int TA, int TB, int m, int k, int n) { float *a; if(!TA) a = random_matrix(m,k); else a = random_matrix(k,m); int lda = (!TA)?k:m; float *b; if(!TB) b = random_matrix(k,n); else b = random_matrix(n,k); int ldb = (!TB)?n:k; float *c = random_matrix(m,n); int i; clock_t start = clock(), end; for(i = 0; i<32; ++i){ gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); } end = clock(); printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); free(a); free(b); free(c); }
int main() { matrix_t mat_a, mat_b; matrix_t mat_c; struct timeval start_time, end_time; random_matrix(&mat_a, 4); random_matrix(&mat_b, 4); null_matrix(&mat_c, 4); print_matrix(mat_a); printf("\n"); print_matrix(mat_b); printf("\n"); print_matrix(mat_c); gettimeofday(&start_time, 0); matrix_multiplication(mat_a, mat_b, mat_c); gettimeofday(&end_time, 0); printf("Normal Multiplication\n"); print_matrix(mat_c); print_time_taken(start_time, end_time); mat_c = set_zero(mat_c); mat_c = matrix_multiplication_strassen(mat_a, mat_b, mat_c, 2); printf("Strassen Multiplication\n"); print_matrix(mat_c); }
int main() { for (size_t n = 0; n <= 50; ++n) { for (int i = 0; i < 1000; ++i) { square_matrix A = random_matrix(n); square_matrix A_rot = rotate_square_matrix_1(A); rotate_square_matrix_2(A); assert(A == A_rot); } std::cout << "passed random tests for matrices of size " << n << std::endl; } return 0; }
void Call_Hermite ( int n, int m ) { POLY a[n][m], a1[n][m]; POLY p[n][n], t[n][m]; dcmplx deter, dcmplx_p[n][n], x; int k; printf("1 random matrix.\n"); printf("2 input your own matrix.\n"); printf("Please choose the test matrix:"); scanf("%d", &k ); printf("%d\n\n", k ); if(k==1) random_matrix ( n, m, a); if(k==2) read_matrix( n, m, a ); printf("the original matrix generated :\n"); print(n,m,a); zero_matrix ( n, m, a1); I_matrix ( n, p); copy ( n, m, a, t); /* Eliminate_Col(n,m,a,p,0,0); */ /* Eliminate_Row(n,m,a,q,0,0); */ Hermite(n, m, a, p); printf("The hermite form of matrix a is :\n"); print(n,m,a); /* now begin to test the result */ Multiply ( n, n, m, p, t, a1 ); printf("The calculated hermite form with p*a is:\n"); print(n, m, a1); printf(" p is:\n"); print(n, n, p); x=create1(1.1); evaluate_matrix(n, n, p, dcmplx_p, x); deter=determinant(n, dcmplx_p); printf("The determinant of the p is: "); writeln_dcmplx(deter); }
/** * \brief perform a gauss experiment with n x n matrix * * \param n dimension of the matrix the inverse */ void experiment(int n) { /* create a system to solve */ a = random_matrix(n, 2 * n); /* display the matrix */ if (n <= 10) { display_matrix(stdout, a, n, 2 * n); } /* perform the Gauss algorithm */ gauss(); /* display the matrix */ if (n <= 10) { display_matrix(stdout, a, n, 2 * n); } free(a); }
bool test_cholesky_1down (void) { int info; c_matrix *a; c_matrix *c; c_matrix *l; c_vector *u; double nrm; /* posdef symmetry matrix *a */ { int i; c_matrix *a0 = random_matrix (size1, size1); a = c_matrix_transpose_dot_matrix (1., a0, a0); c_matrix_free (a0); for (i = 0; i < size1; i++) c_matrix_set (a, i, i, c_matrix_get(a, i, i) + 1.); } l = c_matrix_alloc (a->size1, a->size2); c_matrix_memcpy (l, a); u = random_vector (size1); c_vector_scale (u, 0.1); { c_matrix *ut = c_matrix_view_array (u->size, 1, u->size, u->data); c = c_matrix_dot_matrix_transpose (1., ut, ut); c_matrix_free (ut); c_matrix_sub (a, c); c_matrix_free (c); c_linalg_cholesky_decomp (a); } c_linalg_cholesky_decomp (l); info = c_linalg_cholesky_1down (l, u); c_matrix_sub (a, l); c_matrix_free (l); nrm = c_matrix_nrm (a, '1'); c_matrix_free (a); return (info == 0 && nrm < 1.e-8); }
void print_matrix_types() { double *random, *ones, *zeros, *identity, *tri; /* Allocate matrices */ random = random_matrix(6, 3); identity = identity_matrix(5, 5); ones = ones_matrix(4, 2); zeros = zeros_matrix(2, 4); tri = lowerTri_matrix(5, 5); printf("\n\t\tMatrix Types\n"); printf("6x3 Random Matrix\n"); print_matrix(6, 3, random); printf("\n\n"); printf("5x5 Identity Matrix\n"); print_matrix(5, 5, identity); printf("\n\n"); printf("4x2 Ones Matrix\n"); print_matrix(4, 2, ones); printf("\n\n"); printf("2x4 Zeros Matrix\n"); print_matrix(2, 4, zeros); printf("\n\n"); printf("5x5 Lower Triangular Matrix\n"); print_matrix(5, 5, tri); printf("\n\n"); /* deallocate memory */ deallocate_matrix(random); deallocate_matrix(ones); deallocate_matrix(zeros); deallocate_matrix(identity); deallocate_matrix(tri); }
int main() { for (size_t m = 0; m <= 20; ++m) { for (size_t n = 0; n <= 20; ++n) { for (int i = 0; i < 1000; ++i) { matrix A = random_matrix(m,n); matrix A_zero = zero_when_necessary_1(A); zero_when_necessary_2(A); assert(A == A_zero); } std::cout << "passed random tests for matrices of size " << m << "×" << n << std::endl; } } return 0; }
/* check |x - (l' * l)^-1 * y| < 1.e-8 */ bool test_cholesky_svx (void) { c_matrix *a; c_vector *x; c_vector *y; c_matrix *l; double nrm; /* posdef symmetry matrix *a */ { int i; c_matrix *a0 = random_matrix (size1, size1); a = c_matrix_transpose_dot_matrix (1., a0, a0); for (i = 0; i < size1; i++) c_matrix_set (a, i, i, c_matrix_get(a, i, i) + 0.1); c_matrix_free (a0); } /* vector *x */ x = random_vector (size1); /* vector *y */ y = c_matrix_dot_vector (1., a, x); /* cholesky_svx */ c_linalg_cholesky_decomp (a); c_linalg_cholesky_svx (a, y); c_matrix_free (a); /* x = - y + x */ c_vector_axpy (-1., y, x); c_vector_free (y); nrm = c_vector_nrm (x); c_vector_free (x); return (nrm < 1.e-8); }
/* check |a - l' * l| < 1.e-8 */ bool test_cholesky_decomp (void) { c_matrix *a; c_matrix *c; c_matrix *l; c_matrix *b; double nrm; { int i; c_matrix *a0 = random_matrix (size1, size1); a = c_matrix_transpose_dot_matrix (1., a0, a0); for (i = 0; i < size1; i++) c_matrix_set (a, i, i, c_matrix_get(a, i, i) + 0.1); c_matrix_free (a0); } /* c = chol(a) */ c = c_matrix_alloc (a->size1, a->size2); c_matrix_memcpy (c, a); c_linalg_cholesky_decomp (c); l = c_matrix_alloc (c->size1, c->size2); c_matrix_set_zero (l); c_matrix_upper_triangular_memcpy (l, c); c_matrix_free (c); /* b = l' * l */ b = c_matrix_transpose_dot_matrix (1., l, l); c_matrix_free (l); c_matrix_sub (a, b); c_matrix_free (b); nrm = c_matrix_nrm (a, '1'); c_matrix_free (a); return (nrm < 1.e-8); }
void test_gpu_accuracy(int TA, int TB, int m, int k, int n) { srand(0); float *a; if(!TA) a = random_matrix(m,k); else a = random_matrix(k,m); int lda = (!TA)?k:m; float *b; if(!TB) b = random_matrix(k,n); else b = random_matrix(n,k); int ldb = (!TB)?n:k; float *c = random_matrix(m,n); float *c_gpu = random_matrix(m,n); memset(c, 0, m*n*sizeof(float)); memset(c_gpu, 0, m*n*sizeof(float)); int i; //pm(m,k,b); gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c_gpu,n); //printf("GPU\n"); //pm(m, n, c_gpu); gemm_cpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); //printf("\n\nCPU\n"); //pm(m, n, c); double sse = 0; for(i = 0; i < m*n; ++i) { //printf("%f %f\n", c[i], c_gpu[i]); sse += pow(c[i]-c_gpu[i], 2); } printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %g SSE\n",m,k,k,n, TA, TB, sse/(m*n)); free(a); free(b); free(c); free(c_gpu); }
int main(int argc, char* argv[]) { int rows, cols, size_I, size_R, niter = 10, iter, k; float *I, *J, q0sqr, sum, sum2, tmp, meanROI,varROI ; float Jc, G2, L, num, den, qsqr; int *iN,*iS,*jE,*jW; float *dN,*dS,*dW,*dE; int r1, r2, c1, c2; float cN,cS,cW,cE; float *c, D; float lambda; int i, j; int nthreads; if (argc == 10) { rows = atoi(argv[1]); //number of rows in the domain cols = atoi(argv[2]); //number of cols in the domain if ((rows%16!=0) || (cols%16!=0)){ fprintf(stderr, "rows and cols must be multiples of 16\n"); exit(1); } r1 = atoi(argv[3]); //y1 position of the speckle r2 = atoi(argv[4]); //y2 position of the speckle c1 = atoi(argv[5]); //x1 position of the speckle c2 = atoi(argv[6]); //x2 position of the speckle nthreads = atoi(argv[7]); // number of threads lambda = atof(argv[8]); //Lambda value niter = atoi(argv[9]); //number of iterations } else{ usage(argc, argv); } size_I = cols * rows; size_R = (r2-r1+1)*(c2-c1+1); I = (float *)malloc( size_I * sizeof(float) ); J = (float *)malloc( size_I * sizeof(float) ); c = (float *)malloc(sizeof(float)* size_I) ; iN = (int *)malloc(sizeof(unsigned int*) * rows) ; iS = (int *)malloc(sizeof(unsigned int*) * rows) ; jW = (int *)malloc(sizeof(unsigned int*) * cols) ; jE = (int *)malloc(sizeof(unsigned int*) * cols) ; dN = (float *)malloc(sizeof(float)* size_I) ; dS = (float *)malloc(sizeof(float)* size_I) ; dW = (float *)malloc(sizeof(float)* size_I) ; dE = (float *)malloc(sizeof(float)* size_I) ; for (int i=0; i< rows; i++) { iN[i] = i-1; iS[i] = i+1; } for (int j=0; j< cols; j++) { jW[j] = j-1; jE[j] = j+1; } iN[0] = 0; iS[rows-1] = rows-1; jW[0] = 0; jE[cols-1] = cols-1; printf("Randomizing the input matrix\n"); random_matrix(I, rows, cols); for (k = 0; k < size_I; k++ ) { J[k] = (float)exp(I[k]) ; } printf("Start the SRAD main loop\n"); for (iter=0; iter< niter; iter++){ sum=0; sum2=0; for (i=r1; i<=r2; i++) { for (j=c1; j<=c2; j++) { tmp = J[i * cols + j]; sum += tmp ; sum2 += tmp*tmp; } } meanROI = sum / size_R; varROI = (sum2 / size_R) - meanROI*meanROI; q0sqr = varROI / (meanROI*meanROI); #pragma omp parallel for shared(J, dN, dS, dW, dE, c, rows, cols, iN, iS, jW, jE) private(i, j, k, Jc, G2, L, num, den, qsqr) for (int i = 0 ; i < rows ; i++) { ____num_tasks[omp_get_thread_num()]++; { for (int j = 0; j < cols; j++) { k = i * cols + j; Jc = J[k]; // directional derivates dN[k] = J[iN[i] * cols + j] - Jc; dS[k] = J[iS[i] * cols + j] - Jc; dW[k] = J[i * cols + jW[j]] - Jc; dE[k] = J[i * cols + jE[j]] - Jc; G2 = (dN[k]*dN[k] + dS[k]*dS[k] + dW[k]*dW[k] + dE[k]*dE[k]) / (Jc*Jc); L = (dN[k] + dS[k] + dW[k] + dE[k]) / Jc; num = (0.5*G2) - ((1.0/16.0)*(L*L)) ; den = 1 + (.25*L); qsqr = num/(den*den); // diffusion coefficent (equ 33) den = (qsqr-q0sqr) / (q0sqr * (1+q0sqr)) ; c[k] = 1.0 / (1.0+den) ; // saturate diffusion coefficent if (c[k] < 0) {c[k] = 0;} else if (c[k] > 1) {c[k] = 1;} } } ; } #pragma omp parallel for shared(J, c, rows, cols, lambda) private(i, j, k, D, cS, cN, cW, cE) for (int i = 0; i < rows; i++) { ____num_tasks[omp_get_thread_num()]++; { for (int j = 0; j < cols; j++) { // current index k = i * cols + j; // diffusion coefficent cN = c[k]; cS = c[iS[i] * cols + j]; cW = c[k]; cE = c[i * cols + jE[j]]; // divergence (equ 58) D = cN * dN[k] + cS * dS[k] + cW * dW[k] + cE * dE[k]; // image update (equ 61) J[k] = J[k] + 0.25*lambda*D; #ifdef OUTPUT //printf("%.5f ", J[k]); #endif //output } #ifdef OUTPUT //printf("\n"); #endif //output } ; } } ; { int __i; assert(omp_get_max_threads() <= 32); for (__i = 0; __i < omp_get_max_threads(); __i++) { fprintf(stderr, "Thread %d: %d\n", __i, ____num_tasks[__i]); } } #ifdef OUTPUT for( int i = 0 ; i < rows ; i++){ for ( int j = 0 ; j < cols ; j++){ printf("%.5f ", J[i * cols + j]); } printf("\n"); } #endif printf("Computation Done\n"); free(I); free(J); free(iN); free(iS); free(jW); free(jE); free(dN); free(dS); free(dW); free(dE); free(c); return 0; }
void runTest( int argc, char** argv) { int rows, cols, size_I, size_R, niter = 10, iter; double *I, *J, lambda, q0sqr, sum, sum2, tmp, meanROI,varROI ; #ifdef CPU double Jc, G2, L, num, den, qsqr; int *iN,*iS,*jE,*jW, k; double *dN,*dS,*dW,*dE; double cN,cS,cW,cE,D; #endif #ifdef GPU double *J_cuda; double *C_cuda; double *E_C, *W_C, *N_C, *S_C; #endif unsigned int r1, r2, c1, c2; double *c; if (argc == 9) { rows = atoi(argv[1]); //number of rows in the domain cols = atoi(argv[2]); //number of cols in the domain if ((rows%16!=0) || (cols%16!=0)){ fprintf(stderr, "rows and cols must be multiples of 16\n"); exit(1); } r1 = atoi(argv[3]); //y1 position of the speckle r2 = atoi(argv[4]); //y2 position of the speckle c1 = atoi(argv[5]); //x1 position of the speckle c2 = atoi(argv[6]); //x2 position of the speckle lambda = atof(argv[7]); //Lambda value niter = atoi(argv[8]); //number of iterations } else{ usage(argc, argv); } size_I = cols * rows; size_R = (r2-r1+1)*(c2-c1+1); I = (double *)malloc( size_I * sizeof(double) ); J = (double *)malloc( size_I * sizeof(double) ); c = (double *)malloc(sizeof(double)* size_I) ; #ifdef CPU iN = (int *)malloc(sizeof(unsigned int*) * rows) ; iS = (int *)malloc(sizeof(unsigned int*) * rows) ; jW = (int *)malloc(sizeof(unsigned int*) * cols) ; jE = (int *)malloc(sizeof(unsigned int*) * cols) ; dN = (double *)malloc(sizeof(double)* size_I) ; dS = (double *)malloc(sizeof(double)* size_I) ; dW = (double *)malloc(sizeof(double)* size_I) ; dE = (double *)malloc(sizeof(double)* size_I) ; for (int i=0; i< rows; i++) { iN[i] = i-1; iS[i] = i+1; } for (int j=0; j< cols; j++) { jW[j] = j-1; jE[j] = j+1; } iN[0] = 0; iS[rows-1] = rows-1; jW[0] = 0; jE[cols-1] = cols-1; #endif #ifdef GPU printf("size_I = %d\n", size_I); //Allocate device memory //cudaMalloc((void**)& J_cuda, sizeof(double)* size_I); J_cuda = (double*)malloc(sizeof(double)*size_I); //cudaMalloc((void**)& C_cuda, sizeof(double)* size_I); C_cuda = (double*)malloc(sizeof(double)*size_I); //cudaMalloc((void**)& E_C, sizeof(double)* size_I); E_C = (double*)malloc(sizeof(double)*size_I); //cudaMalloc((void**)& W_C, sizeof(double)* size_I); W_C = (double*)malloc(sizeof(double)*size_I); //cudaMalloc((void**)& S_C, sizeof(double)* size_I); S_C = (double*)malloc(sizeof(double)*size_I); //cudaMalloc((void**)& N_C, sizeof(double)* size_I); N_C = (double*)malloc(sizeof(double)*size_I); #endif printf("Randomizing the input matrix\n"); //Generate a random matrix random_matrix(I, rows, cols); for (int k = 0; k < size_I; k++ ) { J[k] = exp(I[k]*1.0) ; } printf("Start the SRAD main loop\n"); for (iter=0; iter< niter; iter++){ sum=0; sum2=0; for (int i=r1; i<=r2; i++) { for (int j=c1; j<=c2; j++) { tmp = J[i * cols + j]; sum += tmp ; sum2 += tmp*tmp; } } meanROI = sum / (size_R * 1.0); varROI = (sum2 / (size_R*1.0)) - meanROI*meanROI; q0sqr = varROI / (1.0*(meanROI*meanROI)); #ifdef CPU for (int i = 0 ; i < rows ; i++) { for (int j = 0; j < cols; j++) { k = i * cols + j; Jc = J[k]; // directional derivates dN[k] = J[iN[i] * cols + j] - Jc; dS[k] = J[iS[i] * cols + j] - Jc; dW[k] = J[i * cols + jW[j]] - Jc; dE[k] = J[i * cols + jE[j]] - Jc; G2 = (dN[k]*dN[k] + dS[k]*dS[k] + dW[k]*dW[k] + dE[k]*dE[k]) / (Jc*Jc); L = (dN[k] + dS[k] + dW[k] + dE[k]) / Jc; num = (0.5*G2) - ((1.0/16.0)*(L*L)) ; den = 1.0 + (.25*L); qsqr = num/(den*den*1.0); // diffusion coefficent (equ 33) den = (qsqr-q0sqr) / (q0sqr * (1.0+q0sqr)) ; c[k] = 1.0 / (1.0+den) ; // saturate diffusion coefficent if (c[k] < 0) {c[k] = 0;} else if (c[k] > 1) {c[k] = 1;} } } for (int i = 0; i < rows; i++) { for (int j = 0; j < cols; j++) { // current index k = i * cols + j; // diffusion coefficent cN = c[k]; cS = c[iS[i] * cols + j]; cW = c[k]; cE = c[i * cols + jE[j]]; // divergence (equ 58) D = cN * dN[k] + cS * dS[k] + cW * dW[k] + cE * dE[k]; // image update (equ 61) J[k] = J[k] + 0.25*lambda*D; } } #endif // CPU #ifdef GPU //Currently the input size must be divided by 16 - the block size int block_x = cols/BLOCK_SIZE ; int block_y = rows/BLOCK_SIZE ; dim3 dimBlock(BLOCK_SIZE, BLOCK_SIZE); dim3 dimGrid(block_x , block_y); //Copy data from main memory to device memory //cudaMemcpy(J_cuda, J, sizeof(double) * size_I, cudaMemcpyHostToDevice); memcpy(J_cuda, J, sizeof(double) * size_I); //Run kernels //srad_cuda_1<<<dimGrid, dimBlock>>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, q0sqr); srad_cuda_1(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, q0sqr, dimGrid, dimBlock, 1, 0); //srad_cuda_2<<<dimGrid, dimBlock>>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, lambda, q0sqr); srad_cuda_2(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, lambda, q0sqr, dimGrid, dimBlock, 1, 0); //Copy data from device memory to main memory //cudaMemcpy(J, J_cuda, sizeof(double) * size_I, cudaMemcpyDeviceToHost); memcpy(J, J_cuda, sizeof(double) * size_I); #endif } //cudaThreadSynchronize(); #define OUTPUT #ifdef OUTPUT //Printing output printf("Printing Output:\n"); int passed = 1; FILE *gp = fopen("cuda/gold_output.txt", "r"); if (gp == NULL) { printf("Cannot open file.\n"); } double gold_J_val; for( int i = 0 ; i < rows ; i++){ for ( int j = 0 ; j < cols ; j++){ fscanf(gp, "%lf", &gold_J_val); //printf("%.8f ", J[i * cols + j]); if (fabs(gold_J_val - J[i * cols + j]) > EPSILON) { printf("Mismatch at %d: gold = %f, calc = %f.\n", i * cols + j, gold_J_val, J[i * cols + j]); passed = 0; break; } } if (passed == 0) break; //printf("\n"); } fclose(gp); if (passed == 1) printf("PASSED.\n"); else printf("FAILED.\n"); #endif printf("Computation Done\n"); free(I); free(J); #ifdef CPU free(iN); free(iS); free(jW); free(jE); free(dN); free(dS); free(dW); free(dE); #endif #ifdef GPU /*cudaFree(C_cuda); cudaFree(J_cuda); cudaFree(E_C); cudaFree(W_C); cudaFree(N_C); cudaFree(S_C);*/ free(C_cuda); free(J_cuda); free(E_C); free(W_C); free(N_C); free(S_C); #endif free(c); }
/** * Set command */ void command_set(const char* line) { char cmd[MAX_BUFFER]; char key[MAX_BUFFER]; char func[MAX_BUFFER]; char arg1[MAX_BUFFER]; char arg2[MAX_BUFFER]; int argc = sscanf(line, "%s %s = %s %s %s", cmd, key, func, arg1, arg2); if (argc < 3) { puts("invalid arguments"); return; } uint32_t* matrix = NULL; switch (argc) { case 3: if (strcasecmp(func, "identity") == 0) { matrix = identity_matrix(); } else { goto invalid; } break; case 4: if (strcasecmp(func, "random") == 0) { uint32_t seed = atoll(arg1); matrix = random_matrix(seed); } else if (strcasecmp(func, "uniform") == 0) { uint32_t value = atoll(arg1); matrix = uniform_matrix(value); } else if (strcasecmp(func, "cloned") == 0) { MATRIX_GUARD(arg1); matrix = cloned(m); } else if (strcasecmp(func, "reversed") == 0) { MATRIX_GUARD(arg1); matrix = reversed(m); } else if (strcasecmp(func, "transposed") == 0) { MATRIX_GUARD(arg1); matrix = transposed(m); } else { goto invalid; } break; case 5: if (strcasecmp(func, "sequence") == 0) { uint32_t start = atoll(arg1); uint32_t step = atoll(arg2); matrix = sequence_matrix(start, step); } else if (strcasecmp(func, "scalar#add") == 0) { MATRIX_GUARD(arg1); uint32_t value = atoll(arg2); matrix = scalar_add(m, value); } else if (strcasecmp(func, "scalar#mul") == 0) { MATRIX_GUARD(arg1); uint32_t value = atoll(arg2); matrix = scalar_mul(m, value); } else if (strcasecmp(func, "matrix#add") == 0) { MATRIX_GUARD_PAIR(arg1, arg2); matrix = matrix_add(m1, m2); } else if (strcasecmp(func, "matrix#mul") == 0) { MATRIX_GUARD_PAIR(arg1, arg2); matrix = matrix_mul(m1, m2); } else if (strcasecmp(func, "matrix#pow") == 0) { MATRIX_GUARD(arg1); uint32_t exponent = atoll(arg2); matrix = matrix_pow(m, exponent); } else { goto invalid; } break; } entry* e = find_entry(key); if (e == NULL) { e = add_entry(key); } else { free(e->matrix); } e->matrix = matrix; puts("ok"); return; invalid: puts("invalid arguments"); }
int main(int argc, char *argv[]) { struct thread_data *threads; struct thread_data *thread; int i, ret, ch; if (argc > 1) { if (strcmp(argv[1], "--help") == 0) { usage_error(argv[0]); } init_program_parameter(argc, argv); } program_parameter(argv[0]); create_matrix(&matrix_a); create_matrix(&matrix_b); create_matrix(&matrix_c); create_matrix(&matrix_d); random_matrix(matrix_a); random_matrix(matrix_b); nonmal_matrix_multipy(matrix_a, matrix_b, matrix_d); threads = (struct thread_data *)malloc(pthread_max * sizeof(struct thread_data)); if (threads == NULL) { unix_error("malloc threads failed"); } cpu_online = sysconf(_SC_NPROCESSORS_CONF); for(i = 0; i < pthread_max; i++) { thread = threads + i; thread->index = i; if ((ret = pthread_create(&thread->thread_id, NULL, thread_func, thread)) != 0) { posix_error(ret, "pthread_create failed"); } } for(i = 0; i < pthread_max; i++) { thread = threads + i; if ((ret = pthread_join(thread->thread_id, NULL)) != 0) { posix_error(ret, "pthread_join failed"); } } if (matrix_equal(matrix_c, matrix_d) == 0) { unix_error("runtime error"); } if (dump) { dump_matrix("matrix A", matrix_a); dump_matrix("matrix B", matrix_b); dump_matrix("matrix C", matrix_c); dump_matrix("matrix D", matrix_d); } statistics(threads); free_matrix(matrix_a); free_matrix(matrix_b); free_matrix(matrix_c); free_matrix(matrix_d); free(threads); return 0; }
int main(int argc, char* argv[]) { #ifdef __NVCUDA__ acc_init( acc_device_nvcuda ); #endif #ifdef __NVOPENCL__ acc_init( acc_device_nvocl ); acc_list_devices_spec( acc_device_nvocl ); #endif int rows, cols, size_I, size_R, niter = 10, iter, k; float *I, *J, q0sqr, sum, sum2, tmp, meanROI,varROI ; float Jc, G2, L, num, den, qsqr; int *iN,*iS,*jE,*jW; float *dN,*dS,*dW,*dE; int r1, r2, c1, c2; float cN,cS,cW,cE; float *c, D; float lambda; int i, j; printf("%d \n", argc ); if (argc ==9 ) { rows = atoi(argv[1]); //number of rows in the domain cols = atoi(argv[2]); //number of cols in the domain if ((rows%16!=0) || (cols%16!=0)){ fprintf(stderr, "rows and cols must be multiples of 16\n"); exit(1); } r1 = atoi(argv[3]); //y1 position of the speckle r2 = atoi(argv[4]); //y2 position of the speckle c1 = atoi(argv[5]); //x1 position of the speckle c2 = atoi(argv[6]); //x2 position of the speckle lambda = atof(argv[7]); //Lambda value niter = atoi(argv[8]); //number of iterations } else{ usage(argc, argv); } size_I = cols * rows; size_R = (r2-r1+1)*(c2-c1+1); I = (float *)malloc( size_I * sizeof(float) ); J = (float *)malloc( size_I * sizeof(float) ); c = (float *)malloc(sizeof(float)* size_I) ; iN = (int *)malloc(sizeof(unsigned int*) * rows) ; iS = (int *)malloc(sizeof(unsigned int*) * rows) ; jW = (int *)malloc(sizeof(unsigned int*) * cols) ; jE = (int *)malloc(sizeof(unsigned int*) * cols) ; dN = (float *)malloc(sizeof(float)* size_I) ; dS = (float *)malloc(sizeof(float)* size_I) ; dW = (float *)malloc(sizeof(float)* size_I) ; dE = (float *)malloc(sizeof(float)* size_I) ; #pragma acc kernels create(iN[0:rows], iS[0:rows]) #pragma acc loop independent for (int i=0; i< rows; i++) { iN[i] = i-1; iS[i] = i+1; if (i == 0) iN[0] = 0; if (i == rows-1) iS[rows-1] = rows-1; } #pragma acc kernels create(jW[0:cols], jE[0:cols]) #pragma acc loop independent for (int j=0; j< cols; j++) { jW[j] = j-1; jE[j] = j+1; if (j == 0) jW[0] = 0; if (j == cols-1) jE[cols-1] = cols-1; } printf("Randomizing the input matrix\n"); random_matrix(I, rows, cols); #pragma acc kernels copyin(I[0:size_I]) create(J[0:size_I]) #pragma acc loop independent for (k = 0; k < size_I; k++ ) { J[k] = (float)exp(I[k]) ; } printf("Start the SRAD main loop\n"); #pragma acc data copyout(J[0:size_I]) \ create(dN[0:size_I], dS[0:size_I], dW[0:size_I], dE[0:size_I], c[0:size_I]) \ present(iN, iS, jW, jE) { #ifdef ITERATION for (iter=0; iter< niter; iter++){ #endif sum=0; sum2=0; #pragma acc kernels #pragma acc loop vector reduction(+:sum,+:sum2) independent for (i=r1; i<=r2; i++) { // #pragma acc loop vector reduction(+:sum,+:sum2) independent for (j=c1; j<=c2; j++) { tmp = J[i * cols + j]; sum += tmp ; sum2 += tmp*tmp; } } meanROI = sum / size_R; varROI = (sum2 / size_R) - meanROI*meanROI; q0sqr = varROI / (meanROI*meanROI); #pragma acc kernels #pragma acc loop independent for (int i = 0 ; i < rows ; i++) { for (int j = 0; j < cols; j++) { k = i * cols + j; Jc = J[k]; // directional derivates dN[k] = J[iN[i] * cols + j] - Jc; dS[k] = J[iS[i] * cols + j] - Jc; dW[k] = J[i * cols + jW[j]] - Jc; dE[k] = J[i * cols + jE[j]] - Jc; G2 = (dN[k]*dN[k] + dS[k]*dS[k] + dW[k]*dW[k] + dE[k]*dE[k]) / (Jc*Jc); L = (dN[k] + dS[k] + dW[k] + dE[k]) / Jc; num = (0.5*G2) - ((1.0/16.0)*(L*L)) ; den = 1 + (.25*L); qsqr = num/(den*den); // diffusion coefficent (equ 33) den = (qsqr-q0sqr) / (q0sqr * (1+q0sqr)) ; c[k] = 1.0 / (1.0+den) ; // saturate diffusion coefficent if (c[k] < 0) {c[k] = 0;} else if (c[k] > 1) {c[k] = 1;} } } #pragma acc kernels #pragma acc loop independent for (int i = 0; i < rows; i++) { for (int j = 0; j < cols; j++) { // current index k = i * cols + j; // diffusion coefficent cN = c[k]; cS = c[iS[i] * cols + j]; cW = c[k]; cE = c[i * cols + jE[j]]; // divergence (equ 58) D = cN * dN[k] + cS * dS[k] + cW * dW[k] + cE * dE[k]; // image update (equ 61) J[k] = J[k] + 0.25*lambda*D; #ifdef OUTPUT //printf("%.5f ", J[k]); #endif //output } #ifdef OUTPUT //printf("\n"); #endif //output } #ifdef ITERATION } #endif } /* end pragma acc data */ //#ifdef OUTPUT for( int i = 0 ; i < rows ; i++){ for ( int j = 0 ; j < cols ; j++){ printf("%.5f ", J[i * cols + j]); } printf("\n"); } //#endif printf("Computation Done\n"); free(I); free(J); free(iN); free(iS); free(jW); free(jE); free(dN); free(dS); free(dW); free(dE); free(c); return 0; }
/* * PURPOSE: run the commands which user entered * INPUTS: * cmd double pointer that holds all commands * mats the matrix list * num_mats the number of matrix in the list * RETURN: void * If no errors occurred during process then return nothing * else print error message **/ void run_commands (Commands_t* cmd, Matrix_t** mats, unsigned int num_mats) { //TODO ERROR CHECK INCOMING PARAMETERS if(!cmd){ printf("commands array is null\n"); return; } if(!(*mats)){ printf("matrix list is null\n"); return; } /*Parsing and calling of commands*/ if (strncmp(cmd->cmds[0],"display",strlen("display") + 1) == 0 && cmd->num_cmds == 2) { /*find the requested matrix*/ int idx = find_matrix_given_name(mats,num_mats,cmd->cmds[1]); if (idx >= 0) { display_matrix (mats[idx]); } else { printf("Matrix (%s) doesn't exist\n", cmd->cmds[1]); return; } } else if (strncmp(cmd->cmds[0],"add",strlen("add") + 1) == 0 && cmd->num_cmds == 4) { int mat1_idx = find_matrix_given_name(mats,num_mats,cmd->cmds[1]); int mat2_idx = find_matrix_given_name(mats,num_mats,cmd->cmds[2]); if (mat1_idx >= 0 && mat2_idx >= 0) { Matrix_t* c = NULL; if( !create_matrix (&c,cmd->cmds[3], mats[mat1_idx]->rows, mats[mat1_idx]->cols)) { printf("Failure to create the result Matrix (%s)\n", cmd->cmds[3]); return; } if(add_matrix_to_array(mats,c, num_mats) == 999){ perror("PROGRAM FAILED TO ADD MATRIX TO ARRAY\n"); return; } //TODO ERROR CHECK NEEDED if (! add_matrices(mats[mat1_idx], mats[mat2_idx],c) ) { printf("Failure to add %s with %s into %s\n", mats[mat1_idx]->name, mats[mat2_idx]->name,c->name); return; } } } else if (strncmp(cmd->cmds[0],"duplicate",strlen("duplicate") + 1) == 0 && cmd->num_cmds == 3 && strlen(cmd->cmds[1]) + 1 <= MATRIX_NAME_LEN) { int mat1_idx = find_matrix_given_name(mats,num_mats,cmd->cmds[1]); if (mat1_idx >= 0 ) { Matrix_t* dup_mat = NULL; if( !create_matrix (&dup_mat,cmd->cmds[2], mats[mat1_idx]->rows, mats[mat1_idx]->cols)) { return; } if(!duplicate_matrix (mats[mat1_idx], dup_mat)){ perror("PROGRAM FAILED TO DUPLICATE MATRIX\n"); return; } //TODO ERROR CHECK NEEDED if(add_matrix_to_array(mats,dup_mat,num_mats) == 999){ perror("PROGRAM FAILED TO ADD MATRIX TO ARRAY\n"); return; } //TODO ERROR CHECK NEEDED printf ("Duplication of %s into %s finished\n", mats[mat1_idx]->name, cmd->cmds[2]); } else { printf("Duplication Failed\n"); return; } } else if (strncmp(cmd->cmds[0],"equal",strlen("equal") + 1) == 0 && cmd->num_cmds == 2) { int mat1_idx = find_matrix_given_name(mats,num_mats,cmd->cmds[1]); int mat2_idx = find_matrix_given_name(mats,num_mats,cmd->cmds[2]); if (mat1_idx >= 0 && mat2_idx >= 0) { if ( equal_matrices(mats[mat1_idx],mats[mat2_idx]) ) { printf("SAME DATA IN BOTH\n"); } else { printf("DIFFERENT DATA IN BOTH\n"); } } else { printf("Equal Failed\n"); return; } } else if (strncmp(cmd->cmds[0],"shift",strlen("shift") + 1) == 0 && cmd->num_cmds == 4) { int mat1_idx = find_matrix_given_name(mats,num_mats,cmd->cmds[1]); const int shift_value = atoi(cmd->cmds[3]); if (mat1_idx >= 0 ) { if(!bitwise_shift_matrix(mats[mat1_idx],cmd->cmds[2][0], shift_value)){ perror("PROGRAM FAILED TO SHIFT MATRIX\n"); return; } //TODO ERROR CHECK NEEDED printf("Matrix (%s) has been shifted by %d\n", mats[mat1_idx]->name, shift_value); } else { printf("Matrix shift failed\n"); return; } } else if (strncmp(cmd->cmds[0],"read",strlen("read") + 1) == 0 && cmd->num_cmds == 2) { Matrix_t* new_matrix = NULL; if(! read_matrix(cmd->cmds[1],&new_matrix)) { printf("Read Failed\n"); return; } if(add_matrix_to_array(mats,new_matrix, num_mats) == 999){ perror("PROGRAM FAILED TO ADD MATRIX TO ARRAY\n"); return; } //TODO ERROR CHECK NEEDED printf("Matrix (%s) is read from the filesystem\n", cmd->cmds[1]); } else if (strncmp(cmd->cmds[0],"write",strlen("write") + 1) == 0 && cmd->num_cmds == 2) { int mat1_idx = find_matrix_given_name(mats,num_mats,cmd->cmds[1]); if(! write_matrix(mats[mat1_idx]->name,mats[mat1_idx])) { printf("Write Failed\n"); return; } else { printf("Matrix (%s) is wrote out to the filesystem\n", mats[mat1_idx]->name); } } else if (strncmp(cmd->cmds[0], "create", strlen("create") + 1) == 0 && strlen(cmd->cmds[1]) + 1 <= MATRIX_NAME_LEN && cmd->num_cmds == 4) { Matrix_t* new_mat = NULL; const unsigned int rows = atoi(cmd->cmds[2]); const unsigned int cols = atoi(cmd->cmds[3]); if(!create_matrix(&new_mat,cmd->cmds[1],rows, cols)){ perror("PROGRAM FAILED TO ADD CREATE TO ARRAY\n"); return; } //TODO ERROR CHECK NEEDED if(add_matrix_to_array(mats,new_mat,num_mats) == 999){ perror("PROGRAM FAILED TO ADD MATRIX TO ARRAY\n"); return; } // TODO ERROR CHECK NEEDED printf("Created Matrix (%s,%u,%u)\n", new_mat->name, new_mat->rows, new_mat->cols); } else if (strncmp(cmd->cmds[0], "random", strlen("random") + 1) == 0 && cmd->num_cmds == 4) { int mat1_idx = find_matrix_given_name(mats,num_mats,cmd->cmds[1]); const unsigned int start_range = atoi(cmd->cmds[2]); const unsigned int end_range = atoi(cmd->cmds[3]); if(!random_matrix(mats[mat1_idx],start_range, end_range)) { perror("PROGRAM FAILED TO RANDOMIZE MATRIX\n"); return; } //TODO ERROR CHECK NEEDED printf("Matrix (%s) is randomized between %u %u\n", mats[mat1_idx]->name, start_range, end_range); } else { printf("Not a command in this application\n"); } }
long main() { long i, j; _pMalloc = 0x500000000; niter = 10; rows = 1024; cols = 1024; r1 = 200; //y1 position of the speckle r2 = 500; //y2 position of the speckle c1 = 1000; //x1 position of the speckle c2 = 800; //x2 position of the speckle lambda = 0.5; //Lambda value niter = 100; //number of iterations size_I = cols * rows; size_R = (r2 - r1 + 1) * (c2 - c1 + 1); I = (double *)malloc_sr(size_I * 8); J = (double *)malloc_sr(size_I * 8); c = (double *)malloc_sr(8 * size_I) ; iN = (long *)malloc_sr(8 * rows) ; iS = (long *)malloc_sr(8 * rows) ; jW = (long *)malloc_sr(8 * cols) ; jE = (long *)malloc_sr(8 * cols) ; dN = (double *)malloc_sr(8 * size_I) ; dS = (double *)malloc_sr(8 * size_I) ; dW = (double *)malloc_sr(8 * size_I) ; dE = (double *)malloc_sr(8 * size_I) ; for(i = 0; i < rows; i+=1) { iN[i] = i - 1; iS[i] = i + 1; } for(j = 0; j < cols; j+=1) { jW[j] = j - 1; jE[j] = j + 1; } iN[0] = 0; iS[rows - 1] = rows - 1; jW[0] = 0; jE[cols - 1] = cols - 1; random_matrix(I, rows, cols); for(k = 0; k < size_I; k+=1) { J[k] = exp(I[k]) ; } for(iter = 0; iter < niter; iter+=1) { sum = 0; sum2 = 0; for(i = r1; i <= r2; i = i + 1) { for(j = c1; j <= c2; j = j + 1) { tmp = J[i * cols + j]; sum += tmp ; sum2 += tmp * tmp; } } meanROI = sum / size_R; varROI = (sum2 / size_R) - meanROI * meanROI; q0sqr = varROI / (meanROI * meanROI); // #pragma omp parallel for shared(J, dN, dS, dW, dE, c, rows, cols, iN, iS, jW, jE) private(i, j, k, Jc, G2, L, num, den, qsqr) for(i = 0 ; i < rows ; i = i + 1) { for(j = 0; j < cols; j = j + 1) { k = i * cols + j; Jc = J[k]; // directional derivates dN[k] = J[iN[i] * cols + j] - Jc; dS[k] = J[iS[i] * cols + j] - Jc; dW[k] = J[i * cols + jW[j]] - Jc; dE[k] = J[i * cols + jE[j]] - Jc; G2 = (dN[k] * dN[k] + dS[k] * dS[k] + dW[k] * dW[k] + dE[k] * dE[k]) / (Jc * Jc); L = (dN[k] + dS[k] + dW[k] + dE[k]) / Jc; num = (0.5 * G2) - ((1.0 / 16.0) * (L * L)) ; den = 1 + (0.25 * L); qsqr = num / (den * den); // diffusion coefficent (equ 33) den = (qsqr - q0sqr) / (q0sqr * (1 + q0sqr)) ; c[k] = 1.0 / (1.0 + den) ; // saturate diffusion coefficent if(c[k] < 0) { c[k] = 0; } else if(c[k] > 1) { c[k] = 1; } } } // #pragma omp parallel for shared(J, c, rows, cols, lambda) private(i, j, k, D, cS, cN, cW, cE) for(i = 0; i < rows; i = i + 1) { for(j = 0; j < cols; j = j + 1) { // current index k = i * cols + j; // diffusion coefficent cN = c[k]; cS = c[iS[i] * cols + j]; cW = c[k]; cE = c[i * cols + jE[j]]; // divergence (equ 58) D = cN * dN[k] + cS * dS[k] + cW * dW[k] + cE * dE[k]; // image update (equ 61) J[k] = J[k] + 0.25 * lambda * D; } } } return 0; }