Example #1
0
/**
 * Test the multiplication of two matrices of all ones
 **/
void
random_multiply (int m, int n, int k, int iterations)
{
  int iter;
  double *A, *B, *C;
  double t_start, t_elapsed;

  printf ("Timing Matrix Multiply m=%d n=%d k=%d iterations=%d....", m, n, k,
	  iterations);

  /* Allocate matrices */
  A = random_matrix (m, k);
  B = random_matrix (k, n);
  C = random_matrix (m, n);

  t_start = MPI_Wtime ();	/* Start timer */

  /* perform several Matric Mulitplies back-to-back */
  for (iter = 0; iter < iterations; iter++)
    {
      /* C = (1.0/k)*(A*B) + 0.0*C */
      local_mm (m, n, k, 1.0, A, m, B, k, 1.0, C, m);
    }				/* iter */

  t_elapsed = MPI_Wtime () - t_start;	/* Stop timer */

  /* deallocate memory */
  deallocate_matrix (A);
  deallocate_matrix (B);
  deallocate_matrix (C);

  printf ("total_time=%lf, per_iteration=%lf\n", t_elapsed, t_elapsed
	  / iterations);
}
Example #2
0
void random_multiply(mat_mul_specs * mms) {
  double *A, *B, *C;
  double t_start, t_elapsed;

  //Allocate matrices
  A = random_matrix(mms->m, mms->k);
  B = random_matrix(mms->k, mms->n);
  C = random_matrix(mms->m, mms->n);

  t_start = MPI_Wtime();

  //perform several Matric Mulitplies back-to-back
  int iter;
  for (iter = 0; iter < mms->trials; iter++) {
    //C = (1.0/k)*(A*B) + 0.0*C
    local_mm_mms(mms->m, mms->n, mms->k, 1.0, A, mms->m, B, mms->k, 1.0, C, mms->m, mms);
  }

  t_elapsed = MPI_Wtime() - t_start;

  //deallocate memory
  deallocate_matrix(A);
  deallocate_matrix(B);
  deallocate_matrix(C);

  if(mms->type == NAIVE)
    printf("naive, ");
  else if(mms->type == OPENMP)
    printf("openmp, ");
  else if(mms->type == MKL)
    printf("mkl, ");
  printf("%d, %d, %d, %d, %d, %d, %d, %d, %d, %lf\n", mms->threads, mms->cbl, mms->cop, mms->bm, mms->bn, mms->bk, mms->m, mms->n, mms->k, t_elapsed / mms->trials);
}
Example #3
0
void time_ongpu(int TA, int TB, int m, int k, int n)
{
    int iter = 10;
    float *a = random_matrix(m,k);
    float *b = random_matrix(k,n);

    int lda = (!TA)?k:m;
    int ldb = (!TB)?n:k;

    float *c = random_matrix(m,n);

    float *a_cl = cuda_make_array(a, m*k);
    float *b_cl = cuda_make_array(b, k*n);
    float *c_cl = cuda_make_array(c, m*n);

    int i;
    clock_t start = clock(), end;
    for(i = 0; i<iter; ++i){
        gemm_ongpu(TA,TB,m,n,k,1,a_cl,lda,b_cl,ldb,1,c_cl,n);
        cudaThreadSynchronize();
    }
    double flop = ((double)m)*n*(2.*k + 2.)*iter;
    double gflop = flop/pow(10., 9);
    end = clock();
    double seconds = sec(end-start);
    printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s, %lf GFLOPS\n",m,k,k,n, TA, TB, seconds, gflop/seconds);
    cuda_free(a_cl);
    cuda_free(b_cl);
    cuda_free(c_cl);
    free(a);
    free(b);
    free(c);
}
Example #4
0
int main(int argc, char *argv[]){

		if (argc > 1 && atoi(argv[1]) > 10) limit = atoi(argv[1]);

		clock_t start, end;
		srand(clock());

		M = rand() % limit/2 + limit/2;
		N = rand() % limit/2 + limit/2;
		O = (rand() % limit/8 + 1 + limit/16) * 4;


		float* A = random_matrix(M, N, 10);
		float* B = random_matrix(N, O, 10);
		float* C = malloc(sizeof(float) * M * O);
		float* D;

		printf("Generadas dos matrices aleatorias de (%zu x %zu) y (%zu x %zu)\n", M, N, N, O);

		if (print) {
			print_matrix(A, M, N);
			printf("\n");
			print_matrix(B, N, O);
			printf("\n");
		}

		start = clock();
		D = MULT(A, B, M, N, O);
		end = clock();

		printf("RESULTADO FUERZA BRUTA: (%f)\n", ((double)(end - start))/CLOCKS_PER_SEC);
		if (print) print_matrix(D, M, O);



		start = clock();
		SIMD_MULT(A, B, C, M, N, O);
		end = clock();

		printf("RESULTADO SIMD: (%f)\n", ((double)(end - start))/CLOCKS_PER_SEC);
		if (print) print_matrix(C, M, O);

		if (equal_mtrx(C, D, M*O)) printf("Son iguales!\n");
		else printf("NO son iguales!\n");

		free(A);
		free(B);
		free(C);
		free(D);
}
Example #5
0
void Call_Inverse( int n)
{
  POLY ds;
  POLY M1[n][n], t_M1[n][n], product[n][n];
  int k; 

  printf("Please choose the test matrix:  "); 
  printf("1 random matrix.\n");
  printf("2 input your own matrix.\n");
  scanf("%d", &k ); printf("%d\n", k);

  if(k==1) random_matrix ( n, n, M1);
  if(k==2) read_matrix( n, n, M1 );
  printf("the original matrix generated :\n");
  print( n, n, M1);

  copy(n, n, M1, t_M1);
  ds = Inverse_Poly ( n, M1 );
  printf(" The inverse matrix of the matrix is :\n" );
  print(n, n, M1 );
  printf(" The polynomial ds is :\n" );
  Print_Poly( ds.d, ds.p );

  printf("The product (without ds) of the original matrix");
  printf(" and the inverse matrix is:\n");
 
  Multiply(n, n, n, M1, t_M1, product);
  print(n, n, product);
  
  free_matrix(n, n, M1);
  free_matrix(n, n, t_M1);
  free_matrix(n, n, product);
 
}
Example #6
0
int main(int argc, char* argv[]){
  printf("Factor a Matrix into its upper triangular portion\n");

  int n = 3; 

  double A[n*n];                // initial matrix
 
  double U[n*n];                // to hold factored matrix

  random_matrix(A, n, n);

  print_matrix(A, n, n);

  for (int col = 0; col < n; col++){
    *(U + col*n) = *(A + col*n);
  }

  for (int row = 1; row < n; row++){
    for (int col = 0 + row - 1; col < n; col++){
      double num = *(A + row + (row-1)*n);
      double dnm = *(A + row - 1 + (row-1)*n);
      *(U + row + col*n) = A[row + col*n] - A[row -1 + col*n]*(num/dnm);
    } 
  }

  print_matrix(U, n, n);

  return 0;

}
/**
 * Verify that a matrix times the identity is itself
 **/
void identity_test(int n) {
  double *A, *B, *C;

  printf("identity_test n=%d............", n);

  /* Allocate matrices */
  A = random_matrix(n, n);
  B = identity_matrix(n, n);
  C = zeros_matrix(n, n);

  /* C = 1.0*(A*B) + 0.0*C */
  local_mm(n, n, n, 1.0, A, n, B, n, 5.0, C, n);

  /* Verfiy the results */
  verify_matrix(n, n, A, C);

  /* Backwards C = 1.0*(B*A) + 0.0*C */
  local_mm(n, n, n, 1.0, B, n, A, n, 0.0, C, n);

  /* Verfiy the results */
  verify_matrix(n, n, A, C);

  /* deallocate memory */
  deallocate_matrix(A);
  deallocate_matrix(B);
  deallocate_matrix(C);

  printf("passed\n");
}
Example #8
0
int main(void)
{
	init_prg();

	uint l = random_dim();
	uint n = random_dim();
	float *ys = random_vector(n);
	float eta = 0.001;

	float *Ys = random_matrix(n, l);

	perturbate(l, n, ys, eta, Ys);

	for (uint j = 1; j <= l; j++) {
		for (uint i = 1; i <= n; i++) {
			assert(abs((M_IDX(Ys, n, i, j) - V_IDX(ys, i))
				   / V_IDX(ys, i)) <= eta);
		}
	}

	free(Ys);
	free(ys);

	return 0;
}
Example #9
0
void ChompOptimizer::perturbTrajectory()
{
  //int mid_point = (free_vars_start_ + free_vars_end_) / 2;
  if(worst_collision_cost_state_ < 0)
    return;
  int mid_point = worst_collision_cost_state_;
  planning_models::RobotState *random_state(state_);
  random_state.getJointStateGroup(planning_group_)->setToRandomValues();
  std::vector<double> vals;
  random_state.getJointStateGroup(planning_group_)->getGroupStateValues(vals);
  double* ptr = &vals[0];
  Eigen::Map<Eigen::VectorXd> random_matrix(ptr, vals.size());
  //Eigen::VectorXd random_matrix = vals;

  // convert the state into an increment
  random_matrix -= group_trajectory_.getTrajectoryPoint(mid_point).transpose();

  // project the increment orthogonal to joint velocities
  group_trajectory_.getJointVelocities(mid_point, joint_state_velocities_);
  joint_state_velocities_.normalize();
  random_matrix = (Eigen::MatrixXd::Identity(num_joints_, num_joints_) - joint_state_velocities_
                   * joint_state_velocities_.transpose()) * random_matrix;

  int mp_free_vars_index = mid_point - free_vars_start_;
  for(int i = 0; i < num_joints_; i++)
  {
    group_trajectory_.getFreeJointTrajectoryBlock(i)
      += joint_costs_[i].getQuadraticCostInverse().col(mp_free_vars_index) * random_state_(i);
  }
}
Example #10
0
double experiment(size_t NSUB, size_t NCOMP, size_t NVOX, int verbose){

  gsl_matrix *estimated_a = gsl_matrix_alloc(NSUB,  NCOMP);
  gsl_matrix *estimated_s = gsl_matrix_alloc(NCOMP, NVOX);
  gsl_matrix *estimated_x = gsl_matrix_alloc(NSUB,  NVOX);
  gsl_matrix *true_a      = gsl_matrix_alloc(NSUB,  NCOMP);
  gsl_matrix *true_s      = gsl_matrix_alloc(NCOMP, NVOX);
  gsl_matrix *true_x      = gsl_matrix_alloc(NSUB,  NVOX);
  gsl_matrix *cs          = gsl_matrix_alloc(NCOMP, NCOMP);
  gsl_matrix *noise       = gsl_matrix_alloc(NSUB,  NVOX);

  // Random gaussian mixing matrix A
  random_matrix(true_a, 1.0, gsl_ran_gaussian);
  // Random logistic mixing matrix S
  random_matrix(true_s, 1.0, gsl_ran_logistic);
  // Random gaussian noise
  random_matrix(noise, 1, gsl_ran_gaussian);
  // matrix_apply_all(true_s, gsl_pow_3);
  // X = AS
  matrix_mmul(true_a, true_s, true_x);
  // add noise
  gsl_matrix_add(true_x, noise);

  double start, end;
  double cpu_time_used;

  start = omp_get_wtime();
  // A,S <- ICA(X, NCOMP)
  ica(estimated_a, estimated_s, true_x, verbose);
  end = omp_get_wtime();
  cpu_time_used = ((double) (end - start));
  printf("\nTime used : %g\n", cpu_time_used);

  //Clean
  gsl_matrix_free(true_a);
  gsl_matrix_free(true_s);
  gsl_matrix_free(true_x);
  gsl_matrix_free(estimated_a);
  gsl_matrix_free(estimated_s);
  gsl_matrix_free(estimated_x);
  gsl_matrix_free(cs);

  return (cpu_time_used);
}
Example #11
0
bool
test_cholesky_insert (void)
{
	int			index = size1 * rand () / RAND_MAX;
	c_matrix	*a;
	c_matrix	*b;
	c_matrix	*l;
	c_vector	*c;
	double		nrm;

	/* posdef symmetry matrix *a */
	{
		int			i;
		c_matrix	*a0 = random_matrix (size1, size1);
		a = c_matrix_transpose_dot_matrix (1., a0, a0);
		for (i = 0; i < size1; i++) c_matrix_set (a, i, i, c_matrix_get(a, i, i) + 0.1);
		c_matrix_free (a0);
	}

	l = c_matrix_alloc (size1 - 1, size1 - 1);
	c = c_vector_alloc (size1);
	{
		int			i, j, m, n;
		for (i = 0, m = 0; i < size1; i++) {
			c_vector_set (c, i, c_matrix_get (a, i, index));
			if (i == index) continue;
			for (j = 0, n = 0; j < size1; j++) {
				if (j == index) continue;
				c_matrix_set (l, m, n, c_matrix_get (a, i, j));
				n++;
			}
			m++;
		}
	}

	c_linalg_cholesky_decomp (l);
	c_linalg_cholesky_insert (l, index, c);
	c_vector_free (c);
	{
		int		i, j;
		for (i = 0; i < size1; i++) {
			for (j = 0; j < i; j++) c_matrix_set (l, i, j, 0.);
		}
	}
	b = c_matrix_transpose_dot_matrix (1., l, l);
	c_matrix_free (l);

	c_matrix_sub (a, b);
	c_matrix_free (b);

	nrm = c_matrix_nrm (a, '1');
	c_matrix_free (a);

	return (nrm < 1.e-8);
}
Example #12
0
	/*
	 * PURPOSE: The command line driven program does matrix creation, reading, writing, and other 
	 *		miscellaneous operations. The program automatically creates a matrix and writes 
	 *		that out called temp_mat (in binary do not use the cat command on it). You are 
	 *		able to display any matrix by using the display command. You can create a new 
	 *		blank matrix with the command create. To fill a matrix with random values use the 
	 *		random command between a range of values. To get some experience with bit shifting 
	 *		there is a command called shift. If you want to write and read in a matrix from 
	 *		the filesystem use the respective read and write commands. To see memory operations 
	 *		in action use the duplicate and equal commands. The others commands are sum and add. 
	 *		To exit the program use the exit command.  
	 * INPUTS: No inputs needed
	 * RETURN: Returns 0 on successful exectution otherwise -1 if there was an error 
	 **/
int main (int argc, char **argv) {
	srand(time(NULL));		
	char *line = NULL;
	Commands_t* cmd;

	Matrix_t *mats[10];
	memset(&mats,0, sizeof(Matrix_t*) * 10); // IMPORTANT C FUNCTION TO LEARN

	Matrix_t *temp = NULL;
	
	// TODO ERROR CHECK
	if(!create_matrix (&temp,"temp_mat", 5, 5)) { 
		return -1; 
	}
 
	//TODO ERROR CHECK NEEDED
	if( (add_matrix_to_array(mats,temp, 10)) < 0) {
		return -1;
	}

	int mat_idx = find_matrix_given_name(mats,10,"temp_mat");

	if (mat_idx < 0) {
		perror("PROGRAM FAILED TO INIT\n");
		return -1;
	}
	random_matrix(mats[mat_idx], 10, 15);
	
	// TODO ERROR CHECK
	if(!write_matrix("temp_mat", mats[mat_idx])) {
		return -1; 
	}

	line = readline("> ");
	while (strncmp(line,"exit", strlen("exit")  + 1) != 0) {
		
		if (!parse_user_input(line,&cmd)) {
			printf("Failed at parsing command\n\n");
		}
		
		if (cmd->num_cmds > 1) {	
			run_commands(cmd,mats,10);
		}
		if (line) {
			free(line);
		}
		destroy_commands(&cmd);
		line = readline("> ");
	}
	free(line);
	destroy_remaining_heap_allocations(mats,10);
	return 0;	
}
Example #13
0
void time_gpu_random_matrix(int TA, int TB, int m, int k, int n)
{
    float *a;
    if(!TA) a = random_matrix(m,k);
    else a = random_matrix(k,m);
    int lda = (!TA)?k:m;
    float *b;
    if(!TB) b = random_matrix(k,n);
    else b = random_matrix(n,k);
    int ldb = (!TB)?n:k;

    float *c = random_matrix(m,n);
    int i;
    clock_t start = clock(), end;
    for(i = 0; i<32; ++i){
        gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n);
    }
    end = clock();
    printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC);
    free(a);
    free(b);
    free(c);
}
Example #14
0
int main()
{
    matrix_t mat_a, mat_b;
    matrix_t mat_c;
    struct timeval start_time, end_time;
    random_matrix(&mat_a, 4);
    random_matrix(&mat_b, 4);
    null_matrix(&mat_c, 4);
    print_matrix(mat_a);
    printf("\n");
    print_matrix(mat_b);
    printf("\n");
    print_matrix(mat_c);
    gettimeofday(&start_time, 0);
    matrix_multiplication(mat_a, mat_b, mat_c);
    gettimeofday(&end_time, 0);
    printf("Normal Multiplication\n");
    print_matrix(mat_c);
    print_time_taken(start_time, end_time);
    mat_c = set_zero(mat_c);
    mat_c = matrix_multiplication_strassen(mat_a, mat_b, mat_c, 2);
    printf("Strassen Multiplication\n");
    print_matrix(mat_c);
}
int main()
{
	for (size_t n = 0; n <= 50; ++n)
	{
		for (int i = 0; i < 1000; ++i)
		{
			square_matrix A = random_matrix(n);

			square_matrix A_rot = rotate_square_matrix_1(A);
			rotate_square_matrix_2(A);

			assert(A == A_rot);
		}
		std::cout << "passed random tests for matrices of size " << n << std::endl;
	}

	return 0;
}
Example #16
0
void Call_Hermite ( int n, int m )
{
    POLY a[n][m], a1[n][m];
    POLY p[n][n], t[n][m];
    dcmplx deter, dcmplx_p[n][n], x;
    int k;

    printf("1 random matrix.\n");
    printf("2 input your own matrix.\n");
    printf("Please choose the test matrix:");
    scanf("%d", &k ); printf("%d\n\n", k );

    if(k==1) random_matrix ( n, m, a);
    if(k==2) read_matrix( n, m, a );
    printf("the original matrix generated :\n");
    print(n,m,a); 
    
    zero_matrix ( n, m, a1);
    I_matrix (  n, p);
    copy ( n, m, a, t);

    /* Eliminate_Col(n,m,a,p,0,0); */
    /* Eliminate_Row(n,m,a,q,0,0); */
    Hermite(n, m, a, p);

    printf("The hermite form of matrix a is :\n");
    print(n,m,a);

    /* now begin to test the result */
    Multiply ( n,  n,  m, p, t, a1 ); 
 
    
    printf("The calculated hermite form with p*a is:\n");
    print(n, m, a1);
    printf(" p is:\n");
    print(n, n, p);

    x=create1(1.1);
    evaluate_matrix(n, n, p, dcmplx_p, x);
    deter=determinant(n, dcmplx_p);
    printf("The determinant of the p is: ");
    writeln_dcmplx(deter);
    
}
Example #17
0
/**
 * \brief perform a gauss experiment with n x n matrix
 *
 * \param n	dimension of the matrix the inverse
 */
void	experiment(int n) {
	/* create a system to solve */
	a = random_matrix(n, 2 * n);

	/* display the matrix */
	if (n <= 10) {
		display_matrix(stdout, a, n, 2 * n);
	}

	/* perform the Gauss algorithm */
	gauss();

	/* display the matrix */
	if (n <= 10) {
		display_matrix(stdout, a, n, 2 * n);
	}

	free(a);
}
Example #18
0
bool
test_cholesky_1down (void)
{
	int			info;
	c_matrix	*a;
	c_matrix	*c;
	c_matrix	*l;
	c_vector	*u;
	double		nrm;

	/* posdef symmetry matrix *a */
	{
		int			i;
		c_matrix	*a0 = random_matrix (size1, size1);
		a = c_matrix_transpose_dot_matrix (1., a0, a0);
		c_matrix_free (a0);
		for (i = 0; i < size1; i++) c_matrix_set (a, i, i, c_matrix_get(a, i, i) + 1.);
	}

	l = c_matrix_alloc (a->size1, a->size2);
	c_matrix_memcpy (l, a);
	u = random_vector (size1);
	c_vector_scale (u, 0.1);
	{
		c_matrix	*ut = c_matrix_view_array (u->size, 1, u->size, u->data);
		c = c_matrix_dot_matrix_transpose (1., ut, ut);
		c_matrix_free (ut);
		c_matrix_sub (a, c);
		c_matrix_free (c);
		c_linalg_cholesky_decomp (a);
	}

	c_linalg_cholesky_decomp (l);
	info = c_linalg_cholesky_1down (l, u);
	c_matrix_sub (a, l);
	c_matrix_free (l);

	nrm = c_matrix_nrm (a, '1');
	c_matrix_free (a);

	return (info == 0 && nrm < 1.e-8);
}
Example #19
0
void print_matrix_types() {

  double *random, *ones, *zeros, *identity, *tri;

  /* Allocate matrices */
  random = random_matrix(6, 3);
  identity = identity_matrix(5, 5);
  ones = ones_matrix(4, 2);
  zeros = zeros_matrix(2, 4);
  tri = lowerTri_matrix(5, 5);

  printf("\n\t\tMatrix Types\n");

  printf("6x3 Random Matrix\n");
  print_matrix(6, 3, random);
  printf("\n\n");

  printf("5x5 Identity Matrix\n");
  print_matrix(5, 5, identity);
  printf("\n\n");

  printf("4x2 Ones Matrix\n");
  print_matrix(4, 2, ones);
  printf("\n\n");

  printf("2x4 Zeros Matrix\n");
  print_matrix(2, 4, zeros);
  printf("\n\n");

  printf("5x5 Lower Triangular Matrix\n");
  print_matrix(5, 5, tri);
  printf("\n\n");

  /* deallocate memory */
  deallocate_matrix(random);
  deallocate_matrix(ones);
  deallocate_matrix(zeros);
  deallocate_matrix(identity);
  deallocate_matrix(tri);

}
int main()
{
	for (size_t m = 0; m <= 20; ++m)
	{
		for (size_t n = 0; n <= 20; ++n)
		{
			for (int i = 0; i < 1000; ++i)
			{
				matrix A = random_matrix(m,n);

				matrix A_zero = zero_when_necessary_1(A);
				zero_when_necessary_2(A);

				assert(A == A_zero);
			}
			std::cout << "passed random tests for matrices of size "
				  << m << "×" << n << std::endl;
		}
	}

	return 0;
}
Example #21
0
/* check |x - (l' * l)^-1 * y| < 1.e-8 */
bool
test_cholesky_svx (void)
{
	c_matrix	*a;
	c_vector	*x;
	c_vector	*y;

	c_matrix	*l;
	double		nrm;

	/* posdef symmetry matrix *a */
	{
		int			i;
		c_matrix	*a0 = random_matrix (size1, size1);
		a = c_matrix_transpose_dot_matrix (1., a0, a0);
		for (i = 0; i < size1; i++) c_matrix_set (a, i, i, c_matrix_get(a, i, i) + 0.1);
		c_matrix_free (a0);
	}
	/* vector *x */
	x = random_vector (size1);

	/* vector *y */
	y = c_matrix_dot_vector (1., a, x);

	/* cholesky_svx */
	c_linalg_cholesky_decomp (a);
	c_linalg_cholesky_svx (a, y);
	c_matrix_free (a);

	/* x = - y + x */
	c_vector_axpy (-1., y, x);
	c_vector_free (y);
	nrm = c_vector_nrm (x);
	c_vector_free (x);

	return (nrm < 1.e-8);
}
Example #22
0
/* check |a - l' * l| < 1.e-8 */
bool
test_cholesky_decomp (void)
{
	c_matrix	*a;
	c_matrix	*c;
	c_matrix	*l;
	c_matrix	*b;
	double		nrm;

	{
		int			i;
		c_matrix	*a0 = random_matrix (size1, size1);
		a = c_matrix_transpose_dot_matrix (1., a0, a0);
		for (i = 0; i < size1; i++) c_matrix_set (a, i, i, c_matrix_get(a, i, i) + 0.1);
		c_matrix_free (a0);
	}

	/* c = chol(a) */
	c = c_matrix_alloc (a->size1, a->size2);
	c_matrix_memcpy (c, a);
	c_linalg_cholesky_decomp (c);
	l = c_matrix_alloc (c->size1, c->size2);
	c_matrix_set_zero (l);
	c_matrix_upper_triangular_memcpy (l, c);
	c_matrix_free (c);

	/* b = l' * l */
	b = c_matrix_transpose_dot_matrix (1., l, l);
	c_matrix_free (l);
	c_matrix_sub (a, b);
	c_matrix_free (b);

	nrm = c_matrix_nrm (a, '1');
	c_matrix_free (a);

	return (nrm < 1.e-8);
}
Example #23
0
void test_gpu_accuracy(int TA, int TB, int m, int k, int n)
{
    srand(0);
    float *a;
    if(!TA) a = random_matrix(m,k);
    else a = random_matrix(k,m);
    int lda = (!TA)?k:m;
    float *b;
    if(!TB) b = random_matrix(k,n);
    else b = random_matrix(n,k);
    int ldb = (!TB)?n:k;

    float *c = random_matrix(m,n);
    float *c_gpu = random_matrix(m,n);
    memset(c, 0, m*n*sizeof(float));
    memset(c_gpu, 0, m*n*sizeof(float));
    int i;
    //pm(m,k,b);
    gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c_gpu,n);
    //printf("GPU\n");
    //pm(m, n, c_gpu);

    gemm_cpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n);
    //printf("\n\nCPU\n");
    //pm(m, n, c);
    double sse = 0;
    for(i = 0; i < m*n; ++i) {
        //printf("%f %f\n", c[i], c_gpu[i]);
        sse += pow(c[i]-c_gpu[i], 2);
    }
    printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %g SSE\n",m,k,k,n, TA, TB, sse/(m*n));
    free(a);
    free(b);
    free(c);
    free(c_gpu);
}
Example #24
0
int main(int argc, char* argv[])
{   
	int rows, cols, size_I, size_R, niter = 10, iter, k;
    float *I, *J, q0sqr, sum, sum2, tmp, meanROI,varROI ;
	float Jc, G2, L, num, den, qsqr;
	int *iN,*iS,*jE,*jW;
	float *dN,*dS,*dW,*dE;
	int r1, r2, c1, c2;
	float cN,cS,cW,cE;
	float *c, D;
	float lambda;
	int i, j;
    int nthreads;

	if (argc == 10)
	{
		rows = atoi(argv[1]); //number of rows in the domain
		cols = atoi(argv[2]); //number of cols in the domain
		if ((rows%16!=0) || (cols%16!=0)){
			fprintf(stderr, "rows and cols must be multiples of 16\n");
			exit(1);
		}
		r1   = atoi(argv[3]); //y1 position of the speckle
		r2   = atoi(argv[4]); //y2 position of the speckle
		c1   = atoi(argv[5]); //x1 position of the speckle
		c2   = atoi(argv[6]); //x2 position of the speckle
		nthreads = atoi(argv[7]); // number of threads
		lambda = atof(argv[8]); //Lambda value
		niter = atoi(argv[9]); //number of iterations
	}
    else{
		usage(argc, argv);
    }


	size_I = cols * rows;
    size_R = (r2-r1+1)*(c2-c1+1);   

	I = (float *)malloc( size_I * sizeof(float) );
    J = (float *)malloc( size_I * sizeof(float) );
	c  = (float *)malloc(sizeof(float)* size_I) ;

    iN = (int *)malloc(sizeof(unsigned int*) * rows) ;
    iS = (int *)malloc(sizeof(unsigned int*) * rows) ;
    jW = (int *)malloc(sizeof(unsigned int*) * cols) ;
    jE = (int *)malloc(sizeof(unsigned int*) * cols) ;    


	dN = (float *)malloc(sizeof(float)* size_I) ;
    dS = (float *)malloc(sizeof(float)* size_I) ;
    dW = (float *)malloc(sizeof(float)* size_I) ;
    dE = (float *)malloc(sizeof(float)* size_I) ;    
    

    for (int i=0; i< rows; i++) {
        iN[i] = i-1;
        iS[i] = i+1;
    }    
    for (int j=0; j< cols; j++) {
        jW[j] = j-1;
        jE[j] = j+1;
    }
    iN[0]    = 0;
    iS[rows-1] = rows-1;
    jW[0]    = 0;
    jE[cols-1] = cols-1;
	
	printf("Randomizing the input matrix\n");

    random_matrix(I, rows, cols);

    for (k = 0;  k < size_I; k++ ) {
     	J[k] = (float)exp(I[k]) ;
    }
   
	printf("Start the SRAD main loop\n");

for (iter=0; iter< niter; iter++){
		sum=0; sum2=0;     
		for (i=r1; i<=r2; i++) {
            for (j=c1; j<=c2; j++) {
                tmp   = J[i * cols + j];
                sum  += tmp ;
                sum2 += tmp*tmp;
            }
        }
        meanROI = sum / size_R;
        varROI  = (sum2 / size_R) - meanROI*meanROI;
        q0sqr   = varROI / (meanROI*meanROI);
		

#pragma omp parallel for shared(J, dN, dS, dW, dE, c, rows, cols, iN, iS, jW, jE) private(i, j, k, Jc, G2, L, num, den, qsqr)
		for (int i = 0 ; i < rows ; i++) { ____num_tasks[omp_get_thread_num()]++;
{
            for (int j = 0; j < cols; j++) { 
		
				k = i * cols + j;
				Jc = J[k];
 
				// directional derivates
                dN[k] = J[iN[i] * cols + j] - Jc;
                dS[k] = J[iS[i] * cols + j] - Jc;
                dW[k] = J[i * cols + jW[j]] - Jc;
                dE[k] = J[i * cols + jE[j]] - Jc;
			
                G2 = (dN[k]*dN[k] + dS[k]*dS[k] 
                    + dW[k]*dW[k] + dE[k]*dE[k]) / (Jc*Jc);

   		        L = (dN[k] + dS[k] + dW[k] + dE[k]) / Jc;

				num  = (0.5*G2) - ((1.0/16.0)*(L*L)) ;
                den  = 1 + (.25*L);
                qsqr = num/(den*den);
 
                // diffusion coefficent (equ 33)
                den = (qsqr-q0sqr) / (q0sqr * (1+q0sqr)) ;
                c[k] = 1.0 / (1.0+den) ;
                
                // saturate diffusion coefficent
                if (c[k] < 0) {c[k] = 0;}
                else if (c[k] > 1) {c[k] = 1;}
   
		}
  
    } ; }

#pragma omp parallel for shared(J, c, rows, cols, lambda) private(i, j, k, D, cS, cN, cW, cE)
		for (int i = 0; i < rows; i++) { ____num_tasks[omp_get_thread_num()]++;
{
            for (int j = 0; j < cols; j++) {        

                // current index
                k = i * cols + j;
                
                // diffusion coefficent
					cN = c[k];
					cS = c[iS[i] * cols + j];
					cW = c[k];
					cE = c[i * cols + jE[j]];

                // divergence (equ 58)
                D = cN * dN[k] + cS * dS[k] + cW * dW[k] + cE * dE[k];
                
                // image update (equ 61)
                J[k] = J[k] + 0.25*lambda*D;
                #ifdef OUTPUT
                //printf("%.5f ", J[k]); 
                #endif //output
            }
	            #ifdef OUTPUT
                //printf("\n"); 
                #endif //output
	     } ; }


	} ; {
    int __i;
    assert(omp_get_max_threads() <= 32);
    for (__i = 0; __i < omp_get_max_threads(); __i++) {
        fprintf(stderr, "Thread %d: %d\n", __i, ____num_tasks[__i]);
    }
}



#ifdef OUTPUT
	  for( int i = 0 ; i < rows ; i++){
		for ( int j = 0 ; j < cols ; j++){

         printf("%.5f ", J[i * cols + j]); 
    
		}
         printf("\n"); 
   }
#endif 

	printf("Computation Done\n");

	free(I);
	free(J);
	free(iN); free(iS); free(jW); free(jE);
    free(dN); free(dS); free(dW); free(dE);

	free(c);
	return 0;
}
Example #25
0
  void
runTest( int argc, char** argv)
{
  int rows, cols, size_I, size_R, niter = 10, iter;
  double *I, *J, lambda, q0sqr, sum, sum2, tmp, meanROI,varROI ;

#ifdef CPU
  double Jc, G2, L, num, den, qsqr;
  int *iN,*iS,*jE,*jW, k;
  double *dN,*dS,*dW,*dE;
  double cN,cS,cW,cE,D;
#endif

#ifdef GPU

  double *J_cuda;
  double *C_cuda;
  double *E_C, *W_C, *N_C, *S_C;

#endif

  unsigned int r1, r2, c1, c2;
  double *c;



  if (argc == 9)
  {
    rows = atoi(argv[1]);  //number of rows in the domain
    cols = atoi(argv[2]);  //number of cols in the domain
    if ((rows%16!=0) || (cols%16!=0)){
      fprintf(stderr, "rows and cols must be multiples of 16\n");
      exit(1);
    }
    r1   = atoi(argv[3]);  //y1 position of the speckle
    r2   = atoi(argv[4]);  //y2 position of the speckle
    c1   = atoi(argv[5]);  //x1 position of the speckle
    c2   = atoi(argv[6]);  //x2 position of the speckle
    lambda = atof(argv[7]); //Lambda value
    niter = atoi(argv[8]); //number of iterations

  }
  else{
    usage(argc, argv);
  }



  size_I = cols * rows;
  size_R = (r2-r1+1)*(c2-c1+1);

  I = (double *)malloc( size_I * sizeof(double) );
  J = (double *)malloc( size_I * sizeof(double) );
  c  = (double *)malloc(sizeof(double)* size_I) ;


#ifdef CPU

  iN = (int *)malloc(sizeof(unsigned int*) * rows) ;
  iS = (int *)malloc(sizeof(unsigned int*) * rows) ;
  jW = (int *)malloc(sizeof(unsigned int*) * cols) ;
  jE = (int *)malloc(sizeof(unsigned int*) * cols) ;


  dN = (double *)malloc(sizeof(double)* size_I) ;
  dS = (double *)malloc(sizeof(double)* size_I) ;
  dW = (double *)malloc(sizeof(double)* size_I) ;
  dE = (double *)malloc(sizeof(double)* size_I) ;


  for (int i=0; i< rows; i++) {
    iN[i] = i-1;
    iS[i] = i+1;
  }
  for (int j=0; j< cols; j++) {
    jW[j] = j-1;
    jE[j] = j+1;
  }
  iN[0]    = 0;
  iS[rows-1] = rows-1;
  jW[0]    = 0;
  jE[cols-1] = cols-1;

#endif

#ifdef GPU
  printf("size_I = %d\n", size_I);
  //Allocate device memory
  //cudaMalloc((void**)& J_cuda, sizeof(double)* size_I);
  J_cuda = (double*)malloc(sizeof(double)*size_I);
  //cudaMalloc((void**)& C_cuda, sizeof(double)* size_I);
  C_cuda = (double*)malloc(sizeof(double)*size_I);
  //cudaMalloc((void**)& E_C, sizeof(double)* size_I);
  E_C = (double*)malloc(sizeof(double)*size_I);
  //cudaMalloc((void**)& W_C, sizeof(double)* size_I);
  W_C = (double*)malloc(sizeof(double)*size_I);
  //cudaMalloc((void**)& S_C, sizeof(double)* size_I);
  S_C = (double*)malloc(sizeof(double)*size_I);
  //cudaMalloc((void**)& N_C, sizeof(double)* size_I);
  N_C = (double*)malloc(sizeof(double)*size_I);

#endif

  printf("Randomizing the input matrix\n");
  //Generate a random matrix
  random_matrix(I, rows, cols);

  for (int k = 0;  k < size_I; k++ ) {
    J[k] = exp(I[k]*1.0) ;
  }
  printf("Start the SRAD main loop\n");
  for (iter=0; iter< niter; iter++){
    sum=0; sum2=0;
    for (int i=r1; i<=r2; i++) {
      for (int j=c1; j<=c2; j++) {
        tmp   = J[i * cols + j];
        sum  += tmp ;
        sum2 += tmp*tmp;
      }
    }
    meanROI = sum / (size_R * 1.0);
    varROI  = (sum2 / (size_R*1.0)) - meanROI*meanROI;
    q0sqr   = varROI / (1.0*(meanROI*meanROI));

#ifdef CPU

    for (int i = 0 ; i < rows ; i++) {
      for (int j = 0; j < cols; j++) {

        k = i * cols + j;
        Jc = J[k];

        // directional derivates
        dN[k] = J[iN[i] * cols + j] - Jc;
        dS[k] = J[iS[i] * cols + j] - Jc;
        dW[k] = J[i * cols + jW[j]] - Jc;
        dE[k] = J[i * cols + jE[j]] - Jc;

        G2 = (dN[k]*dN[k] + dS[k]*dS[k]
            + dW[k]*dW[k] + dE[k]*dE[k]) / (Jc*Jc);

        L = (dN[k] + dS[k] + dW[k] + dE[k]) / Jc;

        num  = (0.5*G2) - ((1.0/16.0)*(L*L)) ;
        den  = 1.0 + (.25*L);
        qsqr = num/(den*den*1.0);

        // diffusion coefficent (equ 33)
        den = (qsqr-q0sqr) / (q0sqr * (1.0+q0sqr)) ;
        c[k] = 1.0 / (1.0+den) ;

        // saturate diffusion coefficent
        if (c[k] < 0) {c[k] = 0;}
        else if (c[k] > 1) {c[k] = 1;}
      }
    }
    for (int i = 0; i < rows; i++) {
      for (int j = 0; j < cols; j++) {

        // current index
        k = i * cols + j;

        // diffusion coefficent
        cN = c[k];
        cS = c[iS[i] * cols + j];
        cW = c[k];
        cE = c[i * cols + jE[j]];

        // divergence (equ 58)
        D = cN * dN[k] + cS * dS[k] + cW * dW[k] + cE * dE[k];

        // image update (equ 61)
        J[k] = J[k] + 0.25*lambda*D;
      }
    }

#endif // CPU


#ifdef GPU

    //Currently the input size must be divided by 16 - the block size
    int block_x = cols/BLOCK_SIZE ;
    int block_y = rows/BLOCK_SIZE ;

    dim3 dimBlock(BLOCK_SIZE, BLOCK_SIZE);
    dim3 dimGrid(block_x , block_y);


    //Copy data from main memory to device memory
    //cudaMemcpy(J_cuda, J, sizeof(double) * size_I, cudaMemcpyHostToDevice);
    memcpy(J_cuda, J, sizeof(double) * size_I);
    //Run kernels
    //srad_cuda_1<<<dimGrid, dimBlock>>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, q0sqr);
    srad_cuda_1(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, q0sqr, dimGrid, dimBlock, 1, 0);
    //srad_cuda_2<<<dimGrid, dimBlock>>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, lambda, q0sqr);
    srad_cuda_2(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, rows, lambda, q0sqr, dimGrid, dimBlock, 1, 0);

    //Copy data from device memory to main memory
    //cudaMemcpy(J, J_cuda, sizeof(double) * size_I, cudaMemcpyDeviceToHost);
    memcpy(J, J_cuda, sizeof(double) * size_I);

#endif
  }

  //cudaThreadSynchronize();
#define OUTPUT
#ifdef OUTPUT
  //Printing output
  printf("Printing Output:\n");
  int passed = 1;
  FILE *gp = fopen("cuda/gold_output.txt", "r");
  if (gp == NULL) {
    printf("Cannot open file.\n");
  }
  double gold_J_val;
  for( int i = 0 ; i < rows ; i++){
    for ( int j = 0 ; j < cols ; j++){
      fscanf(gp, "%lf", &gold_J_val);
      //printf("%.8f ", J[i * cols + j]);
      if (fabs(gold_J_val - J[i * cols + j]) > EPSILON) {
        printf("Mismatch at %d: gold = %f, calc = %f.\n",
            i * cols + j, gold_J_val, J[i * cols + j]);
        passed = 0;
        break;
      }
    }
    if (passed == 0)
      break;
    //printf("\n");
  }
  fclose(gp);
  if (passed == 1)
    printf("PASSED.\n");
  else
    printf("FAILED.\n");
#endif

  printf("Computation Done\n");

  free(I);
  free(J);
#ifdef CPU
  free(iN); free(iS); free(jW); free(jE);
  free(dN); free(dS); free(dW); free(dE);
#endif
#ifdef GPU
  /*cudaFree(C_cuda);
    cudaFree(J_cuda);
    cudaFree(E_C);
    cudaFree(W_C);
    cudaFree(N_C);
    cudaFree(S_C);*/
  free(C_cuda);
  free(J_cuda);
  free(E_C);
  free(W_C);
  free(N_C);
  free(S_C);

#endif
  free(c);

}
Example #26
0
/**
 * Set command
 */
void command_set(const char* line) {

    char cmd[MAX_BUFFER];
    char key[MAX_BUFFER];
    char func[MAX_BUFFER];
    char arg1[MAX_BUFFER];
    char arg2[MAX_BUFFER];

    int argc = sscanf(line, "%s %s = %s %s %s", cmd, key, func, arg1, arg2);
    if (argc < 3) {
        puts("invalid arguments");
        return;
    }

    uint32_t* matrix = NULL;

    switch (argc) {
        case 3:
            if (strcasecmp(func, "identity") == 0) {
                matrix = identity_matrix();
            } else {
                goto invalid;
            }
            break;

        case 4:
            if (strcasecmp(func, "random") == 0) {
                uint32_t seed = atoll(arg1);
                matrix = random_matrix(seed);
            } else if (strcasecmp(func, "uniform") == 0) {
                uint32_t value = atoll(arg1);
                matrix = uniform_matrix(value);
            } else if (strcasecmp(func, "cloned") == 0) {
                MATRIX_GUARD(arg1);
                matrix = cloned(m);
            } else if (strcasecmp(func, "reversed") == 0) {
                MATRIX_GUARD(arg1);
                matrix = reversed(m);
            } else if (strcasecmp(func, "transposed") == 0) {
                MATRIX_GUARD(arg1);
                matrix = transposed(m);
            } else {
                goto invalid;
            }
            break;

        case 5:
            if (strcasecmp(func, "sequence") == 0) {
                uint32_t start = atoll(arg1);
                uint32_t step = atoll(arg2);
                matrix = sequence_matrix(start, step);
            } else if (strcasecmp(func, "scalar#add") == 0) {
                MATRIX_GUARD(arg1);
                uint32_t value = atoll(arg2);
                matrix = scalar_add(m, value);
            } else if (strcasecmp(func, "scalar#mul") == 0) {
                MATRIX_GUARD(arg1);
                uint32_t value = atoll(arg2);
                matrix = scalar_mul(m, value);
            } else if (strcasecmp(func, "matrix#add") == 0) {
                MATRIX_GUARD_PAIR(arg1, arg2);
                matrix = matrix_add(m1, m2);
            } else if (strcasecmp(func, "matrix#mul") == 0) {
                MATRIX_GUARD_PAIR(arg1, arg2);
                matrix = matrix_mul(m1, m2);
            } else if (strcasecmp(func, "matrix#pow") == 0) {
                MATRIX_GUARD(arg1);
                uint32_t exponent = atoll(arg2);
                matrix = matrix_pow(m, exponent);
            } else {
                goto invalid;
            }
            break;
    }

    entry* e = find_entry(key);
    if (e == NULL) {
        e = add_entry(key);
    } else {
        free(e->matrix);
    }

    e->matrix = matrix;

    puts("ok");
    return;

invalid:
    puts("invalid arguments");
}
Example #27
0
int main(int argc, char *argv[])
{
	struct thread_data *threads;
	struct thread_data *thread;
	int i, ret, ch;

	if (argc > 1)
	{
		if (strcmp(argv[1], "--help") == 0)
		{
			usage_error(argv[0]);
		}
		init_program_parameter(argc, argv);
	}

	program_parameter(argv[0]);

	create_matrix(&matrix_a);
	create_matrix(&matrix_b);
	create_matrix(&matrix_c);
	create_matrix(&matrix_d);
	random_matrix(matrix_a);
	random_matrix(matrix_b);

	nonmal_matrix_multipy(matrix_a, matrix_b, matrix_d);

	threads = (struct thread_data *)malloc(pthread_max * sizeof(struct thread_data));
	if (threads == NULL)
	{
		unix_error("malloc threads failed");
	}

	cpu_online = sysconf(_SC_NPROCESSORS_CONF);

	for(i = 0; i < pthread_max; i++)
	{
		thread = threads + i;
		thread->index = i;
		if ((ret = pthread_create(&thread->thread_id, NULL, thread_func, thread)) != 0)
		{
			posix_error(ret, "pthread_create failed");
		}
	}

	for(i = 0; i < pthread_max; i++)
	{
		thread = threads + i;
		if ((ret = pthread_join(thread->thread_id, NULL)) != 0)
		{
			posix_error(ret, "pthread_join failed");
		}
	}

	if (matrix_equal(matrix_c, matrix_d) == 0)
	{
		unix_error("runtime error");
	}
	if (dump)
	{
		dump_matrix("matrix A", matrix_a);
		dump_matrix("matrix B", matrix_b);
		dump_matrix("matrix C", matrix_c);
		dump_matrix("matrix D", matrix_d);
	}
	statistics(threads);

	free_matrix(matrix_a);
	free_matrix(matrix_b);
	free_matrix(matrix_c);
	free_matrix(matrix_d);
	free(threads);
	return 0;
}
Example #28
0
int main(int argc, char* argv[])
{   
#ifdef __NVCUDA__
	acc_init( acc_device_nvcuda );
#endif 
#ifdef __NVOPENCL__
	acc_init( acc_device_nvocl );
	acc_list_devices_spec( acc_device_nvocl );
#endif 




	int rows, cols, size_I, size_R, niter = 10, iter, k;
	float *I, *J, q0sqr, sum, sum2, tmp, meanROI,varROI ;
	float Jc, G2, L, num, den, qsqr;
	int *iN,*iS,*jE,*jW;
	float *dN,*dS,*dW,*dE;
	int r1, r2, c1, c2;
	float cN,cS,cW,cE;
	float *c, D;
	float lambda;
	int i, j;
	printf("%d \n", argc );

	if (argc ==9 )
	{
		rows = atoi(argv[1]); //number of rows in the domain
		cols = atoi(argv[2]); //number of cols in the domain
		if ((rows%16!=0) || (cols%16!=0)){
			fprintf(stderr, "rows and cols must be multiples of 16\n");
			exit(1);
		}
		r1   = atoi(argv[3]); //y1 position of the speckle
		r2   = atoi(argv[4]); //y2 position of the speckle
		c1   = atoi(argv[5]); //x1 position of the speckle
		c2   = atoi(argv[6]); //x2 position of the speckle
		lambda = atof(argv[7]); //Lambda value
		niter = atoi(argv[8]); //number of iterations
	}
	else{
		usage(argc, argv);
	}


	size_I = cols * rows;
	size_R = (r2-r1+1)*(c2-c1+1);   

	I = (float *)malloc( size_I * sizeof(float) );
	J = (float *)malloc( size_I * sizeof(float) );
	c  = (float *)malloc(sizeof(float)* size_I) ;

	iN = (int *)malloc(sizeof(unsigned int*) * rows) ;
	iS = (int *)malloc(sizeof(unsigned int*) * rows) ;
	jW = (int *)malloc(sizeof(unsigned int*) * cols) ;
	jE = (int *)malloc(sizeof(unsigned int*) * cols) ;    


	dN = (float *)malloc(sizeof(float)* size_I) ;
	dS = (float *)malloc(sizeof(float)* size_I) ;
	dW = (float *)malloc(sizeof(float)* size_I) ;
	dE = (float *)malloc(sizeof(float)* size_I) ;    

#pragma acc kernels create(iN[0:rows], iS[0:rows])
#pragma acc loop independent
	for (int i=0; i< rows; i++) {
		iN[i] = i-1;
		iS[i] = i+1;
		if (i == 0) iN[0] = 0;
		if (i == rows-1) iS[rows-1] = rows-1;
	}
#pragma acc kernels create(jW[0:cols], jE[0:cols])
#pragma acc loop independent
	for (int j=0; j< cols; j++) {
		jW[j] = j-1;
		jE[j] = j+1;
		if (j == 0) jW[0] = 0;
		if (j == cols-1) jE[cols-1] = cols-1;
	}

	printf("Randomizing the input matrix\n");

	random_matrix(I, rows, cols);

#pragma acc kernels copyin(I[0:size_I]) create(J[0:size_I])
#pragma acc loop independent
	for (k = 0;  k < size_I; k++ ) {
		J[k] = (float)exp(I[k]) ;
	}

	printf("Start the SRAD main loop\n");

#pragma acc data copyout(J[0:size_I]) \
	create(dN[0:size_I], dS[0:size_I], dW[0:size_I], dE[0:size_I], c[0:size_I]) \
	present(iN, iS, jW, jE)
	{
#ifdef ITERATION

		for (iter=0; iter< niter; iter++){
#endif        
			sum=0; sum2=0;
#pragma acc kernels     
#pragma acc loop vector reduction(+:sum,+:sum2) independent
			for (i=r1; i<=r2; i++) {
				//    	#pragma acc loop vector reduction(+:sum,+:sum2) independent
				for (j=c1; j<=c2; j++) {
					tmp   = J[i * cols + j];
					sum  += tmp ;
					sum2 += tmp*tmp;
				}
			}
			meanROI = sum / size_R;
			varROI  = (sum2 / size_R) - meanROI*meanROI;
			q0sqr   = varROI / (meanROI*meanROI);


#pragma acc kernels
#pragma acc loop independent
			for (int i = 0 ; i < rows ; i++) {
				for (int j = 0; j < cols; j++) { 

					k = i * cols + j;
					Jc = J[k];

					// directional derivates
					dN[k] = J[iN[i] * cols + j] - Jc;
					dS[k] = J[iS[i] * cols + j] - Jc;
					dW[k] = J[i * cols + jW[j]] - Jc;
					dE[k] = J[i * cols + jE[j]] - Jc;

					G2 = (dN[k]*dN[k] + dS[k]*dS[k] 
							+ dW[k]*dW[k] + dE[k]*dE[k]) / (Jc*Jc);

					L = (dN[k] + dS[k] + dW[k] + dE[k]) / Jc;

					num  = (0.5*G2) - ((1.0/16.0)*(L*L)) ;
					den  = 1 + (.25*L);
					qsqr = num/(den*den);

					// diffusion coefficent (equ 33)
					den = (qsqr-q0sqr) / (q0sqr * (1+q0sqr)) ;
					c[k] = 1.0 / (1.0+den) ;

					// saturate diffusion coefficent
					if (c[k] < 0) {c[k] = 0;}
					else if (c[k] > 1) {c[k] = 1;}

				}
			}

#pragma acc kernels
#pragma acc loop independent
			for (int i = 0; i < rows; i++) {
				for (int j = 0; j < cols; j++) {        

					// current index
					k = i * cols + j;

					// diffusion coefficent
					cN = c[k];
					cS = c[iS[i] * cols + j];
					cW = c[k];
					cE = c[i * cols + jE[j]];

					// divergence (equ 58)
					D = cN * dN[k] + cS * dS[k] + cW * dW[k] + cE * dE[k];

					// image update (equ 61)
					J[k] = J[k] + 0.25*lambda*D;
#ifdef OUTPUT
					//printf("%.5f ", J[k]); 
#endif //output
				}
#ifdef OUTPUT
				//printf("\n"); 
#endif //output
			}

#ifdef ITERATION
		}
#endif

	} /* end pragma acc data */


	//#ifdef OUTPUT
	for( int i = 0 ; i < rows ; i++){
		for ( int j = 0 ; j < cols ; j++){

			printf("%.5f ", J[i * cols + j]); 

		}
		printf("\n"); 
	}
	//#endif 

	printf("Computation Done\n");

	free(I);
	free(J);
	free(iN); free(iS); free(jW); free(jE);
	free(dN); free(dS); free(dW); free(dE);

	free(c);
	return 0;
}
Example #29
0
/* 
 * PURPOSE: run the commands which user entered 
 * INPUTS: 
 * cmd double pointer that holds all commands
 * mats the matrix list
 * num_mats the number of matrix in the list
 * RETURN: void
 * If no errors occurred during process then return nothing
 * else print error message
 **/
void run_commands (Commands_t* cmd, Matrix_t** mats, unsigned int num_mats) {
	//TODO ERROR CHECK INCOMING PARAMETERS
	if(!cmd){
		printf("commands array is null\n");
		return;
	}
	if(!(*mats)){
		printf("matrix list is null\n");
		return;
	}

	/*Parsing and calling of commands*/
	if (strncmp(cmd->cmds[0],"display",strlen("display") + 1) == 0
		&& cmd->num_cmds == 2) {
			/*find the requested matrix*/
			int idx = find_matrix_given_name(mats,num_mats,cmd->cmds[1]);
			if (idx >= 0) {
				display_matrix (mats[idx]);
			}
			else {
				printf("Matrix (%s) doesn't exist\n", cmd->cmds[1]);
				return;
			}
	}
	else if (strncmp(cmd->cmds[0],"add",strlen("add") + 1) == 0
		&& cmd->num_cmds == 4) {
			int mat1_idx = find_matrix_given_name(mats,num_mats,cmd->cmds[1]);
			int mat2_idx = find_matrix_given_name(mats,num_mats,cmd->cmds[2]);
			if (mat1_idx >= 0 && mat2_idx >= 0) {
				Matrix_t* c = NULL;
				if( !create_matrix (&c,cmd->cmds[3], mats[mat1_idx]->rows, 
						mats[mat1_idx]->cols)) {
					printf("Failure to create the result Matrix (%s)\n", cmd->cmds[3]);
					return;
				}
			
				if(add_matrix_to_array(mats,c, num_mats) == 999){
					perror("PROGRAM FAILED TO ADD MATRIX TO ARRAY\n");
					return;
				} //TODO ERROR CHECK NEEDED


				if (! add_matrices(mats[mat1_idx], mats[mat2_idx],c) ) {
					printf("Failure to add %s with %s into %s\n", mats[mat1_idx]->name, mats[mat2_idx]->name,c->name);
					return;	
				}
			}
	}
	else if (strncmp(cmd->cmds[0],"duplicate",strlen("duplicate") + 1) == 0
		&& cmd->num_cmds == 3 && strlen(cmd->cmds[1]) + 1 <= MATRIX_NAME_LEN) {
		int mat1_idx = find_matrix_given_name(mats,num_mats,cmd->cmds[1]);
		if (mat1_idx >= 0 ) {
				Matrix_t* dup_mat = NULL;
				if( !create_matrix (&dup_mat,cmd->cmds[2], mats[mat1_idx]->rows, 
						mats[mat1_idx]->cols)) {
					return;
				}
				if(!duplicate_matrix (mats[mat1_idx], dup_mat)){
					perror("PROGRAM FAILED TO DUPLICATE MATRIX\n");
					return;
				} //TODO ERROR CHECK NEEDED
				if(add_matrix_to_array(mats,dup_mat,num_mats) == 999){
					perror("PROGRAM FAILED TO ADD MATRIX TO ARRAY\n");
					return;
				} //TODO ERROR CHECK NEEDED
				printf ("Duplication of %s into %s finished\n", mats[mat1_idx]->name, cmd->cmds[2]);
		}
		else {
			printf("Duplication Failed\n");
			return;
		}
	}
	else if (strncmp(cmd->cmds[0],"equal",strlen("equal") + 1) == 0
		&& cmd->num_cmds == 2) {
			int mat1_idx = find_matrix_given_name(mats,num_mats,cmd->cmds[1]);
			int mat2_idx = find_matrix_given_name(mats,num_mats,cmd->cmds[2]);
			if (mat1_idx >= 0 && mat2_idx >= 0) {
				if ( equal_matrices(mats[mat1_idx],mats[mat2_idx]) ) {
					printf("SAME DATA IN BOTH\n");
				}
				else {
					printf("DIFFERENT DATA IN BOTH\n");
				}
			}
			else {
				printf("Equal Failed\n");
				return;
			}
	}
	else if (strncmp(cmd->cmds[0],"shift",strlen("shift") + 1) == 0
		&& cmd->num_cmds == 4) {
		int mat1_idx = find_matrix_given_name(mats,num_mats,cmd->cmds[1]);
		const int shift_value = atoi(cmd->cmds[3]);
		if (mat1_idx >= 0 ) {
			if(!bitwise_shift_matrix(mats[mat1_idx],cmd->cmds[2][0], shift_value)){
				perror("PROGRAM FAILED TO SHIFT MATRIX\n");
				return;
			} //TODO ERROR CHECK NEEDED
			printf("Matrix (%s) has been shifted by %d\n", mats[mat1_idx]->name, shift_value);
		}
		else {
			printf("Matrix shift failed\n");
			return;
		}

	}
	else if (strncmp(cmd->cmds[0],"read",strlen("read") + 1) == 0
		&& cmd->num_cmds == 2) {
		Matrix_t* new_matrix = NULL;
		if(! read_matrix(cmd->cmds[1],&new_matrix)) {
			printf("Read Failed\n");
			return;
		}	
		
		if(add_matrix_to_array(mats,new_matrix, num_mats) == 999){
			perror("PROGRAM FAILED TO ADD MATRIX TO ARRAY\n");
			return;
		} //TODO ERROR CHECK NEEDED
		printf("Matrix (%s) is read from the filesystem\n", cmd->cmds[1]);	
	}
	else if (strncmp(cmd->cmds[0],"write",strlen("write") + 1) == 0
		&& cmd->num_cmds == 2) {
		int mat1_idx = find_matrix_given_name(mats,num_mats,cmd->cmds[1]);
		if(! write_matrix(mats[mat1_idx]->name,mats[mat1_idx])) {
			printf("Write Failed\n");
			return;
		}
		else {
			printf("Matrix (%s) is wrote out to the filesystem\n", mats[mat1_idx]->name);
		}
	}
	else if (strncmp(cmd->cmds[0], "create", strlen("create") + 1) == 0
		&& strlen(cmd->cmds[1]) + 1 <= MATRIX_NAME_LEN && cmd->num_cmds == 4) {
		Matrix_t* new_mat = NULL;
		const unsigned int rows = atoi(cmd->cmds[2]);
		const unsigned int cols = atoi(cmd->cmds[3]);

		if(!create_matrix(&new_mat,cmd->cmds[1],rows, cols)){
			perror("PROGRAM FAILED TO ADD CREATE TO ARRAY\n");
			return;
		} //TODO ERROR CHECK NEEDED
		if(add_matrix_to_array(mats,new_mat,num_mats) == 999){
			perror("PROGRAM FAILED TO ADD MATRIX TO ARRAY\n");
			return;
		} // TODO ERROR CHECK NEEDED
		printf("Created Matrix (%s,%u,%u)\n", new_mat->name, new_mat->rows, new_mat->cols);
	}
	else if (strncmp(cmd->cmds[0], "random", strlen("random") + 1) == 0
		&& cmd->num_cmds == 4) {
		int mat1_idx = find_matrix_given_name(mats,num_mats,cmd->cmds[1]);
		const unsigned int start_range = atoi(cmd->cmds[2]);
		const unsigned int end_range = atoi(cmd->cmds[3]);
		if(!random_matrix(mats[mat1_idx],start_range, end_range)) {
			perror("PROGRAM FAILED TO RANDOMIZE MATRIX\n");
			return;
		} //TODO ERROR CHECK NEEDED

		printf("Matrix (%s) is randomized between %u %u\n", mats[mat1_idx]->name, start_range, end_range);
	}
	else {
		printf("Not a command in this application\n");
	}

}
Example #30
0
File: srad.c Project: chetui/cc0
long main()
{
    long i, j;

    _pMalloc = 0x500000000;

    niter = 10;
    rows = 1024;
    cols = 1024;
    r1   = 200; //y1 position of the speckle
    r2   = 500; //y2 position of the speckle
    c1   = 1000; //x1 position of the speckle
    c2   = 800; //x2 position of the speckle
    lambda = 0.5; //Lambda value
    niter = 100; //number of iterations


    size_I = cols * rows;
    size_R = (r2 - r1 + 1) * (c2 - c1 + 1);

    I = (double *)malloc_sr(size_I * 8);
    J = (double *)malloc_sr(size_I * 8);
    c  = (double *)malloc_sr(8 * size_I) ;

    iN = (long *)malloc_sr(8 * rows) ;
    iS = (long *)malloc_sr(8 * rows) ;
    jW = (long *)malloc_sr(8 * cols) ;
    jE = (long *)malloc_sr(8 * cols) ;


    dN = (double *)malloc_sr(8 * size_I) ;
    dS = (double *)malloc_sr(8 * size_I) ;
    dW = (double *)malloc_sr(8 * size_I) ;
    dE = (double *)malloc_sr(8 * size_I) ;


    for(i = 0; i < rows; i+=1)
    {
        iN[i] = i - 1;
        iS[i] = i + 1;
    }
    for(j = 0; j < cols; j+=1)
    {
        jW[j] = j - 1;
        jE[j] = j + 1;
    }
    iN[0]    = 0;
    iS[rows - 1] = rows - 1;
    jW[0]    = 0;
    jE[cols - 1] = cols - 1;


    random_matrix(I, rows, cols);

    for(k = 0;  k < size_I; k+=1)
    {
        J[k] = exp(I[k]) ;
    }



    for(iter = 0; iter < niter; iter+=1)
    {

        sum = 0;
        sum2 = 0;
        for(i = r1; i <= r2; i = i + 1)
        {
            for(j = c1; j <= c2; j = j + 1)
            {
                tmp   = J[i * cols + j];
                sum  += tmp ;
                sum2 += tmp * tmp;
            }
        }
        meanROI = sum / size_R;
        varROI  = (sum2 / size_R) - meanROI * meanROI;
        q0sqr   = varROI / (meanROI * meanROI);



//         #pragma omp parallel for shared(J, dN, dS, dW, dE, c, rows, cols, iN, iS, jW, jE) private(i, j, k, Jc, G2, L, num, den, qsqr)
        for(i = 0 ; i < rows ; i = i + 1)
        {
            for(j = 0; j < cols; j = j + 1)
            {

                k = i * cols + j;
                Jc = J[k];

                // directional derivates
                dN[k] = J[iN[i] * cols + j] - Jc;
                dS[k] = J[iS[i] * cols + j] - Jc;
                dW[k] = J[i * cols + jW[j]] - Jc;
                dE[k] = J[i * cols + jE[j]] - Jc;

                G2 = (dN[k] * dN[k] + dS[k] * dS[k] + dW[k] * dW[k] + dE[k] * dE[k]) / (Jc * Jc);

                L = (dN[k] + dS[k] + dW[k] + dE[k]) / Jc;

                num  = (0.5 * G2) - ((1.0 / 16.0) * (L * L)) ;
                den  = 1 + (0.25 * L);
                qsqr = num / (den * den);

                // diffusion coefficent (equ 33)
                den = (qsqr - q0sqr) / (q0sqr * (1 + q0sqr)) ;
                c[k] = 1.0 / (1.0 + den) ;

                // saturate diffusion coefficent
                if(c[k] < 0)
                {
                    c[k] = 0;
                }
                else if(c[k] > 1)
                {
                    c[k] = 1;
                }

            }

        }

//         #pragma omp parallel for shared(J, c, rows, cols, lambda) private(i, j, k, D, cS, cN, cW, cE)
        for(i = 0; i < rows; i = i + 1)
        {
            for(j = 0; j < cols; j = j + 1)
            {

                // current index
                k = i * cols + j;

                // diffusion coefficent
                cN = c[k];
                cS = c[iS[i] * cols + j];
                cW = c[k];
                cE = c[i * cols + jE[j]];

                // divergence (equ 58)
                D = cN * dN[k] + cS * dS[k] + cW * dW[k] + cE * dE[k];

                // image update (equ 61)
                J[k] = J[k] + 0.25 * lambda * D;

            }
        }
    }


    return 0;
}