int main(int argc, char *argv[])
{
	double* matA = _mm_malloc(WIDTH*HEIGHT*sizeof(double), 64);
	double* matB = _mm_malloc((WIDTH*HEIGHT)*sizeof(double), 64);
	double* prod = _mm_malloc(WIDTH*HEIGHT*sizeof(double), 64);
	double* prod_ref = _mm_malloc(WIDTH*HEIGHT*sizeof(double), 64);

	int read_flag = read_matrix(TEST_FILENAME, prod_ref, matA, matB);
	if (read_flag == 1)
		printf("Cannot open test file\n");
	else if (read_flag == 2)
		printf("Error while reading data from test file");
	else if (read_flag == 3)
		printf("Error while closing the test file");
	if (read_flag)
		return 0;

	uint64_t start = timestamp_us();
	matmul_optimize(prod, matA, matB); /* run the optimization functions. */
	uint64_t time = timestamp_us() - start;

	if (compare_matrix(prod, prod_ref)) {
		printf("%lu incorrect\n", time);
	} else {
		printf("%lu\n", time);
	}
	_mm_free(prod_ref);
	_mm_free(prod);
	_mm_free(matB);
	_mm_free(matA);
	return 0;
}
示例#2
0
int main(int argc, char** argv) {
	arg_size = 1024;
	if(argc > 1) arg_size = atoi(argv[1]);
	arg_cutoff_value = 64;
	if(argc > 2) arg_cutoff_value = atoi(argv[2]);

	if((arg_size & (arg_size - 1)) != 0 || (arg_size % 16) != 0) inncabs::error("Error: matrix size must be a power of 2 and a multiple of 16\n");
	REAL *A = alloc_matrix(arg_size);
	REAL *B = alloc_matrix(arg_size);
	REAL *C = alloc_matrix(arg_size);
	REAL *D = alloc_matrix(arg_size);

	std::stringstream ss;
	ss << "Strassen Algorithm (" << arg_size << " x " << arg_size 
		<< " matrix with cutoff " << arg_cutoff_value << ") ";

	init_matrix(arg_size, A, arg_size);
	init_matrix(arg_size, B, arg_size);
	OptimizedStrassenMultiply_seq(D, A, B, arg_size, arg_size, arg_size, arg_size, 1);

	inncabs::run_all(
		[&](const std::launch l) {
			OptimizedStrassenMultiply_par(l, C, A, B, arg_size, arg_size, arg_size, arg_size, 1);
			return 1;
		},
		[&](int result) {
			return compare_matrix(arg_size, C, arg_size, D, arg_size);
		},
		ss.str()
		);
}
/* This function checks if a matrix is orthogonal
 * returns -1 if it can't be checked
 * returns 0 if it isn't orthogonal
 * returns 1 if it is orthogonal                */
int is_orthogonal(struct matrix m)
{
    if(m.rows!=m.columns)
    {
        printf("This matrix isn't square\n");
        return -1;
    }

    struct matrix c=traspose(m);
    c=matrix_multiplication(m,c);
    return compare_matrix(c,identity_matrix(c.rows));
}
示例#4
0
void work(int size)
{
	matrix a, b, result1, result2, result3;

	// Allocate memory for matrices
	allocate_matrix(&a, size);
	allocate_matrix(&b, size);
	allocate_matrix(&result1, size);
	//allocate_matrix(&result2, size) ;
	allocate_matrix(&result3, size) ;
	
	// Initialize matrix elements
	init_matrix(a);
	init_matrix(b);
	init_matrix_zero(result1);


	// Perform sequential matrix multiplication
	mm(a, b, result1);

	//init_matrix_zero(result2);
	//mm_fast(a, b, result2) ;
	//if (compare_matrix(result1, result2)==1)
	//	printf("true") ;
	//free_matrix(result2);

	init_matrix_zero(result3);
	mm_block(a, b, result3) ;



	if (compare_matrix(result1, result3)==1)
		printf("true") ;

	// Print the result1 matrix
//	print_matrix(result1);
	
	free_matrix(a);
	free_matrix(b);

	free_matrix(result1);
	free_matrix(result3);
}
void CTestsTest::c_test_matrix_turn()
{
    int size_of_matrix=3;
    int **matrix;
    int i, j, k=0;
    matrix = (int**)malloc(size_of_matrix*sizeof(int*));
    for(i=0; i<size_of_matrix; i++)
    {
        matrix[i] = (int*)malloc(size_of_matrix*sizeof(int));
    }
    for(i=0; i<size_of_matrix; i++)
        for(j=0; j<size_of_matrix; j++)
        {
            k++;
            matrix[i][j]=k;
        }
    int **matrix_r;
    matrix_r = (int**)malloc(size_of_matrix*sizeof(int*));
    for(i=0; i<size_of_matrix; i++)
    {
        matrix_r[i] = (int*)malloc(size_of_matrix*sizeof(int));
    }
    k=0;
    for(i=0; i<size_of_matrix; i++)
        for(j=0; j<size_of_matrix; j++)
        {
            k++;
            matrix_r[i][j]=k+size_of_matrix*2-4*j-2*i;
        }
    c_calc_matrix_turn(size_of_matrix, matrix);
    QCOMPARE(compare_matrix(matrix, matrix_r, size_of_matrix), 0);
    for(i=0; i<size_of_matrix; i++)
    {
        free(matrix[i]);
        free(matrix_r[i]);
    }
    free(matrix);
    free(matrix_r);
}
int main(int argc, char *argv[])
{
	uint64_t* newimg = _mm_malloc(WIMAGE*HIMAGE*sizeof(uint64_t), 64);
	uint64_t* newimg_ref = _mm_malloc(WIMAGE*HIMAGE*sizeof(uint64_t), 64);
	uint16_t* filter = _mm_malloc(WFILTER*HFILTER*sizeof(uint16_t), 64);
	uint16_t* image = _mm_malloc((WIMAGE*HIMAGE+2*PAD_ZERO)*sizeof(uint16_t), 64);
	for (int i = 0; i < PAD_ZERO; i++) { /* PAD matrix2 with zero to ease programming the optimization functions. */
		image[i] = 0;
	}
	image += PAD_ZERO;

	int read_flag = read_matrix(TEST_FILENAME, newimg_ref, filter, image);
	if (read_flag == 1)
		printf("Cannot open test file\n");
	else if (read_flag == 2)
		printf("Error while reading data from test file");
	else if (read_flag == 3)
		printf("Error while closing the test file");
	if (read_flag)
		return 0;


	uint64_t start = timestamp_us();
	matconv_optimize(newimg, filter, image); /* run the optimization functions. */
	uint64_t time = timestamp_us() - start;

	if (compare_matrix(newimg, newimg_ref)) {
		printf("%lu incorrect\n", time);
	} else {
		printf("%lu\n", time);
	}
	_mm_free(filter);
	_mm_free(image-PAD_ZERO);
	_mm_free(newimg);
	_mm_free(newimg_ref);
	return 0;
}
示例#7
0
int main(int argc, char *argv[])
{
    int workers = 1;
    int dqsize = 100000;
    int verify = 0;

    char c;
    while ((c=getopt(argc, argv, "w:q:h:c")) != -1) {
        switch (c) {
            case 'w':
                workers = atoi(optarg);
                break;
            case 'q':
                dqsize = atoi(optarg);
                break;
            case 'c':
                verify = 1;
                break;
            case 'h':
                usage(argv[0]);
                break;
            default:
                abort();
        }
    }

    if (optind == argc) {
        usage(argv[0]);
        exit(1);
    }

    int n = atoi(argv[optind]);

    lace_init(workers, dqsize);
    lace_startup(0, 0, 0);

    REAL *A, *B, *C1, *C2;

    if ((n & (n - 1)) != 0 || (n % 16) != 0) {
        printf("%d: matrix size must be a power of 2"
                " and a multiple of %d\n", n, 16);
        return 1;
    }

    A = alloc_matrix(n);
    B = alloc_matrix(n);
    C1 = alloc_matrix(n);
    C2 = alloc_matrix(n);

    init_matrix(n, A, n);
    init_matrix(n, B, n);

    LACE_ME;

    double t1=wctime();
    CALL(OptimizedStrassenMultiply, C2, A, B, n, n, n, n);
    double t2=wctime();

    if (verify) {
        matrixmul(n, A, n, B, n, C1, n);
        verify = compare_matrix(n, C1, n, C2, n);
    }

    if (verify)
        printf("WRONG RESULT!\n");
    else {	
        printf("Time: %f\n", t2-t1);
    }

    lace_exit();

    return 0;
}
示例#8
0
int main(int argc, char *argv[]) {
  int n=get_num_ind();
  int i,j;
  struct timeval tv1,tv2;
  adouble *xad;
  adouble fad;
  double f;
  double *x;
  x=new double[n];
  xad=new adouble[n];
get_initial_value(x);

  printf("evaluating the function...");
trace_on(tag);
  for(i=0;i<n;i++)
  {
    xad[i] <<= x[i];  
  }
  fad=func_eval(xad); 
  fad >>= f;
trace_off();
  printf("done!\n");
//  printf("function value  =<%10.20f>\n",f);
//  function(tag,1,n,x,&f);
//  printf("adolc func value=<%10.20f>\n",f);
//tape_doc(tag,1,n,x,&f);
#ifdef _compare_with_full
  double **H;
  H = myalloc2(n,n);
  printf("computing full hessain....");
  gettimeofday(&tv1,NULL);
  hessian(tag,n,x,H);
  printf("done\n");
  gettimeofday(&tv2,NULL);
  printf("Computing the full hessian cost %10.6f seconds\n",(tv2.tv_sec-tv1.tv_sec)+(double)(tv2.tv_usec-tv1.tv_usec)/1000000);
#ifdef _PRINTOUT
    for(i=0;i<n;i++){
      for(j=0;j<n;j++){
        printf("H[%d][%d]=<%10.10f>",i,j,H[i][j]);
      }
      printf("\n");
    }
    printf("\n");
#endif
#endif

#ifdef edge_pushing
  unsigned int    *rind  = NULL;
  unsigned int    *cind  = NULL;
  double *values = NULL;
  int nnz;
  int options[2];
  options[0]=PRE_ACC;
  options[1]=COMPUT_GRAPH;
  gettimeofday(&tv1,NULL);
//  edge_hess(tag, 1, n, x, &nnz, &rind, &cind, &values, options);
  sparse_hess(tag,n,0,x, &nnz, &rind, &cind, &values, options);
  gettimeofday(&tv2,NULL);
  printf("Sparse Hessian: edge pushing cost %10.6f seconds\n",(tv2.tv_sec-tv1.tv_sec)+(double)(tv2.tv_usec-tv1.tv_usec)/1000000);

#ifdef _PRINTOUT
  for(i=0;i<nnz;i++){
    printf("<%d,%d>:<%10.10f>\n",cind[i],rind[i],values[i]);
//    printf("%d %d \n", rind[i], cind[i]);
  }
#endif
#endif

#ifdef _compare_with_full
#ifdef edge_pushing
  compare_matrix(n,H,nnz,cind,rind,values);
#endif
  myfree2(H);
#endif

#ifdef edge_pushing
  printf("nnz=%d\n", nnz);
  free(rind); rind=NULL;
  free(cind); cind=NULL;
  free(values); values=NULL;
#endif
  delete[] x;
  delete[] xad;
  return 0;
}
示例#9
0
int main(int argc, char *argv[]) {
  int n=NUM_IND;
  int i,j;
  struct timeval tv1,tv2;
  adouble *xad;
  adouble fad;
  double f;
  double *x;
  x=new double[n];
  xad=new adouble[n];
  get_initials(x, n);

//  printf("evaluating the function...");
  trace_on(tag);
  for(i=0;i<n;i++)
  {
    xad[i] <<= x[i];  
  }
  fad=eval_func<adouble>(xad, n); 
  fad >>= f;
  trace_off();
//  printf("done!\n");
  std::cout << "y = " << f << std::endl;
#ifdef COMPARE_WITH_FULL_HESS
  double **H;
  H = myalloc2(n,n);
  printf("computing full hessain....");
  gettimeofday(&tv1,NULL);
  hessian(tag,n,x,H);
  printf("done\n");
  gettimeofday(&tv2,NULL);
  printf("Computing the full hessian cost %10.6f seconds\n",(tv2.tv_sec-tv1.tv_sec)+(double)(tv2.tv_usec-tv1.tv_usec)/1000000);

#ifdef PRINT_RESULTS
    for(i=0;i<n;i++){
      for(j=0;j<n;j++){
        printf("H[%d][%d]=<%10.10f>",i,j,H[i][j]);
      }
      printf("\n");
    }
    printf("\n");
#endif
#endif

  unsigned int    *rind  = NULL;
  unsigned int    *cind  = NULL;
  double *values = NULL;
  int nnz;
  int options[2];

#ifdef LIVARH
  options[0]=0;
  options[1]=1;
  gettimeofday(&tv1,NULL);
  edge_hess(tag, 1, n, x, &nnz, &rind, &cind, &values, options);
  gettimeofday(&tv2,NULL);
  printf("Sparse Hessian: LivarH cost %10.6f seconds\n",(tv2.tv_sec-tv1.tv_sec)+(double)(tv2.tv_usec-tv1.tv_usec)/1000000);
#endif

#ifdef LIVARHACC
  options[0]=1;
  options[1]=1;
  gettimeofday(&tv1,NULL);
  edge_hess(tag, 1, n, x, &nnz, &rind, &cind, &values, options);
  gettimeofday(&tv2,NULL);
  printf("Sparse Hessian: LivarHACC cost %10.6f seconds\n",(tv2.tv_sec-tv1.tv_sec)+(double)(tv2.tv_usec-tv1.tv_usec)/1000000);
#endif

// Sparse ADOL-C drivers report the upper matrix
#ifdef DIRECT
  options[0]=0;
  options[1]=1;
  gettimeofday(&tv1,NULL);
  sparse_hess(tag, n, 0, x, &nnz, &cind, &rind, &values, options);
  gettimeofday(&tv2,NULL);
  printf("Sparse Hessian: direct recovery cost %10.6f seconds\n",(tv2.tv_sec-tv1.tv_sec)+(double)(tv2.tv_usec-tv1.tv_usec)/1000000);
#endif

#ifdef INDIRECT
  options[0]=0;
  options[1]=0;
  gettimeofday(&tv1,NULL);
  sparse_hess(tag, n, 0, x, &nnz, &cind, &rind, &values, options);
  gettimeofday(&tv2,NULL);
  printf("Sparse Hessian: indirect recovery cost %10.6f seconds\n",(tv2.tv_sec-tv1.tv_sec)+(double)(tv2.tv_usec-tv1.tv_usec)/1000000);
#endif

#ifdef PRINT_RESULTS
  for(i=0;i<nnz;i++){
    printf("<%d,%d>:<%10.10f>\n",rind[i],cind[i],values[i]);
  }
#endif

#ifdef COMPARE_WITH_FULL_HESS
  compare_matrix(n,H,nnz,rind,cind,values);
  myfree2(H);
#endif

  free(rind); rind=NULL;
  free(cind); cind=NULL;
  free(values); values=NULL;

  delete[] x;
  delete[] xad;
  return 0;
}