Beispiel #1
0
int main(int argc, char* argv[])
{
    int j, k, n, n2, i3, n3, iter, niter;
    float **a, *e, **v, s2;
    sf_file mat, val, eig;

    sf_init(argc,argv);
    mat = sf_input("in");
    val = sf_output("out");

    if (SF_FLOAT != sf_gettype(mat)) sf_error("Need float input");
    if (!sf_histint(mat,"n1",&n)) sf_error("No n1= in input");
    if (!sf_histint(mat,"n2",&n2) || n2 != n) sf_error("Need n1=n2 in input");
    n3 = sf_leftsize(mat,2);

    sf_putint(val,"n2",1);

    if (!sf_getint("niter",&niter)) niter=10;

    a = sf_floatalloc2(n,n);
    e = sf_floatalloc(n);

    if (NULL != sf_getstring("eig")) {
	eig = sf_output("eig"); /* eigenvectors */
	v = sf_floatalloc2(n,n);
	for (j=0; j < n; j++) {
	    for (k=0; k < n; k++) {
		v[j][k] = (j==k)? 1.0:0.0;
	    }
	}
    } else {
	eig = NULL;
	v = NULL;
    }

    jacobi_init(n);

    for (i3=0; i3 < n3; i3++) {
	sf_floatread(a[0],n*n,mat);
	
	for (iter=0; iter < niter; iter++) {
	    s2 = 0.;
	    for (j=0; j < n-1; j++) {
		for (k=j+1; k < n; k++) {
		    s2 += jacobi(a,j,k,v);
		}
	    }
	    sf_warning("iter=%d s2=%g",iter+1,s2);
	}

	for (j=0; j < n; j++) {
	    e[j]=a[j][j];
	}

	sf_floatwrite(e,n, val);
	if (NULL != v) 	sf_floatwrite(v[0],n*n, eig);
    }

    exit(0);
}
Beispiel #2
0
// Jacobi solver kernels
void run_jacobi_init(
        Chunk* chunk, Settings* settings, double rx, double ry)
{
    START_PROFILING(settings->kernel_profile);
    jacobi_init(
            chunk->x, chunk->y, settings->halo_depth, settings->coefficient, rx, 
            ry, chunk->density, chunk->energy, chunk->u0, chunk->u, 
            chunk->kx, chunk->ky);
    STOP_PROFILING(settings->kernel_profile, __func__);
}
Beispiel #3
0
    void jacobi( size_t n , size_t iterations, size_t block_size, std::string output_filename) {
        hpx::util::high_resolution_timer t;

        vector< vector< vector< shared_future<block> > > > blockList(2);
        jacobi_init(blockList, n, block_size);

        size_t numBlocks = blockList[0].size();

        for(size_t i = 1; i < iterations; ++i) {
            const size_t prev = i%2;
            const size_t curr = (i+1)%2;
            blockList[curr][0][0] = dataflow(
                    jacobi_BL, blockList[prev][0][0],
                               blockList[prev][0][1],
                               blockList[prev][1][0] );
            for(size_t j = 1; j < numBlocks - 1; j++) {
                blockList[curr][j][0] = dataflow(
                        jacobi_left, blockList[prev][j  ][0],
                                     blockList[prev][j  ][1],
                                     blockList[prev][j-1][0],
                                     blockList[prev][j+1][0] );
            }
            blockList[curr][numBlocks-1][0] = dataflow(
                    jacobi_TL, blockList[prev][numBlocks-1][0],
                               blockList[prev][numBlocks-1][1],
                               blockList[prev][numBlocks-2][0] );

            for(size_t j = 1; j < numBlocks - 1; j++) {
                blockList[curr][0][j] = dataflow(
                        jacobi_bot, blockList[prev][0][j  ], 
                                    blockList[prev][0][j-1],
                                    blockList[prev][0][j+1],
                                    blockList[prev][1][j  ] );
                for(size_t k = 1; k < numBlocks - 1; k++) {
                    blockList[curr][j][k] = dataflow( 
                            jacobi_op, blockList[prev][k  ][j  ],
                                       blockList[prev][k  ][j-1],
                                       blockList[prev][k  ][j+1],
                                       blockList[prev][k-1][j  ],
                                       blockList[prev][k+1][j  ]);
                }
                blockList[curr][numBlocks-1][j] = dataflow(
                        jacobi_top, blockList[prev][numBlocks-1][j  ], 
                                    blockList[prev][numBlocks-1][j-1],
                                    blockList[prev][numBlocks-1][j+1],
                                    blockList[prev][numBlocks-2][j  ] );
            }
            blockList[curr][0][numBlocks-1] = dataflow(
                    jacobi_BR, blockList[prev][0][numBlocks-1],
                               blockList[prev][0][numBlocks-2],
                               blockList[prev][1][numBlocks-1]);
            for(size_t j = 1; j < numBlocks - 1; j++) {
            blockList[curr][j][numBlocks-1] = dataflow(
                    jacobi_left, blockList[prev][j ][numBlocks-1],
                                 blockList[prev][j ][numBlocks-2],
                                 blockList[prev][j-1][numBlocks-1],
                                 blockList[prev][j+1][numBlocks-1]);
            }
            blockList[curr][numBlocks-1][numBlocks-1] = dataflow(
                    jacobi_TR, blockList[prev][numBlocks-1][numBlocks-1],
                               blockList[prev][numBlocks-1][numBlocks-2],
                               blockList[prev][numBlocks-2][numBlocks-1]);
        }
        for(int i = 0; i < blockList[(n-1)%2].size(); i++) {
            hpx::wait_all(blockList[(n-1)%2][i]);
        }

        report_timing(n, iterations, t.elapsed());
        //output_grid(output_filename, *grid_old, n);
   }
Beispiel #4
0
int main(int argc, char** argv) 
{
	max_file_t *max_file = jacobi_init();
	size_t dim               = 64;   // this should be a scalar input in the bitstream
	size_t MAX_ITER          = 20;
	size_t C                 = max_get_constant_uint64t(max_file, "C");
	size_t blks              = 100;
	size_t total_equations   = blks*C;
	clock_t engine_start     = 0;
	clock_t engine_end       = 0;
	double engine_total_time = 0.0;
	size_t max_dim           = max_get_constant_uint64t(max_file, "maxDimLen");
	if(argc == 1)
	{
		fprintf(stderr, "====>Info:Runing Jacobi with default parameter values:[Dimension = %ld, Iteration = %ld, blocks = %ld(%ld*%ld equations)], for details, see the README.txt\n", dim, MAX_ITER, blks, blks, C);
	}
	char *opt_str = "hd:b:i:";
	int opt = 0;
	int input_dim = dim;
	int input_iter = MAX_ITER;
	int input_blks = blks;
	while( (opt = getopt(argc, argv, opt_str)) != -1)
	{
		switch(opt)
		{
			case 'd':
				input_dim = atoi(optarg);	
				break;
			case 'b':
				input_blks = atoi(optarg);
				break;
			case 'i':
				input_iter = atoi(optarg);
				break;
			case 'h':
				usage();
				return 1;
			default:
				fprintf(stderr, "====>Error: Inputs contain invalid command line paramter(s)!\n");
				usage();
				return 1;
			}
		}
	max_file_free(max_file);
	if(input_dim <= 0 || input_dim > max_dim || input_dim % 2 != 0)
	{
		fprintf(stderr, "\n====>Error: Input dimension length is invalid, for details, see the usage below:\n");
		usage();
		return 1;
	}
	else
	{
		dim = (size_t)input_dim;
	}
	if(input_blks <= 0)
	{
		fprintf(stderr, "\n====>Error: Input block number is invalid, should bigger than zero.\n");
		usage();
		return 1;
	}
	else
	{
		blks = (size_t)input_blks;
	}
	if(input_iter <= 1)
	{
		fprintf(stderr, "\n====>Error: Input iteration number is invalid, should bigger than 1.\n");
		usage();
		return 1;
	}
	else
	{
		MAX_ITER = (size_t)input_iter;
	}
	total_equations = blks * C;

	double *A                  = malloc(dim*dim*sizeof(double));
	double *A_trans            = malloc(dim*dim*sizeof(double));
	double *b                  = malloc(total_equations*dim*sizeof(double));
	double *b_trans            = malloc(total_equations*dim*sizeof(double));
	double *diagA              = malloc(dim*sizeof(double));
	double *reverse_diagA      = malloc(dim*sizeof(double));
	double *x_init             = malloc(C*dim*sizeof(double));
	double *x_trans_init       = malloc(C*dim*sizeof(double));
	double *result             = malloc(total_equations * dim * sizeof(double));
	double *reorder_result     = malloc(total_equations * dim *sizeof(double));
	double *solutions          = malloc(total_equations * dim *sizeof(double));
	double *error              = malloc(total_equations*sizeof(double));
	double *error_bak          = malloc(total_equations*sizeof(double));
	int    *is_solution_valid  = malloc(total_equations*sizeof(int));
	int    *recacu_error_index = malloc(total_equations*sizeof(int));
	double *expected_error     = malloc(total_equations*sizeof(double));
	double *x_base             = malloc(total_equations * dim * sizeof(double));
	double *x_all_init         = malloc(total_equations * dim *sizeof(double));
	double *x_all_trans_init   = malloc(total_equations * dim *sizeof(double));
	memset(A,                0 , sizeof(double)*dim*dim);
	memset(A_trans,          0 , sizeof(double)*dim*dim);
	memset(b,                0 , sizeof(double)*dim*total_equations);
	memset(b_trans,          0 , sizeof(double)*dim*total_equations);
	memset(diagA,            0 , sizeof(double)*dim);
	memset(reverse_diagA,    0 , sizeof(double)*dim);
	memset(x_init,           0 , sizeof(double) *C*dim);
	memset(result,           0 , sizeof(double)*dim*total_equations);
	memset(reorder_result,   0 , sizeof(double)*dim*total_equations);
	memset(error,            0 , sizeof(double)*total_equations);
	memset(expected_error,   0 , sizeof(double)*total_equations);
	memset(x_base,           0 , sizeof(double)*dim*total_equations);
	memset(x_all_init,       0 , sizeof(double)*dim*total_equations);
	memset(x_all_trans_init, 0 , sizeof(double)*dim*total_equations);
	memset(is_solution_valid,0 , sizeof(int)*total_equations);
	
	for(int i = 0; i < total_equations; i ++)
	{
		recacu_error_index[i] = -1;
		expected_error[i]     = 1000;
		error_bak[i]          = 1000;
		for(int j = 0; j < dim; j ++)
		{
			solutions[i*dim + j] = 1000;
		}
	}

	/**
	 *  Generating random value for b and A
	 */
	srand(time(NULL));
	for(int i = 0; i < dim; ++i) {
		double sum = 0;
		for(int j = 0; j < dim; ++j) {
			if(i != j) {
				A[i*dim+j]     = 2.0*rand()/(double)RAND_MAX - 1 ; // random number between -1 and 1
				sum           += fabs(A[i*dim+j])                ;
			}
		}
		A[i * dim + i] = 1 + sum;
		diagA[i]       = 1.0/A[i * dim + i];
		reverse_diagA[i]  = A[i * dim + i];
	}
	
	double A_original[dim * dim];
	for(int i = 0; i < C*blks; i ++)
	{
			for(int j = 0; j < dim; j ++)
			{
				b[i * dim + j] = 2.0*rand()/(double)RAND_MAX - 1;
			}
	}

	for(int i = 0; i < dim; i ++)
	{
		for(int j = 0; j < dim; j ++)
		{
			A_original[i * dim + j] = A[i * dim + j];
			if(i != j)
			{
				A[i * dim + j] = A[i*dim + j] * diagA[i];
			}
		}
	}

	/**
	 * Reorder the input A and b 
	 */
	engine_start = clock();
	for(int i = 0; i < dim; i ++)
	{
		for(int j = 0; j < dim; j ++)
		{
			A_trans[i * dim + j] = A[j * dim + i];
		}
	}
	int count = 0;
	for(int yy = 0; yy < total_equations; yy += C)
	{
			for(int i = 0; i < dim; i ++)
			{
				for(int j = yy; j <yy + C; j ++)
				{
					b_trans[count] = b[j * dim + i]*diagA[i]; 
					count ++;
				}
			}
	}

	for(int k = 0; k < blks; k ++)
	{
			for ( int i = 0; i < C ; i ++ ) 
			{
					for ( int j = 0; j < dim; j ++ ) 
					{
							x_init[i * dim + j] = 0;
							x_trans_init[j*C + i] = x_init[i * dim + j];
					}
			}
		memcpy(x_all_trans_init + k * C * dim , x_trans_init , sizeof(double)*C*dim);
		memcpy(x_all_init       + k * C * dim , x_init       , sizeof(double)*C*dim);
	}
	
    jacobi(
		dim, 
		total_equations,
		MAX_ITER,
		A_trans                                ,
		dim * dim * sizeof(double)             ,
		b_trans                                ,
		total_equations * dim * sizeof(double) ,
		reverse_diagA                                  ,
		dim * sizeof(double)                   ,
		x_all_trans_init                       ,
		total_equations * dim * sizeof(double) ,
		error                                  ,
		total_equations * sizeof(double)       ,
		result                                 ,
	    total_equations * dim * sizeof(double) 
	  );

	for(int yy = 0; yy<total_equations; yy += C)
	{
			for(int i = 0; i <  C; i ++)
			{
				for(int j = 0; j < dim; j ++)
				{
					reorder_result[yy *dim + i*dim + j] = result[yy * dim + i + j * C];
				}
			}
	}

	/*Check Error to decide whether we need to restream into kernel again*/
	int recacu_cnt            = 0;
	int new_recacu_cnt        = 0;
	int actual_recacu_cnt     = 0;
	int new_actual_recacu_cnt = 0;

	double *x_latest_init       = malloc(total_equations * dim * sizeof(double)) ;
	double *x_latest_trans_init = malloc(total_equations * dim * sizeof(double)) ;
	double *recacu_b            = malloc(total_equations * dim * sizeof(double)) ;
	double *recacu_trans_b      = malloc(total_equations * dim * sizeof(double)) ;
	memset(x_latest_init       , 0 , total_equations * dim * sizeof(double))     ;
	memset(x_latest_trans_init , 0 , total_equations * dim * sizeof(double))     ;
	memset(recacu_b            , 0 , total_equations * dim * sizeof(double))     ;
	memset(recacu_trans_b      , 0 , total_equations * dim * sizeof(double))     ;

	int idx = 0;
	for(int i = 0; i < total_equations; i ++)
	{
		if(error[i] > CUR_EPS)
		{
			memcpy(x_latest_init + idx*dim, reorder_result + i*dim, dim*sizeof(double)); 
			memcpy(recacu_b      + idx*dim, b              + i*dim, dim*sizeof(double)); 
			recacu_error_index[idx] = i;			
			recacu_cnt ++        ;
			actual_recacu_cnt ++ ;
			idx ++;
		}
		else
		{
			error_bak[i] = error[i];
			memcpy(solutions +  i*dim, reorder_result + i*dim, dim*sizeof(double));
		}

	}
	while( recacu_cnt % C )
	{
		recacu_cnt ++;
	}

	/**
	 *  if recaculate count not zero, we start to restream data into kernel again
	 */
	int times = 1;
	while( recacu_cnt != 0 )
	{	
		/*Reorder Latest solutions init value */
		times ++;
		memset(x_latest_trans_init, 0, recacu_cnt*dim*sizeof(double));
		count = 0;
		for(int yy = 0; yy < recacu_cnt; yy += C)
		{
			for(int i = 0; i < dim; i ++)
			{
				for(int j = yy; j < yy + C; j ++)
				{
					x_latest_trans_init[count] = x_latest_init[j * dim + i]; 
					count ++;
				}
			}
		}


		/*Reorder latest b*/
		memset(recacu_trans_b, 0, total_equations*dim*sizeof(double));
		count = 0;
		for(int yy = 0; yy < recacu_cnt; yy += C)
		{
			for(int i = 0; i < dim; i ++)
			{
				for(int j = yy; j < yy + C; j ++)
				{
					recacu_trans_b[count] = recacu_b[j * dim + i]*diagA[i]; 
					count ++;
				}
			}
		}

		memset(error  , 0 , recacu_cnt * sizeof(double       )  ) ;
		memset(result , 0 , recacu_cnt * dim * sizeof(double )  ) ;
		jacobi(
			dim, 
			recacu_cnt,
			MAX_ITER,
			A_trans                           ,
			dim * dim * sizeof(double)        ,
			recacu_trans_b                    ,
			recacu_cnt * dim * sizeof(double) ,
			reverse_diagA                             ,
			dim * sizeof(double)              ,
			x_latest_trans_init               ,
			recacu_cnt * dim * sizeof(double) ,
			error                             ,
			recacu_cnt * sizeof(double)       ,
			result                            ,
			recacu_cnt * dim * sizeof(double)
		  );


		for(int yy = 0; yy < recacu_cnt; yy += C)
		{
			for(int i = 0; i <  C; i ++)
			{
				for(int j = 0; j < dim; j ++)
				{
					reorder_result[yy *dim + i*dim + j] = result[yy * dim + i + j * C];
				}
			}
		}
		
		new_recacu_cnt = 0;
		new_actual_recacu_cnt = 0;
		int idx2 = 0;
		for(int i = 0; i < actual_recacu_cnt; i ++)
		{
			if(error[i] > CUR_EPS)
			{
				memcpy(x_latest_init + new_recacu_cnt*dim, reorder_result + i*dim, dim*sizeof(double)); 
				memcpy(recacu_b      + new_recacu_cnt*dim, recacu_b       + i*dim, dim*sizeof(double)); 
				recacu_error_index[idx2] = recacu_error_index[i]; 
				new_recacu_cnt ++;
				new_actual_recacu_cnt ++;
				idx2 ++;
			}
			else
			{
				error_bak[ recacu_error_index[i]] = error[i];
				memcpy(solutions + recacu_error_index[i] *dim, reorder_result + i*dim, dim*sizeof(double));
			}
		}

		/* padding to multipy of C */
		while( new_recacu_cnt % C )
		{
			new_recacu_cnt ++;
		}
		/* update the current recaculating solution numbers */
		recacu_cnt = new_recacu_cnt;
		actual_recacu_cnt = new_actual_recacu_cnt;
	}//loop while

	engine_end        = clock();
	engine_total_time = (double)(engine_end - engine_start) / CLOCKS_PER_SEC;
	fprintf(stderr, "=========>Kernel Complete, Stream Times: %d\n", times);
	clock_t cpu_start = clock();
	jacobi_opt(A_original, x_base, b, dim, C, total_equations, x_all_init , expected_error);
	clock_t cpu_end = clock();
	double cpu_total_time = (double)(cpu_end - cpu_start) / CLOCKS_PER_SEC;

	/* Compare the result with the standard result */
	int cnt = 0;
	int index = 0;
	for(int i = 0; i < total_equations; i ++)
	{
		for(int j = 0; j < dim; j ++)
		{
			double diff = solutions[i * dim + j] - x_base[i*dim + j];
			if(fabs(diff) > EPS)
			{
					fprintf(stderr, "error: atual=%.10f, expect=%.10f, err=%.10e\n",
							solutions[i * dim + j], x_base[i*dim + j], diff);
					cnt ++;
					index ++;
			}
		}
	}
	if(cnt == 0)
	{
		max_print_result(dim, total_equations, MAX_ITER, engine_total_time, cpu_total_time);
		fprintf(stderr, "==========>All Test Passed\n\n");
	}
	else
	{
		fprintf(stderr, "!!!Test Failed:%d\n\n", cnt);
	}

	free ( A                   ) ;
	free ( A_trans             ) ;
	free ( b                   ) ;
	free ( b_trans             ) ;
	free ( diagA               ) ;
	free ( reverse_diagA       ) ;
	free ( x_init              ) ;
	free ( error               ) ;
	free ( error_bak           ) ;
	free ( recacu_error_index  ) ;
	free ( expected_error      ) ;
	free ( result              ) ;
	free ( reorder_result      ) ;
	free ( solutions           ) ;
	free ( x_base              ) ;
	free ( x_all_init          ) ;
	free ( x_all_trans_init    ) ;
	free ( x_latest_init       ) ;
	free ( x_latest_trans_init ) ;
	free ( recacu_b            ) ;

	int status = (cnt == 0) ? 0:1;
	return status;
}