Exemple #1
0
    // check the parameters from CPU and DFE are the same
    // otherwise it will not get correct result
    bool LSH_DFE::check_parameters(){
        int dfe_dout = max_get_constant_uint64t(mf, "dout");
        int dfe_din = max_get_constant_uint64t(mf, "din");

        if(dfe_dout != DOUT || dfe_din != in_d.size2()){
            cout<<"------------------------------"<<endl;
            cout<<"DFE configuration failed."<<endl;
            cout<<"Parameters are as followed:"<<endl;
            cout<<"dfe_dout : "<<dfe_dout<<",   dout : "<<DOUT<<endl;
            cout<<"dfe_din : "<<dfe_din<<",   din : "<<in_d.size2()<<endl;
            cout<<"------------------------------"<<endl;
            return false;
        }
        return true;
    }
/**
 *	Constructor takes a string where a plan is contained and a pointer to a domain object, parses the plan and sets up unrolled arrays for streaming to DFE
 */
AirfoilDFEInterface::AirfoilDFEInterface(std::string planfile, AirfoilDFEDomain * domain_) {

	domain = domain_;

	nEdgeComputeDFE = (*domain).nedgecomputedfe;
	nCellComputeDFE = (*domain).ncellcomputedfe;

	maxfile = airfoildfe_init();
	engine = max_load(maxfile,"*");

	memAddresses = (int * ) malloc(sizeof(int)*(MEM_ADDRESS_ITEMS+1));

	parsePlan(planfile);

	int cumulativeAddress = 0;
	memAddresses[adtDx] = cumulativeAddress*burstSize;
	int floatBits = max_get_constant_uint64t(maxfile,"floatBits");
	int adtDxBursts = getNBursts(floatBits, nCellComputeDFE*12);
	cumulativeAddress += adtDxBursts;

	memAddresses[resReadOnly] = cumulativeAddress*burstSize;
	int resReadOnlyBursts = getNBursts(resReadOnlyBits, nEdgeComputeDFE);
	cumulativeAddress += resReadOnlyBursts;

	memAddresses[q] = cumulativeAddress*burstSize;
	int qBursts = getNBursts(floatBits, nCellComputeDFE*4);
	cumulativeAddress += qBursts;

	memAddresses[qold] = cumulativeAddress*burstSize;
	cumulativeAddress += qBursts;

	memAddresses[MEM_ADDRESS_ITEMS] = cumulativeAddress*burstSize;

	qpadtPortWidth = max_get_constant_uint64t(maxfile, "qpadtPortPCIeWidthInBytes");
			;
	printf("Generated lmem addresses, sizes:\n");
	for (int i = 0; i < MEM_ADDRESS_ITEMS; i++) printf("%d,%d\n",memAddresses[i], memAddresses[i+1]- memAddresses[i]);
	printf("\n");
}
Exemple #3
0
int main(int argc, char** argv) 
{
	max_file_t *max_file = jacobi_init();
	size_t dim               = 64;   // this should be a scalar input in the bitstream
	size_t MAX_ITER          = 20;
	size_t C                 = max_get_constant_uint64t(max_file, "C");
	size_t blks              = 100;
	size_t total_equations   = blks*C;
	clock_t engine_start     = 0;
	clock_t engine_end       = 0;
	double engine_total_time = 0.0;
	size_t max_dim           = max_get_constant_uint64t(max_file, "maxDimLen");
	if(argc == 1)
	{
		fprintf(stderr, "====>Info:Runing Jacobi with default parameter values:[Dimension = %ld, Iteration = %ld, blocks = %ld(%ld*%ld equations)], for details, see the README.txt\n", dim, MAX_ITER, blks, blks, C);
	}
	char *opt_str = "hd:b:i:";
	int opt = 0;
	int input_dim = dim;
	int input_iter = MAX_ITER;
	int input_blks = blks;
	while( (opt = getopt(argc, argv, opt_str)) != -1)
	{
		switch(opt)
		{
			case 'd':
				input_dim = atoi(optarg);	
				break;
			case 'b':
				input_blks = atoi(optarg);
				break;
			case 'i':
				input_iter = atoi(optarg);
				break;
			case 'h':
				usage();
				return 1;
			default:
				fprintf(stderr, "====>Error: Inputs contain invalid command line paramter(s)!\n");
				usage();
				return 1;
			}
		}
	max_file_free(max_file);
	if(input_dim <= 0 || input_dim > max_dim || input_dim % 2 != 0)
	{
		fprintf(stderr, "\n====>Error: Input dimension length is invalid, for details, see the usage below:\n");
		usage();
		return 1;
	}
	else
	{
		dim = (size_t)input_dim;
	}
	if(input_blks <= 0)
	{
		fprintf(stderr, "\n====>Error: Input block number is invalid, should bigger than zero.\n");
		usage();
		return 1;
	}
	else
	{
		blks = (size_t)input_blks;
	}
	if(input_iter <= 1)
	{
		fprintf(stderr, "\n====>Error: Input iteration number is invalid, should bigger than 1.\n");
		usage();
		return 1;
	}
	else
	{
		MAX_ITER = (size_t)input_iter;
	}
	total_equations = blks * C;

	double *A                  = malloc(dim*dim*sizeof(double));
	double *A_trans            = malloc(dim*dim*sizeof(double));
	double *b                  = malloc(total_equations*dim*sizeof(double));
	double *b_trans            = malloc(total_equations*dim*sizeof(double));
	double *diagA              = malloc(dim*sizeof(double));
	double *reverse_diagA      = malloc(dim*sizeof(double));
	double *x_init             = malloc(C*dim*sizeof(double));
	double *x_trans_init       = malloc(C*dim*sizeof(double));
	double *result             = malloc(total_equations * dim * sizeof(double));
	double *reorder_result     = malloc(total_equations * dim *sizeof(double));
	double *solutions          = malloc(total_equations * dim *sizeof(double));
	double *error              = malloc(total_equations*sizeof(double));
	double *error_bak          = malloc(total_equations*sizeof(double));
	int    *is_solution_valid  = malloc(total_equations*sizeof(int));
	int    *recacu_error_index = malloc(total_equations*sizeof(int));
	double *expected_error     = malloc(total_equations*sizeof(double));
	double *x_base             = malloc(total_equations * dim * sizeof(double));
	double *x_all_init         = malloc(total_equations * dim *sizeof(double));
	double *x_all_trans_init   = malloc(total_equations * dim *sizeof(double));
	memset(A,                0 , sizeof(double)*dim*dim);
	memset(A_trans,          0 , sizeof(double)*dim*dim);
	memset(b,                0 , sizeof(double)*dim*total_equations);
	memset(b_trans,          0 , sizeof(double)*dim*total_equations);
	memset(diagA,            0 , sizeof(double)*dim);
	memset(reverse_diagA,    0 , sizeof(double)*dim);
	memset(x_init,           0 , sizeof(double) *C*dim);
	memset(result,           0 , sizeof(double)*dim*total_equations);
	memset(reorder_result,   0 , sizeof(double)*dim*total_equations);
	memset(error,            0 , sizeof(double)*total_equations);
	memset(expected_error,   0 , sizeof(double)*total_equations);
	memset(x_base,           0 , sizeof(double)*dim*total_equations);
	memset(x_all_init,       0 , sizeof(double)*dim*total_equations);
	memset(x_all_trans_init, 0 , sizeof(double)*dim*total_equations);
	memset(is_solution_valid,0 , sizeof(int)*total_equations);
	
	for(int i = 0; i < total_equations; i ++)
	{
		recacu_error_index[i] = -1;
		expected_error[i]     = 1000;
		error_bak[i]          = 1000;
		for(int j = 0; j < dim; j ++)
		{
			solutions[i*dim + j] = 1000;
		}
	}

	/**
	 *  Generating random value for b and A
	 */
	srand(time(NULL));
	for(int i = 0; i < dim; ++i) {
		double sum = 0;
		for(int j = 0; j < dim; ++j) {
			if(i != j) {
				A[i*dim+j]     = 2.0*rand()/(double)RAND_MAX - 1 ; // random number between -1 and 1
				sum           += fabs(A[i*dim+j])                ;
			}
		}
		A[i * dim + i] = 1 + sum;
		diagA[i]       = 1.0/A[i * dim + i];
		reverse_diagA[i]  = A[i * dim + i];
	}
	
	double A_original[dim * dim];
	for(int i = 0; i < C*blks; i ++)
	{
			for(int j = 0; j < dim; j ++)
			{
				b[i * dim + j] = 2.0*rand()/(double)RAND_MAX - 1;
			}
	}

	for(int i = 0; i < dim; i ++)
	{
		for(int j = 0; j < dim; j ++)
		{
			A_original[i * dim + j] = A[i * dim + j];
			if(i != j)
			{
				A[i * dim + j] = A[i*dim + j] * diagA[i];
			}
		}
	}

	/**
	 * Reorder the input A and b 
	 */
	engine_start = clock();
	for(int i = 0; i < dim; i ++)
	{
		for(int j = 0; j < dim; j ++)
		{
			A_trans[i * dim + j] = A[j * dim + i];
		}
	}
	int count = 0;
	for(int yy = 0; yy < total_equations; yy += C)
	{
			for(int i = 0; i < dim; i ++)
			{
				for(int j = yy; j <yy + C; j ++)
				{
					b_trans[count] = b[j * dim + i]*diagA[i]; 
					count ++;
				}
			}
	}

	for(int k = 0; k < blks; k ++)
	{
			for ( int i = 0; i < C ; i ++ ) 
			{
					for ( int j = 0; j < dim; j ++ ) 
					{
							x_init[i * dim + j] = 0;
							x_trans_init[j*C + i] = x_init[i * dim + j];
					}
			}
		memcpy(x_all_trans_init + k * C * dim , x_trans_init , sizeof(double)*C*dim);
		memcpy(x_all_init       + k * C * dim , x_init       , sizeof(double)*C*dim);
	}
	
    jacobi(
		dim, 
		total_equations,
		MAX_ITER,
		A_trans                                ,
		dim * dim * sizeof(double)             ,
		b_trans                                ,
		total_equations * dim * sizeof(double) ,
		reverse_diagA                                  ,
		dim * sizeof(double)                   ,
		x_all_trans_init                       ,
		total_equations * dim * sizeof(double) ,
		error                                  ,
		total_equations * sizeof(double)       ,
		result                                 ,
	    total_equations * dim * sizeof(double) 
	  );

	for(int yy = 0; yy<total_equations; yy += C)
	{
			for(int i = 0; i <  C; i ++)
			{
				for(int j = 0; j < dim; j ++)
				{
					reorder_result[yy *dim + i*dim + j] = result[yy * dim + i + j * C];
				}
			}
	}

	/*Check Error to decide whether we need to restream into kernel again*/
	int recacu_cnt            = 0;
	int new_recacu_cnt        = 0;
	int actual_recacu_cnt     = 0;
	int new_actual_recacu_cnt = 0;

	double *x_latest_init       = malloc(total_equations * dim * sizeof(double)) ;
	double *x_latest_trans_init = malloc(total_equations * dim * sizeof(double)) ;
	double *recacu_b            = malloc(total_equations * dim * sizeof(double)) ;
	double *recacu_trans_b      = malloc(total_equations * dim * sizeof(double)) ;
	memset(x_latest_init       , 0 , total_equations * dim * sizeof(double))     ;
	memset(x_latest_trans_init , 0 , total_equations * dim * sizeof(double))     ;
	memset(recacu_b            , 0 , total_equations * dim * sizeof(double))     ;
	memset(recacu_trans_b      , 0 , total_equations * dim * sizeof(double))     ;

	int idx = 0;
	for(int i = 0; i < total_equations; i ++)
	{
		if(error[i] > CUR_EPS)
		{
			memcpy(x_latest_init + idx*dim, reorder_result + i*dim, dim*sizeof(double)); 
			memcpy(recacu_b      + idx*dim, b              + i*dim, dim*sizeof(double)); 
			recacu_error_index[idx] = i;			
			recacu_cnt ++        ;
			actual_recacu_cnt ++ ;
			idx ++;
		}
		else
		{
			error_bak[i] = error[i];
			memcpy(solutions +  i*dim, reorder_result + i*dim, dim*sizeof(double));
		}

	}
	while( recacu_cnt % C )
	{
		recacu_cnt ++;
	}

	/**
	 *  if recaculate count not zero, we start to restream data into kernel again
	 */
	int times = 1;
	while( recacu_cnt != 0 )
	{	
		/*Reorder Latest solutions init value */
		times ++;
		memset(x_latest_trans_init, 0, recacu_cnt*dim*sizeof(double));
		count = 0;
		for(int yy = 0; yy < recacu_cnt; yy += C)
		{
			for(int i = 0; i < dim; i ++)
			{
				for(int j = yy; j < yy + C; j ++)
				{
					x_latest_trans_init[count] = x_latest_init[j * dim + i]; 
					count ++;
				}
			}
		}


		/*Reorder latest b*/
		memset(recacu_trans_b, 0, total_equations*dim*sizeof(double));
		count = 0;
		for(int yy = 0; yy < recacu_cnt; yy += C)
		{
			for(int i = 0; i < dim; i ++)
			{
				for(int j = yy; j < yy + C; j ++)
				{
					recacu_trans_b[count] = recacu_b[j * dim + i]*diagA[i]; 
					count ++;
				}
			}
		}

		memset(error  , 0 , recacu_cnt * sizeof(double       )  ) ;
		memset(result , 0 , recacu_cnt * dim * sizeof(double )  ) ;
		jacobi(
			dim, 
			recacu_cnt,
			MAX_ITER,
			A_trans                           ,
			dim * dim * sizeof(double)        ,
			recacu_trans_b                    ,
			recacu_cnt * dim * sizeof(double) ,
			reverse_diagA                             ,
			dim * sizeof(double)              ,
			x_latest_trans_init               ,
			recacu_cnt * dim * sizeof(double) ,
			error                             ,
			recacu_cnt * sizeof(double)       ,
			result                            ,
			recacu_cnt * dim * sizeof(double)
		  );


		for(int yy = 0; yy < recacu_cnt; yy += C)
		{
			for(int i = 0; i <  C; i ++)
			{
				for(int j = 0; j < dim; j ++)
				{
					reorder_result[yy *dim + i*dim + j] = result[yy * dim + i + j * C];
				}
			}
		}
		
		new_recacu_cnt = 0;
		new_actual_recacu_cnt = 0;
		int idx2 = 0;
		for(int i = 0; i < actual_recacu_cnt; i ++)
		{
			if(error[i] > CUR_EPS)
			{
				memcpy(x_latest_init + new_recacu_cnt*dim, reorder_result + i*dim, dim*sizeof(double)); 
				memcpy(recacu_b      + new_recacu_cnt*dim, recacu_b       + i*dim, dim*sizeof(double)); 
				recacu_error_index[idx2] = recacu_error_index[i]; 
				new_recacu_cnt ++;
				new_actual_recacu_cnt ++;
				idx2 ++;
			}
			else
			{
				error_bak[ recacu_error_index[i]] = error[i];
				memcpy(solutions + recacu_error_index[i] *dim, reorder_result + i*dim, dim*sizeof(double));
			}
		}

		/* padding to multipy of C */
		while( new_recacu_cnt % C )
		{
			new_recacu_cnt ++;
		}
		/* update the current recaculating solution numbers */
		recacu_cnt = new_recacu_cnt;
		actual_recacu_cnt = new_actual_recacu_cnt;
	}//loop while

	engine_end        = clock();
	engine_total_time = (double)(engine_end - engine_start) / CLOCKS_PER_SEC;
	fprintf(stderr, "=========>Kernel Complete, Stream Times: %d\n", times);
	clock_t cpu_start = clock();
	jacobi_opt(A_original, x_base, b, dim, C, total_equations, x_all_init , expected_error);
	clock_t cpu_end = clock();
	double cpu_total_time = (double)(cpu_end - cpu_start) / CLOCKS_PER_SEC;

	/* Compare the result with the standard result */
	int cnt = 0;
	int index = 0;
	for(int i = 0; i < total_equations; i ++)
	{
		for(int j = 0; j < dim; j ++)
		{
			double diff = solutions[i * dim + j] - x_base[i*dim + j];
			if(fabs(diff) > EPS)
			{
					fprintf(stderr, "error: atual=%.10f, expect=%.10f, err=%.10e\n",
							solutions[i * dim + j], x_base[i*dim + j], diff);
					cnt ++;
					index ++;
			}
		}
	}
	if(cnt == 0)
	{
		max_print_result(dim, total_equations, MAX_ITER, engine_total_time, cpu_total_time);
		fprintf(stderr, "==========>All Test Passed\n\n");
	}
	else
	{
		fprintf(stderr, "!!!Test Failed:%d\n\n", cnt);
	}

	free ( A                   ) ;
	free ( A_trans             ) ;
	free ( b                   ) ;
	free ( b_trans             ) ;
	free ( diagA               ) ;
	free ( reverse_diagA       ) ;
	free ( x_init              ) ;
	free ( error               ) ;
	free ( error_bak           ) ;
	free ( recacu_error_index  ) ;
	free ( expected_error      ) ;
	free ( result              ) ;
	free ( reorder_result      ) ;
	free ( solutions           ) ;
	free ( x_base              ) ;
	free ( x_all_init          ) ;
	free ( x_all_trans_init    ) ;
	free ( x_latest_init       ) ;
	free ( x_latest_trans_init ) ;
	free ( recacu_b            ) ;

	int status = (cnt == 0) ? 0:1;
	return status;
}