int main()
{
    /* Creat the file to save results */
    char *varnames[NUM_VARS] = {"x_rec_all"};
    create_netcdf(FILENAME_WR, NUM_VARS, varnames);

    /* Allocate memory */
    double *x_fusion_lf_all = (double*)malloc(NUM_3DSNAPS * NUM_2DSNAPS * N_HR * N_HR * sizeof(double));
    double *x_fusion_hf_all = (double*)malloc(NUM_3DSNAPS * NUM_2DSNAPS * N_HR * N_HR * sizeof(double));
    double *x_rec_all = (double*)malloc(NUM_3DSNAPS * NUM_2DSNAPS * N_HR * N_HR * sizeof(double));

    /* read all snapshots */
    size_t start_ids[4] = {0, 0, 0, 0};
    size_t count_ids[4] = {NUM_3DSNAPS, NUM_2DSNAPS, N_HR, N_HR };
    read_netcdf(FILENAME_RD, "Uinterp_all", start_ids, count_ids, x_fusion_lf_all);
    read_netcdf(FILENAME_RD, "Udiff_all", start_ids, count_ids, x_fusion_hf_all);

    double time_all_start = omp_get_wtime();

    double *x_current_lf = (double*)malloc(N_HR * N_HR * sizeof(double));
    double *x_current_hf = (double*)malloc(N_HR * N_HR * sizeof(double));
    double *x_rec = (double*)malloc(N_HR * N_HR * sizeof(double));

    long int grid_size = N_HR * N_HR * NEIGHBOR_FULLSIZE * NEIGHBOR_FULLSIZE * SIM_FULLSIZE * SIM_FULLSIZE;
    int *gridpatches_y = (int*)malloc(grid_size * sizeof(int));
    int *gridpatches_z = (int*)malloc(grid_size * sizeof(int));
    int *acc_ids = (int*)malloc(ACC_FULLSIZE * ACC_FULLSIZE * sizeof(int));
    generate_grids(gridpatches_y, gridpatches_z, acc_ids);


    for(int snap3d_id = 0; snap3d_id < NUM_3DSNAPS; snap3d_id++)
    {
        int t_offset = snap3d_id * NUM_2DSNAPS * N_HR*N_HR;

        // put first PIV
        get_onesnap(x_fusion_hf_all, x_current_hf, t_offset + 0 * N_HR * N_HR, t_offset + 1 * N_HR * N_HR - 1);
        put_onesnap(x_rec_all, x_current_hf, t_offset + 0 * N_HR * N_HR, t_offset + 1 * N_HR * N_HR - 1);

        int block_id;
        for(block_id = 0; block_id < NUM_BLOCKS; block_id++)
        {
            double time_start = omp_get_wtime();

            int t_first = SCALE_FACTOR_TIME*block_id;
            int t_last = SCALE_FACTOR_TIME*(block_id+1);

            // Put last PIV of the block
            get_onesnap(x_fusion_hf_all, x_current_hf, t_offset + t_last * N_HR * N_HR, t_offset + (t_last + 1) * N_HR * N_HR - 1);
            put_onesnap(x_rec_all, x_current_hf, t_offset + t_last * N_HR * N_HR, t_offset + (t_last + 1) * N_HR * N_HR - 1);

            if (SCALE_FACTOR_TIME % 2)
            {
                int t_bound1 = t_first + (int)SCALE_FACTOR_TIME/2;
                int t_bound2 = t_bound1 + 1;

                propag_forward(x_rec_all, x_fusion_lf_all, gridpatches_y, gridpatches_z, acc_ids, t_first, t_bound1, t_offset);
                propag_backward(x_rec_all, x_fusion_lf_all, gridpatches_y, gridpatches_z, acc_ids, t_last, t_bound2, t_offset);
            }
            else
            {
                int t_mid = t_first + (int)SCALE_FACTOR_TIME/2;
                int t_bound1 = t_mid - 1;
                int t_bound2 = t_mid + 1;
                propag_forward(x_rec_all, x_fusion_lf_all, gridpatches_y, gridpatches_z, acc_ids, t_first, t_bound1, t_offset);
                propag_backward(x_rec_all, x_fusion_lf_all, gridpatches_y, gridpatches_z, acc_ids, t_last, t_bound2, t_offset);
                propag_2planes(x_rec_all, x_fusion_lf_all, gridpatches_y, gridpatches_z, acc_ids, t_mid, t_offset);

                printf("\n Estimated block %i (total 23) in 3D snapshot %i (total 37) in %f seconds \n", block_id, snap3d_id, (double)omp_get_wtime() - time_start);
            }
        }
    }

    // Write to file
    write_netcdf(FILENAME_WR, "x_rec_all", start_ids, count_ids, x_rec_all);

    /* free memory */
    free(x_rec); free(x_current_lf); free(x_current_hf);
    free(x_rec_all); free(x_fusion_lf_all); free(x_fusion_hf_all);
    free(gridpatches_y); free(gridpatches_z); free(acc_ids);
    printf("\n FINISH ALL COMPUTATION IN %f SECONDS \n", (double)omp_get_wtime() - time_all_start);

    return 1;
}
示例#2
0
文件: Main.c 项目: shamouda/ocr-apps
int main( int argc, char* argv[] )
{
	// =====================================================================
	// Initialization & Command Line Read-In
	// =====================================================================
	int version = 13;
	int mype = 0;
	int max_procs = omp_get_num_procs();
	int i, thread, mat;
	unsigned long seed;
	double omp_start, omp_end, p_energy;
	unsigned long long vhash = 0;
	int nprocs;

	#ifdef MPI
	MPI_Status stat;
	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
	MPI_Comm_rank(MPI_COMM_WORLD, &mype);
	#endif

	// rand() is only used in the serial initialization stages.
	// A custom RNG is used in parallel portions.
	#ifdef VERIFICATION
	srand(26);
	#else
	srand(time(NULL));
	#endif

	// Process CLI Fields -- store in "Inputs" structure
	Inputs in = read_CLI( argc, argv );

	// Set number of OpenMP Threads
	omp_set_num_threads(in.nthreads);

	// Print-out of Input Summary
	if( mype == 0 )
		print_inputs( in, nprocs, version );

	// =====================================================================
	// Prepare Nuclide Energy Grids, Unionized Energy Grid, & Material Data
	// =====================================================================

	// Allocate & fill energy grids
	#ifndef BINARY_READ
	if( mype == 0) printf("Generating Nuclide Energy Grids...\n");
	#endif

	NuclideGridPoint ** nuclide_grids = gpmatrix(in.n_isotopes,in.n_gridpoints);

	#ifdef VERIFICATION
	generate_grids_v( nuclide_grids, in.n_isotopes, in.n_gridpoints );
	#else
	generate_grids( nuclide_grids, in.n_isotopes, in.n_gridpoints );
	#endif

	// Sort grids by energy
	#ifndef BINARY_READ
	if( mype == 0) printf("Sorting Nuclide Energy Grids...\n");
	sort_nuclide_grids( nuclide_grids, in.n_isotopes, in.n_gridpoints );
	#endif

	// Prepare Unionized Energy Grid Framework
	#ifndef BINARY_READ
	GridPoint * energy_grid = generate_energy_grid( in.n_isotopes,
	                          in.n_gridpoints, nuclide_grids );
	#else
	GridPoint * energy_grid = (GridPoint *)malloc( in.n_isotopes *
	                           in.n_gridpoints * sizeof( GridPoint ) );
	int * index_data = (int *) malloc( in.n_isotopes * in.n_gridpoints
	                   * in.n_isotopes * sizeof(int));
	for( i = 0; i < in.n_isotopes*in.n_gridpoints; i++ )
		energy_grid[i].xs_ptrs = &index_data[i*in.n_isotopes];
	#endif

	// Double Indexing. Filling in energy_grid with pointers to the
	// nuclide_energy_grids.
	#ifndef BINARY_READ
	set_grid_ptrs( energy_grid, nuclide_grids, in.n_isotopes, in.n_gridpoints );
	#endif

	#ifdef BINARY_READ
	if( mype == 0 ) printf("Reading data from \"XS_data.dat\" file...\n");
	binary_read(in.n_isotopes, in.n_gridpoints, nuclide_grids, energy_grid);
	#endif

	// Get material data
	if( mype == 0 )
		printf("Loading Mats...\n");
	int *num_nucs  = load_num_nucs(in.n_isotopes);
	int **mats     = load_mats(num_nucs, in.n_isotopes);

	#ifdef VERIFICATION
	double **concs = load_concs_v(num_nucs);
	#else
	double **concs = load_concs(num_nucs);
	#endif

	#ifdef BINARY_DUMP
	if( mype == 0 ) printf("Dumping data to binary file...\n");
	binary_dump(in.n_isotopes, in.n_gridpoints, nuclide_grids, energy_grid);
	if( mype == 0 ) printf("Binary file \"XS_data.dat\" written! Exiting...\n");
	return 0;
	#endif

	// =====================================================================
	// Cross Section (XS) Parallel Lookup Simulation Begins
	// =====================================================================

	// Outer benchmark loop can loop through all possible # of threads
	#ifdef BENCHMARK
	for( int bench_n = 1; bench_n <=omp_get_num_procs(); bench_n++ )
	{
		in.nthreads = bench_n;
		omp_set_num_threads(in.nthreads);
 	#endif

	if( mype == 0 )
	{
		printf("\n");
		border_print();
		center_print("SIMULATION", 79);
		border_print();
	}

	omp_start = omp_get_wtime();

	//initialize papi with one thread (master) here
	#ifdef PAPI
	if ( PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT){
		fprintf(stderr, "PAPI library init error!\n");
		exit(1);
	}
	#endif

	// OpenMP compiler directives - declaring variables as shared or private
	#pragma omp parallel default(none) \
	private(i, thread, p_energy, mat, seed) \
	shared( max_procs, in, energy_grid, nuclide_grids, \
	        mats, concs, num_nucs, mype, vhash)
	{
		// Initialize parallel PAPI counters
		#ifdef PAPI
		int eventset = PAPI_NULL;
		int num_papi_events;
		#pragma omp critical
		{
			counter_init(&eventset, &num_papi_events);
		}
		#endif

		double macro_xs_vector[5];
		double * xs = (double *) calloc(5, sizeof(double));

		// Initialize RNG seeds for threads
		thread = omp_get_thread_num();
		seed   = (thread+1)*19+17;

		// XS Lookup Loop
		#pragma omp for schedule(dynamic)
		for( i = 0; i < in.lookups; i++ )
		{
			// Status text
			if( INFO && mype == 0 && thread == 0 && i % 1000 == 0 )
				printf("\rCalculating XS's... (%.0lf%% completed)",
						(i / ( (double)in.lookups / (double) in.nthreads ))
						/ (double) in.nthreads * 100.0);

			// Randomly pick an energy and material for the particle
			#ifdef VERIFICATION
			#pragma omp critical
			{
				p_energy = rn_v();
				mat      = pick_mat(&seed);
			}
			#else
			p_energy = rn(&seed);
			mat      = pick_mat(&seed);
			#endif

			// debugging
			//printf("E = %lf mat = %d\n", p_energy, mat);

			// This returns the macro_xs_vector, but we're not going
			// to do anything with it in this program, so return value
			// is written over.
			calculate_macro_xs( p_energy, mat, in.n_isotopes,
			                    in.n_gridpoints, num_nucs, concs,
			                    energy_grid, nuclide_grids, mats,
                                macro_xs_vector );

			// Copy results from above function call onto heap
			// so that compiler cannot optimize function out
			// (only occurs if -flto flag is used)
			memcpy(xs, macro_xs_vector, 5*sizeof(double));

			// Verification hash calculation
			// This method provides a consistent hash accross
			// architectures and compilers.
			#ifdef VERIFICATION
			char line[256];
			sprintf(line, "%.5lf %d %.5lf %.5lf %.5lf %.5lf %.5lf",
			       p_energy, mat,
				   macro_xs_vector[0],
				   macro_xs_vector[1],
				   macro_xs_vector[2],
				   macro_xs_vector[3],
				   macro_xs_vector[4]);
			unsigned long long vhash_local = hash(line, 10000);
			#pragma omp atomic
			vhash += vhash_local;
			#endif
		}

		// Prints out thread local PAPI counters
		#ifdef PAPI
		if( mype == 0 && thread == 0 )
		{
			printf("\n");
			border_print();
			center_print("PAPI COUNTER RESULTS", 79);
			border_print();
			printf("Count          \tSmybol      \tDescription\n");
		}
		{
		#pragma omp barrier
		}
		counter_stop(&eventset, num_papi_events);
		#endif

	}

	#ifndef PAPI
	if( mype == 0)
	{
		printf("\n" );
		printf("Simulation complete.\n" );
	}
	#endif

	omp_end = omp_get_wtime();

	// Print / Save Results and Exit
	print_results( in, mype, omp_end-omp_start, nprocs, vhash );

	#ifdef BENCHMARK
	}
	#endif

	#ifdef MPI
	MPI_Finalize();
	#endif

	return 0;
}