int gmx_setup(int *argc,char **argv,int *nnodes) { #ifndef GMX_MPI gmx_call("gmx_setup"); return 0; #else char buf[256]; int resultlen; /* actual length of node name */ int i,flag; int mpi_num_nodes; int mpi_my_rank; char mpi_hostname[MPI_MAX_PROCESSOR_NAME]; /* Call the MPI routines */ #ifdef GMX_FAHCORE (void) fah_MPI_Init(argc,&argv); #else (void) MPI_Init(argc,&argv); #endif (void) MPI_Comm_size( MPI_COMM_WORLD, &mpi_num_nodes ); (void) MPI_Comm_rank( MPI_COMM_WORLD, &mpi_my_rank ); (void) MPI_Get_processor_name( mpi_hostname, &resultlen ); #ifdef USE_MPE /* MPE logging routines. Get event IDs from MPE: */ /* General events */ ev_timestep1 = MPE_Log_get_event_number( ); ev_timestep2 = MPE_Log_get_event_number( ); ev_force_start = MPE_Log_get_event_number( ); ev_force_finish = MPE_Log_get_event_number( ); ev_do_fnbf_start = MPE_Log_get_event_number( ); ev_do_fnbf_finish = MPE_Log_get_event_number( ); ev_ns_start = MPE_Log_get_event_number( ); ev_ns_finish = MPE_Log_get_event_number( ); ev_calc_bonds_start = MPE_Log_get_event_number( ); ev_calc_bonds_finish = MPE_Log_get_event_number( ); ev_global_stat_start = MPE_Log_get_event_number( ); ev_global_stat_finish = MPE_Log_get_event_number( ); ev_virial_start = MPE_Log_get_event_number( ); ev_virial_finish = MPE_Log_get_event_number( ); /* Shift related events */ ev_shift_start = MPE_Log_get_event_number( ); ev_shift_finish = MPE_Log_get_event_number( ); ev_unshift_start = MPE_Log_get_event_number( ); ev_unshift_finish = MPE_Log_get_event_number( ); ev_mk_mshift_start = MPE_Log_get_event_number( ); ev_mk_mshift_finish = MPE_Log_get_event_number( ); /* PME related events */ ev_pme_start = MPE_Log_get_event_number( ); ev_pme_finish = MPE_Log_get_event_number( ); ev_spread_on_grid_start = MPE_Log_get_event_number( ); ev_spread_on_grid_finish = MPE_Log_get_event_number( ); ev_sum_qgrid_start = MPE_Log_get_event_number( ); ev_sum_qgrid_finish = MPE_Log_get_event_number( ); ev_gmxfft3d_start = MPE_Log_get_event_number( ); ev_gmxfft3d_finish = MPE_Log_get_event_number( ); ev_solve_pme_start = MPE_Log_get_event_number( ); ev_solve_pme_finish = MPE_Log_get_event_number( ); ev_gather_f_bsplines_start = MPE_Log_get_event_number( ); ev_gather_f_bsplines_finish= MPE_Log_get_event_number( ); ev_reduce_start = MPE_Log_get_event_number( ); ev_reduce_finish = MPE_Log_get_event_number( ); ev_rscatter_start = MPE_Log_get_event_number( ); ev_rscatter_finish = MPE_Log_get_event_number( ); ev_alltoall_start = MPE_Log_get_event_number( ); ev_alltoall_finish = MPE_Log_get_event_number( ); ev_pmeredist_start = MPE_Log_get_event_number( ); ev_pmeredist_finish = MPE_Log_get_event_number( ); ev_init_pme_start = MPE_Log_get_event_number( ); ev_init_pme_finish = MPE_Log_get_event_number( ); ev_send_coordinates_start = MPE_Log_get_event_number( ); ev_send_coordinates_finish = MPE_Log_get_event_number( ); ev_update_fr_start = MPE_Log_get_event_number( ); ev_update_fr_finish = MPE_Log_get_event_number( ); ev_clear_rvecs_start = MPE_Log_get_event_number( ); ev_clear_rvecs_finish = MPE_Log_get_event_number( ); ev_update_start = MPE_Log_get_event_number( ); ev_update_finish = MPE_Log_get_event_number( ); ev_output_start = MPE_Log_get_event_number( ); ev_output_finish = MPE_Log_get_event_number( ); ev_sum_lrforces_start = MPE_Log_get_event_number( ); ev_sum_lrforces_finish = MPE_Log_get_event_number( ); ev_sort_start = MPE_Log_get_event_number( ); ev_sort_finish = MPE_Log_get_event_number( ); ev_sum_qgrid_start = MPE_Log_get_event_number( ); ev_sum_qgrid_finish = MPE_Log_get_event_number( ); /* Essential dynamics related events */ ev_edsam_start = MPE_Log_get_event_number( ); ev_edsam_finish = MPE_Log_get_event_number( ); ev_get_coords_start = MPE_Log_get_event_number( ); ev_get_coords_finish = MPE_Log_get_event_number( ); ev_ed_apply_cons_start = MPE_Log_get_event_number( ); ev_ed_apply_cons_finish = MPE_Log_get_event_number( ); ev_fit_to_reference_start = MPE_Log_get_event_number( ); ev_fit_to_reference_finish = MPE_Log_get_event_number( ); /* describe events: */ if ( mpi_my_rank == 0 ) { /* General events */ MPE_Describe_state(ev_timestep1, ev_timestep2, "timestep START", "magenta" ); MPE_Describe_state(ev_force_start, ev_force_finish, "force", "cornflower blue" ); MPE_Describe_state(ev_do_fnbf_start, ev_do_fnbf_finish, "do_fnbf", "navy" ); MPE_Describe_state(ev_ns_start, ev_ns_finish, "neighbor search", "tomato" ); MPE_Describe_state(ev_calc_bonds_start, ev_calc_bonds_finish, "bonded forces", "slate blue" ); MPE_Describe_state(ev_global_stat_start, ev_global_stat_finish, "global stat", "firebrick3"); MPE_Describe_state(ev_update_fr_start, ev_update_fr_finish, "update forcerec", "goldenrod"); MPE_Describe_state(ev_clear_rvecs_start, ev_clear_rvecs_finish, "clear rvecs", "bisque"); MPE_Describe_state(ev_update_start, ev_update_finish, "update", "cornsilk"); MPE_Describe_state(ev_output_start, ev_output_finish, "output", "black"); MPE_Describe_state(ev_virial_start, ev_virial_finish, "calc_virial", "thistle4"); /* PME related events */ MPE_Describe_state(ev_pme_start, ev_pme_finish, "doing PME", "grey" ); MPE_Describe_state(ev_spread_on_grid_start, ev_spread_on_grid_finish, "spread", "dark orange" ); MPE_Describe_state(ev_sum_qgrid_start, ev_sum_qgrid_finish, "sum qgrid", "slate blue"); MPE_Describe_state(ev_gmxfft3d_start, ev_gmxfft3d_finish, "fft3d", "snow2" ); MPE_Describe_state(ev_solve_pme_start, ev_solve_pme_finish, "solve PME", "indian red" ); MPE_Describe_state(ev_gather_f_bsplines_start, ev_gather_f_bsplines_finish, "bsplines", "light sea green" ); MPE_Describe_state(ev_reduce_start, ev_reduce_finish, "reduce", "cyan1" ); MPE_Describe_state(ev_rscatter_start, ev_rscatter_finish, "rscatter", "cyan3" ); MPE_Describe_state(ev_alltoall_start, ev_alltoall_finish, "alltoall", "LightCyan4" ); MPE_Describe_state(ev_pmeredist_start, ev_pmeredist_finish, "pmeredist", "thistle" ); MPE_Describe_state(ev_init_pme_start, ev_init_pme_finish, "init PME", "snow4"); MPE_Describe_state(ev_send_coordinates_start, ev_send_coordinates_finish, "send_coordinates","blue"); MPE_Describe_state(ev_sum_lrforces_start, ev_sum_lrforces_finish, "sum_LRforces", "lime green"); MPE_Describe_state(ev_sort_start, ev_sort_finish, "sort pme atoms", "brown"); MPE_Describe_state(ev_sum_qgrid_start, ev_sum_qgrid_finish, "sum charge grid", "medium orchid"); /* Shift related events */ MPE_Describe_state(ev_shift_start, ev_shift_finish, "shift", "orange"); MPE_Describe_state(ev_unshift_start, ev_unshift_finish, "unshift", "dark orange"); MPE_Describe_state(ev_mk_mshift_start, ev_mk_mshift_finish, "mk_mshift", "maroon"); /* Essential dynamics related events */ MPE_Describe_state(ev_edsam_start, ev_edsam_finish, "EDSAM", "deep sky blue"); MPE_Describe_state(ev_get_coords_start, ev_get_coords_finish, "ED get coords", "steel blue"); MPE_Describe_state(ev_ed_apply_cons_start, ev_ed_apply_cons_finish, "ED apply constr", "forest green"); MPE_Describe_state(ev_fit_to_reference_start, ev_fit_to_reference_finish, "ED fit to ref", "lavender"); } MPE_Init_log(); #endif fprintf(stderr,"NNODES=%d, MYRANK=%d, HOSTNAME=%s\n", mpi_num_nodes,mpi_my_rank,mpi_hostname); *nnodes=mpi_num_nodes; return mpi_my_rank; #endif }
int main(int argc, char **argv) { /* MPI stuff. */ int mpi_namelen; char mpi_name[MPI_MAX_PROCESSOR_NAME]; int mpi_size, mpi_rank; MPI_Comm comm = MPI_COMM_WORLD; MPI_Info info = MPI_INFO_NULL; /* Netcdf-4 stuff. */ int ncid, v1id, dimids[NDIMS]; size_t start[NDIMS], count[NDIMS]; int data[DIMSIZE * DIMSIZE], i, res; int slab_data[DIMSIZE * DIMSIZE / 4]; /* one slab */ char file_name[NC_MAX_NAME + 1]; #ifdef USE_MPE int s_init, e_init, s_define, e_define, s_write, e_write, s_close, e_close; #endif /* USE_MPE */ /* Initialize MPI. */ MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); MPI_Get_processor_name(mpi_name, &mpi_namelen); /*printf("mpi_name: %s size: %d rank: %d\n", mpi_name, mpi_size, mpi_rank);*/ #ifdef USE_MPE MPE_Init_log(); s_init = MPE_Log_get_event_number(); e_init = MPE_Log_get_event_number(); s_define = MPE_Log_get_event_number(); e_define = MPE_Log_get_event_number(); s_write = MPE_Log_get_event_number(); e_write = MPE_Log_get_event_number(); s_close = MPE_Log_get_event_number(); e_close = MPE_Log_get_event_number(); MPE_Describe_state(s_init, e_init, "Init", "red"); MPE_Describe_state(s_define, e_define, "Define", "yellow"); MPE_Describe_state(s_write, e_write, "Write", "green"); MPE_Describe_state(s_close, e_close, "Close", "purple"); MPE_Start_log(); MPE_Log_event(s_init, 0, "start init"); #endif /* USE_MPE */ if (mpi_rank == 1) { printf("\n*** tst_parallel testing very basic parallel access.\n"); printf("*** tst_parallel testing whether we can create file for parallel access and write to it..."); } /* Create phony data. We're going to write a 24x24 array of ints, in 4 sets of 144. */ /*printf("mpi_rank*QTR_DATA=%d (mpi_rank+1)*QTR_DATA-1=%d\n", mpi_rank*QTR_DATA, (mpi_rank+1)*QTR_DATA);*/ for (i = mpi_rank * QTR_DATA; i < (mpi_rank + 1) * QTR_DATA; i++) data[i] = mpi_rank; for (i = 0; i < DIMSIZE * DIMSIZE / 4; i++) slab_data[i] = mpi_rank; #ifdef USE_MPE MPE_Log_event(e_init, 0, "end init"); MPE_Log_event(s_define, 0, "start define file"); #endif /* USE_MPE */ /* Create a parallel netcdf-4 file. */ /*nc_set_log_level(3);*/ sprintf(file_name, "%s/%s", TEMP_LARGE, FILE); if ((res = nc_create_par(file_name, NC_NETCDF4|NC_MPIIO, comm, info, &ncid))) ERR; /* Create three dimensions. */ if (nc_def_dim(ncid, "d1", DIMSIZE, dimids)) ERR; if (nc_def_dim(ncid, "d2", DIMSIZE, &dimids[1])) ERR; if (nc_def_dim(ncid, "d3", NUM_SLABS, &dimids[2])) ERR; /* Create one var. */ if ((res = nc_def_var(ncid, "v1", NC_INT, NDIMS, dimids, &v1id))) ERR; /* Write metadata to file. */ if ((res = nc_enddef(ncid))) ERR; #ifdef USE_MPE MPE_Log_event(e_define, 0, "end define file"); if (mpi_rank) sleep(mpi_rank); #endif /* USE_MPE */ /* Set up slab for this process. */ start[0] = mpi_rank * DIMSIZE/mpi_size; start[1] = 0; count[0] = DIMSIZE/mpi_size; count[1] = DIMSIZE; count[2] = 1; /*printf("mpi_rank=%d start[0]=%d start[1]=%d count[0]=%d count[1]=%d\n", mpi_rank, start[0], start[1], count[0], count[1]);*/ if (nc_var_par_access(ncid, v1id, NC_COLLECTIVE)) ERR; /* if (nc_var_par_access(ncid, v1id, NC_INDEPENDENT)) ERR;*/ for (start[2] = 0; start[2] < NUM_SLABS; start[2]++) { #ifdef USE_MPE MPE_Log_event(s_write, 0, "start write slab"); #endif /* USE_MPE */ /* Write slabs of phoney data. */ if (nc_put_vara_int(ncid, v1id, start, count, slab_data)) ERR; #ifdef USE_MPE MPE_Log_event(e_write, 0, "end write file"); #endif /* USE_MPE */ } #ifdef USE_MPE MPE_Log_event(s_close, 0, "start close file"); #endif /* USE_MPE */ /* Close the netcdf file. */ if ((res = nc_close(ncid))) ERR; #ifdef USE_MPE MPE_Log_event(e_close, 0, "end close file"); #endif /* USE_MPE */ /* Delete this large file. */ remove(file_name); /* Shut down MPI. */ MPI_Finalize(); if (mpi_rank == 1) { SUMMARIZE_ERR; FINAL_RESULTS; } return 0; }
int goma_init_(dbl *time1, int *nnodes, int *nelems, int *nnv_in, int *nev_in, int *i_soln, int *i_post) /* * Initial main driver for GOMA. Derived from a (1/93) release of * the rf_salsa program by * * Original Authors: John Shadid (1421) * Scott Hutchinson (1421) * Harry Moffat (1421) * * Date: 12/3/92 * * * Updates and Changes by: * Randy Schunk (9111) * P. A. Sackinger (9111) * R. R. Rao (9111) * R. A. Cairncross (Univ. of Delaware) * Dates: 2/93 - 6/96 * * Modified for continuation * Ian Gates * Dates: 2/98 - 10/98 * Dates: 7/99 - 8/99 * * Last modified: Wed June 26 14:21:35 MST 1994 [email protected] * Hello. * * Note: Many modifications from an early 2/93 pre-release * version of rf_salsa were made by various persons * in order to test ideas about moving/deforming meshes... */ { /* Local Declarations */ double time_start, total_time; /* timing variables */ #ifndef PARALLEL struct tm *tm_ptr; /* additional serial timing variables */ time_t the_time; #endif int error; int i; int j; static int first_goma_call=TRUE; char **ptmp; static const char *yo="goma_init"; struct Command_line_command **clc=NULL; /* point to command line structure */ int nclc = 0; /* number of command line commands */ /********************** BEGIN EXECUTION ***************************************/ /* assume number of commands is less than or equal to the number of * arguments in the command line minus 1 (1st is program name) */ /* * Get the name of the executable, yo */ #ifdef PARALLEL if( first_goma_call ) { Argc = 1; Argv = (char **) smalloc( Argc*sizeof(char *) ); Argv[0] = (char *) yo; MPI_Init(&Argc, &Argv); /*PRS will have to fix this. Too late TAB already did. */ } time_start = MPI_Wtime(); #else /* PARALLEL */ (void) time(&the_time); tm_ptr = gmtime(&the_time); time_start = (double) ( tm_ptr->tm_sec + 60. * ( 60. * ( tm_ptr->tm_yday * 24. + tm_ptr->tm_hour ) + tm_ptr->tm_min ) ); #endif /* PARALLEL */ *time1 = time_start; /* Argv = argv; */ /* Argc = argc; */ time_goma_started = time_start; #ifdef PARALLEL /* * Determine the parallel processing status, if any. We need to know * pretty early if we're "one of many" or the only process. */ error = MPI_Comm_size(MPI_COMM_WORLD, &Num_Proc); error = MPI_Comm_rank(MPI_COMM_WORLD, &ProcID); /* * Setup a default Proc_config so we can use utility routines * from Aztec */ AZ_set_proc_config(Proc_Config, MPI_COMM_WORLD); /* set the output limit flag if need be */ if( Num_Proc > DP_PROC_PRINT_LIMIT ) Unlimited_Output = FALSE; #ifdef HAVE_MPE_H error = MPE_Init_log(); #endif /* HAVE_MPE_H */ Dim = 0; /* for any hypercube legacy code... */ #endif /* PARALLEL */ #ifndef PARALLEL Dim = 0; ProcID = 0; Num_Proc = 1; #endif /* PARALLEL */ /* * HKM - Change the ieee exception handling based on the machine and * the level of debugging/speed desired. This call currently causes * core dumps for floating point exceptions. */ handle_ieee(); log_msg("--------------"); log_msg("GOMA begins..."); #ifdef USE_CGM cgm_initialize(); #endif /* * Some initial stuff that only the master process does. */ /*PRS: Disable this command line stuff for the jas coupled version */ /*-----------------------------------------------------------------*/ /* if ( ProcID == 0 ) */ /* { */ /* if (argc > 1) */ /* { */ /* log_msg("Preprocessing command line options."); */ /* clc = (struct Command_line_command **) */ /* smalloc( argc * sizeof(struct Command_line_command *)); */ /* for (i=0; i<argc; i++) */ /* { */ /* clc[i] = (struct Command_line_command *) */ /* smalloc(sizeof(struct Command_line_command)); */ /* clc[i]->type = 0; /\* initialize command line structure *\/ */ /* clc[i]->i_val = 0; */ /* clc[i]->r_val = 0.; */ /* clc[i]->string = (char *) */ /* smalloc(MAX_COMMAND_LINE_LENGTH*sizeof(char)); */ /* for ( j=0; j<MAX_COMMAND_LINE_LENGTH; j++) */ /* { */ /* clc[i]->string[j] = '\0'; */ /* } */ /* #ifdef DEBUG */ /* fprintf(stderr, "clc[%d]->string is at 0x%x\n", i, clc[i]->string); */ /* fprintf(stderr, "clc[%d] is at 0x%x\n", i, clc[i]); */ /* #endif */ /* } */ /* } */ /* PRS For the JAS version we will use the default input file name "input" */ strcpy(Input_File, "input"); /* if (argc > 1) translate_command_line(argc, argv, clc, &nclc); */ /* print_code_version(); */ /* ptmp = legal_notice; */ /* while ( strcmp(*ptmp, LAST_LEGAL_STRING) != 0 ) */ /* { */ /* fprintf(stderr, "%s", *ptmp++); */ /* } */ /* } */ /* * Allocate the uniform problem description structure and * the problem description structures on all processors */ error = pd_alloc(); EH(error, "pd_alloc problem"); #ifdef DEBUG fprintf(stderr, "P_%d at barrier after pd_alloc\n", ProcID); #ifdef PARALLEL error = MPI_Barrier(MPI_COMM_WORLD); #endif #endif log_msg("Allocating mp, gn, ..."); error = mp_alloc(); EH(error, "mp_alloc problem"); error = gn_alloc(); EH(error, "gn_alloc problem"); error = ve_alloc(); EH(error, "ve_alloc problem"); error = elc_alloc(); EH(error, "elc_alloc problem"); error = elc_rs_alloc(); EH(error, "elc_alloc problem"); error = cr_alloc(); EH(error, "cr_alloc problem"); error = evp_alloc(); EH(error, "evp_alloc problem"); error = tran_alloc(); EH(error, "tran_alloc problem"); error = libio_alloc(); EH(error, "libio_alloc problem"); error = eigen_alloc(); EH(error, "eigen_alloc problem"); error = cont_alloc(); EH(error, "cont_alloc problem"); error = loca_alloc(); EH(error, "loca_alloc problem"); error = efv_alloc(); EH(error, "efv_alloc problem"); #ifdef DEBUG fprintf(stderr, "P_%d at barrier before read_input_file()\n", ProcID); #ifdef PARALLEL error = MPI_Barrier(MPI_COMM_WORLD); #endif #endif /*PRS AGAIN, NO COMMAND LINE OVERRIDES IN THIS JAS3D VERSION */ /* * Read ASCII input file, data files, related exodusII FEM databases. */ if ( ProcID == 0 ) { log_msg("Reading input file ..."); read_input_file(clc, nclc); } /* * The user-defined material properties, etc. available to goma users * mean that some dynamically allocated data needs to be communicated. * * To handle this, sizing information from the input file scan is * broadcast in stages so that the other processors can allocate space * accordingly to hold the data. * * Note: instead of handpacking a data structure, use MPI derived datatypes * to gather and scatter. Pray this is done efficiently. Certainly it costs * less from a memory standpoint. */ #ifdef PARALLEL /* * Make sure the input file was successully processed before moving on */ check_parallel_error("Input file error"); /* * This is some sizing information that helps fit a little bit more * onto the ark later on. */ #ifdef DEBUG fprintf(stderr, "P_%d at barrier before noahs_raven()\n", ProcID); error = MPI_Barrier(MPI_COMM_WORLD); #endif noahs_raven(); #ifdef DEBUG fprintf(stderr, "P_%d at barrier before MPI_Bcast of Noahs_Raven\n", ProcID); error = MPI_Barrier(MPI_COMM_WORLD); #endif MPI_Bcast(MPI_BOTTOM, 1, Noahs_Raven->new_type, 0, MPI_COMM_WORLD); #ifdef DEBUG fprintf(stderr, "P_%d at barrier after Bcast/before raven_landing()\n", ProcID); error = MPI_Barrier(MPI_COMM_WORLD); #endif /* * Get the other processors ready to handle ark data. */ raven_landing(); #ifdef DEBUG fprintf(stderr, "P_%d at barrier before noahs_ark()\n", ProcID); error = MPI_Barrier(MPI_COMM_WORLD); #endif /* * This is the main body of communicated information, including some * whose sizes were determined because of advanced legwork by the raven. */ noahs_ark(); MPI_Bcast(MPI_BOTTOM, 1, Noahs_Ark->new_type, 0, MPI_COMM_WORLD); /* * Chemkin was initialized on processor zero during the input file * process. Now, distribute it to all processors */ #ifdef USE_CHEMKIN if (Chemkin_Needed) { chemkin_initialize_mp(); } #endif /* * Once the ark has landed, there are additional things that will need to * be sent by dove. Example: BC_Types[]->u-BC arrays. * */ ark_landing(); noahs_dove(); MPI_Bcast(MPI_BOTTOM, 1, Noahs_Dove->new_type, 0, MPI_COMM_WORLD); #endif /* End of ifdef PARALLEL */ /* * We sent the packed line to all processors that contained geometry * creation commands. Now we need to step through it and create * geometry as we go (including possibly reading an ACIS .sat file). * */ #ifdef USE_CGM create_cgm_geometry(); #endif /* * For parallel execution, assume the following variables will be changed * to reflect the multiple file aspect of the problem. * * FEM file = file.exoII --> file_3of15.exoII * * Output EXODUS II file = out.exoII --> out_3of15.exoII * */ /* * Allocate space for structures holding the EXODUS II finite element * database information and for the Distributed Processing information. * * These are mostly skeletons with pointers that get allocated in the * rd_exoII and rd_dpi routines. Remember to free up those arrays first * before freeing the major pointers. */ EXO_ptr = alloc_struct_1(Exo_DB, 1); init_exo_struct(EXO_ptr); DPI_ptr = alloc_struct_1(Dpi, 1); init_dpi_struct(DPI_ptr); log_msg("Reading mesh from EXODUS II file..."); error = read_mesh_exoII(EXO_ptr, DPI_ptr); /* * Missing files on any processor are detected at a lower level * forcing a return to the higher level * rd_exo --> rd_mesh --> main * Shutdown now, if any of the exodus files weren't found */ if (error < 0) { #ifdef PARALLEL MPI_Finalize(); #endif return(-1); } /* * All of the MPI_Type_commit() calls called behind the scenes that build * the dove, ark and raven really allocated memory. Let's free it up now that * the initial information has been communicated. */ #ifdef PARALLEL MPI_Type_free(&(Noahs_Raven->new_type)); MPI_Type_free(&(Noahs_Ark->new_type)); MPI_Type_free(&(Noahs_Dove->new_type)); #endif /* * Setup the rest of the Problem Description structure that depends on * the mesh that was read in from the EXODUS II file... * * Note that memory allocation and some setup has already been performed * in mm_input()... */ error = setup_pd(); EH( error, "Problem setting up Problem_Description."); /* * Let's check to see if we need the large elasto-plastic global tensors * and allocate them if so */ error = evp_tensor_alloc(EXO_ptr); EH( error, "Problems setting up evp tensors"); /* * Now that we know about what kind of problem we're solving and the * mesh information, let's allocate space for elemental assembly structures * */ #ifdef DEBUG DPRINTF(stderr, "About to assembly_alloc()...\n"); #endif log_msg("Assembly allocation..."); error = assembly_alloc(EXO_ptr); EH( error, "Problem from assembly_alloc"); if (Debug_Flag) { DPRINTF(stderr, "%s: setting up EXODUS II output files...\n", yo); } /* * These are not critical - just niceties. Also, they should not overburden * your db with too much of this - they're capped verbiage compliant routines. */ add_qa_stamp(EXO_ptr); add_info_stamp(EXO_ptr); #ifdef DEBUG fprintf(stderr, "added qa and info stamps\n"); #endif /* * If the output EXODUS II database file is different from the input * file, then we'll need to replicate all the basic mesh information. * But, remember that if we're parallel, that the output file names must * be multiplexed first... */ if ( Num_Proc > 1 ) { multiname(ExoFileOut, ProcID, Num_Proc); multiname(Init_GuessFile, ProcID, Num_Proc); if ( strcmp( Soln_OutFile, "" ) != 0 ) { multiname(Soln_OutFile, ProcID, Num_Proc); } if( strcmp( ExoAuxFile, "" ) != 0 ) { multiname(ExoAuxFile, ProcID, Num_Proc); } if( efv->Num_external_field != 0 ) { for( i=0; i<efv->Num_external_field; i++ ) { multiname(efv->file_nm[i], ProcID, Num_Proc); } } } /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * Preprocess the exodus mesh * -> Allocate pointers to structures containing element * side bc info, First_Elem_Side_BC_Array, and * element edge info, First_Elem_Edge_BC_Array. * -> Determine Unique_Element_Types[] array */ #ifdef DEBUG fprintf(stderr, "pre_process()...\n"); #endif log_msg("Pre processing of mesh..."); #ifdef PARALLEL error = MPI_Barrier(MPI_COMM_WORLD); #endif pre_process(EXO_ptr); /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * Load up a few key indeces in the bfd prototype basis function structures * and make sure that each active eqn/vbl has a bf[v] that points to the * right bfd[]...needs pre_process to find out the number of unique * element types in the problem. */ #ifdef DEBUG fprintf(stderr, "bf_init()...\n"); #endif log_msg("Basis function initialization..."); error = bf_init(EXO_ptr); EH( error, "Problem from bf_init"); /* * check for parallel errors before continuing */ check_parallel_error("Error encountered in problem setup"); /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * Allocate space for each communication exchange description. */ #ifdef PARALLEL #ifdef DEBUG fprintf(stderr, "P_%d: Parallel cx allocation\n", ProcID); #endif if (DPI_ptr->num_neighbors > 0) { cx = alloc_struct_1(Comm_Ex, DPI_ptr->num_neighbors); Request = alloc_struct_1(MPI_Request, Num_Requests * DPI_ptr->num_neighbors); Status = alloc_struct_1(MPI_Status, Num_Requests * DPI_ptr->num_neighbors); } #endif /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * SET UP THE PROBLEM * * Setup node-based structures * Finalise how boundary conditions are to be handled * Determine what unknowns are at each owned node and then tell * neighboring processors about your nodes * Set up communications pattern for fast unknown updates between * processors. */ (void) setup_problem(EXO_ptr, DPI_ptr); /* * check for parallel errors before continuing */ check_parallel_error("Error encountered in problem setup"); /***********************************************************************/ /***********************************************************************/ /***********************************************************************/ /* * WRITE OUT INITIAL INFO TO EXODUS FILE */ /* * Only have to initialize the exodus file if we are using different * files for the output versus the input mesh */ if (strcmp(ExoFile, ExoFileOut)) { /* * Temporarily we'll need to renumber the nodes and elements in the * mesh to be 1-based. After writing, return to the 0 based indexing * that is more convenient in C. */ #ifdef DEBUG fprintf(stderr, "1-base; wr_mesh; 0-base\n"); #endif one_base(EXO_ptr); wr_mesh_exo(EXO_ptr, ExoFileOut, 0); zero_base(EXO_ptr); /* * If running on a distributed computer, augment the plain finite * element information of EXODUS with the description of how this * piece fits into the global problem. */ if (Num_Proc > 1) { #ifdef PARALLEL #ifdef DEBUG fprintf(stderr, "P_%d at barrier before wr_dpi()\n", ProcID); fprintf(stderr, "P_%d ExoFileOut = \"%s\"\n", ProcID, ExoFileOut); error = MPI_Barrier(MPI_COMM_WORLD); #endif #endif wr_dpi(DPI_ptr, ExoFileOut, 0); } } if (Num_Import_NV > 0 || Num_Import_EV > 0) printf (" Goma will import %d nodal and %d element variables.\n", Num_Import_NV, Num_Import_EV); if (Num_Export_XS > 0 || Num_Export_XP > 0) printf (" Goma will export %d solution and %d post-processing variables.\n", Num_Export_XS, Num_Export_XP); /* Return counts to calling program */ *nnodes = EXO_ptr->num_nodes; *nelems = EXO_ptr->num_elems; *nnv_in = Num_Import_NV; *nev_in = Num_Import_EV; *i_soln = Num_Export_XS; *i_post = Num_Export_XP; return (0); /* Back to animas*/ }
int main( int argc, char *argv[] ) { int n, myid, numprocs, ii, jj; double PI25DT = 3.141592653589793238462643; double mypi, pi, h, sum, x; double startwtime = 0.0, endwtime; int namelen; int event1a, event1b, event2a, event2b, event3a, event3b, event4a, event4b; int event1, event2, event3; char processor_name[ MPI_MAX_PROCESSOR_NAME ]; MPI_Init( &argc, &argv ); MPI_Pcontrol( 0 ); MPI_Comm_size( MPI_COMM_WORLD, &numprocs ); MPI_Comm_rank( MPI_COMM_WORLD, &myid ); MPI_Get_processor_name( processor_name, &namelen ); fprintf( stderr, "Process %d running on %s\n", myid, processor_name ); /* MPE_Init_log() & MPE_Finish_log() are NOT needed when liblmpe.a is linked with this program. In that case, MPI_Init() would have called MPE_Init_log() already. */ #if defined( NO_MPI_LOGGING ) MPE_Init_log(); #endif /* user should NOT assign eventIDs directly in MPE_Describe_state() Get the eventIDs for user-defined STATES(rectangles) from MPE_Log_get_state_eventIDs() instead of the deprecated function MPE_Log_get_event_number(). */ MPE_Log_get_state_eventIDs( &event1a, &event1b ); MPE_Log_get_state_eventIDs( &event2a, &event2b ); MPE_Log_get_state_eventIDs( &event3a, &event3b ); MPE_Log_get_state_eventIDs( &event4a, &event4b ); if ( myid == 0 ) { MPE_Describe_state( event1a, event1b, "Broadcast", "red" ); MPE_Describe_state( event2a, event2b, "Sync", "orange" ); MPE_Describe_state( event3a, event3b, "Compute", "blue" ); MPE_Describe_state( event4a, event4b, "Reduce", "green" ); } /* Get event ID for Solo-Event(single timestamp object) from MPE */ MPE_Log_get_solo_eventID( &event1 ); MPE_Log_get_solo_eventID( &event2 ); MPE_Log_get_solo_eventID( &event3 ); if ( myid == 0 ) { MPE_Describe_event( event1, "Broadcast Post", "white" ); MPE_Describe_event( event2, "Compute Start", "purple" ); MPE_Describe_event( event3, "Compute End", "navy" ); } if ( myid == 0 ) { n = 1000000; startwtime = MPI_Wtime(); } MPI_Barrier( MPI_COMM_WORLD ); MPI_Pcontrol( 1 ); /* MPE_Start_log(); */ for ( jj = 0; jj < 5; jj++ ) { MPE_Log_event( event1a, 0, NULL ); MPI_Bcast( &n, 1, MPI_INT, 0, MPI_COMM_WORLD ); MPE_Log_event( event1b, 0, NULL ); MPE_Log_event( event1, 0, NULL ); MPE_Log_event( event2a, 0, NULL ); MPI_Barrier( MPI_COMM_WORLD ); MPE_Log_event( event2b, 0, NULL ); MPE_Log_event( event2, 0, NULL ); MPE_Log_event( event3a, 0, NULL ); h = 1.0 / (double) n; sum = 0.0; for ( ii = myid + 1; ii <= n; ii += numprocs ) { x = h * ((double)ii - 0.5); sum += f(x); } mypi = h * sum; MPE_Log_event( event3b, 0, NULL ); MPE_Log_event( event3, 0, NULL ); pi = 0.0; MPE_Log_event( event4a, 0, NULL ); MPI_Reduce( &mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD ); MPE_Log_event( event4b, 0, NULL ); MPE_Log_sync_clocks(); } #if defined( NO_MPI_LOGGING ) if ( argv != NULL ) MPE_Finish_log( argv[0] ); else MPE_Finish_log( "cpilog" ); #endif if ( myid == 0 ) { endwtime = MPI_Wtime(); printf( "pi is approximately %.16f, Error is %.16f\n", pi, fabs(pi - PI25DT) ); printf( "wall clock time = %f\n", endwtime-startwtime ); } MPI_Finalize(); return( 0 ); }
int main(int argc, char **argv) { int p, my_rank; #ifdef USE_MPE int s_init, e_init, s_define, e_define, s_write, e_write, s_close, e_close; #endif /* USE_MPE */ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &p); #ifdef USE_MPE MPE_Init_log(); s_init = MPE_Log_get_event_number(); e_init = MPE_Log_get_event_number(); s_define = MPE_Log_get_event_number(); e_define = MPE_Log_get_event_number(); s_write = MPE_Log_get_event_number(); e_write = MPE_Log_get_event_number(); s_close = MPE_Log_get_event_number(); e_close = MPE_Log_get_event_number(); MPE_Describe_state(s_init, e_init, "Init", "red"); MPE_Describe_state(s_define, e_define, "Define", "yellow"); MPE_Describe_state(s_write, e_write, "Write", "green"); MPE_Describe_state(s_close, e_close, "Close", "purple"); MPE_Start_log(); MPE_Log_event(s_init, 0, "start init"); #endif /* USE_MPE */ if (!my_rank) printf("*** Creating file for parallel I/O read, and rereading it..."); { hid_t fapl_id, fileid, whole_spaceid, dsid, slice_spaceid, whole_spaceid1, xferid; hsize_t start[NDIMS], count[NDIMS]; hsize_t dims[1]; int data[SC1], data_in[SC1]; int num_steps; double ftime; int write_us, read_us; int max_write_us, max_read_us; float write_rate, read_rate; int i, s; /* We will write the same slice of random data over and over to * fill the file. */ for (i = 0; i < SC1; i++) data[i] = rand(); #ifdef USE_MPE MPE_Log_event(e_init, 0, "end init"); MPE_Log_event(s_define, 0, "start define file"); #endif /* USE_MPE */ /* Create file. */ if ((fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0) ERR; if (H5Pset_fapl_mpio(fapl_id, MPI_COMM_WORLD, MPI_INFO_NULL) < 0) ERR; if ((fileid = H5Fcreate(FILE_NAME, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id)) < 0) ERR; /* Create a space to deal with one slice in memory. */ dims[0] = SC1; if ((slice_spaceid = H5Screate_simple(NDIMS, dims, NULL)) < 0) ERR; /* Create a space to write all slices. */ dims[0] = DIM2_LEN; if ((whole_spaceid = H5Screate_simple(NDIMS, dims, NULL)) < 0) ERR; /* Create dataset. */ if ((dsid = H5Dcreate1(fileid, VAR_NAME, H5T_NATIVE_INT, whole_spaceid, H5P_DEFAULT)) < 0) ERR; /* Use collective write operations. */ if ((xferid = H5Pcreate(H5P_DATASET_XFER)) < 0) ERR; if (H5Pset_dxpl_mpio(xferid, H5FD_MPIO_COLLECTIVE) < 0) ERR; #ifdef USE_MPE MPE_Log_event(e_define, 0, "end define file"); if (my_rank) sleep(my_rank); #endif /* USE_MPE */ /* Write the data in num_step steps. */ ftime = MPI_Wtime(); num_steps = (DIM2_LEN/SC1) / p; for (s = 0; s < num_steps; s++) { #ifdef USE_MPE MPE_Log_event(s_write, 0, "start write slab"); #endif /* USE_MPE */ /* Select hyperslab for write of one slice. */ start[0] = s * SC1 * p + my_rank * SC1; count[0] = SC1; if (H5Sselect_hyperslab(whole_spaceid, H5S_SELECT_SET, start, NULL, count, NULL) < 0) ERR; if (H5Dwrite(dsid, H5T_NATIVE_INT, slice_spaceid, whole_spaceid, xferid, data) < 0) ERR; #ifdef USE_MPE MPE_Log_event(e_write, 0, "end write file"); #endif /* USE_MPE */ } write_us = (MPI_Wtime() - ftime) * MILLION; MPI_Reduce(&write_us, &max_write_us, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); if (!my_rank) { write_rate = (float)(DIM2_LEN * sizeof(int))/(float)max_write_us; printf("\np=%d, write_rate=%g", p, write_rate); } #ifdef USE_MPE MPE_Log_event(s_close, 0, "start close file"); #endif /* USE_MPE */ /* Close. These collective operations will allow every process * to catch up. */ if (H5Dclose(dsid) < 0 || H5Sclose(whole_spaceid) < 0 || H5Sclose(slice_spaceid) < 0 || H5Pclose(fapl_id) < 0 || H5Fclose(fileid) < 0) ERR; #ifdef USE_MPE MPE_Log_event(e_close, 0, "end close file"); #endif /* USE_MPE */ /* Open the file. */ if ((fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0) ERR; if (H5Pset_fapl_mpio(fapl_id, MPI_COMM_WORLD, MPI_INFO_NULL) < 0) ERR; if (H5Pset_libver_bounds(fapl_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) < 0) ERR; if ((fileid = H5Fopen(FILE_NAME, H5F_ACC_RDONLY, fapl_id)) < 0) ERR; /* Create a space to deal with one slice in memory. */ dims[0] = SC1; if ((slice_spaceid = H5Screate_simple(NDIMS, dims, NULL)) < 0) ERR; /* Open the dataset. */ if ((dsid = H5Dopen(fileid, VAR_NAME)) < 0) ERR; if ((whole_spaceid1 = H5Dget_space(dsid)) < 0) ERR; ftime = MPI_Wtime(); /* Read the data, a slice at a time. */ for (s = 0; s < num_steps; s++) { /* Select hyperslab for read of one slice. */ start[0] = s * SC1 * p + my_rank * SC1; count[0] = SC1; if (H5Sselect_hyperslab(whole_spaceid1, H5S_SELECT_SET, start, NULL, count, NULL) < 0) { ERR; return 2; } if (H5Dread(dsid, H5T_NATIVE_INT, slice_spaceid, whole_spaceid1, H5P_DEFAULT, data_in) < 0) { ERR; return 2; } /* /\* Check the slice of data. *\/ */ /* for (i = 0; i < SC1; i++) */ /* if (data[i] != data_in[i]) */ /* { */ /* ERR; */ /* return 2; */ /* } */ } read_us = (MPI_Wtime() - ftime) * MILLION; MPI_Reduce(&read_us, &max_read_us, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); if (!my_rank) { read_rate = (float)(DIM2_LEN * sizeof(int))/(float)max_read_us; printf(", read_rate=%g\n", read_rate); } /* Close down. */ if (H5Dclose(dsid) < 0 || H5Sclose(slice_spaceid) < 0 || H5Sclose(whole_spaceid1) < 0 || H5Pclose(fapl_id) < 0 || H5Fclose(fileid) < 0) ERR; } if (!my_rank) SUMMARIZE_ERR; MPI_Finalize(); if (!my_rank) FINAL_RESULTS; return 0; }
/** Main execution of code. Executes the functions to: - create a new examplePioClass instance - initialize MPI and the ParallelIO libraries - create the decomposition for this example - create the netCDF output file - define the variable in the file - write data to the variable in the file using decomposition - read the data back from the file using decomposition - close the file - clean up resources The example can be run from the command line (on system that support it) like this: <pre> mpiexec -n 4 ./examplePio </pre> The sample file created by this program is a small netCDF file. It has the following contents (as shown by ncdump) for a 4-processor run: <pre> netcdf examplePio_c { dimensions: x = 16 ; variables: int foo(x) ; data: foo = 42, 42, 42, 42, 43, 43, 43, 43, 44, 44, 44, 44, 45, 45, 45, 45 ; } </pre> @param [in] argc argument count (should be zero) @param [in] argv argument array (should be NULL) @retval examplePioClass* Pointer to self. */ int main(int argc, char* argv[]) { /** Set to non-zero to get output to stdout. */ int verbose = 0; /** Zero-based rank of processor. */ int my_rank; /** Number of processors involved in current execution. */ int ntasks; /** Different output flavors. The example file is written (and * then read) four times. The first two flavors, * parallel-netcdf, and netCDF serial, both produce a netCDF * classic format file (but with different libraries). The * last two produce netCDF4/HDF5 format files, written with * and without using netCDF-4 parallel I/O. */ int format[NUM_NETCDF_FLAVORS] = {PIO_IOTYPE_PNETCDF, PIO_IOTYPE_NETCDF, PIO_IOTYPE_NETCDF4C, PIO_IOTYPE_NETCDF4P}; /** Names for the output files. Two of them (pnetcdf and * classic) will be in classic netCDF format, the others * (serial4 and parallel4) will be in netCDF-4/HDF5 * format. All four can be read by the netCDF library, and all * will contain the same contents. */ char filename[NUM_NETCDF_FLAVORS][NC_MAX_NAME + 1] = {"example2_pnetcdf.nc", "example2_classic.nc", "example2_serial4.nc", "example2_parallel4.nc"}; /** Number of processors that will do IO. In this example we * will do IO from all processors. */ int niotasks; /** Stride in the mpi rank between io tasks. Always 1 in this * example. */ int ioproc_stride = 1; /** Number of the aggregator? Always 0 in this example. */ int numAggregator = 0; /** Zero based rank of first processor to be used for I/O. */ int ioproc_start = 0; /** Specifies the flavor of netCDF output format. */ int iotype; /** The dimension IDs. */ int dimids[NDIM]; /** Array index per processing unit. This is the number of * elements of the data array that will be handled by each * processor. In this example there are 16 data elements. If the * example is run on 4 processors, then arrIdxPerPe will be 4. */ PIO_Offset elements_per_pe; /** The ID for the parallel I/O system. It is set by * PIOc_Init_Intracomm(). It references an internal structure * containing the general IO subsystem data and MPI * structure. It is passed to PIOc_finalize() to free * associated resources, after all I/O, but before * MPI_Finalize is called. */ int iosysid; /** The ncid of the netCDF file created in this example. */ int ncid = 0; /** The ID of the netCDF varable in the example file. */ int varid; /** The I/O description ID as passed back by PIOc_InitDecomp() * and freed in PIOc_freedecomp(). */ int ioid; /** A buffer for sample data. The size of this array will * vary depending on how many processors are involved in the * execution of the example code. It's length will be the same * as elements_per_pe.*/ float *buffer; /** A buffer for reading data back from the file. The size of * this array will vary depending on how many processors are * involved in the execution of the example code. It's length * will be the same as elements_per_pe.*/ int *read_buffer; /** A 1-D array which holds the decomposition mapping for this * example. The size of this array will vary depending on how * many processors are involved in the execution of the * example code. It's length will be the same as * elements_per_pe. */ PIO_Offset *compdof; #ifdef HAVE_MPE /** MPE event numbers used to track start and stop of * different parts of the program for later display with * Jumpshot. */ int event_num[2][NUM_EVENTS]; #endif /* HAVE_MPE */ /** Needed for command line processing. */ int c; /* Parse command line. */ while ((c = getopt(argc, argv, "v")) != -1) switch (c) { case 'v': verbose++; break; default: break; } #ifdef TIMING /* Initialize the GPTL timing library. */ int ret; if ((ret = GPTLinitialize ())) return ret; #endif /* Initialize MPI. */ if ((ret = MPI_Init(&argc, &argv))) MPIERR(ret); if ((ret = MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN))) MPIERR(ret); /* Learn my rank and the total number of processors. */ if ((ret = MPI_Comm_rank(MPI_COMM_WORLD, &my_rank))) MPIERR(ret); if ((ret = MPI_Comm_size(MPI_COMM_WORLD, &ntasks))) MPIERR(ret); /* Check that a valid number of processors was specified. */ if (!(ntasks == 1 || ntasks == 2 || ntasks == 4 || ntasks == 8 || ntasks == 16)) fprintf(stderr, "Number of processors must be 1, 2, 4, 8, or 16!\n"); if (verbose) printf("%d: ParallelIO Library example1 running on %d processors.\n", my_rank, ntasks); #ifdef HAVE_MPE /* Initialize MPE logging. */ if ((ret = MPE_Init_log())) ERR(ret); if (init_logging(my_rank, event_num)) ERR(ERR_LOGGING); /* Log with MPE that we are starting INIT. */ if ((ret = MPE_Log_event(event_num[START][INIT], 0, "start init"))) MPIERR(ret); #endif /* HAVE_MPE */ /* keep things simple - 1 iotask per MPI process */ niotasks = ntasks; /* Initialize the PIO IO system. This specifies how * many and which processors are involved in I/O. */ if ((ret = PIOc_Init_Intracomm(MPI_COMM_WORLD, niotasks, ioproc_stride, ioproc_start, PIO_REARR_SUBSET, &iosysid))) ERR(ret); /* Describe the decomposition. This is a 1-based array, so add 1! */ elements_per_pe = X_DIM_LEN * Y_DIM_LEN / ntasks; if (!(compdof = malloc(elements_per_pe * sizeof(PIO_Offset)))) return PIO_ENOMEM; for (int i = 0; i < elements_per_pe; i++) { compdof[i] = my_rank * elements_per_pe + i + 1; } /* Create the PIO decomposition for this example. */ if (verbose) printf("rank: %d Creating decomposition...\n", my_rank); if ((ret = PIOc_InitDecomp(iosysid, PIO_FLOAT, 2, &dim_len[1], (PIO_Offset)elements_per_pe, compdof, &ioid, NULL, NULL, NULL))) ERR(ret); free(compdof); #ifdef HAVE_MPE /* Log with MPE that we are done with INIT. */ if ((ret = MPE_Log_event(event_num[END][INIT], 0, "end init"))) MPIERR(ret); #endif /* HAVE_MPE */ /* Use PIO to create the example file in each of the four * available ways. */ for (int fmt = 0; fmt < NUM_NETCDF_FLAVORS; fmt++) { #ifdef HAVE_MPE /* Log with MPE that we are starting CREATE. */ if ((ret = MPE_Log_event(event_num[START][CREATE_PNETCDF+fmt], 0, "start create"))) MPIERR(ret); #endif /* HAVE_MPE */ /* Create the netCDF output file. */ if (verbose) printf("rank: %d Creating sample file %s with format %d...\n", my_rank, filename[fmt], format[fmt]); if ((ret = PIOc_createfile(iosysid, &ncid, &(format[fmt]), filename[fmt], PIO_CLOBBER))) ERR(ret); /* Define netCDF dimensions and variable. */ if (verbose) printf("rank: %d Defining netCDF metadata...\n", my_rank); for (int d = 0; d < NDIM; d++) { if (verbose) printf("rank: %d Defining netCDF dimension %s, length %d\n", my_rank, dim_name[d], dim_len[d]); if ((ret = PIOc_def_dim(ncid, dim_name[d], (PIO_Offset)dim_len[d], &dimids[d]))) ERR(ret); } if ((ret = PIOc_def_var(ncid, VAR_NAME, PIO_FLOAT, NDIM, dimids, &varid))) ERR(ret); /* For netCDF-4 files, set the chunksize to improve performance. */ if (format[fmt] == PIO_IOTYPE_NETCDF4C || format[fmt] == PIO_IOTYPE_NETCDF4P) if ((ret = PIOc_def_var_chunking(ncid, 0, NC_CHUNKED, chunksize))) ERR(ret); if ((ret = PIOc_enddef(ncid))) ERR(ret); #ifdef HAVE_MPE /* Log with MPE that we are done with CREATE. */ if ((ret = MPE_Log_event(event_num[END][CREATE_PNETCDF + fmt], 0, "end create"))) MPIERR(ret); #endif /* HAVE_MPE */ /* Allocate space for sample data. */ if (!(buffer = malloc(elements_per_pe * sizeof(float)))) return PIO_ENOMEM; /* Write data for each timestep. */ for (int ts = 0; ts < NUM_TIMESTEPS; ts++) { #ifdef HAVE_MPE /* Log with MPE that we are starting CALCULATE. */ if ((ret = MPE_Log_event(event_num[START][CALCULATE], 0, "start calculate"))) MPIERR(ret); #endif /* HAVE_MPE */ /* Calculate sample data. Add some math function calls to make this slower. */ for (int i = 0; i < elements_per_pe; i++) if ((ret = calculate_value(my_rank, ts, &buffer[i]))) ERR(ret); #ifdef HAVE_MPE /* Log with MPE that we are done with CALCULATE. */ if ((ret = MPE_Log_event(event_num[END][CALCULATE], 0, "end calculate"))) MPIERR(ret); /* Log with MPE that we are starting WRITE. */ if ((ret = MPE_Log_event(event_num[START][WRITE], 0, "start write"))) MPIERR(ret); #endif /* HAVE_MPE */ /* Write data to the file. */ if (verbose) printf("rank: %d Writing sample data...\n", my_rank); if ((ret = PIOc_setframe(ncid, varid, ts))) ERR(ret); if ((ret = PIOc_write_darray(ncid, varid, ioid, (PIO_Offset)elements_per_pe, buffer, NULL))) ERR(ret); if ((ret = PIOc_sync(ncid))) ERR(ret); #ifdef HAVE_MPE /* Log with MPE that we are done with WRITE. */ if ((ret = MPE_Log_event(event_num[END][WRITE], 0, "end write"))) MPIERR(ret); #endif /* HAVE_MPE */ } #ifdef HAVE_MPE /* Log with MPE that we are starting CLOSE. */ if ((ret = MPE_Log_event(event_num[START][CLOSE], 0, "start close"))) MPIERR(ret); #endif /* HAVE_MPE */ /* Free buffer space used in this example. */ free(buffer); /* Close the netCDF file. */ if (verbose) printf("rank: %d Closing the sample data file...\n", my_rank); if ((ret = PIOc_closefile(ncid))) ERR(ret); #ifdef HAVE_MPE /* Log with MPE that we are done with CLOSE. */ if ((ret = MPE_Log_event(event_num[END][CLOSE], 0, "end close"))) MPIERR(ret); #endif /* HAVE_MPE */ /* After each file is closed, make all processors wait so that * all start creating the next file at the same time. */ if ((ret = MPI_Barrier(MPI_COMM_WORLD))) MPIERR(ret); } #ifdef HAVE_MPE /* Log with MPE that we are starting FREE. */ if ((ret = MPE_Log_event(event_num[START][FREE], 0, "start free"))) MPIERR(ret); #endif /* HAVE_MPE */ /* Free the PIO decomposition. */ if (verbose) printf("rank: %d Freeing PIO decomposition...\n", my_rank); if ((ret = PIOc_freedecomp(iosysid, ioid))) ERR(ret); /* Finalize the IO system. */ if (verbose) printf("rank: %d Freeing PIO resources...\n", my_rank); if ((ret = PIOc_finalize(iosysid))) ERR(ret); #ifdef HAVE_MPE /* Log with MPE that we are done with FREE. */ if ((ret = MPE_Log_event(event_num[END][FREE], 0, "end free"))) MPIERR(ret); /* Log with MPE that we are starting READ. */ if ((ret = MPE_Log_event(event_num[START][READ], 0, "start read"))) MPIERR(ret); #endif /* HAVE_MPE */ /* Check the output file. */ /* if (!my_rank) */ /* for (int fmt = 0; fmt < NUM_NETCDF_FLAVORS; fmt++) */ /* if ((ret = check_file(ntasks, filename[fmt]))) */ /* ERR(ret); */ #ifdef HAVE_MPE /* Log with MPE that we are done with READ. */ if ((ret = MPE_Log_event(event_num[END][READ], 0, "end read"))) MPIERR(ret); #endif /* HAVE_MPE */ /* Finalize the MPI library. */ MPI_Finalize(); #ifdef TIMING /* Finalize the GPTL timing library. */ if ((ret = GPTLfinalize ())) return ret; #endif if (verbose) printf("rank: %d SUCCESS!\n", my_rank); return 0; }