/* // This function must be called by all members of communicator com. */ void print_comm_contents(MPI_Comm com, const char *name) { MPI_Group world, local; int i, n, *ranks_local, *ranks_world; MPI_Comm_group(mpi.comm.world,&world); MPI_Comm_group(com,&local); MPI_Group_size(local,&n); MPI_Group_rank(local,&i); if(i == 0) { ranks_local = cart_alloc(int,n); ranks_world = cart_alloc(int,n); for(i=0; i<n; i++) ranks_local[i] = i; MPI_Group_translate_ranks(local,n,ranks_local,world,ranks_world); cart_debug("Communicator %s (%p), size = %d:",name,com,n); for(i=0; i<n; i++) cart_debug("id = %d -> world id = %d",i,ranks_world[i]); cart_free(ranks_local); cart_free(ranks_world); }
void rtGetRadiationFieldWorker(int cell, int n, const float *wlen, float *ngxi, int free_space) { int i; DEFINE_FRT_INTEFACE(var,rawrf); frt_real *fwlen, *fngxi; frt_intg nout = n; cart_assert(n > 0); if(cell < 0) { if(sizeof(frt_real) == sizeof(float)) { frtCall(getbackgroundradiationfield)(&nout,(frt_real *)wlen,(frt_real*)ngxi); } else { fwlen = cart_alloc(frt_real,n); fngxi = cart_alloc(frt_real,n); for(i=0; i<n; i++) fwlen[i] = wlen[i]; frtCall(getbackgroundradiationfield)(&nout,fwlen,fngxi); for(i=0; i<n; i++) ngxi[i] = fngxi[i]; cart_free(fwlen); cart_free(fngxi); } } else { rtPackCellData(cell_level(cell),cell,var,&rawrf); if(sizeof(frt_real) == sizeof(float)) { if(free_space) frtCall(getradiationfieldfs)(var,rawrf,&nout,(frt_real *)wlen,(frt_real*)ngxi); else frtCall(getradiationfield)(var,rawrf,&nout,(frt_real *)wlen,(frt_real*)ngxi); } else { fwlen = cart_alloc(frt_real,n); fngxi = cart_alloc(frt_real,n); for(i=0; i<n; i++) fwlen[i] = wlen[i]; if(free_space) frtCall(getradiationfieldfs)(var,rawrf,&nout,fwlen,fngxi); else frtCall(getradiationfield)(var,rawrf,&nout,fwlen,fngxi); for(i=0; i<n; i++) ngxi[i] = fngxi[i]; cart_free(fwlen); cart_free(fngxi); } } }
void prolongate( int level ) { int i,j; const int prolongation_vars[1] = { VAR_POTENTIAL }; int icell; int num_level_cells; int *level_cells; cart_assert( level >= min_level+1 ); start_time( WORK_TIMER ); select_level( level-1, CELL_TYPE_LOCAL | CELL_TYPE_REFINED, &num_level_cells, &level_cells ); #pragma omp parallel for default(none), private(i,icell,j), shared(num_level_cells,level_cells,cell_vars,cell_child_oct) for ( i = 0; i < num_level_cells; i++ ) { icell = level_cells[i]; for ( j = 0; j < num_children; j++ ) { cell_potential( cell_child(icell,j) ) = cell_interpolate( icell, j, VAR_POTENTIAL ); } } cart_free( level_cells ); end_time( WORK_TIMER ); /* update buffers */ start_time( PROLONGATE_UPDATE_TIMER ); update_buffer_level( level, prolongation_vars, 1 ); end_time( PROLONGATE_UPDATE_TIMER ); }
/* size(var) needs to be num_cell */ void compute_upto_level_hierarchy( void (*flevel)(int , int , int *, float *), float (*favg)(int , float *), int top_level, float *var) { MESH_RUN_DECLARE(level,cell); float *var_level; MESH_RUN_OVER_LEVELS_BEGIN(level,_MaxLevel,top_level); var_level = cart_alloc(float,_Num_level_cells); flevel(level,_Num_level_cells,_Level_cells,var_level); #pragma omp parallel for default(none), private(_Index,cell), shared(_Num_level_cells,_Level_cells,var_level,var,cell_child_oct,cell_vars,level, units,constants,favg) MESH_RUN_OVER_CELLS_OF_LEVEL_BEGIN(cell); if(cell_is_leaf(cell)) { var[cell] = MAX(0.0,var_level[_Index]); } else { var[cell] = favg(cell,var); } cart_assert( var[cell]>=0 && !(var[cell]!=var[cell]) ); MESH_RUN_OVER_CELLS_OF_LEVEL_END; cart_free(var_level); MESH_RUN_OVER_LEVELS_END; }
void compute_accelerations_particles( int level ) { int i, j; double a2half; const int accel_vars[nDim] = { VAR_ACCEL, VAR_ACCEL+1, VAR_ACCEL+2 }; int neighbors[num_neighbors]; int L1, R1; double phi_l, phi_r; int icell; int num_level_cells; int *level_cells; start_time( GRAVITY_TIMER ); start_time( PARTICLE_ACCEL_TIMER ); start_time( WORK_TIMER ); #ifdef COSMOLOGY a2half = -0.5*abox[level]*abox[level]*cell_size_inverse[level]; #else a2half = -0.5 * cell_size_inverse[level]; #endif select_level( level, CELL_TYPE_LOCAL, &num_level_cells, &level_cells ); #pragma omp parallel for default(none), private(icell,j,neighbors,L1,R1,phi_l,phi_r), shared(num_level_cells,level_cells,level,cell_vars,a2half) for ( i = 0; i < num_level_cells; i++ ) { icell = level_cells[i]; cell_all_neighbors( icell, neighbors ); for ( j = 0; j < nDim; j++ ) { L1 = neighbors[2*j]; R1 = neighbors[2*j+1]; if ( cell_level(L1) < level ) { phi_l = 0.8*cell_potential(L1) + 0.2*cell_potential(icell); } else { phi_l = cell_potential(L1); } if ( cell_level(R1) < level ) { phi_r = 0.8*cell_potential(R1)+0.2*cell_potential(icell); } else { phi_r = cell_potential(R1); } cell_accel( icell, j ) = (float)(a2half * ( phi_r - phi_l ) ); } } cart_free( level_cells ); end_time( WORK_TIMER ); /* update accelerations */ start_time( PARTICLE_ACCEL_UPDATE_TIMER ); update_buffer_level( level, accel_vars, nDim ); end_time( PARTICLE_ACCEL_UPDATE_TIMER ); end_time( PARTICLE_ACCEL_TIMER ); end_time( GRAVITY_TIMER ); }
/* // Compute the maxium and minimum of a (cached) float 1-component array. */ void linear_array_maxmin(int n, float *arr, float *max, float *min) { int j, i, ibeg, iend; float *vmax, *vmin; #ifdef _OPENMP int num_pieces = omp_get_num_threads(); #else int num_pieces = 1; #endif int len_piece = (n+num_pieces-1)/num_pieces; cart_assert(n > 0); vmax = cart_alloc(float, num_pieces ); vmin = cart_alloc(float, num_pieces ); #pragma omp parallel for default(none), private(j,i,ibeg,iend), shared(arr,vmin,vmax,n,len_piece,num_pieces) for(j=0; j<num_pieces; j++) { ibeg = j*len_piece; iend = ibeg + len_piece; if(iend > n) iend = n; vmin[j] = vmax[j] = arr[ibeg]; for(i=ibeg+1; i<iend; i++) { if(arr[i] > vmax[j]) vmax[j] = arr[i]; if(arr[i] < vmin[j]) vmin[j] = arr[i]; } } *min = vmin[0]; *max = vmax[0]; for(j=1; j<num_pieces; j++) { if(*max < vmax[j]) *max = vmax[j]; if(*min > vmin[j]) *min = vmin[j]; } cart_free(vmax); cart_free(vmin); }
/* var: allocated of size num_cells & filled with zeros */ void xfactor_upto_level_hierarchy(int top_level, float *var) { MESH_RUN_DECLARE(level,cell); int j,c; float *var_level; double aux1, aux2; float soblen; MESH_RUN_OVER_LEVELS_BEGIN(level,_MaxLevel,top_level); var_level = cart_alloc(float,_Num_level_cells); xfactor_level(level,_Num_level_cells,_Level_cells,var_level); #pragma omp parallel for default(none), private(_Index,cell,j,aux1,aux2,soblen,c), shared(_Num_level_cells,_Level_cells,var_level,var,cell_child_oct,cell_vars,level, units,constants) MESH_RUN_OVER_CELLS_OF_LEVEL_BEGIN(cell); if(cell_is_leaf(cell)) { var[cell] = MAX(0.0,var_level[_Index]); } else { aux1=0.; aux2=0.; #ifdef XLSOB for(j=0; j<num_children; j++) { c = cell_child(cell,j); soblen = cell_sobolev_length2(c,cell_level(c),NULL); aux1 += cell_H2_density(c)*soblen; aux2 += cell_H2_density(c)*soblen/var[c]; } #else for(j=0; j<num_children; j++) { aux1 += cell_H2_density(cell_child(cell,j)); aux2 += cell_H2_density(cell_child(cell,j))/var[cell_child(cell,j)]; } #endif if (aux2==0) var[cell]= FLT_MAX; else var[cell] = aux1/aux2; } cart_assert( var[cell]>=0 && !(var[cell]!=var[cell]) ); MESH_RUN_OVER_CELLS_OF_LEVEL_END; cart_free(var_level); MESH_RUN_OVER_LEVELS_END; }
int main(int argc, char *argv[]) { cart_load("tests/cpu_instrs.gb"); mem_alloc(); cpu_bios_init(); for(uint32_t i = 0; i < 28000000; ++i) { step(); } printf("\n\nEND OF LINE\n"); mem_free(); cart_free(); return 0; }
void rtOtvetSingleSourceEddingtonTensor(int level) { int i, j, l, index, cell; int num_level_cells, *level_cells; double eps1, eps2, dr2, pos[nDim]; start_time(WORK_TIMER); eps1 = 0.01*cell_size[rtSingleSourceLevel]*cell_size[rtSingleSourceLevel]; eps2 = 9*cell_size[rtSingleSourceLevel]*cell_size[rtSingleSourceLevel]; select_level(level,CELL_TYPE_LOCAL,&num_level_cells,&level_cells); #pragma omp parallel for default(none), private(index,cell,pos,dr2,i,j,l), shared(level,num_level_cells,level_cells,rtSingleSourceValue,rtSingleSourcePos,eps1,eps2,cell_vars) for(index=0; index<num_level_cells; index++) { cell = level_cells[index]; cell_center_position(cell,pos); dr2 = eps1; for(i=0; i<nDim; i++) { pos[i] -= rtSingleSourcePos[i]; if(pos[i] > 0.5*num_grid) pos[i] -= num_grid; if(pos[i] < -0.5*num_grid) pos[i] += num_grid; dr2 += pos[i]*pos[i]; } cell_var(cell,RT_VAR_OT_FIELD) = rtSingleSourceValue/(4*M_PI*dr2); dr2 += nDim*eps2; for(l=j=0; j<nDim; j++) { for(i=0; i<j; i++) { cell_var(cell,rt_et_offset+l++) = pos[i]*pos[j]/dr2; } cell_var(cell,rt_et_offset+l++) = (eps2+pos[j]*pos[j])/dr2; } } cart_free(level_cells); end_time(WORK_TIMER); start_time(RT_SINGLE_SOURCE_UPDATE_TIMER); update_buffer_level(level,rtOtvetETVars,rtNumOtvetETVars); end_time(RT_SINGLE_SOURCE_UPDATE_TIMER); }
void cart_dummy(void) { cart_free(); cart.size=2048*1024; cart.data=malloc(cart.size); memset(cart.data,'?',1024*1024); memset(cart.data,0,64); memset(cart.data+4096,0x70,4096); cart.data[0x23]='E'; cart.data[0x22]='R'; cart.data[0x21]='R'; cart.data[0x20]='O'; cart.data[0x27]='R'; cart.data[0x26]='!'; cart.osrangestart=1; cart.osrangeend=1; }
void star_particle_feedback(int level, int time_multiplier) { int i; int ipart; int iter_cell; int num_level_cells; int *level_cells; double t_next; start_time( WORK_TIMER ); setup_star_formation_feedback(level); t_next = tl[level] + time_multiplier*dtl[level]; select_level( level, CELL_TYPE_LOCAL | CELL_TYPE_LEAF, &num_level_cells, &level_cells ); #ifndef COMPILER_GCC /* Get compiler segfault under GCC */ #ifdef STAR_PARTICLE_TYPES #pragma omp parallel for default(none), private(iter_cell,ipart), shared(num_level_cells,level_cells,cell_particle_list,particle_level,level,particle_t,t_next,particle_id,particle_species_indices,num_particle_species,particle_list_next, sf_feedback_particle,star_particle_type), schedule(dynamic) #else #pragma omp parallel for default(none), private(iter_cell,ipart), shared(num_level_cells,level_cells,cell_particle_list,particle_level,level,particle_t,t_next,particle_id,particle_species_indices,num_particle_species,particle_list_next, sf_feedback_particle), schedule(dynamic) #endif #endif for ( i = 0; i < num_level_cells; i++ ) { iter_cell = level_cells[i]; ipart = cell_particle_list[iter_cell]; while ( ipart != NULL_PARTICLE ) { if ( particle_is_star(ipart) && particle_t[ipart] < t_next - 0.5*dtl[max_level] #ifdef STAR_PARTICLE_TYPES && ( star_particle_type[ipart] == STAR_TYPE_NORMAL || star_particle_type[ipart] == STAR_TYPE_STARII || star_particle_type[ipart] == STAR_TYPE_FAST_GROWTH) #endif /* STAR_PARTICLE_TYPES */ ) { sf_feedback_particle->hydro_feedback(level,iter_cell,ipart,t_next); } ipart = particle_list_next[ipart]; } } cart_free(level_cells); end_time( WORK_TIMER ); }
void copy_potential( int level ) { int i; int icell; int num_level_cells; int *level_cells; start_time( GRAVITY_TIMER ); start_time( WORK_TIMER ); select_level( level, CELL_TYPE_ANY, &num_level_cells, &level_cells ); #pragma omp parallel for default(none), private(i,icell), shared(num_level_cells,level_cells,cell_vars) for ( i = 0; i < num_level_cells; i++ ) { icell = level_cells[i]; cell_potential_hydro(icell) = cell_potential(icell); } cart_free( level_cells ); end_time( WORK_TIMER ); end_time( GRAVITY_TIMER ); }
void star_destruction(int level) { int i, j; int icell, idelete, ipart, ipart_next; int num_level_cells; int *level_cells; double dt_eff; if(sf_feedback_particle->destroy_star_particle == NULL) return; #ifdef STAR_PARTICLE_TYPES /* this ifdef is not strictly necessary */ start_time( WORK_TIMER ); select_level( level, CELL_TYPE_LOCAL | CELL_TYPE_LEAF, &num_level_cells, &level_cells ); #pragma omp parallel for default(none), private(icell,ipart,ipart_next,idelete), shared(num_level_cells,level_cells,cell_particle_list,particle_level,level,particle_id,star_particle_type,particle_species_indices,num_particle_species,particle_list_next, particle_list_prev, sf_feedback_particle), schedule(dynamic) for ( i = 0; i < num_level_cells; i++ ) { icell = level_cells[i]; ipart = cell_particle_list[icell]; while ( ipart != NULL_PARTICLE ) { ipart_next = particle_list_next[ipart]; if ( particle_is_star(ipart) ) { idelete = sf_feedback_particle->destroy_star_particle(level,icell,ipart); cart_assert(idelete==0 || idelete==1); if(idelete == 1) { #pragma omp critical { /* delete_particle should be threadsafe, but just to be sure */ delete_particle(icell,ipart); particle_free(ipart); } } } ipart = ipart_next; } } cart_free(level_cells); end_time( WORK_TIMER ); #endif /* STAR_PARTICLE_TYPES */ }
/* compute X-factor at given level; taking into account this level and all higher res. levels */ void xfactor_upto_level(int top_level, float *xf) { MESH_RUN_DECLARE(level,cell); float *var; var = cart_alloc(float,num_cells); memset(var,0,sizeof(float)*num_cells); xfactor_upto_level_hierarchy(top_level, var); MESH_RUN_OVER_LEVELS_BEGIN(level,top_level,top_level); #pragma omp parallel for default(none), private(_Index,cell), shared(_Num_level_cells,_Level_cells,var,cell_child_oct,cell_vars,level,xf) MESH_RUN_OVER_CELLS_OF_LEVEL_BEGIN(cell); xf[_Index]=var[cell]; MESH_RUN_OVER_CELLS_OF_LEVEL_END; MESH_RUN_OVER_LEVELS_END; cart_free(var); }
void rtAfterAssignDensity1(int level) { int num_level_cells; int *level_cells; start_time(RT_AFTER_DENSITY_TIMER); #ifdef RT_TRANSFER /* assumes buffer gas density is up to date */ start_time( WORK_TIMER ); select_level(level,CELL_TYPE_ANY,&num_level_cells,&level_cells); end_time( WORK_TIMER ); rtAfterAssignDensityTransfer(level,num_level_cells,level_cells); start_time( WORK_TIMER ); cart_free(level_cells); end_time( WORK_TIMER ); #endif end_time(RT_AFTER_DENSITY_TIMER); }
/* size(var) needs to be num_level_cells */ void compute_upto_level( void (*flevel)(int, int, int *, float *), float (*favg)(int, float *), int top_level, float *var) { MESH_RUN_DECLARE(level,cell); float *var2; var2 = cart_alloc(float,num_cells); memset(var2,0,sizeof(float)*num_cells); compute_upto_level_hierarchy(flevel,favg,top_level, var2); MESH_RUN_OVER_LEVELS_BEGIN(level,top_level,top_level); #pragma omp parallel for default(none), private(_Index,cell), shared(_Num_level_cells,_Level_cells,cell_child_oct,cell_vars,level,var,var2) MESH_RUN_OVER_CELLS_OF_LEVEL_BEGIN(cell); var[_Index]=var2[cell]; MESH_RUN_OVER_CELLS_OF_LEVEL_END; MESH_RUN_OVER_LEVELS_END; cart_free(var2); }
void interpolate_potential( int level ) { int i; int icell; int num_level_cells; int *level_cells; double dtdt2; start_time( GRAVITY_TIMER ); start_time( WORK_TIMER ); /* // NG: dtl_old may not be set in the first time-step, but then // cell_potential = cell_potential_hydro */ if(dtl_old[level] > 0.1*dtl[level]) { dtdt2 = 0.5 * dtl[level]/dtl_old[level]; } else { dtdt2 = 0; } select_level( level, CELL_TYPE_ANY, &num_level_cells, &level_cells ); #pragma omp parallel for default(none), private(i,icell), shared(num_level_cells,level_cells,cell_vars,dtdt2) for ( i = 0; i < num_level_cells; i++ ) { icell = level_cells[i]; cell_potential_hydro(icell) = cell_potential(icell) + ( cell_potential(icell) - cell_potential_hydro(icell) ) * dtdt2; } cart_free( level_cells ); end_time( WORK_TIMER ); end_time( GRAVITY_TIMER ); }
void compute_accelerations_hydro( int level ) { int i, j; double a2half; int neighbors[num_neighbors]; int L1, R1; double phi_l, phi_r; #ifdef GRAVITY_IN_RIEMANN const int accel_vars[nDim] = { VAR_ACCEL, VAR_ACCEL+1, VAR_ACCEL+2 }; #endif int icell; int num_level_cells; int *level_cells; start_time( GRAVITY_TIMER ); start_time( HYDRO_ACCEL_TIMER ); start_time( WORK_TIMER ); #ifdef COSMOLOGY a2half = abox_from_tcode( tl[level] + dtl[level] ); a2half = -0.5*dtl[level]*cell_size_inverse[level]*a2half*a2half; #else a2half = -0.5*dtl[level]*cell_size_inverse[level]; #endif select_level( level, CELL_TYPE_LOCAL, &num_level_cells, &level_cells ); #ifdef OPENMP_DECLARE_CONST #pragma omp parallel for default(none), private(icell,j,neighbors,L1,R1,phi_l,phi_r), shared(num_level_cells,level_cells,level,cell_vars,a2half), shared(local) #else /* OPENMP_DECLARE_CONST */ #pragma omp parallel for default(none), private(icell,j,neighbors,L1,R1,phi_l,phi_r), shared(num_level_cells,level_cells,level,cell_vars,a2half) #endif /* OPENMP_DECLARE_CONST */ for ( i = 0; i < num_level_cells; i++ ) { icell = level_cells[i]; cell_all_neighbors( icell, neighbors ); for ( j = 0; j < nDim; j++ ) { L1 = neighbors[2*j]; R1 = neighbors[2*j+1]; if ( cell_level(L1) == level && cell_level(R1) == level ) { phi_l = cell_potential_hydro(L1); phi_r = cell_potential_hydro(R1); } else { if ( cell_level(L1) < level ) { phi_l = cell_interpolate( L1, local[cell_child_number(icell)][2*j], VAR_POTENTIAL_HYDRO ); phi_r = cell_potential_hydro(R1); } else { phi_l = cell_potential_hydro(L1); phi_r = cell_interpolate( R1, local[cell_child_number(icell)][2*j], VAR_POTENTIAL_HYDRO ); } } cell_accel( icell, j ) = (float)(a2half * ( phi_r - phi_l ) ); } } cart_free( level_cells ); end_time( WORK_TIMER ); #ifdef GRAVITY_IN_RIEMANN /* this only gets called if we pass gravity on to Riemann solver (and thus need accel in buffer cells) */ start_time( HYDRO_ACCEL_UPDATE_TIMER ); update_buffer_level( level, accel_vars, nDim ); end_time( HYDRO_ACCEL_UPDATE_TIMER ); #endif end_time( HYDRO_ACCEL_TIMER ); end_time( GRAVITY_TIMER ); }
void control_parameter_set_refinement_indicator(const char *value, void *ptr, int ind) { char *str, *tok; int id, from_level, to_level, level, n; float w; char c, unit[10]; str = cart_alloc(char,strlen(value)+1); strcpy(str,value); /* strtok destroys the input string */ /* // indicator id */ tok = strtok(str," "); if(sscanf(tok,"%d",&id)!=1 && sscanf(tok,"id=%d",&id)!=1) { cart_error("Unable to read refinement ID parameter from token '%s' of string '%s'",tok,value); } if(id<-1 || id>= num_refinement_indicators) { cart_error("Invalid indicator id %d in '%s'",id,value); } if(id == -1) { /* // That means the indicator is not set - just skip */ return; } /* // weight */ tok = strtok(NULL," "); if(sscanf(tok,"%g",&w)!=1 && sscanf(tok,"weight=%g",&w)!=1) { cart_error("Unable to read refinement WEIGHT parameter from token '%s' of string '%s'",tok,value); } if(w < 0.0) { cart_error("Refinement weight for indicator %d must be positive",id,w); } refinement_indicator[id].weight = w; /* // levels */ tok = strtok(NULL," "); if(sscanf(tok,"%d",&from_level)!=1 && sscanf(tok,"from-level=%d",&from_level)!=1) { cart_error("Unable to read refinement FROM-LEVEL parameter from token '%s' of string '%s'",tok,value); } if(from_level < min_level) { cart_error("<from-level> for indicator %d cannot be less than min_level (%d)",id,min_level); } if(from_level >= max_level && max_level > min_level ) { cart_error("<from-level> for indicator %d must be less than max_level (%d)",id,max_level); } tok = strtok(NULL," "); if(sscanf(tok,"%d",&to_level)!=1 && sscanf(tok,"to-level=%d",&to_level)!=1) { cart_error("Unable to read refinement TO-LEVEL parameter from token '%s' of string '%s'",tok,value); } if(to_level < from_level) { cart_error("<to-level> for indicator %d cannot be less than <from-level> (%d)",id,from_level); } if(to_level > max_level) to_level = max_level; /* // threshold values: at least one value is required */ for(level=from_level; level<=to_level; ) { tok = strtok(NULL," "); if(tok==NULL && level>from_level) { /* // Use the last value for all missing tokens: the last value must be present */ w = refinement_indicator[id].threshold[level-1]; for(; level<=to_level; level++) { refinement_indicator[id].use[level] = 1; refinement_indicator[id].threshold[level] = w; } } else { n = 1; unit[0] = 0; if(sscanf(tok,"%g%c",&w,&c)!=1 && sscanf(tok,"%d*%g%c",&n,&w,&c)!=2 && sscanf(tok,"%d*%g[%s]%c",&n,&w,unit,&c)!=3) { cart_error("Unable to read refinement THRESHOLD from token '%s' of string '%s'",tok,value); } if(n < 1) { cart_error("Numerical multiplier %d in the threshold value for indicator %d must be positive",n,id); } /* // Parse unit identifier */ if(unit[0] != 0) { if(strcmp(unit,"M0") == 0) { /* // Multiply the value by the mean specie mass */ switch(id) { default: { cart_error("Unit <M0> is not valid for refinement indicator %d",id); } } } else { cart_error("String '%s' is not a valid refinement indicator unit",unit); } } for(; level<=to_level && n>0; level++, n--) { refinement_indicator[id].use[level] = 1; refinement_indicator[id].threshold[level] = w; } } } cart_free(str); }
void init_run() { int i, j, k; int index; double a_th; int ipart; int icell; double qi, qj, qk; double xcons, vcons; double dx, dvx; double pw; double a_vel; double qfact; int num_level_cells; int *level_cells; cosmology_set(OmegaM,OmM0); cosmology_set(OmegaB,OmB0); cosmology_set(OmegaL,OmL0); cosmology_set(h,h0); cosmology_set(DeltaDC,dDC); box_size = Lbox0; units_set_art(cosmology->OmegaM,cosmology->h,box_size); units_init(); build_cell_buffer(); repair_neighbors(); auni[min_level] = auni_init; tl[min_level] = tcode_from_auni( auni_init ); for ( i = min_level; i <= max_level; i++ ) { tl[i] = tl[min_level]; } abox[min_level] = auni_init; for(i=min_level+1; i<=max_level; i++) { tl[i] = tl[min_level]; auni[i] = auni[min_level]; abox[i] = abox[min_level]; } units_update(min_level); cart_debug("tl[min_level] = %f", tl[min_level] ); cart_debug("au[min_level] = %f", auni[min_level] ); cart_debug("ab[min_level] = %f", abox[min_level] ); cart_debug("DC mode = %f", cosmology->DeltaDC ); cosmology_set_fixed(); rhogas0 = cosmology->OmegaB/cosmology->OmegaM; cart_debug("rhogas0 = %e", rhogas0 ); Tinit = TinitK/units->temperature; ak = 2.0*M_PI / lambda; dgrowth = growth(abox[min_level]); ddgrowthdt = dgrowthdt(abox[min_level]); ampl = 1.0 / ( growth(a_cross) * ak ); cart_debug("Tinit,TinitK = %e %e", Tinit,TinitK ); #ifdef HYDRO for ( i = min_level; i <= max_level; i++ ) { cart_debug("generating initial conditions on level %u", i ); select_level( i, CELL_TYPE_LOCAL, &num_level_cells, &level_cells ); for ( j = 0; j < num_level_cells; j++ ) { // cart_debug("%d %d",level_cells[j],num_cells); initial_conditions( level_cells[j], i ); } cart_free( level_cells ); } for ( i = min_level; i <= max_level; i++ ) { update_buffer_level( i, all_hydro_vars, num_hydro_vars ); } #endif /* HYDRO */ cart_debug("choose timestep and set velocity on the half step"); dtl[min_level] = 0.0; set_timestepping_scheme(); dtl[min_level]=.125; cart_debug("=======================%e",dtl[min_level]); dtl_old[min_level] = dtl[min_level]; tl_old[min_level] = tl[min_level]-dtl[min_level]; abox_old[min_level] = abox_from_tcode(tl_old[min_level]); dtl_old[min_level] = dtl[min_level]; for ( i = min_level+1; i <= max_level; i++ ) { tl_old[i] = tl[i]-dtl[i]; abox_old[i] = abox_from_tcode(tl_old[i]); dtl_old[i] = dtl[i]; } #ifdef GRAVITY #ifdef HYDRO for ( i = min_level; i <= max_level; i++ ) { cart_debug("generating gravity on level %u", i ); // cart_assert(dtl[i]==0); select_level( i, CELL_TYPE_LOCAL, &num_level_cells, &level_cells ); for ( j = 0; j < num_level_cells; j++ ) { initial_gravity( level_cells[j], i ); } cart_free( level_cells ); } for ( i = min_level; i <= max_level; i++ ) { update_buffer_level( i, all_hydro_vars, num_hydro_vars ); } #endif /* GRAVITY */ #endif /* HYDRO */ #ifdef PARTICLES qfact = (double)num_grid / (double)num_grid; pw = (1.0-rhogas0)*qfact*qfact*qfact; cart_debug("particle weight = %e", pw ); xcons = dgrowth*ampl; a_vel = abox_from_tcode( tl[min_level] - 0.5*dtl[min_level] ); vcons = ampl * dgrowthdt( a_vel); ipart = 0; for ( i = 0; i < num_grid; i++ ) { qi = qfact*((double)i + 0.5); dx = xcons * sin( ak * qi ); dvx = vcons * sin( ak * qi ); for ( j = 0; j < num_grid; j++ ) { qj = qfact*((double)j + 0.5); for ( k = 0; k < num_grid; k++ ) { qk = qfact*((double)k + 0.5); particle_x[ipart][0] = qi + dx; particle_x[ipart][1] = qj; particle_x[ipart][2] = qk; if ( particle_x[ipart][0] >= (double)num_grid ) { particle_x[ipart][0] -= num_grid; } if ( particle_x[ipart][1] >= (double)num_grid ) { particle_x[ipart][1] -= num_grid; } if ( particle_x[ipart][2] >= (double)num_grid ) { particle_x[ipart][2] -= num_grid; } icell = cell_find_position( particle_x[ipart] ); if ( icell != -1 && cell_is_local(icell) ) { particle_v[ipart][0] = dvx; particle_v[ipart][1] = 0.0; particle_v[ipart][2] = 0.0; particle_id[ipart] = (particleid_t)num_grid*(num_grid*i + j) + k; particle_mass[ipart] = pw; cart_assert( qi == particle_q_init( particle_id[ipart] ) ); particle_t[ipart] = tl[min_level]; particle_dt[ipart] = dtl[min_level]; ipart++; } } } } cart_debug("created %u particles", ipart ); num_local_particles = ipart; num_particles_total = (particleid_t)num_grid*(particleid_t)num_grid*(particleid_t)num_grid; num_particle_species = 1; particle_species_mass[0] = pw; particle_species_num[0] = num_particles_total; particle_species_indices[0] = 0; particle_species_indices[1] = num_particles_total; build_particle_list(); /* assign_density( min_level, min_level ); */ //for refinement /* modify( min_level, 0 ); */ assign_density( min_level, min_level ); //for refinement modify( min_level, 0 ); if ( local_proc_id == MASTER_NODE ) { particles = fopen("dumps/particle_rms.dat", "w"); fclose(particles); } #endif check_map(); cart_debug("done with initialization"); }
void configure_runtime_setup() { const char *str; MPI_Group world_grp, run_grp, fft_grp; int i, irun, nrun, ifft, nfft, sel[MAX_PROCS]; char **str1, **str2; char *buf1, *buf2; MPI_Comm_size(mpi.comm.world,&mpi.world.size); MPI_Comm_rank(mpi.comm.world,&mpi.world.rank); /* // for cart_error to work */ num_procs = mpi.world.size; local_proc_id = mpi.world.rank; MPI_Comm_group(mpi.comm.world,&world_grp); /* // ************************************************ // // Main configuration of the runtime setup // // ------------------------------------------------ */ str = extract_option1("mpi-setup","mpi",NULL); if(str == NULL) { /* // Default behaviour: all tasks are run tasks, at least one K-slice per fft task */ irun = 0; nrun = mpi.world.size; ifft = 0; nfft = MIN(num_grid,mpi.world.size); } else { if(sscanf(str,"run:%d-%d,fft:%d-%d",&irun,&nrun,&ifft,&nfft) != 4) { cart_error("A valid format for the --mpi-setup option argument is run:N1-N2,fft:N3-N4, where N1-N2 is the range of run tasks ids, and N3-N4 is the range of fft tasks ids."); } if(irun<0 || irun>nrun) { cart_error("Invalid range %d - %d",irun,nrun); } if(ifft<0 || ifft>nfft) { cart_error("Invalid range %d - %d",ifft,nfft); } if(nrun >= mpi.world.size) { cart_error("The range of run tasks overflows the available number of tasks %d",mpi.world.size); } if(nfft >= mpi.world.size) { cart_error("The range of fft tasks overflows the available number of tasks %d",mpi.world.size); } nrun = nrun - irun + 1; nfft = nfft - ifft + 1; } cart_assert(irun>=0 && irun+nrun<=mpi.world.size); cart_assert(ifft>=0 && ifft+nfft<=mpi.world.size); for(i=0; i<nrun; i++) sel[i] = irun + i; MPI_Group_incl(world_grp,nrun,sel,&run_grp); for(i=0; i<nfft; i++) sel[i] = ifft + i; MPI_Group_incl(world_grp,nfft,sel,&fft_grp); /* // ************************************************ // // Create our communicators, etc (no customization here) */ MPI_Comm_create(mpi.comm.world,run_grp,&mpi.comm.run); MPI_Comm_create(mpi.comm.world,fft_grp,&mpi.comm.fft); mpi.task_type = 0; /* // Sizes and ranks can only be safely querued from a group, // not a communicator!!! */ MPI_Group_rank(run_grp,&i); if(i != MPI_UNDEFINED) { mpi.task_type += MPI_TASK_TYPE_RUN; mpi.run.size = nrun; mpi.run.rank = i; } else { mpi.run.size = 0; mpi.run.rank = i; } MPI_Group_rank(fft_grp,&i); if(i != MPI_UNDEFINED) { mpi.task_type += MPI_TASK_TYPE_FFT; mpi.fft.size = nrun; mpi.fft.rank = i; } else { mpi.fft.size = 0; mpi.fft.rank = i; } MPI_Group_free(&world_grp); str = extract_option1("num-omp-threads","omp",NULL); if(str != NULL) { #ifdef _OPENMP if(sscanf(str,"%d",&i)!=1 || i<1 || i>256) { cart_error("--num-omp-threads=<num> option requires a positive integer <num> as an argument"); } omp_set_num_threads(i); cart_debug("num openmp threads = %u", omp_get_max_threads() ); #else cart_debug("OpenMP support is not compiled in; ignoring --num-omp-threads option."); #endif } root_grid_fft_init(run_grp,fft_grp); MPI_Group_free(&run_grp); MPI_Group_free(&fft_grp); /* // Measure tasks per node */ buf1 = cart_alloc(char,mpi.world.size*MPI_MAX_PROCESSOR_NAME); buf2 = cart_alloc(char,mpi.world.size*MPI_MAX_PROCESSOR_NAME); str1 = cart_alloc(char*,mpi.world.size); str2 = cart_alloc(char*,mpi.world.size); for(i=0; i<mpi.world.size; i++) { str1[i] = buf1 + i*MPI_MAX_PROCESSOR_NAME; str2[i] = buf2 + i*MPI_MAX_PROCESSOR_NAME; } MPI_Get_processor_name(str1[0],&i); for(i=1; i<mpi.world.size; i++) { strcpy(str1[i],str1[0]); } MPI_Alltoall(buf1,MPI_MAX_PROCESSOR_NAME,MPI_CHAR,buf2,MPI_MAX_PROCESSOR_NAME,MPI_CHAR,mpi.comm.world); tasks_per_node = 0; for(i=0; i<mpi.world.size; i++) { if(strcmp(str2[i],str2[mpi.world.rank]) == 0) tasks_per_node++; } cart_debug("Tasks per node: %d",tasks_per_node); cart_assert(tasks_per_node > 0); cart_free(buf1); cart_free(buf2); cart_free(str1); cart_free(str2); }
void load_halo_particle_mapping( char *filename, halo_list *halos ) { int i, j; FILE *input; halo *h; int nh, np, ih, nhp; int size; float aexpn; int endian = 0; input = fopen( filename, "r" ); if ( input == NULL ) { cart_error("Unable to open %s", filename ); } fread( &size, sizeof(int), 1, input ); if ( size != sizeof(float) ) { reorder( (char *)&size, sizeof(int) ); if ( size != sizeof(float) ) { cart_error("Error reading from file %s\n", filename ); } endian = 1; } fread( &aexpn, sizeof(float), 1, input ); if ( endian ) { reorder( (char *)&aexpn, sizeof(float) ); } if(fabs(aexpn-auni[min_level]) > 1.0e-3) { cart_debug("Scalar factor in halo particle file %s (%f) is different from the current value (%f)", filename, aexpn,auni[min_level]); } fread( &size, sizeof(int), 1, input ); fread( &size, sizeof(int), 1, input ); fread( &nh, sizeof(int), 1, input ); fread( &np, sizeof(int), 1, input ); fread( &size, sizeof(int), 1, input ); if ( endian ) { reorder( (char *)&nh, sizeof(int) ); reorder( (char *)&np, sizeof(int) ); } if ( nh != halos->num_halos ) { cart_error("Error: number of halos in %s (%u) don't match provided halo_list (%u)", filename, nh, halos->num_halos ); } for ( i = 0; i < nh; i++ ) { fread( &size, sizeof(int), 1, input ); fread( &ih, sizeof(int), 1, input ); fread( &nhp, sizeof(int), 1, input ); if ( endian ) { reorder( (char *)&ih, sizeof(int) ); reorder( (char *)&nhp, sizeof(int) ); } h = find_halo_by_id( halos, ih ); if ( h != NULL ) { h->particles = cart_alloc(int, nhp ); h->binding_order = cart_alloc(int, nhp ); binding_energy = cart_alloc(float, nhp ); fread( h->particles, sizeof(int), nhp, input ); fread( binding_energy, sizeof(float), nhp, input ); fread( &size, sizeof(int), 1, input ); if ( endian ) { for ( j = 0; j < nhp; j++ ) { reorder( (char *)&h->particles[j], sizeof(int) ); reorder( (char *)&binding_energy[j], sizeof(float) ); } } for ( j = 0; j < nhp; j++ ) { /* convert to CART indexes DHR - hfind now outputs in 0 based indices halos->list[ih].particles[j] -= 1; */ h->binding_order[j] = j; } /* sort particles by binding energy */ qsort( h->binding_order, nhp, sizeof(int), compare_binding_energy ); cart_free( binding_energy ); for ( j = 0; j < nhp; j++ ) { h->binding_order[j] = h->particles[h->binding_order[j]]; } /* sort by particle index for faster searching */ qsort( h->particles, nhp, sizeof(int), compare_ints ); /* force consistency between particle mapping and np */ h->np = nhp; } }
void update_particle_list( int level ) { int i, k; int ipart; int iter_cell; int num_level_cells; int *level_cells; double pos[nDim]; int num_parts_to_send[MAX_PROCS]; int particle_list_to_send[MAX_PROCS]; int *particle_array_to_send[MAX_PROCS]; int ipart2, new_cell; int proc; int collect_level; int sfc; start_time( UPDATE_PARTS_TIMER ); start_time( WORK_TIMER ); /* now move particles from one cell list to another */ for ( i = 0; i < num_procs; i++ ) { num_parts_to_send[i] = 0; particle_list_to_send[i] = NULL_PARTICLE; } select_level( level, CELL_TYPE_LOCAL, &num_level_cells, &level_cells ); for ( k = 0; k < num_level_cells; k++ ) { iter_cell = level_cells[k]; ipart = cell_particle_list[iter_cell]; while ( ipart != NULL_PARTICLE ) { ipart2 = particle_list_next[ipart]; sfc = sfc_index_position( particle_x[ipart] ); proc = processor_owner(sfc); if ( proc == local_proc_id ) { new_cell = cell_find_position_sfc( sfc, particle_x[ipart] ); if ( new_cell != iter_cell ) { cart_assert( cell_is_local(new_cell) ); delete_particle( iter_cell, ipart ); insert_particle( new_cell, ipart ); } } else if ( proc == -1 ) { cart_error( "Unable to locate processor for particle %d!", particle_id[ipart]); } else { delete_particle( iter_cell, ipart ); particle_list_next[ipart] = particle_list_to_send[proc]; particle_list_to_send[proc] = ipart; num_parts_to_send[proc]++; } ipart = ipart2; } } cart_free( level_cells ); end_time( WORK_TIMER ); start_time( COMMUNICATION_TIMER ); start_time( UPDATE_PARTS_COMMUNICATION_TIMER ); for ( proc = 0; proc < num_procs; proc++ ) { if ( num_parts_to_send[proc] > 0 ) { particle_array_to_send[proc] = cart_alloc(int, num_parts_to_send[proc]); num_parts_to_send[proc] = 0; /* add particles that ended up in processor linked list */ ipart = particle_list_to_send[proc]; while ( ipart != NULL_PARTICLE ) { particle_array_to_send[proc][ num_parts_to_send[proc]++ ] = ipart; ipart = particle_list_next[ipart]; } } }
/* // Update all running averages */ void rtGlobalUpdateTransfer(int top_level, MPI_Comm level_com) { int iomp, i, freq, field; int level, cell, *level_cells, num_level_cells, bottom_level = max_level_local(); float amin, amax; float *abc[2]; #ifdef _OPENMP int nomp = omp_get_max_threads(); #else int nomp = 1; #endif double s[nomp][rt_num_fields]; double s1, sw[nomp][rt_num_fields_per_freq]; start_time(WORK_TIMER); /* // Compute per-level averages */ for(level=top_level; level<=bottom_level; level++) { select_level(level,CELL_TYPE_LOCAL | CELL_TYPE_LEAF,&num_level_cells,&level_cells); if(num_level_cells == 0) continue; /* // Because the reduction variable cannot be an array in C, doing // reduction manually. Cannot re-arrange the loops because of the // cache access pattern. */ for(i=0; i<nomp; i++) { for(field=0; field<rt_num_fields; field++) s[i][field] = 0.0; } #pragma omp parallel for default(none), private(i,field,cell,iomp), shared(num_level_cells,level_cells,level,cell_vars,cell_child_oct,nomp,s) for(i=0; i<num_level_cells; i++) { cell = level_cells[i]; // No need to check for leaves, we selected only them! #ifdef _OPENMP iomp = omp_get_thread_num(); cart_assert(iomp>=0 && iomp<nomp); #else iomp = 0; #endif for(field=0; field<rt_num_fields; field++) { s[iomp][field] += cell_var(cell,rt_field_offset+field)*cell_volume[level]/num_root_cells; } } #ifdef _OPENMP for(i=1; i<nomp; i++) { for(field=0; field<rt_num_fields; field++) s[0][field] += s[i][field]; } #endif for(field=0; field<rt_num_fields; field++) { rtGlobalValueUpdate(&rtAvgRF[field],level,s[0][field]); } /* // Now do absoprtion - since we need to recompute the abs. coefficient, // loop over frequencies first */ abc[0] = cart_alloc(float,num_level_cells); #if (RT_CFI == 1) abc[1] = cart_alloc(float,num_level_cells); #else abc[1] = abc[0]; #endif for(freq=0; freq<rt_num_freqs; freq++) { /* // Average by weighting with the far field only */ rtComputeAbsLevel(level,num_level_cells,level_cells,freq,abc); linear_array_maxmin(num_level_cells,abc[1],&amax,&amin); rtGlobalValueUpdate(&rtMaxAC[freq],level,amax); /* // Because the reduction variable cannot be an array in C, doing // reduction manually. Cannot re-arrange the loops because of the // cache access pattern. */ for(i=0; i<nomp; i++) { for(field=0; field<rt_num_fields_per_freq; field++) sw[i][field] = 0.0; } #pragma omp parallel for default(none), private(cell,i,iomp,field), shared(num_level_cells,level_cells,abc,level,cell_vars,freq,nomp,sw,units,constants), reduction(+:s1) for(i=0; i<num_level_cells; i++) { float facLLS; #ifdef RT_ADD_EXTERNAL_LLS float tauLLS; #endif /* RT_ADD_EXTERNAL_LLS */ cell = level_cells[i]; // No need to check for leaves, we selected only them! #ifdef _OPENMP iomp = omp_get_thread_num(); cart_assert(iomp>=0 && iomp<nomp); #else iomp = 0; #endif #ifdef RT_ADD_EXTERNAL_LLS tauLLS = 6.3e-18*units->number_density*units->length*cell_HI_density(cell)*cell_sobolev_length2(cell,level,NULL); facLLS = exp(-tauLLS); #else facLLS = 1.0; #endif /* RT_ADD_EXTERNAL_LLS */ for(field=0; field<rt_num_near_fields_per_freq; field++) { sw[iomp][field] += facLLS*cell_var(cell,rt_field_offset+rt_num_freqs*field+freq)*abc[1][i]*cell_volume[level]/num_root_cells; } for(field=rt_num_near_fields_per_freq; field<rt_num_fields_per_freq; field++) { sw[iomp][field] += cell_var(cell,rt_field_offset+rt_num_freqs*field+freq)*abc[1][i]*cell_volume[level]/num_root_cells; } s1 += abc[1][i]*cell_volume[level]/num_root_cells; } #ifdef _OPENMP for(i=1; i<nomp; i++) { for(field=0; field<rt_num_fields_per_freq; field++) { sw[0][field] += sw[i][field]; } } #endif rtGlobalValueUpdate(&rtAvgAC[freq],level,s1); for(field=0; field<rt_num_fields_per_freq; field++) rtGlobalValueUpdate(&rtAvgACxRF[rt_num_freqs*field+freq],level,sw[0][field]); } cart_free(abc[0]); #if (RT_CFI == 1) cart_free(abc[1]); #endif cart_free(level_cells); } end_time(WORK_TIMER); for(field=0; field<rt_num_fields; field++) { rtGlobalValueCommunicate(&rtAvgRF[field],MPI_SUM,level_com); rtGlobalValueCommunicate(&rtAvgACxRF[field],MPI_SUM,level_com); } for(freq=0; freq<rt_num_freqs; freq++) { rtGlobalValueCommunicate(&rtMaxAC[freq],MPI_MAX,level_com); rtGlobalValueCommunicate(&rtAvgAC[freq],MPI_SUM,level_com); } start_time(WORK_TIMER); /* // Weighted average */ for(freq=0; freq<rt_num_freqs; freq++) { float wACxRF = 0.0; float wRF = 0.0; for(field=0; field<rt_num_fields_per_freq-1; field++) { wACxRF += rtAvgACxRF[rt_num_freqs*field+freq].Value; wRF += rtAvgRF[rt_num_freqs*field+freq].Value; } if(wRF > 1.0e-35) { frtAbcLoc[freq] = wACxRF/wRF; } else { frtAbcLoc[freq] = rtAvgAC[freq].Value; } cart_assert(field == rt_num_fields_per_freq-1); if(rtAvgRF[rt_num_freqs*field+freq].Value > 1.0e-35) { frtAbcUni[freq] = rtAvgACxRF[rt_num_freqs*field+freq].Value/rtAvgRF[rt_num_freqs*field+freq].Value; } else { frtAbcUni[freq] = rtAvgAC[freq].Value; } frtAbcAvg[freq] = rtAvgAC[freq].Value; } end_time(WORK_TIMER); #ifdef RT_OUTPUT for(freq=0; freq<rt_num_freqs; freq++) { cart_debug("RT: Abc[%d] loc=%10.3e, uni=%10.3e, avg=%10.3le, max=%10.3le",freq,frtAbcLoc[freq],frtAbcUni[freq],rtAvgAC[freq].Value,rtMaxAC[freq].Value); } for(field=0; field<rt_num_fields; field++) { cart_debug("RT: field=%d: <rf>=%10.3e, <abc>=%10.3e",field,rtAvgRF[field].Value,(rtAvgRF[field].Value>0.0)?rtAvgACxRF[field].Value/rtAvgRF[field].Value:0.0); } #endif /* RT_OUTPUT */ /* // Maintain the unit average of the far field - should be called // by all run tasks only, to ensure the buffer consistency. */ if(top_level == min_level) for(level=top_level; level<=bottom_level; level++) { select_level(level,CELL_TYPE_ANY,&num_level_cells,&level_cells); #pragma omp parallel for default(none), private(i,freq), shared(num_level_cells,level_cells,cell_vars,rtAvgRF) for(i=0; i<num_level_cells; i++) { for(freq=0; freq<rt_num_freqs; freq++) if(rtAvgRF[rt_far_freq_offset+freq].Value > 0.0) { cell_var(level_cells[i],rt_far_field_offset+freq) /= rtAvgRF[rt_far_freq_offset+freq].Value; } } cart_free(level_cells); for(freq=0; freq<rt_num_freqs; freq++) if(rtAvgRF[rt_far_freq_offset+freq].Value > 0.0) { rtAvgRF[rt_far_freq_offset+freq].buffer[i] /= rtAvgRF[rt_far_freq_offset+freq].Value; rtAvgACxRF[rt_far_freq_offset+freq].buffer[i] /= rtAvgRF[rt_far_freq_offset+freq].Value; } } #ifdef RT_SINGLE_SOURCE start_time(WORK_TIMER); cell = cell_find_position(rtSingleSourcePos); if(cell>-1 && cell_is_local(cell)) { level = cell_level(cell); } else { level = -1; } end_time(WORK_TIMER); start_time(COMMUNICATION_TIMER); /* // NG: I don't know why, but Bcast blocks here, hence using Allreduce */ MPI_Allreduce(&level,&rtSingleSourceLevel,1,MPI_INT,MPI_MAX,level_com); end_time(COMMUNICATION_TIMER); #endif /* RT_SINGLE_SOURCE */ }
void read_hart_gas_ic( char *filename ) { int i; FILE *input; int size; float boxh, ainit, astep; int ncells; int endian; int coords[nDim]; int index; int proc, icell; MPI_Status status; int page_count; float *input_page; int count[MAX_PROCS]; float *page[MAX_PROCS]; int *page_indices[MAX_PROCS]; int var; float fracHII; const int num_gas_vars = 6; const int var_index[] = { HVAR_GAS_DENSITY, HVAR_MOMENTUM, HVAR_MOMENTUM+1, HVAR_MOMENTUM+2, HVAR_GAS_ENERGY, HVAR_INTERNAL_ENERGY }; if ( local_proc_id == MASTER_NODE ) { input = fopen(filename, "r"); if ( input == NULL ) { cart_error("Unable to open %s for reading!", filename ); } fread( &size, sizeof(int), 1, input ); if ( size != sizeof(float) ) { reorder( (char *)&size, sizeof(int) ); endian = 1; if ( size != sizeof(float) ) { cart_error("Bad file-format in read_cell_ic"); } } else { endian = 0; } fread( &boxh, sizeof(float), 1, input ); fread( &size, sizeof(int), 1, input ); fread( &size, sizeof(int), 1, input ); fread( &ainit, sizeof(float), 1, input ); fread( &astep, sizeof(float), 1, input ); fread( &size, sizeof(int), 1, input ); fread( &size, sizeof(int), 1, input ); fread( &ncells, sizeof(int), 1, input ); fread( &size, sizeof(int), 1, input ); if ( endian ) { reorder( (char *)&boxh, sizeof(float) ); reorder( (char *)&ainit, sizeof(float) ); reorder( (char *)&astep, sizeof(float) ); reorder( (char *)&ncells, sizeof(int) ); } box_size = boxh; auni_init = ainit; MPI_Bcast( &box_size, 1, MPI_DOUBLE, MASTER_NODE, mpi.comm.run ); MPI_Bcast( &auni_init, 1, MPI_DOUBLE, MASTER_NODE, mpi.comm.run ); cart_debug("boxh = %f", boxh ); cart_debug("ainit = %f", ainit ); cart_debug("astep = %f", astep ); cart_debug("ncells = %u", ncells ); if ( ncells != num_root_cells ) { cart_error("ncells in %s does not match num_root_cells (%u vs %u)", filename, ncells, num_root_cells ); } input_page = cart_alloc(float, num_grid*num_grid ); for ( proc = 1; proc < num_procs; proc++ ) { page[proc] = cart_alloc(float, num_grid*num_grid ); page_indices[proc] = cart_alloc(int, num_grid*num_grid ); } for ( var = 0; var < num_gas_vars; var++ ) { for ( proc = 1; proc < num_procs; proc++ ) { count[proc] = 0; } fread( &size, sizeof(int), 1, input ); for ( coords[0] = 0; coords[0] < num_grid; coords[0]++ ) { size = fread( input_page, sizeof(float), num_grid*num_grid, input ); if ( size != num_grid*num_grid ) { cart_error("Error reading from file %s", filename ); } if ( endian ) { for ( i = 0; i < num_grid*num_grid; i++ ) { reorder( (char *)&input_page[i], sizeof(float) ); } } page_count = 0; for ( coords[1] = 0; coords[1] < num_grid; coords[1]++ ) { for ( coords[2] = 0; coords[2] < num_grid; coords[2]++ ) { index = sfc_index( coords ); proc = processor_owner( index ); if ( proc == local_proc_id ) { icell = root_cell_location( index ); cell_var( icell, var_index[var] ) = input_page[page_count]; } else { /* add cell to send page */ page[proc][count[proc]] = input_page[page_count]; page_indices[proc][count[proc]] = index; count[proc]++; if ( count[proc] == num_grid*num_grid ) { MPI_Send( page[proc], num_grid*num_grid, MPI_FLOAT, proc, 0, mpi.comm.run ); MPI_Send( page_indices[proc], num_grid*num_grid, MPI_INT, proc, 0, mpi.comm.run ); count[proc] = 0; } } page_count++; } } } fread( &size, sizeof(int), 1, input ); /* send last variables */ for ( proc = 1; proc < num_procs; proc++ ) { MPI_Send( page[proc], count[proc], MPI_FLOAT, proc, 0, mpi.comm.run ); MPI_Send( page_indices[proc], count[proc], MPI_INT, proc, 0, mpi.comm.run ); } } fclose(input); cart_free( input_page ); for ( proc = 1; proc < num_procs; proc++ ) { cart_free( page[proc] ); cart_free( page_indices[proc] ); } } else {
void rtOtvetTreeEmulatorEddingtonTensor(int level, int num_level_cells, int *level_cells) { const int NumSmooth = 2; const double S1 = 1.0; const double S2 = (S1-1)/nDim; int i, j, k, l, cell, parent; int nb3[nDim], nb18[rtStencilSize]; int num_parent_cells, *parent_cells; float norm, h2, eps2, *tmp; float ot, et[rt_num_et_vars], sor; double r2, q; if(level == min_level) { root_grid_fft_exec(RT_VAR_SOURCE,rtNumOtvetETVars-1,rtOtvetETVars+1,rtOtvetTopLevelFFTWorker); start_time(WORK_TIMER); /* // Normalize */ #pragma omp parallel for default(none), private(i,j,cell), shared(num_level_cells,level_cells,cell_vars) for(i=0; i<num_level_cells; i++) { cell = level_cells[i]; cell_var(cell,RT_VAR_OT_FIELD) = cell_var(cell,rt_et_offset+0) #if (nDim > 1) + cell_var(cell,rt_et_offset+2) #if (nDim > 2) + cell_var(cell,rt_et_offset+5) #endif /* nDim > 2 */ #endif /* nDim > 1 */ ; if(cell_var(cell,RT_VAR_OT_FIELD) > 0.0) { for(j=0; j<rt_num_et_vars; j++) { cell_var(cell,rt_et_offset+j) /= cell_var(cell,RT_VAR_OT_FIELD); } } else { cell_var(cell,RT_VAR_OT_FIELD) = 0.0; cell_var(cell,rt_et_offset+0) = 1.0/nDim; #if (nDim > 1) cell_var(cell,rt_et_offset+1) = 0.0; cell_var(cell,rt_et_offset+2) = 1.0/nDim; #if (nDim > 2) cell_var(cell,rt_et_offset+3) = 0.0; cell_var(cell,rt_et_offset+4) = 0.0; cell_var(cell,rt_et_offset+5) = 1.0/nDim; #endif /* nDim > 2 */ #endif /* nDim > 1 */ } } end_time(WORK_TIMER); } else { start_time(WORK_TIMER); /* // We start with interpolating from parents */ select_level(level-1,CELL_TYPE_LOCAL | CELL_TYPE_REFINED,&num_parent_cells,&parent_cells); h2 = cell_size[level]*cell_size[level]; norm = cell_volume[level]/(4*M_PI*h2); if(level == 1) { eps2 = 0.1; } else { eps2 = 0.05; } #pragma omp parallel for default(none), private(i,j,k,parent,cell,ot,et,nb3,nb18,sor,l,r2,q), shared(level,num_parent_cells,parent_cells,cell_vars,cell_child_oct,norm,h2,rtStencilDist2,rtStencilTensor,eps2) for(i=0; i<num_parent_cells; i++) { parent = parent_cells[i]; /* // Loop over all children */ for(j=0; j<num_children; j++) { /* // Interpolate from parents and turn ET into OT radiation pressure tensor */ cell_interpolation_neighbors(parent,j,nb3); /* // NG: this is the best interpolation, I did check two other forms */ ot = cell_interpolate_with_neighbors(parent,RT_VAR_OT_FIELD,nb3); for(k=0; k<rt_num_et_vars; k++) { et[k] = ot*cell_interpolate_with_neighbors(parent,rt_et_offset+k,nb3); } cell = cell_child(parent,j); /* // Add local contributions from 18 neighbors */ rtGetStencil(level,cell,nb18); for(l=0; l<rtStencilSize; l++) if((sor = cell_rt_source(nb18[l])) > 0.0) { r2 = rtStencilDist2[l]; sor *= norm/(r2+eps2); ot += sor; et[0] += sor*(S1*rtStencilTensor[l][0]-S2); #if (nDim > 1) et[1] += sor*rtStencilTensor[l][1]; et[2] += sor*(S1*rtStencilTensor[l][2]-S2); #if (nDim > 2) et[3] += sor*rtStencilTensor[l][3]; et[4] += sor*rtStencilTensor[l][4]; et[5] += sor*(S1*rtStencilTensor[l][5]-S2); #endif /* nDim > 2 */ #endif /* nDim > 1 */ } cell_var(cell,RT_VAR_OT_FIELD) = ot; if(ot > 0.0) { q = et[0] #if (nDim > 1) + et[2] #if (nDim > 2) + et[5] #endif /* nDim > 2 */ #endif /* nDim > 1 */ ; for(k=0; k<rt_num_et_vars; k++) { cell_var(cell,rt_et_offset+k) = et[k]/q; } } else { cell_var(cell,rt_et_offset+0) = 1.0/nDim; #if (nDim > 1) cell_var(cell,rt_et_offset+1) = 0.0; cell_var(cell,rt_et_offset+2) = 1.0/nDim; #if (nDim > 2) cell_var(cell,rt_et_offset+3) = 0.0; cell_var(cell,rt_et_offset+4) = 0.0; cell_var(cell,rt_et_offset+5) = 1.0/nDim; #endif /* nDim > 2 */ #endif /* nDim > 1 */ } } } cart_free(parent_cells); end_time(WORK_TIMER); } start_time(RT_TREE_EMULATOR_UPDATE_TIMER); update_buffer_level(level,rtOtvetETVars,rtNumOtvetETVars); end_time(RT_TREE_EMULATOR_UPDATE_TIMER); /* // Smooth a few times */ start_time(WORK_TIMER); tmp = cart_alloc(float, num_level_cells*rt_num_et_vars); end_time(WORK_TIMER); for(l=0; l<NumSmooth; l++) { start_time(WORK_TIMER); #pragma omp parallel for default(none), private(i,j,k,cell,et,nb18), shared(level,num_level_cells,level_cells,cell_vars,tmp) for(i=0; i<num_level_cells; i++) { cell = level_cells[i]; rtGetStencil(level,cell,nb18); //cell_all_neighbors(cell,nb18); for(k=0; k<rt_num_et_vars; k++) et[k] = 2*cell_var(cell,rt_et_offset+k); for(j=0; j<rtStencilSize; j++) //for(j=0; j<num_neighbors; j++) { for(k=0; k<rt_num_et_vars; k++) { et[k] += cell_var(nb18[j],rt_et_offset+k); } } for(k=0; k<rt_num_et_vars; k++) tmp[i*rt_num_et_vars+k] = et[k]/(2+rtStencilSize); } #pragma omp parallel for default(none), private(i,k,cell), shared(level,num_level_cells,level_cells,cell_vars,tmp) for(i=0; i<num_level_cells; i++) { cell = level_cells[i]; for(k=0; k<rt_num_et_vars; k++) { cell_var(cell,rt_et_offset+k) = tmp[i*rt_num_et_vars+k]; } } end_time(WORK_TIMER); start_time(RT_TREE_EMULATOR_UPDATE_TIMER); update_buffer_level(level,rtOtvetETVars+1,rtNumOtvetETVars-1); end_time(RT_TREE_EMULATOR_UPDATE_TIMER); } start_time(WORK_TIMER); cart_free(tmp); end_time(WORK_TIMER); }
void cosmics_init() { static const int page_size = 262144; FILE *f[6]; int i, j, n, index, ipart, coords[3]; int level, levelMax; int l, wrong_order[6], page, num_pages; int cell, num_level_cells, *level_cells; int slice, num_slices, num_data_per_slice, num_data_done; long id; int ng1, ng2; double x0[3], x[3], fRef; float q, xFac, vFac; float *buffer[6], *vxc, *vyc, *vzc, *vxb, *vyb, *vzb; float fracB, temIn, fracHII; int children[num_children]; GIC_RECORD s1, s2; const char *tmp, *dir; char filename[999]; struct cosmics_header { int n[3]; float dx, abeg, OmegaM, OmegaL, H0; } header[6]; /* // Where do we get the root name? Use options for now */ tmp = extract_option1("dir","dir",NULL); if(tmp != NULL) { dir = tmp; } else { cart_error("An option --dir=<name> is required, where <name> is the directory name for a set of COSMICS input files."); } /* // No more options are allowed. */ if(num_options > 0) { cart_error("Unrecognized option: %s",options[0]); } MPI_Barrier(mpi.comm.run); if(local_proc_id == MASTER_NODE) { for(l=0; l<6; l++) { strcpy(filename,dir); switch(l) { case 0: { strcat(filename,"/ic_vxc.dat"); break; } case 1: { strcat(filename,"/ic_vyc.dat"); break; } case 2: { strcat(filename,"/ic_vzc.dat"); break; } case 3: { strcat(filename,"/ic_vxb.dat"); break; } case 4: { strcat(filename,"/ic_vyb.dat"); break; } case 5: { strcat(filename,"/ic_vzb.dat"); break; } } f[l] = fopen(filename,"r"); cart_assert(f[l] != NULL); if(gicReadRecordHelper(f[l],sizeof(struct cosmics_header),header+l,wrong_order+l) != 0) { cart_error("Error in reading the header for stream %d, file %s",l,filename); } if(l!=0 && memcmp(header,header+l,sizeof(struct cosmics_header))!=0) { cart_error("Incompatible input streams 0 and %d",l); } } if(wrong_order[0]) { reorder((char*)&header->n[0],sizeof(int)); reorder((char*)&header->n[1],sizeof(int)); reorder((char*)&header->n[2],sizeof(int)); reorder((char*)&header->dx,sizeof(float)); reorder((char*)&header->abeg,sizeof(float)); reorder((char*)&header->OmegaM,sizeof(float)); reorder((char*)&header->OmegaL,sizeof(float)); reorder((char*)&header->H0,sizeof(float)); } if(header->n[0]!=header->n[1] || header->n[1]!=header->n[2]) { cart_error("Only a cubic input mesh is supported."); } } MPI_Bcast(header,sizeof(struct cosmics_header),MPI_BYTE,MASTER_NODE,mpi.comm.run); levelMax = 0; while(header->n[0] > num_grid) { header->n[0] = header->n[0] >> 1; levelMax++; } if(num_grid != header->n[0]) { cart_error("The input grid size (=%d) is not a power-of-two multiple of num_grid (=%d).",header->n[1],num_grid); } cart_assert(header->n[1] == num_grid << levelMax); levelMax += min_level; /* // Set units */ cosmology_set(OmegaM,header->OmegaM); cosmology_set(OmegaB,0.04); cosmology_set(OmegaL,header->OmegaL); cosmology_set(h,header->H0/100.0); cosmology_set(DeltaDC,0.0); box_size = header->dx*cosmology->h*num_grid; auni[min_level] = header->abeg; tl[min_level] = tcode_from_auni(auni[min_level]); abox[min_level] = abox_from_auni(auni[min_level]); units_init(); units_update(min_level); /* // Particle parameters */ num_particles_total = (particleid_t)header->n[1]*(particleid_t)header->n[1]*(particleid_t)header->n[1]; num_particle_species = 1; particle_species_num[0] = num_particles_total; particle_species_mass[0] = 1.0 - cosmology->OmegaB/cosmology->OmegaM; particle_species_indices[0] = 0; particle_species_indices[1] = num_particles_total; #ifdef STARFORM if(MAX_PARTICLE_SPECIES < 2) { cart_error("MAX_PARTICLE_SPECIES should be at least 2. Increase and rerun."); } num_particle_species = 2; particle_species_num[1] = 0; particle_species_mass[1] = 0.0; particle_species_indices[2] = particle_species_indices[1]; total_stellar_mass = 0.0; total_stellar_initial_mass = 0.0; #endif cart_debug("num_particle_species = %d",num_particle_species); cart_debug("num_particles_total = %d",num_particles_total); /* // Balance load - split uniformly */ for(i=0; i<=num_procs; i++) { proc_sfc_index[i] = ((unsigned long)num_root_cells*(unsigned long)i)/num_procs; } init_tree(); for(i=0; i<nDim; i++) { refinement_volume_min[i] = 0.0; refinement_volume_max[i] = num_grid; } /* // Refine grid uniformly to levelMax */ for(level=min_level; level<levelMax; level++) { cart_debug("refining level %d",level); select_level(level,CELL_TYPE_LOCAL,&num_level_cells,&level_cells); cart_debug("num_level_cells = %d",num_level_cells); for(i=0; i<num_level_cells; i++) { refinement_indicator(level_cells[i],0) = 1.0; } cart_free( level_cells ); refine(level); } /* // Read in the data */ for(l=0; l<6; l++) { buffer[l] = cart_alloc(float,page_size); } vxc = buffer[0]; vyc = buffer[1]; vzc = buffer[2]; vxb = buffer[3]; vyb = buffer[4]; vzb = buffer[5]; /* // Unit conversion factors */ vFac = constants->kms/units->velocity; xFac = abox[min_level]*abox[min_level]*constants->Mpc/(100*cosmology->h*units->length)*dPlus(abox[min_level])/qPlus(abox[min_level]); if(header->n[1] > 256) { num_slices = header->n[1]; num_data_per_slice = header->n[1]*header->n[1]; } else { num_slices = 1; num_data_per_slice = header->n[1]*header->n[1]*header->n[1]; } num_pages = (num_data_per_slice+page_size-1)/page_size; id = 0L; fRef = pow(0.5,levelMax); ng1 = num_grid << levelMax; ng2 = ng1*ng1; for(slice=0; slice<num_slices; slice++) { num_data_done = 0; if(local_proc_id == MASTER_NODE) { for(l=0; l<6; l++) { if(fread(&s1,sizeof(GIC_RECORD),1,f[l]) != 1) { cart_error("Error in reading header for file %d, record %d",l,slice); } if(wrong_order[l]) reorder((char *)&s1,sizeof(s1)); if(s1 != sizeof(float)*num_data_per_slice) { cart_error("Header for file %d, record %d is corrupted: %d, should be %d",l,slice,s1,num_data_per_slice); } } } for(page=0; page<num_pages; page++) { n = page_size; if(num_data_done+n > num_data_per_slice) { n = num_data_per_slice - num_data_done; cart_assert(page == (num_pages-1)); } num_data_done += n; if(local_proc_id == MASTER_NODE) { for(l=0; l<6; l++) { if(fread(buffer[l],sizeof(float),n,f[l]) != n) { cart_error("Error in reading data for file %d, record %d, page %d",l,slice,page); } if(wrong_order[l]) { for(j=0; j<n; j++) reorder((char *)(buffer[l]+j),sizeof(float)); } } } for(l=0; l<6; l++) { MPI_Bcast(buffer[l],n,MPI_FLOAT,MASTER_NODE,mpi.comm.run); } /* // We need a barrier here to avoid overfilling MPI buffers // with too many asynchronized broadcasts */ if(page%100 == 99) MPI_Barrier(mpi.comm.run); for(j=0; j<n; j++) { /* // Particle position */ x0[0] = fRef*(0.5+(id % ng1)); x0[1] = fRef*(0.5+(id/ng1 % ng1)); x0[2] = fRef*(0.5+(id/ng2 % ng1)); x[0] = xFac*vxc[j] + x0[0]; x[1] = xFac*vyc[j] + x0[1]; x[2] = xFac*vzc[j] + x0[2]; /* enforce periodic boundary conditions */ for(i=0; i<3; i++) { if(x[i] < 0.0) { x[i] += (double)num_grid; } else if(x[i] >= (double)num_grid) { x[i] -= (double)num_grid; } coords[i] = (int)(x[i]); } index = sfc_index( coords ); cart_assert( index >= 0 && index < num_root_cells ); /* check if we're supposed to read in this particle */ if(local_proc_id == processor_owner(index)) { ipart = particle_alloc(id); cart_assert(ipart>=0 && ipart<num_particles ); particle_x[ipart][0] = x[0]; particle_x[ipart][1] = x[1]; particle_x[ipart][2] = x[2]; particle_v[ipart][0] = vFac*vxc[j]; particle_v[ipart][1] = vFac*vyc[j]; particle_v[ipart][2] = vFac*vzc[j]; particle_id[ipart] = id; particle_mass[ipart] = particle_species_mass[0]; particle_level[ipart] = min_level + levelMax; } for(i=0; i<3; i++) { coords[i] = (int)(x0[i]); } index = sfc_index( coords ); cart_assert( index >= 0 && index < num_root_cells ); if(local_proc_id == processor_owner(index)) { cell = cell_find_position(x0); #ifdef DEBUG if(cell == -1) { cart_debug("%lf %lf %lf",x0[0],x0[1],x0[2]); cart_debug("%ld %d %g",id,ng1,fRef); } #endif cart_assert(cell != -1); cell_var(cell,HVAR_MOMENTUM+0) = vFac*vxb[j]; cell_var(cell,HVAR_MOMENTUM+1) = vFac*vyb[j]; cell_var(cell,HVAR_MOMENTUM+2) = vFac*vzb[j]; } id++; } } if(local_proc_id == MASTER_NODE) { for(l=0; l<6; l++) { if(fread(&s2,sizeof(GIC_RECORD),1,f[l]) != 1) { cart_error("Error in reading footer for file %d, record %d",l,slice); } if(wrong_order[l]) reorder((char *)&s2,sizeof(s2)); if(s2 != sizeof(float)*num_data_per_slice) { cart_error("Footer for file %d, record %d is corrupted: %d, should be %d",l,slice,s2,num_data_per_slice); } } } } if(local_proc_id == MASTER_NODE) { for(l=0; l<6; l++) fclose(f[l]); } for(l=0; l<6; l++) cart_free(buffer[l]); build_particle_list(); /* // Thermal state of the primordial gas */ fracB = cosmology->OmegaB/cosmology->OmegaM; fracHII = 1.2e-5*sqrt(cosmology->Omh2)/cosmology->Obh2; q = auni[min_level]*137.0*pow(cosmology->Obh2/0.022,0.4); temIn = 2.728/auni[min_level]*q/pow(pow(q,1.73)+1,1.0/1.73); if(local_proc_id == MASTER_NODE) { cart_debug("Initial temperature: %f",temIn); } temIn /= units->temperature; if(local_proc_id == MASTER_NODE) { cart_debug("f_HII: %e, T_in: %e",fracHII,temIn); } /* // Finish filling in the lowest level */ select_level(min_level+levelMax,CELL_TYPE_LOCAL,&num_level_cells,&level_cells); for(i=0; i<num_level_cells; i++) { cell = level_cells[i]; cell_gas_density(cell) = fracB; cell_momentum(cell,0) *= fracB; cell_momentum(cell,1) *= fracB; cell_momentum(cell,2) *= fracB; cell_gas_gamma(cell) = constants->gamma; cell_gas_internal_energy(cell) = cell_gas_density(cell)*temIn/(constants->gamma-1)*(1.0-constants->Yp+0.25*constants->Yp); cell_gas_pressure(cell) = cell_gas_internal_energy(cell)*(constants->gamma-1); cell_gas_energy(cell) = cell_gas_internal_energy(cell) + cell_gas_kinetic_energy(cell); #ifdef RADIATIVE_TRANSFER cell_HI_density(cell) = cell_gas_density(cell)*constants->XH*(1.0-fracHII); cell_HII_density(cell) = cell_gas_density(cell)*constants->XH*fracHII; cell_HeI_density(cell) = cell_gas_density(cell)*constants->XHe; cell_HeII_density(cell) = cell_gas_density(cell)*0.0; cell_HeIII_density(cell) = cell_gas_density(cell)*0.0; cell_H2_density(cell) = cell_gas_density(cell)*constants->XH*2.0e-6; #endif #ifdef EXTRA_PRESSURE_SOURCE cell_extra_pressure_source(cell) = 0; #endif /* EXTRA_PRESSURE_SOURCE */ #ifdef ISOTROPIC_TURBULENCE_ENERGY cell_isotropic_turbulence_energy(cell) = 0; #endif /* ISOTROPIC_TURBULENCE_ENERGY */ } cart_free(level_cells); /* // Finish filling in the grid */ for(level=min_level+levelMax-1; level>=min_level; level--) { select_level(level,CELL_TYPE_LOCAL,&num_level_cells,&level_cells); for(i=0; i<num_level_cells; i++) { cell = level_cells[i]; cell_all_children(cell,children); for(j=0; j<num_hydro_vars; j++) { q = 0.0; for(l=0; l<num_children; l++) { q += cell_var(children[l],all_hydro_vars[j]); } cell_var(cell,all_hydro_vars[j]) = q/num_children; } } cart_free(level_cells); } build_cell_buffer(); repair_neighbors(); /* // Update the buffer everywhere */ for(level=min_level; level<=max_level; level++) { update_buffer_level(level,all_hydro_vars,num_hydro_vars); } hydro_magic(min_level); hydro_eos(min_level); cart_debug("tl[min_level] = %f", tl[min_level] ); cart_debug("au[min_level] = %f", auni[min_level] ); cart_debug("ab[min_level] = %f", abox[min_level] ); for(level=min_level+1; level<=max_level; level++) { tl[level] = tl[min_level]; auni[level] = auni[min_level]; abox[level] = abox[min_level]; } for(i=0; i<num_particles; i++) if(particle_level[i] != FREE_PARTICLE_LEVEL) { particle_t[i] = tl[min_level]; particle_dt[i] = 0.0; } #ifdef STARFORM for(i=0; i<nDim; i++) { star_formation_volume_min[i] = refinement_volume_min[i]; star_formation_volume_max[i] = refinement_volume_max[i]; } #endif }
/* // This function is a modified version of restart_load_balance from io.c */ void gicBalanceLoad(const char *rootname, char *type) { int i, j; int L, nref; int index; int coords[3]; int page, num_pages; float *cell_work; int *constrained_quantities; struct gicFile input; struct gicManifest manifest; struct gicFileHeader fileHeader; char filename[257]; /* GIC filenames are limited to 256 bytes */ GIC_INTEGER *mask; if(num_procs == 1) { proc_sfc_index[0] = 0; proc_sfc_index[1] = num_root_cells; init_tree(); return; } if(strlen(rootname) > 250) { cart_error("GIC filenames are limited to 256 bytes; rootnames to 250 bytes"); } if(local_proc_id == MASTER_NODE) { /* do load balancing */ constrained_quantities = cart_alloc(int,num_constraints*num_root_cells); cell_work = cart_alloc(float,num_root_cells); for(i=0; i<num_root_cells; i++) { cell_work[i] = 0.0; } for(i=0; i<num_constraints*num_root_cells; i++) { constrained_quantities[i] = 0; } /* // load mask information and compute work */ strcpy(filename,rootname); strcat(filename,"_"); strcat(filename,type); strcat(filename,".vel"); gicStartFile(filename,&input,&manifest,&fileHeader); /* // Only low-res files are currently supported for the single-res case */ L = 0; num_particles_total = fileHeader.Ntot; if(fileHeader.Lmax == 0) { /* // Single-resolution file - all work is uniform */ nref = 1 << (nDim*L); for(index=0; index<num_root_cells; index++) { constrained_quantities[num_constraints*index+0] = nref; constrained_quantities[num_constraints*index+1] = nref; cell_work[index] += (cost_per_cell+cost_per_particle)*nref; } } else { mask = cart_alloc(GIC_INTEGER,fileHeader.Nrec); num_pages = (num_root_cells+fileHeader.Nrec-1)/fileHeader.Nrec; /* // Skip shifts */ j = fseek(input.File,2*sizeof(GIC_RECORD)+3*sizeof(GIC_INTEGER),SEEK_CUR); if(j != 0) { cart_error("Error in reading grid shifts, fseek error %d",j); } coords[0] = coords[1] = coords[2] = 0; for(page=0; page<num_pages; page++) { if(gicReadFortranRecordInteger(&input,mask) != 0) { cart_error("File is corrupted, error in reading mask array on page %d",page); } for(j=0; coords[2]<num_grid && j<fileHeader.Nrec; j++) { index = sfc_index(coords); cart_assert(index>=0 && index<num_root_cells); nref = 1 << (nDim*mask[j]); constrained_quantities[num_constraints*index+0] = nref; constrained_quantities[num_constraints*index+1] = nref; cell_work[index] += (cost_per_cell+cost_per_particle)*nref; coords[0]++; if(coords[0] == num_grid) { coords[0] = 0; coords[1]++; } if(coords[1] == num_grid) { coords[1] = 0; coords[2]++; } } } cart_free(mask); } fclose(input.File); cart_debug("load balancing before i/o"); load_balance_entire_volume(cell_work,constrained_quantities,proc_sfc_index); cart_free(cell_work); cart_free(constrained_quantities); }
void move_hydro_tracers( int level ) { int i, j; int tracer; int iter_cell; int num_level_cells; int *level_cells; double vdt[nDim]; int icell, icell_orig; int level1; int child; double pos[nDim]; int found; int c[num_children]; double diff1, diff2, diff3; double pt3, pd3; double t1,t2,t3,d1,d2,d3; double t2t1, t2d1, d2t1, d2d1; double t3t2t1, t3t2d1, t3d2t1, t3d2d1; double d3t2t1, d3t2d1, d3d2t1, d3d2d1; start_time( WORK_TIMER ); select_level( level, CELL_TYPE_LOCAL, &num_level_cells, &level_cells ); for ( i = 0; i < num_level_cells; i++ ) { iter_cell = level_cells[i]; #ifdef HYDRO_TRACERS_NGP for ( j = 0; j < nDim; j++ ) { vdt[j] = cell_momentum(iter_cell,j)/cell_gas_density(iter_cell) * dtl[level]; } #endif /* HYDRO_TRACERS_NGP */ tracer = cell_tracer_list[iter_cell]; while ( tracer != NULL_TRACER ) { cart_assert( tracer >= 0 && tracer < num_tracers ); #ifndef HYDRO_TRACERS_NGP icell = iter_cell; level1 = level; do { found = 1; icell_orig = icell; cart_assert( icell != NULL_OCT ); cell_center_position( icell, pos ); /* find lower leftmost cell */ child = 0; for ( j = 0; j < nDim; j++ ) { if ( tracer_x[tracer][j] >= pos[j] ) { child += (1<<j); } } cart_assert( child >= 0 && child < num_children ); for ( j = 0; j < nDim; j++ ) { if ( neighbor_moves[child][j] == -1 ) { break; } else { icell = cell_neighbor(icell, neighbor_moves[child][j] ); cart_assert( icell != NULL_OCT ); if ( cell_level(icell) != level1 ) { icell = cell_parent_cell(icell_orig); cart_assert( icell != NULL_OCT ); level1 = level1 - 1; found = 0; break; } } } if ( found ) { c[0] = icell; c[1] = cell_neighbor(icell,1); c[2] = cell_neighbor(icell,3); c[3] = cell_neighbor(c[1],3); c[4] = cell_neighbor(icell,5); c[5] = cell_neighbor(c[1],5); c[6] = cell_neighbor(c[2],5); c[7] = cell_neighbor(c[3],5); for ( j = 1; j < num_children; j++ ) { if ( cell_level(c[j]) != level1 ) { icell = cell_parent_cell(icell_orig); level1 = level1 - 1; cart_assert( icell != NULL_OCT ); found = 0; break; } } } } while ( !found ); cell_center_position( c[0], pos ); /* now we have the level on which this particle will move */ diff1 = pos[0] - tracer_x[tracer][0]; if ( fabs(diff1) > (double)(num_grid/2) ) { if ( diff1 > 0.0 ) { diff1 -= (double)(num_grid); } else { diff1 += (double)(num_grid); } } d1 = fabs(diff1) * cell_size_inverse[level1]; cart_assert( d1 >= 0.0 && d1 <= 1.0 ); diff2 = pos[1] - tracer_x[tracer][1]; if ( fabs(diff2) > (double)(num_grid/2) ) { if ( diff2 > 0.0 ) { diff2 -= (double)(num_grid); } else { diff2 += (double)(num_grid); } } d2 = fabs(diff2) * cell_size_inverse[level1]; diff3 = pos[2] - tracer_x[tracer][2]; if ( fabs(diff3) > (double)(num_grid/2) ) { if ( diff3 > 0.0 ) { diff3 -= (double)(num_grid); } else { diff3 += (double)(num_grid); } } d3 = fabs(diff3) * cell_size_inverse[level1]; cart_assert( d1 >= 0.0 && d1 <= 1.0 ); cart_assert( d2 >= 0.0 && d2 <= 1.0 ); cart_assert( d3 >= 0.0 && d3 <= 1.0 ); t1 = 1.0 - d1; t2 = 1.0 - d2; t3 = 1.0 - d3; cart_assert( t1 >= 0.0 && t1 <= 1.0 ); cart_assert( t2 >= 0.0 && t2 <= 1.0 ); cart_assert( t3 >= 0.0 && t3 <= 1.0 ); t2t1 = t2 * t1; t2d1 = t2 * d1; d2t1 = d2 * t1; d2d1 = d2 * d1; pt3 = t3*dtl[level]; pd3 = d3*dtl[level]; t3t2t1 = pt3 * t2t1; t3t2d1 = pt3 * t2d1; t3d2t1 = pt3 * d2t1; t3d2d1 = pt3 * d2d1; d3t2t1 = pd3 * t2t1; d3t2d1 = pd3 * t2d1; d3d2t1 = pd3 * d2t1; d3d2d1 = pd3 * d2d1; for ( j = 0; j < nDim; j++ ) { vdt[j] =t3t2t1 * cell_momentum(c[0], j) / cell_gas_density(c[0]) + t3t2d1 * cell_momentum(c[1], j) / cell_gas_density(c[1]) + t3d2t1 * cell_momentum(c[2], j) / cell_gas_density(c[2]) + t3d2d1 * cell_momentum(c[3], j) / cell_gas_density(c[3]) + d3t2t1 * cell_momentum(c[4], j) / cell_gas_density(c[4]) + d3t2d1 * cell_momentum(c[5], j) / cell_gas_density(c[5]) + d3d2t1 * cell_momentum(c[6], j) / cell_gas_density(c[6]) + d3d2d1 * cell_momentum(c[7], j) / cell_gas_density(c[7]); } #endif /* HYDRO_TRACERS_NGP */ tracer_x[tracer][0] += vdt[0]; tracer_x[tracer][1] += vdt[1]; tracer_x[tracer][2] += vdt[2]; /* enforce periodic boundaries */ if ( tracer_x[tracer][0] < 0.0 ) { tracer_x[tracer][0] += (double)(num_grid); } if ( tracer_x[tracer][0] >= (double)(num_grid) ) { tracer_x[tracer][0] -= (double)(num_grid); } if ( tracer_x[tracer][1] < 0.0 ) { tracer_x[tracer][1] += (double)(num_grid); } if ( tracer_x[tracer][1] >= (double)(num_grid) ) { tracer_x[tracer][1] -= (double)(num_grid); } if ( tracer_x[tracer][2] < 0.0 ) { tracer_x[tracer][2] += (double)(num_grid); } if ( tracer_x[tracer][2] >= (double)(num_grid) ) { tracer_x[tracer][2] -= (double)(num_grid); } tracer = tracer_list_next[tracer]; } } cart_free( level_cells ); end_time( WORK_TIMER ); }
void log_diagnostics() { int i,j; int level; int icell; int num_level_cells; int *level_cells; double kinetic_energy; double gas_kinetic, gas_thermal, gas_potential, gas_mass; double total_gas_kinetic, total_gas_thermal, total_gas_potential, total_gas_mass; double particle_kinetic, particle_potential; double total_particle_kinetic, total_particle_potential; double error; double da; double dtyears, current_age, current_dt; #ifdef STAR_FORMATION double stellar_mass, stellar_initial_mass; double old_stellar_mass, old_stellar_initial_mass; double d_stellar_mass, d_stellar_initial_mass; double resolved_volume[max_level-min_level+1]; double local_resolved_volume[max_level-min_level+1]; double total_resolved_volume; double sfr; #endif /* STAR_FORMATION */ dtyears = dtl[min_level]*units->time/constants->yr; #ifdef COSMOLOGY current_age = tphys_from_abox(abox[min_level]); current_dt = dtyears; #else current_age = tl[min_level]*units->time; current_dt = dtl[min_level]*units->time; #endif #ifdef PARTICLES #ifdef COSMOLOGY ap1 = abox_from_tcode( tl[min_level] - 0.5*dtl[min_level] ); da = abox[min_level] - abox_old[min_level]; #else ap1 = 1.0; ap0 = 0.0; da = 0.0; #endif #endif /* log profiling information */ #ifdef COSMOLOGY fprintf( timing, "%u %e %e %e", step, tl[min_level], auni[min_level], current_time( TOTAL_TIME, min_level-1 ) ); #else fprintf( timing, "%u %e %e", step, tl[min_level], current_time( TOTAL_TIME, min_level-1 ) ); #endif /* COSMOLOGY */ for ( level = min_level-1; level <= max_level; level++ ) { for ( i = 1; i < NUM_TIMERS; i++ ) { fprintf( timing, " %e", total_time(i, level) ); } } fprintf( timing, "\n" ); fflush(timing); #ifdef PAPI_PROFILING fprintf( papi_profile, "%u %e", step, current_time( TOTAL_TIME, min_level-1 ) ); for ( level = min_level-1; level <= max_level; level++ ) { for ( i = 1; i < NUM_TIMERS; i++ ) { for ( j = 0; j < num_papi_events; j++ ) { fprintf( papi_profile, " %llu", papi_total_counter(i,level,j) ); } } } fprintf( papi_profile, "\n" ); fflush(papi_profile); #endif /* PAPI_PROFILING */ /* log workload information */ #ifdef PARTICLES #ifdef COSMOLOGY fprintf( workload, "%u %e %e %u %u", step, tl[min_level], auni[min_level], num_local_particles, max_level_now() ); #else fprintf( workload, "%u %e %u %u", step, tl[min_level], num_local_particles, max_level_now() ); #endif /* COSMOLOGY */ #else #ifdef COSMOLOGY fprintf( workload, "%u %e %e 0 %u", step, tl[min_level], auni[min_level], max_level_now() ); #else fprintf( workload, "%u %e 0 %u", step, tl[min_level], max_level_now() ); #endif /* COSMOLOGY */ #endif /* PARTICLES */ for ( i = min_level; i <= max_level; i++ ) { fprintf(workload, " %u %u", num_cells_per_level[i], num_buffer_cells[i] ); } #ifdef DEBUG_MEMORY_USE fprintf( workload, " %lu",dmuReportAllocatedMemory()); #endif /* DEBUG_MEMORY_USE */ fprintf(workload, "\n"); fflush(workload); /* log dependency information */ #ifdef COSMOLOGY fprintf( dependency, "%u %e %e", step, tl[min_level], auni[min_level] ); #else fprintf( dependency, "%u %e", step, tl[min_level] ); #endif /* COSMOLOGY */ for ( level = min_level; level <= max_level; level++ ) { for ( i = 0; i < num_procs; i++ ) { if ( level == min_level ) { fprintf( dependency, " %u %u", num_remote_buffers[level][i], num_local_buffers[level][i] ); } else { fprintf( dependency, " %u %u", num_children*num_remote_buffers[level][i], num_children*num_local_buffers[level][i] ); } } } fprintf(dependency, "\n"); fflush(dependency); /* compute energies */ gas_kinetic = gas_thermal = gas_potential = gas_mass = 0.0; total_gas_kinetic = total_gas_thermal = total_gas_potential = total_gas_mass = 0.0; #ifdef HYDRO for ( level = min_level; level <= max_level; level++ ) { select_level( level, CELL_TYPE_LOCAL, &num_level_cells, &level_cells ); for ( i = 0; i < num_level_cells; i++ ) { icell = level_cells[i]; if ( cell_is_leaf( icell ) ) { gas_thermal += cell_volume[level]*cell_gas_pressure(icell)/(constants->gamma-1); kinetic_energy = 0.0; for ( j = 0; j < nDim; j++ ) { kinetic_energy += cell_momentum(icell,j)*cell_momentum(icell,j); } kinetic_energy *= 0.5*cell_volume[level]/cell_gas_density(icell); gas_kinetic += kinetic_energy; #ifdef GRAVITY gas_potential += cell_gas_density(icell)*cell_volume[level]*cell_potential(icell); #endif gas_mass += cell_gas_density(icell)*cell_volume[level]; } } cart_free( level_cells ); } /* add stellar mass to gas mass */ #ifdef STAR_FORMATION stellar_mass = 0.0; stellar_initial_mass = 0.0; for ( i = 0; i < num_star_particles; i++ ) { if ( particle_level[i] != FREE_PARTICLE_LEVEL && particle_is_star(i) ) { gas_mass += particle_mass[i]; stellar_mass += particle_mass[i]; stellar_initial_mass += star_initial_mass[i]; } } #endif /* STAR_FORMATION */ MPI_Reduce( &gas_thermal, &total_gas_thermal, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi.comm.run ); MPI_Reduce( &gas_kinetic, &total_gas_kinetic, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi.comm.run ); #ifdef GRAVITY MPI_Reduce( &gas_potential, &total_gas_potential, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi.comm.run ); #endif /* GRAVITY */ MPI_Reduce( &gas_mass, &total_gas_mass, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi.comm.run ); #endif /* HYDRO */ #ifdef STAR_FORMATION old_stellar_mass = total_stellar_mass; old_stellar_initial_mass = total_stellar_initial_mass; MPI_Reduce( &stellar_mass, &total_stellar_mass, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi.comm.run ); MPI_Reduce( &stellar_initial_mass, &total_stellar_initial_mass, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi.comm.run ); d_stellar_mass = MAX( 0.0, total_stellar_mass - old_stellar_mass ); d_stellar_initial_mass = MAX( 0.0, total_stellar_initial_mass - old_stellar_initial_mass ); /* compute resolved volume */ local_resolved_volume[min_level] = 0.0; for ( level = min_level+1; level <= max_level; level++ ) { local_resolved_volume[level] = 0.0; select_level( level, CELL_TYPE_LOCAL, &num_level_cells, &level_cells ); for ( i = 0; i < num_level_cells; i++ ) { icell = level_cells[i]; if ( cell_is_leaf( icell ) ) { local_resolved_volume[level] += cell_volume[level]; } } cart_free(level_cells); } MPI_Reduce( local_resolved_volume, resolved_volume, max_level-min_level+1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi.comm.run ); if ( local_proc_id == MASTER_NODE ) { /* sum resolved volume over all levels except min_level (works for MMR simulations) */ total_resolved_volume = 0.0; for ( level = min_level; level <= max_level; level++ ) { total_resolved_volume += resolved_volume[level]; } #ifdef COSMOLOGY total_resolved_volume *= pow(units->length/constants->Mpc/abox[min_level],3.0); #else total_resolved_volume *= pow(units->length/constants->Mpc,3.0); #endif /* COSMOLOGY */ if ( total_resolved_volume > 0.0 ) { sfr = d_stellar_initial_mass * units->mass / constants->Msun / dtyears / total_resolved_volume; } else { sfr = 0.0; } #ifdef COSMOLOGY fprintf( star_log, "%u %e %e %e %e %e %lu %e %e %e %e %e\n", step, tl[min_level], dtl[min_level], auni[min_level], current_age, dtyears, #else fprintf( star_log, "%u %e %e %e %e %lu %e %e %e %e %e\n", step, tl[min_level], dtl[min_level], current_age, dtyears, #endif /* COSMOLOGY */ particle_species_num[num_particle_species-1], total_stellar_mass*units->mass/constants->Msun, d_stellar_mass*units->mass/constants->Msun, total_stellar_initial_mass*units->mass/constants->Msun, d_stellar_initial_mass*units->mass/constants->Msun, sfr ); fflush(star_log); }