/** (re-)allocation of the memory needed for the phys. values and if needed memory for the nodes located in the cpu memory */ static void lb_realloc_fluid_gpu() { LB_TRACE (printf("#nodes \t %u \n", lbpar_gpu.number_of_nodes)); /**-----------------------------------------------------*/ /** reallocating of the needed memory for several structs */ /**-----------------------------------------------------*/ /**Struct holding calc phys values rho, j, phi of every node*/ size_t size_of_values = lbpar_gpu.number_of_nodes * sizeof(LB_values_gpu); host_values = realloc(host_values, size_of_values); LB_TRACE (fprintf(stderr,"lb_realloc_fluid_gpu \n")); }
/** (re-) allocation of the memory needed for the particles (cpu part) */ void lb_realloc_particles_gpu(){ lbpar_gpu.number_of_particles = n_part; LB_TRACE (printf("#particles realloc\t %u \n", lbpar_gpu.number_of_particles)); //fprintf(stderr, "%u \t \n", lbpar_gpu.number_of_particles); /**-----------------------------------------------------*/ /** allocating of the needed memory for several structs */ /**-----------------------------------------------------*/ lbpar_gpu.your_seed = (unsigned int)i_random(max_ran); LB_TRACE (fprintf(stderr,"test your_seed %u \n", lbpar_gpu.your_seed)); lb_realloc_particles_GPU_leftovers(&lbpar_gpu); }
/** (Re-)initializes the fluid. */ void lb_reinit_parameters_gpu() { int ii; lbpar_gpu.time_step = (float)time_step; for(ii=0;ii<LB_COMPONENTS;++ii){ lbpar_gpu.mu[ii] = 0.0; if (lbpar_gpu.viscosity[ii] > 0.0) { /* Eq. (80) Duenweg, Schiller, Ladd, PRE 76(3):036704 (2007). */ lbpar_gpu.gamma_shear[ii] = 1. - 2./(6.*lbpar_gpu.viscosity[ii]*lbpar_gpu.tau/(lbpar_gpu.agrid*lbpar_gpu.agrid) + 1.); } if (lbpar_gpu.bulk_viscosity[ii] > 0.0) { /* Eq. (81) Duenweg, Schiller, Ladd, PRE 76(3):036704 (2007). */ lbpar_gpu.gamma_bulk[ii] = 1. - 2./(9.*lbpar_gpu.bulk_viscosity[ii]*lbpar_gpu.tau/(lbpar_gpu.agrid*lbpar_gpu.agrid) + 1.); } #ifdef SHANCHEN if (lbpar_gpu.mobility[0] > 0.0) { lbpar_gpu.gamma_mobility[0] = 1. - 2./(6.*lbpar_gpu.mobility[0]*lbpar_gpu.tau/(lbpar_gpu.agrid*lbpar_gpu.agrid) + 1.); } #endif if (temperature > 0.0) { /* fluctuating hydrodynamics ? */ lbpar_gpu.fluct = 1; LB_TRACE (fprintf(stderr, "fluct on \n")); /* Eq. (51) Duenweg, Schiller, Ladd, PRE 76(3):036704 (2007).*/ /* Note that the modes are not normalized as in the paper here! */ lbpar_gpu.mu[ii] = (float)temperature*lbpar_gpu.tau*lbpar_gpu.tau/c_sound_sq/(lbpar_gpu.agrid*lbpar_gpu.agrid); /* lb_coupl_pref is stored in MD units (force) * Eq. (16) Ahlrichs and Duenweg, JCP 111(17):8225 (1999). * The factor 12 comes from the fact that we use random numbers * from -0.5 to 0.5 (equally distributed) which have variance 1/12. * time_step comes from the discretization. */ lbpar_gpu.lb_coupl_pref[ii] = sqrt(12.f*2.f*lbpar_gpu.friction[ii]*(float)temperature/lbpar_gpu.time_step); lbpar_gpu.lb_coupl_pref2[ii] = sqrt(2.f*lbpar_gpu.friction[ii]*(float)temperature/lbpar_gpu.time_step); } else { /* no fluctuations at zero temperature */ lbpar_gpu.fluct = 0; lbpar_gpu.lb_coupl_pref[ii] = 0.0; lbpar_gpu.lb_coupl_pref2[ii] = 0.0; } LB_TRACE (fprintf(stderr,"lb_reinit_prarameters_gpu \n")); } reinit_parameters_GPU(&lbpar_gpu); }
/** Performs a full initialization of * the Lattice Boltzmann system. All derived parameters * and the fluid are reset to their default values. */ void lb_init_gpu() { LB_TRACE(printf("Begin initialzing fluid on GPU\n")); /** set parameters for transfer to gpu */ lb_reinit_parameters_gpu(); lb_realloc_particles_gpu(); lb_init_GPU(&lbpar_gpu); gpu_init_particle_comm(); cuda_bcast_global_part_params(); LB_TRACE(printf("Initialzing fluid on GPU successful\n")); }
/** allocation of the needed memory for phys. values and particle data residing in the cpu memory */ void lb_pre_init_gpu() { lbpar_gpu.number_of_particles = 0; LB_TRACE (fprintf(stderr,"#nodes \t %u \n", lbpar_gpu.number_of_nodes)); /*-----------------------------------------------------*/ /* allocating of the needed memory for several structs */ /*-----------------------------------------------------*/ /* Struct holding calc phys values rho, j, phi of every node */ size_t size_of_values = lbpar_gpu.number_of_nodes * sizeof(LB_values_gpu); host_values = (LB_values_gpu*)malloc(size_of_values); LB_TRACE (fprintf(stderr,"lb_pre_init_gpu \n")); }
/** (re-) allocation of the memory need for the particles (cpu part)*/ void lb_realloc_particles_gpu(){ lbpar_gpu.number_of_particles = n_total_particles; LB_TRACE (printf("#particles realloc\t %u \n", lbpar_gpu.number_of_particles)); /**-----------------------------------------------------*/ /** allocating of the needed memory for several structs */ /**-----------------------------------------------------*/ /**Allocate struct for particle forces */ size_t size_of_forces = lbpar_gpu.number_of_particles * sizeof(LB_particle_force_gpu); host_forces = realloc(host_forces, size_of_forces); lbpar_gpu.your_seed = (unsigned int)i_random(max_ran); LB_TRACE (fprintf(stderr,"test your_seed %u \n", lbpar_gpu.your_seed)); lb_realloc_particle_GPU(&lbpar_gpu, &host_data); }
/** (Re-)initializes the fluid according to the given value of rho. */ void lb_reinit_fluid_gpu() { lbpar_gpu.your_seed = (unsigned int)i_random(max_ran); lb_init_GPU(&lbpar_gpu); LB_TRACE (fprintf(stderr,"lb_reinit_fluid_gpu \n")); }
/** (Re-)initializes the fluid. */ void lb_reinit_parameters_gpu() { lbpar_gpu.mu = 0.0; lbpar_gpu.time_step = (float)time_step; if (lbpar_gpu.viscosity > 0.0) { /* Eq. (80) Duenweg, Schiller, Ladd, PRE 76(3):036704 (2007). */ lbpar_gpu.gamma_shear = 1. - 2./(6.*lbpar_gpu.viscosity*lbpar_gpu.tau/(lbpar_gpu.agrid*lbpar_gpu.agrid) + 1.); } if (lbpar_gpu.bulk_viscosity > 0.0) { /* Eq. (81) Duenweg, Schiller, Ladd, PRE 76(3):036704 (2007). */ lbpar_gpu.gamma_bulk = 1. - 2./(9.*lbpar_gpu.bulk_viscosity*lbpar_gpu.tau/(lbpar_gpu.agrid*lbpar_gpu.agrid) + 1.); } if (temperature > 0.0) { /* fluctuating hydrodynamics ? */ lbpar_gpu.fluct = 1; LB_TRACE (fprintf(stderr, "fluct on \n")); /* Eq. (51) Duenweg, Schiller, Ladd, PRE 76(3):036704 (2007).*/ /* Note that the modes are not normalized as in the paper here! */ lbpar_gpu.mu = (float)temperature/c_sound_sq*lbpar_gpu.tau*lbpar_gpu.tau/(lbpar_gpu.agrid*lbpar_gpu.agrid); //lbpar_gpu->mu *= agrid*agrid*agrid; // Marcello's conjecture /* lb_coupl_pref is stored in MD units (force) * Eq. (16) Ahlrichs and Duenweg, JCP 111(17):8225 (1999). * The factor 12 comes from the fact that we use random numbers * from -0.5 to 0.5 (equally distributed) which have variance 1/12. * time_step comes from the discretization. */ lbpar_gpu.lb_coupl_pref = sqrt(12.f*2.f*lbpar_gpu.friction*(float)temperature/lbpar_gpu.time_step); lbpar_gpu.lb_coupl_pref2 = sqrt(2.f*lbpar_gpu.friction*(float)temperature/lbpar_gpu.time_step); } else { /* no fluctuations at zero temperature */ lbpar_gpu.fluct = 0; lbpar_gpu.lb_coupl_pref = 0.0; lbpar_gpu.lb_coupl_pref2 = 0.0; } LB_TRACE (fprintf(stderr,"lb_reinit_prarameters_gpu \n")); }
/** Performs a full initialization of * the Lattice Boltzmann system. All derived parameters * and the fluid are reset to their default values. */ void lb_init_gpu() { /** set parameters for transfer to gpu */ lb_reinit_parameters_gpu(); lb_realloc_particles_gpu(); lb_realloc_fluid_gpu(); lb_init_GPU(&lbpar_gpu); LB_TRACE (fprintf(stderr,"lb_init_gpu \n")); }
/** lattice boltzmann update gpu called from integrate.c */ void lattice_boltzmann_update_gpu() { int factor = (int)round(lbpar_gpu.tau/time_step); fluidstep += 1; if (fluidstep>=factor) { fluidstep=0; lb_integrate_GPU(); LB_TRACE (fprintf(stderr,"lb_integrate_GPU \n")); } }
/** (Re-)initializes the fluid according to the given value of rho. */ void lb_reinit_fluid_gpu() { //lbpar_gpu.your_seed = (unsigned int)i_random(max_ran); lb_reinit_parameters_gpu(); //#ifdef SHANCHEN // lb_calc_particle_lattice_ia_gpu(); // copy_forces_from_GPU(); //#endif if(lbpar_gpu.number_of_nodes != 0){ lb_reinit_GPU(&lbpar_gpu); lbpar_gpu.reinit = 1; } LB_TRACE (fprintf(stderr,"lb_reinit_fluid_gpu \n")); }
/**copy forces from gpu to cpu and call mpi routines to add forces to particles */ void lb_send_forces_gpu(){ if (transfer_momentum_gpu) { if(this_node == 0){ if (lbpar_gpu.number_of_particles) lb_copy_forces_GPU(host_forces); LB_TRACE (fprintf(stderr,"lb_send_forces_gpu \n")); #if 0 for (i=0;i<n_total_particles;i++) { fprintf(stderr, "%i particle forces , %f %f %f \n", i, host_forces[i].f[0], host_forces[i].f[1], host_forces[i].f[2]); } #endif } mpi_send_forces_lb(host_forces); } }
/** Parser for the \ref lbfluid command gpu. */ int tclcommand_lbfluid_gpu(Tcl_Interp *interp, int argc, char **argv) { #ifdef LB_GPU int err = TCL_OK; int change = 0; while (argc > 0) { if (ARG0_IS_S("grid") || ARG0_IS_S("agrid")) err = lbfluid_parse_agrid(interp, argc-1, argv+1, &change); else if (ARG0_IS_S("tau")) err = lbfluid_parse_tau(interp, argc-1, argv+1, &change); else if (ARG0_IS_S("density") || ARG0_IS_S("dens")) err = lbfluid_parse_density(interp, argc-1, argv+1, &change); else if (ARG0_IS_S("viscosity") || ARG0_IS_S("visc")) err = lbfluid_parse_viscosity(interp, argc-1, argv+1, &change); else if (ARG0_IS_S("bulk_viscosity") || ARG0_IS_S("b_visc")) err = lbfluid_parse_bulk_visc(interp, argc-1, argv+1, &change); else if (ARG0_IS_S("friction") || ARG0_IS_S("coupling")) err = lbfluid_parse_friction(interp, argc-1, argv+1, &change); else if (ARG0_IS_S("ext_force")) err = lbfluid_parse_ext_force(interp, argc-1, argv+1, &change); else if (ARG0_IS_S("gamma_odd")) err = lbfluid_parse_gamma_odd(interp, argc-1, argv+1, &change); else if (ARG0_IS_S("gamma_even")) err = lbfluid_parse_gamma_even(interp, argc-1, argv+1, &change); else { Tcl_AppendResult(interp, "unknown feature \"", argv[0],"\" of lbfluid", (char *)NULL); err = TCL_ERROR ; } if (err == TCL_ERROR) return TCL_ERROR; argc -= (change + 1); argv += (change + 1); } mpi_bcast_parameter(FIELD_LATTICE_SWITCH) ; /* thermo_switch is retained for backwards compatibility */ thermo_switch = (thermo_switch | THERMO_LB); mpi_bcast_parameter(FIELD_THERMO_SWITCH); LB_TRACE (fprintf(stderr,"tclcommand_lbfluid_gpu parser ok \n")); return err; #else /* !defined LB_GPU */ Tcl_AppendResult(interp, "LB_GPU is not compiled in!", NULL); return TCL_ERROR; #endif }
void lattice_boltzmann_update_gpu() { int factor = (int)round(lbpar_gpu.tau/time_step); fluidstep += 1; if (fluidstep>=factor) { fluidstep=0; lb_integrate_GPU(); #ifdef SHANCHEN if(lbpar_gpu.remove_momentum) lb_remove_fluid_momentum_GPU(); #endif LB_TRACE (fprintf(stderr,"lb_integrate_GPU \n")); } }
/** Calculate particle lattice interactions called from forces.c */ void lb_calc_particle_lattice_ia_gpu() { if (transfer_momentum_gpu) { mpi_get_particles_lb(host_data); if(this_node == 0){ #if 0 for (i=0;i<n_total_particles;i++) { fprintf(stderr, "%i particle posi: , %f %f %f\n", i, host_data[i].p[0], host_data[i].p[1], host_data[i].p[2]); } #endif if(lbpar_gpu.number_of_particles) lb_particle_GPU(host_data); LB_TRACE (fprintf(stderr,"lb_calc_particle_lattice_ia_gpu \n")); } } }
static int lbfluid_parse_agrid(Tcl_Interp *interp, int argc, char *argv[], int *change) { double agrid; if (argc < 1) { Tcl_AppendResult(interp, "agrid requires 1 argument", (char *)NULL); return TCL_ERROR; } if (!ARG0_IS_D(agrid)) { Tcl_AppendResult(interp, "wrong argument for agrid", (char *)NULL); return TCL_ERROR; } if (agrid <= 0.0) { Tcl_AppendResult(interp, "agrid must be positive", (char *)NULL); return TCL_ERROR; } *change = 1; lbpar_gpu.agrid = (float)agrid; lbpar_gpu.dim_x = (unsigned int)floor(box_l[0]/agrid); lbpar_gpu.dim_y = (unsigned int)floor(box_l[1]/agrid); lbpar_gpu.dim_z = (unsigned int)floor(box_l[2]/agrid); unsigned int tmp[3]; tmp[0] = lbpar_gpu.dim_x; tmp[1] = lbpar_gpu.dim_y; tmp[2] = lbpar_gpu.dim_z; /* sanity checks */ int dir; for (dir=0;dir<3;dir++) { /* check if box_l is compatible with lattice spacing */ if (fabs(box_l[dir]-tmp[dir]*agrid) > ROUND_ERROR_PREC) { char *errtxt = runtime_error(128); ERROR_SPRINTF(errtxt, "{097 Lattice spacing agrid=%f is incompatible with box_l[%i]=%f} ",agrid,dir,box_l[dir]); } } lbpar_gpu.number_of_nodes = lbpar_gpu.dim_x * lbpar_gpu.dim_y * lbpar_gpu.dim_z; LB_TRACE (printf("#nodes \t %u \n", lbpar_gpu.number_of_nodes)); return TCL_OK; }
/** printing the hole fluid field to file with order x+y*dim_x+z*dim_x*dim_y */ int tclcommand_lbprint_gpu(ClientData data, Tcl_Interp *interp, int argc, char **argv) { int err = TCL_OK; int change = 0; int vtk = 0; argc--; argv++; if (argc < 1) { Tcl_AppendResult(interp, "too few arguments to \"lbprint\"", (char *)NULL); err = TCL_ERROR; } else while (argc > 0) { if (ARG0_IS_S("u") || ARG0_IS_S("velocity") || ARG0_IS_S("v")){ argc--; argv++; if (ARG0_IS_S("vtk")){ vtk = 1; err = lbprint_parse_velocity(interp, argc-1, argv+1, &change, vtk); } else err = lbprint_parse_velocity(interp, argc, argv, &change, vtk); } else if (ARG0_IS_S("rho") || ARG0_IS_S("density")) err = lbprint_parse_density(interp, argc-1, argv+1, &change); else if (ARG0_IS_S("stresstensor")){ //err = lbprint_parse_stresstensor(interp, argc-1, argv+1, &change); Tcl_AppendResult(interp, "\"lbprint stresstensor\" is not available by default due to memory saving, pls ensure availablity of pi[6] (see lbgpu.h) and lbprint_parse_stresstensor()", (char *)NULL); err = TCL_ERROR; } else { Tcl_AppendResult(interp, "unknown feature \"", argv[0],"\" of lbprint", (char *)NULL); err = TCL_ERROR ; } argc -= (change + 1); argv += (change + 1); LB_TRACE (fprintf(stderr,"tclcommand_lbprint_gpu parser ok \n")); } return err; }
/** (Re-)initializes the fluid. */ void lb_reinit_parameters_gpu() { int ii; lbpar_gpu.time_step = (float)time_step; for(ii=0;ii<LB_COMPONENTS;++ii){ lbpar_gpu.mu[ii] = 0.0; if (lbpar_gpu.viscosity[ii] > 0.0) { /* Eq. (80) Duenweg, Schiller, Ladd, PRE 76(3):036704 (2007). */ lbpar_gpu.gamma_shear[ii] = 1. - 2./(6.*lbpar_gpu.viscosity[ii]*lbpar_gpu.tau/(lbpar_gpu.agrid*lbpar_gpu.agrid) + 1.); } if (lbpar_gpu.bulk_viscosity[ii] > 0.0) { /* Eq. (81) Duenweg, Schiller, Ladd, PRE 76(3):036704 (2007). */ lbpar_gpu.gamma_bulk[ii] = 1. - 2./(9.*lbpar_gpu.bulk_viscosity[ii]*lbpar_gpu.tau/(lbpar_gpu.agrid*lbpar_gpu.agrid) + 1.); } #ifdef SHANCHEN if (lbpar_gpu.mobility[0] > 0.0) { lbpar_gpu.gamma_mobility[0] = 1. - 2./(6.*lbpar_gpu.mobility[0]*lbpar_gpu.tau/(lbpar_gpu.agrid*lbpar_gpu.agrid) + 1.); } #endif if (temperature > 0.0) { /* fluctuating hydrodynamics ? */ lbpar_gpu.fluct = 1; LB_TRACE (fprintf(stderr, "fluct on \n")); /* Eq. (51) Duenweg, Schiller, Ladd, PRE 76(3):036704 (2007).*/ /* Note that the modes are not normalized as in the paper here! */ lbpar_gpu.mu[ii] = (float)temperature*lbpar_gpu.tau*lbpar_gpu.tau/c_sound_sq/(lbpar_gpu.agrid*lbpar_gpu.agrid); /* lb_coupl_pref is stored in MD units (force) * Eq. (16) Ahlrichs and Duenweg, JCP 111(17):8225 (1999). * The factor 12 comes from the fact that we use random numbers * from -0.5 to 0.5 (equally distributed) which have variance 1/12. * time_step comes from the discretization. */ lbpar_gpu.lb_coupl_pref[ii] = sqrt(12.f*2.f*lbpar_gpu.friction[ii]*(float)temperature/lbpar_gpu.time_step); lbpar_gpu.lb_coupl_pref2[ii] = sqrt(2.f*lbpar_gpu.friction[ii]*(float)temperature/lbpar_gpu.time_step); } else { /* no fluctuations at zero temperature */ lbpar_gpu.fluct = 0; lbpar_gpu.lb_coupl_pref[ii] = 0.0; lbpar_gpu.lb_coupl_pref2[ii] = 0.0; } LB_TRACE (fprintf(stderr,"lb_reinit_prarameters_gpu \n")); } #ifdef ELECTROKINETICS if (ek_initialized) { lbpar_gpu.dim_x = (unsigned int) round(box_l[0] / lbpar_gpu.agrid); //TODO code duplication with lb.c start lbpar_gpu.dim_y = (unsigned int) round(box_l[1] / lbpar_gpu.agrid); lbpar_gpu.dim_z = (unsigned int) round(box_l[2] / lbpar_gpu.agrid); unsigned int tmp[3]; tmp[0] = lbpar_gpu.dim_x; tmp[1] = lbpar_gpu.dim_y; tmp[2] = lbpar_gpu.dim_z; /* sanity checks */ int dir; for (dir=0;dir<3;dir++) { /* check if box_l is compatible with lattice spacing */ if (fabs(box_l[dir] - tmp[dir] * lbpar_gpu.agrid) > 1.0e-3) { ostringstream msg; msg <<"Lattice spacing lbpar_gpu.agrid= "<< lbpar_gpu.agrid << " is incompatible with box_l[" << dir << "]=" << box_l[dir]; runtimeError(msg); } } lbpar_gpu.number_of_nodes = lbpar_gpu.dim_x * lbpar_gpu.dim_y * lbpar_gpu.dim_z; lbpar_gpu.tau = (float) time_step; //TODO code duplication with lb.c end } #endif LB_TRACE (fprintf(stderr,"lb_reinit_prarameters_gpu \n")); reinit_parameters_GPU(&lbpar_gpu); }