static void mpi_send_forces_slave_lb(){ int n_part; int g; LB_particle_force_gpu *host_forces_sl; Cell *cell; int c, i; MPI_Status status; n_part = cells_get_n_particles(); COMM_TRACE(fprintf(stderr, "%d: send_particles_slave, %d particles\n", this_node, n_part)); if (n_part > 0) { /* get (unsorted) particle informations as an array of type 'particle' */ /* then get the particle information */ host_forces_sl = malloc(n_part*sizeof(LB_particle_force_gpu)); MPI_Recv(host_forces_sl, n_part*sizeof(LB_particle_force_gpu), MPI_BYTE, 0, REQ_GETPARTS, MPI_COMM_WORLD, &status); for (c = 0; c < local_cells.n; c++) { int npart; cell = local_cells.cell[c]; npart = cell->n; for (i=0;i<npart;i++) { cell->part[i].f.f[0] += (double)host_forces_sl[i+g].f[0]; cell->part[i].f.f[1] += (double)host_forces_sl[i+g].f[1]; cell->part[i].f.f[2] += (double)host_forces_sl[i+g].f[2]; } g += npart; } free(host_forces_sl); } }
void cuda_mpi_send_v_cs(CUDA_v_cs *host_v_cs){ int n_part; int g; Cell *cell; int *sizes; // first collect number of particles on each node sizes = (int *) Utils::malloc(sizeof(int)*n_nodes); n_part = cells_get_n_particles(); MPI_Gather(&n_part, 1, MPI_INT, sizes, 1, MPI_INT, 0, comm_cart); // call slave functions to provide the slave data if(this_node > 0) { cuda_mpi_send_v_cs_slave(); } else { // fetch particle informations into 'result' g = 0; for (int pnode = 0; pnode < n_nodes; pnode++) { if (sizes[pnode] > 0) { if (pnode == 0) { for (int c = 0; c < local_cells.n; c++) { int npart; cell = local_cells.cell[c]; npart = cell->n; for (int i = 0; i < npart; i++) { cell->part[i].swim.v_center[0] = (double)host_v_cs[i+g].v_cs[0]; cell->part[i].swim.v_center[1] = (double)host_v_cs[i+g].v_cs[1]; cell->part[i].swim.v_center[2] = (double)host_v_cs[i+g].v_cs[2]; cell->part[i].swim.v_source[0] = (double)host_v_cs[i+g].v_cs[3]; cell->part[i].swim.v_source[1] = (double)host_v_cs[i+g].v_cs[4]; cell->part[i].swim.v_source[2] = (double)host_v_cs[i+g].v_cs[5]; } g += npart; } } else { // and send it back to the slave node MPI_Send(&host_v_cs[g], sizes[pnode]*sizeof(CUDA_v_cs), MPI_BYTE, pnode, 73, comm_cart); g += sizes[pnode]; } } } } COMM_TRACE(fprintf(stderr, "%d: finished send\n", this_node)); free(sizes); }
void cuda_mpi_send_forces(CUDA_particle_force *host_forces,CUDA_fluid_composition * host_composition){ int n_part; int g, pnode; Cell *cell; int c; int i; int *sizes; sizes = (int *) malloc(sizeof(int)*n_nodes); n_part = cells_get_n_particles(); /* first collect number of particles on each node */ MPI_Gather(&n_part, 1, MPI_INT, sizes, 1, MPI_INT, 0, comm_cart); /* call slave functions to provide the slave data */ if(this_node > 0) { cuda_mpi_send_forces_slave(); } else{ /* fetch particle informations into 'result' */ g = 0; for (pnode = 0; pnode < n_nodes; pnode++) { if (sizes[pnode] > 0) { if (pnode == 0) { for (c = 0; c < local_cells.n; c++) { int npart; cell = local_cells.cell[c]; npart = cell->n; for (i=0;i<npart;i++) { cell->part[i].f.f[0] += (double)host_forces[i+g].f[0]; cell->part[i].f.f[1] += (double)host_forces[i+g].f[1]; cell->part[i].f.f[2] += (double)host_forces[i+g].f[2]; #ifdef SHANCHEN for (int ii=0;ii<LB_COMPONENTS;ii++) { cell->part[i].r.composition[ii] = (double)host_composition[i+g].weight[ii]; } #endif } g += npart; } } else { /* and send it back to the slave node */ MPI_Send(&host_forces[g], sizes[pnode]*sizeof(CUDA_particle_force), MPI_BYTE, pnode, REQ_CUDAGETFORCES, comm_cart); #ifdef SHANCHEN MPI_Send(&host_composition[g], sizes[pnode]*sizeof(CUDA_fluid_composition), MPI_BYTE, pnode, REQ_CUDAGETPARTS, comm_cart); #endif g += sizes[pnode]; } } } } COMM_TRACE(fprintf(stderr, "%d: finished send\n", this_node)); free(sizes); }
static void mpi_get_particles_slave_lb(){ int n_part; int g; LB_particle_gpu *host_data_sl; Cell *cell; int c, i; n_part = cells_get_n_particles(); COMM_TRACE(fprintf(stderr, "%d: get_particles_slave, %d particles\n", this_node, n_part)); if (n_part > 0) { /* get (unsorted) particle informations as an array of type 'particle' */ /* then get the particle information */ host_data_sl = malloc(n_part*sizeof(LB_particle_gpu)); g = 0; for (c = 0; c < local_cells.n; c++) { Particle *part; int npart; int dummy[3] = {0,0,0}; double pos[3]; cell = local_cells.cell[c]; part = cell->part; npart = cell->n; for (i=0;i<npart;i++) { memcpy(pos, part[i].r.p, 3*sizeof(double)); fold_position(pos, dummy); host_data_sl[i+g].p[0] = (float)pos[0]; host_data_sl[i+g].p[1] = (float)pos[1]; host_data_sl[i+g].p[2] = (float)pos[2]; host_data_sl[i+g].v[0] = (float)part[i].m.v[0]; host_data_sl[i+g].v[1] = (float)part[i].m.v[1]; host_data_sl[i+g].v[2] = (float)part[i].m.v[2]; #ifdef LB_ELECTROHYDRODYNAMICS host_data_sl[i+g].mu_E[0] = (float)part[i].p.mu_E[0]; host_data_sl[i+g].mu_E[1] = (float)part[i].p.mu_E[1]; host_data_sl[i+g].mu_E[2] = (float)part[i].p.mu_E[2]; #endif } g+=npart; } /* and send it back to the master node */ MPI_Send(host_data_sl, n_part*sizeof(LB_particle_gpu), MPI_BYTE, 0, REQ_GETPARTS, MPI_COMM_WORLD); free(host_data_sl); } }
static void cuda_mpi_send_forces_slave(){ int n_part; CUDA_particle_force *host_forces_sl=NULL; #ifdef SHANCHEN CUDA_fluid_composition *host_composition_sl=NULL; #endif Cell *cell; int c, i; MPI_Status status; n_part = cells_get_n_particles(); COMM_TRACE(fprintf(stderr, "%d: send_particles_slave, %d particles\n", this_node, n_part)); if (n_part > 0) { int g = 0; /* get (unsorted) particle informations as an array of type 'particle' */ /* then get the particle information */ host_forces_sl = (CUDA_particle_force *) malloc(n_part*sizeof(CUDA_particle_force)); MPI_Recv(host_forces_sl, n_part*sizeof(CUDA_particle_force), MPI_BYTE, 0, REQ_CUDAGETFORCES, comm_cart, &status); #ifdef SHANCHEN host_composition_sl = (CUDA_fluid_composition *) malloc(n_part*sizeof(CUDA_fluid_composition)); MPI_Recv(host_composition_sl, n_part*sizeof(CUDA_particle_force), MPI_BYTE, 0, REQ_CUDAGETPARTS, comm_cart, &status); #endif for (c = 0; c < local_cells.n; c++) { int npart; cell = local_cells.cell[c]; npart = cell->n; for (i=0;i<npart;i++) { cell->part[i].f.f[0] += (double)host_forces_sl[i+g].f[0]; cell->part[i].f.f[1] += (double)host_forces_sl[i+g].f[1]; cell->part[i].f.f[2] += (double)host_forces_sl[i+g].f[2]; #ifdef SHANCHEN for (int ii=0;ii<LB_COMPONENTS;ii++) { cell->part[i].r.composition[ii] = (double)host_composition_sl[i+g].weight[ii]; } #endif } g += npart; } free(host_forces_sl); #ifdef SHANCHEN free(host_composition_sl); #endif } }
static void mpi_send_forces_lb(LB_particle_force_gpu *host_forces){ int n_part; int g, pnode; Cell *cell; int c; int i; int *sizes; sizes = malloc(sizeof(int)*n_nodes); n_part = cells_get_n_particles(); /* first collect number of particles on each node */ MPI_Gather(&n_part, 1, MPI_INT, sizes, 1, MPI_INT, 0, MPI_COMM_WORLD); /* call slave functions to provide the slave datas */ if(this_node > 0) { mpi_send_forces_slave_lb(); } else{ /* fetch particle informations into 'result' */ g = 0; for (pnode = 0; pnode < n_nodes; pnode++) { if (sizes[pnode] > 0) { if (pnode == 0) { for (c = 0; c < local_cells.n; c++) { int npart; cell = local_cells.cell[c]; npart = cell->n; for (i=0;i<npart;i++) { cell->part[i].f.f[0] += (double)host_forces[i+g].f[0]; cell->part[i].f.f[1] += (double)host_forces[i+g].f[1]; cell->part[i].f.f[2] += (double)host_forces[i+g].f[2]; } g += npart; } } else { /* and send it back to the slave node */ MPI_Send(&host_forces[g], sizes[pnode]*sizeof(LB_particle_force_gpu), MPI_BYTE, pnode, REQ_GETPARTS, MPI_COMM_WORLD); g += sizes[pnode]; } } } } COMM_TRACE(fprintf(stderr, "%d: finished send\n", this_node)); free(sizes); }
static void cuda_mpi_send_v_cs_slave(){ int n_part; CUDA_v_cs *host_v_cs_slave = NULL; Cell *cell; MPI_Status status; n_part = cells_get_n_particles(); COMM_TRACE(fprintf(stderr, "%d: send_particles_slave, %d particles\n", this_node, n_part)); if (n_part > 0) { int g = 0; // get (unsorted) particle informations as an array of type 'particle' // then get the particle information host_v_cs_slave = (CUDA_v_cs *) Utils::malloc(n_part*sizeof(CUDA_v_cs)); MPI_Recv(host_v_cs_slave, n_part*sizeof(CUDA_v_cs), MPI_BYTE, 0, 73, comm_cart, &status); for (int c = 0; c < local_cells.n; c++) { int npart; cell = local_cells.cell[c]; npart = cell->n; for (int i = 0; i < npart; i++) { cell->part[i].swim.v_center[0] = (double)host_v_cs_slave[i+g].v_cs[0]; cell->part[i].swim.v_center[1] = (double)host_v_cs_slave[i+g].v_cs[1]; cell->part[i].swim.v_center[2] = (double)host_v_cs_slave[i+g].v_cs[2]; cell->part[i].swim.v_source[0] = (double)host_v_cs_slave[i+g].v_cs[3]; cell->part[i].swim.v_source[1] = (double)host_v_cs_slave[i+g].v_cs[4]; cell->part[i].swim.v_source[2] = (double)host_v_cs_slave[i+g].v_cs[5]; } g += npart; } free(host_v_cs_slave); } }
/*************** REQ_GETPARTS ************/ void cuda_mpi_get_particles(CUDA_particle_data *particle_data_host) { int n_part; int g, pnode; Cell *cell; int c; MPI_Status status; int i; int *sizes; sizes = (int*) Utils::malloc(sizeof(int)*n_nodes); n_part = cells_get_n_particles(); /* first collect number of particles on each node */ MPI_Gather(&n_part, 1, MPI_INT, sizes, 1, MPI_INT, 0, comm_cart); /* just check if the number of particles is correct */ if(this_node > 0){ /* call slave functions to provide the slave datas */ cuda_mpi_get_particles_slave(); } else { /* master: fetch particle informations into 'result' */ g = 0; for (pnode = 0; pnode < n_nodes; pnode++) { if (sizes[pnode] > 0) { if (pnode == 0) { for (c = 0; c < local_cells.n; c++) { Particle *part; int npart; int dummy[3] = {0,0,0}; double pos[3]; cell = local_cells.cell[c]; part = cell->part; npart = cell->n; for (i=0;i<npart;i++) { memmove(pos, part[i].r.p, 3*sizeof(double)); fold_position(pos, dummy); particle_data_host[i+g].p[0] = (float)pos[0]; particle_data_host[i+g].p[1] = (float)pos[1]; particle_data_host[i+g].p[2] = (float)pos[2]; particle_data_host[i+g].v[0] = (float)part[i].m.v[0]; particle_data_host[i+g].v[1] = (float)part[i].m.v[1]; particle_data_host[i+g].v[2] = (float)part[i].m.v[2]; #ifdef IMMERSED_BOUNDARY particle_data_host[i+g].isVirtual = part[i].p.isVirtual; #endif #ifdef DIPOLES particle_data_host[i+g].dip[0] = (float)part[i].r.dip[0]; particle_data_host[i+g].dip[1] = (float)part[i].r.dip[1]; particle_data_host[i+g].dip[2] = (float)part[i].r.dip[2]; #endif #ifdef SHANCHEN // SAW TODO: does this really need to be copied every time? int ii; for(ii=0;ii<2*LB_COMPONENTS;ii++){ particle_data_host[i+g].solvation[ii] = (float)part[i].p.solvation[ii]; } #endif #ifdef LB_ELECTROHYDRODYNAMICS particle_data_host[i+g].mu_E[0] = (float)part[i].p.mu_E[0]; particle_data_host[i+g].mu_E[1] = (float)part[i].p.mu_E[1]; particle_data_host[i+g].mu_E[2] = (float)part[i].p.mu_E[2]; #endif #ifdef ELECTROSTATICS particle_data_host[i+g].q = (float)part[i].p.q; #endif #ifdef ROTATION particle_data_host[i+g].quatu[0] = (float)part[i].r.quatu[0]; particle_data_host[i+g].quatu[1] = (float)part[i].r.quatu[1]; particle_data_host[i+g].quatu[2] = (float)part[i].r.quatu[2]; #endif #ifdef ENGINE particle_data_host[i+g].swim.v_swim = (float)part[i].swim.v_swim; particle_data_host[i+g].swim.f_swim = (float)part[i].swim.f_swim; particle_data_host[i+g].swim.quatu[0] = (float)part[i].r.quatu[0]; particle_data_host[i+g].swim.quatu[1] = (float)part[i].r.quatu[1]; particle_data_host[i+g].swim.quatu[2] = (float)part[i].r.quatu[2]; #if defined(LB) || defined(LB_GPU) particle_data_host[i+g].swim.push_pull = part[i].swim.push_pull; particle_data_host[i+g].swim.dipole_length = (float)part[i].swim.dipole_length; #endif particle_data_host[i+g].swim.swimming = part[i].swim.swimming; #endif } g += npart; } } else { MPI_Recv(&particle_data_host[g], sizes[pnode]*sizeof(CUDA_particle_data), MPI_BYTE, pnode, REQ_CUDAGETPARTS, comm_cart, &status); g += sizes[pnode]; } } } } COMM_TRACE(fprintf(stderr, "%d: finished get\n", this_node)); free(sizes); }
void cuda_bcast_global_part_params() { COMM_TRACE(fprintf(stderr, "%d: cuda_bcast_global_part_params\n", this_node)); mpi_bcast_cuda_global_part_vars(); COMM_TRACE(fprintf(stderr, "%d: cuda_bcast_global_part_params finished\n", this_node)); }
static void cuda_mpi_get_particles_slave(){ int n_part; int g; CUDA_particle_data *particle_data_host_sl; Cell *cell; int c, i; n_part = cells_get_n_particles(); COMM_TRACE(fprintf(stderr, "%d: get_particles_slave, %d particles\n", this_node, n_part)); if (n_part > 0) { /* get (unsorted) particle informations as an array of type 'particle' */ /* then get the particle information */ particle_data_host_sl = (CUDA_particle_data*) Utils::malloc(n_part*sizeof(CUDA_particle_data)); g = 0; for (c = 0; c < local_cells.n; c++) { Particle *part; int npart; int dummy[3] = {0,0,0}; double pos[3]; cell = local_cells.cell[c]; part = cell->part; npart = cell->n; for (i=0;i<npart;i++) { memmove(pos, part[i].r.p, 3*sizeof(double)); fold_position(pos, dummy); particle_data_host_sl[i+g].p[0] = (float)pos[0]; particle_data_host_sl[i+g].p[1] = (float)pos[1]; particle_data_host_sl[i+g].p[2] = (float)pos[2]; particle_data_host_sl[i+g].v[0] = (float)part[i].m.v[0]; particle_data_host_sl[i+g].v[1] = (float)part[i].m.v[1]; particle_data_host_sl[i+g].v[2] = (float)part[i].m.v[2]; #ifdef IMMERSED_BOUNDARY particle_data_host_sl[i+g].isVirtual = part[i].p.isVirtual; #endif #ifdef DIPOLES particle_data_host_sl[i+g].dip[0] = (float)part[i].r.dip[0]; particle_data_host_sl[i+g].dip[1] = (float)part[i].r.dip[1]; particle_data_host_sl[i+g].dip[2] = (float)part[i].r.dip[2]; #endif #ifdef SHANCHEN // SAW TODO: does this really need to be copied every time? int ii; for(ii=0;ii<2*LB_COMPONENTS;ii++){ particle_data_host_sl[i+g].solvation[ii] = (float)part[i].p.solvation[ii]; } #endif #ifdef LB_ELECTROHYDRODYNAMICS particle_data_host_sl[i+g].mu_E[0] = (float)part[i].p.mu_E[0]; particle_data_host_sl[i+g].mu_E[1] = (float)part[i].p.mu_E[1]; particle_data_host_sl[i+g].mu_E[2] = (float)part[i].p.mu_E[2]; #endif #ifdef ELECTROSTATICS particle_data_host_sl[i+g].q = (float)part[i].p.q; #endif #ifdef ROTATION particle_data_host_sl[i+g].quatu[0] = (float)part[i].r.quatu[0]; particle_data_host_sl[i+g].quatu[1] = (float)part[i].r.quatu[1]; particle_data_host_sl[i+g].quatu[2] = (float)part[i].r.quatu[2]; #endif #ifdef ENGINE particle_data_host_sl[i+g].swim.v_swim = (float)part[i].swim.v_swim; particle_data_host_sl[i+g].swim.f_swim = (float)part[i].swim.f_swim; particle_data_host_sl[i+g].swim.quatu[0] = (float)part[i].r.quatu[0]; particle_data_host_sl[i+g].swim.quatu[1] = (float)part[i].r.quatu[1]; particle_data_host_sl[i+g].swim.quatu[2] = (float)part[i].r.quatu[2]; #if defined(LB) || defined(LB_GPU) particle_data_host_sl[i+g].swim.push_pull = part[i].swim.push_pull; particle_data_host_sl[i+g].swim.dipole_length = (float)part[i].swim.dipole_length; #endif particle_data_host_sl[i+g].swim.swimming = part[i].swim.swimming; #endif } g+=npart; } /* and send it back to the master node */ MPI_Send(particle_data_host_sl, n_part*sizeof(CUDA_particle_data), MPI_BYTE, 0, REQ_CUDAGETPARTS, comm_cart); free(particle_data_host_sl); } }
/*************** REQ_GETPARTS ************/ static void mpi_get_particles_lb(LB_particle_gpu *host_data) { int n_part; int g, pnode; Cell *cell; int c; MPI_Status status; int i; int *sizes; sizes = malloc(sizeof(int)*n_nodes); n_part = cells_get_n_particles(); /* first collect number of particles on each node */ MPI_Gather(&n_part, 1, MPI_INT, sizes, 1, MPI_INT, 0, MPI_COMM_WORLD); /* just check if the number of particles is correct */ if(this_node > 0){ /* call slave functions to provide the slave datas */ mpi_get_particles_slave_lb(); } else { /* master: fetch particle informations into 'result' */ g = 0; for (pnode = 0; pnode < n_nodes; pnode++) { if (sizes[pnode] > 0) { if (pnode == 0) { for (c = 0; c < local_cells.n; c++) { Particle *part; int npart; int dummy[3] = {0,0,0}; double pos[3]; cell = local_cells.cell[c]; part = cell->part; npart = cell->n; for (i=0;i<npart;i++) { memcpy(pos, part[i].r.p, 3*sizeof(double)); fold_position(pos, dummy); host_data[i+g].p[0] = (float)pos[0]; host_data[i+g].p[1] = (float)pos[1]; host_data[i+g].p[2] = (float)pos[2]; host_data[i+g].v[0] = (float)part[i].m.v[0]; host_data[i+g].v[1] = (float)part[i].m.v[1]; host_data[i+g].v[2] = (float)part[i].m.v[2]; #ifdef LB_ELECTROHYDRODYNAMICS host_data[i+g].mu_E[0] = (float)part[i].p.mu_E[0]; host_data[i+g].mu_E[1] = (float)part[i].p.mu_E[1]; host_data[i+g].mu_E[2] = (float)part[i].p.mu_E[2]; #endif } g += npart; } } else { MPI_Recv(&host_data[g], sizes[pnode]*sizeof(LB_particle_gpu), MPI_BYTE, pnode, REQ_GETPARTS, MPI_COMM_WORLD, &status); g += sizes[pnode]; } } } } COMM_TRACE(fprintf(stderr, "%d: finished get\n", this_node)); free(sizes); }
static void cuda_mpi_get_particles_slave(){ int n_part; int g; CUDA_particle_data *particle_data_host_sl; Cell *cell; int c, i; n_part = cells_get_n_particles(); COMM_TRACE(fprintf(stderr, "%d: get_particles_slave, %d particles\n", this_node, n_part)); if (n_part > 0) { /* get (unsorted) particle informations as an array of type 'particle' */ /* then get the particle information */ particle_data_host_sl = (CUDA_particle_data*) malloc(n_part*sizeof(CUDA_particle_data)); g = 0; for (c = 0; c < local_cells.n; c++) { Particle *part; int npart; int dummy[3] = {0,0,0}; double pos[3]; cell = local_cells.cell[c]; part = cell->part; npart = cell->n; for (i=0;i<npart;i++) { memcpy(pos, part[i].r.p, 3*sizeof(double)); fold_position(pos, dummy); particle_data_host_sl[i+g].p[0] = (float)pos[0]; particle_data_host_sl[i+g].p[1] = (float)pos[1]; particle_data_host_sl[i+g].p[2] = (float)pos[2]; particle_data_host_sl[i+g].v[0] = (float)part[i].m.v[0]; particle_data_host_sl[i+g].v[1] = (float)part[i].m.v[1]; particle_data_host_sl[i+g].v[2] = (float)part[i].m.v[2]; #ifdef SHANCHEN // SAW TODO: does this really need to be copied every time? int ii; for(ii=0;ii<2*LB_COMPONENTS;ii++){ particle_data_host_sl[i+g].solvation[ii] = (float)part[i].p.solvation[ii]; } #endif #ifdef LB_ELECTROHYDRODYNAMICS particle_data_host_sl[i+g].mu_E[0] = (float)part[i].p.mu_E[0]; particle_data_host_sl[i+g].mu_E[1] = (float)part[i].p.mu_E[1]; particle_data_host_sl[i+g].mu_E[2] = (float)part[i].p.mu_E[2]; #endif #ifdef ELECTROSTATICS if (coulomb.method == COULOMB_P3M_GPU) { particle_data_host_sl[i+g].q = (float)part[i].p.q; } #endif } g+=npart; } /* and send it back to the master node */ MPI_Send(particle_data_host_sl, n_part*sizeof(CUDA_particle_data), MPI_BYTE, 0, REQ_CUDAGETPARTS, comm_cart); free(particle_data_host_sl); } }