bool ilo_shader_compile_gs_passthrough(const struct ilo_shader_state *vs_state, const struct ilo_shader_variant *vs_variant, const int *so_mapping, struct ilo_shader *vs) { struct gs_compile_context gcc; struct ilo_shader_state state; struct ilo_shader_variant variant; const int num_verts = 3; int i; /* init GS state and variant */ state = *vs_state; state.info.tokens = NULL; for (i = 0; i < state.info.stream_output.num_outputs; i++) { const int reg = state.info.stream_output.output[i].register_index; state.info.stream_output.output[i].register_index = so_mapping[reg]; } variant = *vs_variant; variant.u.gs.rasterizer_discard = vs_variant->u.vs.rasterizer_discard; variant.u.gs.num_inputs = vs->out.count; for (i = 0; i < vs->out.count; i++) { variant.u.gs.semantic_names[i] = vs->out.semantic_names[i]; variant.u.gs.semantic_indices[i] = vs->out.semantic_indices[i]; } if (!gs_setup(&gcc, &state, &variant, num_verts)) return false; if (!gs_compile_passthrough(&gcc)) { FREE(gcc.shader); gcc.shader = NULL; } /* no need to call toy_tgsi_cleanup() */ toy_compiler_cleanup(&gcc.tc); return append_gs_to_vs(vs, gcc.shader, num_verts); }
/** * Compile the geometry shader. */ struct ilo_shader * ilo_shader_compile_gs(const struct ilo_shader_state *state, const struct ilo_shader_variant *variant) { struct gs_compile_context gcc; if (!gs_setup(&gcc, state, variant, 0)) return NULL; if (!gs_compile(&gcc)) { FREE(gcc.shader); gcc.shader = NULL; } toy_tgsi_cleanup(&gcc.tgsi); toy_compiler_cleanup(&gcc.tc); return gcc.shader;; }
int main(int narg, char *arg[]) { comm_ext world; int np; double t1,t2,gs_time,mpi_time; struct gs_data *gsh; struct comm comm; int *localData,*recvBuf; slong *glo_num; int i,j,nid,nsamples; T *v; #ifdef MPI MPI_Init(&narg,&arg); world = MPI_COMM_WORLD; MPI_Comm_size(world,&np); #else world=0, np=1; #endif comm_init(&comm,world); MPI_Comm_rank(world,&nid); glo_num = malloc(sizeof(slong)*np); for(i=1;i<=np;i++){ j = nid+1; if(j>=i){ glo_num[i-1] = (i-1)*np-i*(i-1)/2 + j-1; } else { glo_num[i-1] = (j-1)*np - j*(j-1)/2+i-1; } if(j==i){ glo_num[i-1] = 0; } } gsh = gs_setup(glo_num,np,&comm,0,gs_auto,1); nsamples = 10000; localData = malloc(sizeof(int)*np); recvBuf = malloc(sizeof(int)*np); MPI_Barrier(world); t1 = MPI_Wtime(); for(j=0;j<nsamples;j++){ for(i=0;i<np;i++){ localData[i] = nid+i; recvBuf[i] = nid+i; } gs(recvBuf,gs_int,gs_add,0,gsh,0); for(i=0;i<np;i++){ recvBuf[i] = recvBuf[i] - localData[i]; } } MPI_Barrier(world); t2 = MPI_Wtime(); gs_time = t2 - t1; MPI_Barrier(world); t1 = MPI_Wtime(); for(j=0;j<nsamples;j++){ for(i=0;i<np;i++){ localData[i] = nid+i; } MPI_Alltoall(localData,1,MPI_INT,recvBuf,1,MPI_INT,world); } MPI_Barrier(world); t2 = MPI_Wtime(); mpi_time = t2 - t1; if(nid==0)printf("gs_time: %f mpi_time: %f\n",gs_time,mpi_time); #ifdef MPI MPI_Barrier(world); // MPI_Finalize(); #endif return 0; }