/* Frees the memory allocated for buffers and scratch space. */ void free_indexed_output_buffer (indexed_output_buffer *buffer, unsigned int gridsize) { #if defined TARGET_GPU indexed_output_buffer *g_ixob = NULL; cutilSafeCall(cudaGetSymbolAddress((void **)&g_ixob, gpu_ixob)); cutilSafeCall(cudaFree(g_ixob)); #else unsigned int i; for (i=0; i<gridsize; i++) { free(global_ixob[i].scratch); } free(global_ixob); #endif }
indexed_output_buffer *alloc_indexed_output_buffer (unsigned int gridsize, unsigned int blocksize) { #if NUM_OUTPUTS == 0 return NULL; #endif #if defined TARGET_GPU indexed_output_buffer *g_ixob = NULL; cutilSafeCall(cudaGetSymbolAddress((void **)&g_ixob, gpu_ixob)); cutilSafeCall(cudaMalloc((void **)&g_ixob, gridsize*sizeof(indexed_output_buffer))); cutilSafeCall(cudaMemset(g_ixob,0,gridsize*sizeof(indexed_output_buffer))); return g_ixob; #else unsigned int i; global_ixob = (indexed_output_buffer *)calloc(gridsize,sizeof(indexed_output_buffer)); return global_ixob; #endif }
// simengine_runmodel() // // executes the model for the given parameters, states and simulation time simengine_result *simengine_runmodel(simengine_opts *opts){ double start_time = opts->start_time; double stop_time = opts->stop_time; unsigned int num_models = opts->num_models; const char *outputs_dirname = opts->outputs_dirname; CDATAFORMAT model_states[PARALLEL_MODELS * NUM_STATES]; unsigned int stateid; unsigned int modelid; unsigned int models_executed; unsigned int models_per_batch; double *progress; int progress_fd; int output_fd; int resuming = 0; int random_initialized = 0; # if defined TARGET_GPU gpu_init(); # endif open_progress_file(outputs_dirname, &progress, &progress_fd, num_models); // Create result structure simengine_result *seresult = (simengine_result*)malloc(sizeof(simengine_result)); // Couldn't allocate return structure, return NULL if(!seresult) return NULL; if(seint.num_states){ seresult->final_states = (double*)malloc(num_models * seint.num_states * sizeof(double)); } else{ seresult->final_states = NULL; } seresult->final_time = (double*)malloc(num_models * sizeof(double)); if((seint.num_states && !seresult->final_states) ||!seresult->final_time){ seresult->status = ERRMEM; seresult->status_message = (char*) simengine_errors[ERRMEM]; seresult->final_states = NULL; seresult->final_time = NULL; return seresult; } init_output_buffers(outputs_dirname, &output_fd); // Run the parallel simulation repeatedly until all requested models have been executed for(models_executed = 0 ; models_executed < num_models; models_executed += PARALLEL_MODELS){ models_per_batch = MIN(num_models - models_executed, PARALLEL_MODELS); // Copy inputs and state initial values to internal representation unsigned int modelid_offset = global_modelid_offset + models_executed; #if NUM_CONSTANT_INPUTS > 0 #if defined TARGET_GPU host_constant_inputs = (CDATAFORMAT *)malloc(PARALLEL_MODELS * NUM_CONSTANT_INPUTS * sizeof(CDATAFORMAT)); #else host_constant_inputs = constant_inputs; #endif #else CDATAFORMAT *host_constant_inputs = NULL; #endif #if NUM_SAMPLED_INPUTS > 0 #if defined TARGET_GPU host_sampled_inputs = (sampled_input_t *)malloc(STRUCT_SIZE * NUM_SAMPLED_INPUTS * sizeof(sampled_input_t)); #else host_sampled_inputs = sampled_inputs; #endif #else sampled_input_t *host_sampled_inputs = NULL; #endif resuming = initialize_states(model_states, outputs_dirname, num_models, models_per_batch, modelid_offset); initialize_inputs(host_constant_inputs, host_sampled_inputs, outputs_dirname, num_models, models_per_batch, modelid_offset, start_time); #if defined TARGET_GPU && NUM_CONSTANT_INPUTS > 0 CDATAFORMAT *g_constant_inputs; cutilSafeCall(cudaGetSymbolAddress((void **)&g_constant_inputs, constant_inputs)); cutilSafeCall(cudaMemcpy(g_constant_inputs, host_constant_inputs, PARALLEL_MODELS * NUM_CONSTANT_INPUTS * sizeof(CDATAFORMAT), cudaMemcpyHostToDevice)); #endif #if defined TARGET_GPU && NUM_SAMPLED_INPUTS > 0 sampled_input_t *g_sampled_inputs; cutilSafeCall(cudaGetSymbolAddress((void **)&g_sampled_inputs, sampled_inputs)); cutilSafeCall(cudaMemcpy(g_sampled_inputs, host_sampled_inputs, STRUCT_SIZE * NUM_SAMPLED_INPUTS * sizeof(sampled_input_t), cudaMemcpyHostToDevice)); #endif // Initialize the solver properties and internal simulation memory structures solver_props *props = init_solver_props(start_time, stop_time, models_per_batch, model_states, models_executed+global_modelid_offset); // Initialize random number generator if (!random_initialized || opts->seeded) { random_init(models_per_batch); random_initialized = 1; } // If no initial states were passed in if(!resuming){ if(seint.num_states > 0){ // Initialize default states in next_states for(modelid=0;modelid<models_per_batch;modelid++){ init_states(props, modelid); // Copy states from next_states to model_states unsigned int iterid; for(iterid=0;iterid<seint.num_iterators;iterid++){ solver_writeback(&props[iterid], modelid); } } } } // Run the model seresult->status = exec_loop(props, outputs_dirname, progress + models_executed, resuming); seresult->status_message = (char*) simengine_errors[seresult->status]; // Copy the final time from simulation for(modelid=0; modelid<models_per_batch; modelid++){ seresult->final_time[models_executed + modelid] = props->time[modelid]; // Time from the first solver } // Free all internal simulation memory and make sure that model_states has the final state values free_solver_props(props, model_states); // Copy state values back to state initial value structure for(modelid=0; modelid<models_per_batch; modelid++){ for(stateid=0;stateid<seint.num_states;stateid++){ seresult->final_states[AS_IDX(seint.num_states, num_models, stateid, models_executed + modelid)] = model_states[TARGET_IDX(seint.num_states, PARALLEL_MODELS, stateid, modelid)]; } } } close_progress_file(progress, progress_fd, num_models); clean_up_output_buffers(output_fd); return seresult; }
cudaError_t WINAPI wine_cudaGetSymbolAddress( void **devPtr, const char *symbol ) { WINE_TRACE("\n"); return cudaGetSymbolAddress( devPtr, symbol ); }