int main(int argc, char **argv) { int i; tw_lp *lp; tw_kp *kp; tw_pe *pe; tw_opt_add(app_opt); tw_init(&argc, &argv); if( lookahead > 1.0 ) tw_error(TW_LOC, "Lookahead > 1.0 .. needs to be less\n"); //reset mean based on lookahead mean = mean - lookahead; g_tw_memory_nqueues = 16; // give at least 16 memory queue event offset_lpid = g_tw_mynode * nlp_per_pe; ttl_lps = tw_nnodes() * g_tw_npe * nlp_per_pe; g_tw_events_per_pe = (mult * nlp_per_pe * g_phold_start_events) + optimistic_memory; //g_tw_rng_default = TW_FALSE; g_tw_lookahead = lookahead; tw_define_lps(nlp_per_pe, sizeof(tcp_phold_message)); for(i = 0; i < g_tw_nlp; i++) tw_lp_settype(i, &mylps[0]); if( g_tw_mynode == 0 ) { printf("Running simulation with following configuration: \n" ); printf(" Processors Used = %d\n", tw_nnodes()); printf(" KPs Used = %lu\n", g_tw_nkp); printf(" LPs Used = %u\n", nlp_per_model); printf(" End Time = %f \n", g_tw_ts_end); printf(" Buffers Allocated Per PE = %d\n", g_tw_events_per_pe); printf(" Gvt Interval = %d\n", g_tw_gvt_interval); printf(" Message Block Size (i.e., Batch) = %d\n", g_tw_mblock); printf("\n\n"); } TWAppStats.sent_packets = 0; TWAppStats.received_packets = 0; TWAppStats.dropped_packets = 0; TWAppStats.timedout_packets = 0; TWAppStats.throughput = 0; tw_run(); tw_end(); tcp_finalize( &TWAppStats ); return 0; }
int main( int argc, char** argv ) { int i; tw_opt_add(app_opt); tw_init(&argc, &argv); printf("First version BGP model! \n"); //////////// N_controller_per_DDN = NumControllerPerDDN; N_FS_per_DDN = N_controller_per_DDN * NumFSPerController; N_ION_per_DDN = N_FS_per_DDN * N_ION_per_FS; N_CN_per_DDN = N_ION_per_DDN * N_CN_per_ION; int N_lp_per_DDN = N_controller_per_DDN + N_FS_per_DDN + N_ION_per_DDN + N_CN_per_DDN + 1; int N_lp_t = N_lp_per_DDN * NumDDN; nlp_per_pe = N_lp_t/tw_nnodes()/g_tw_npe; N_DDN_per_PE = NumDDN/tw_nnodes()/g_tw_npe; g_tw_events_per_pe = nlp_per_pe * 32 + opt_mem; tw_define_lps(nlp_per_pe, sizeof(MsgData), 0); // mapping initialization int LPaccumulate = 0; for( i=0; i<N_CN_per_DDN*N_DDN_per_PE; i++ ) tw_lp_settype(i, &mylps[0]); LPaccumulate += N_CN_per_DDN*N_DDN_per_PE; for( i=0; i<N_ION_per_DDN*N_DDN_per_PE; i++ ) tw_lp_settype(i + LPaccumulate, &mylps[1]); LPaccumulate += N_ION_per_DDN*N_DDN_per_PE; for( i=0; i<N_FS_per_DDN*N_DDN_per_PE; i++ ) tw_lp_settype(i + LPaccumulate, &mylps[2]); LPaccumulate += N_FS_per_DDN*N_DDN_per_PE; for( i=0; i<N_controller_per_DDN*N_DDN_per_PE; i++ ) tw_lp_settype(i + LPaccumulate, &mylps[3]); LPaccumulate += N_controller_per_DDN*N_DDN_per_PE; for( i=0; i<N_DDN_per_PE; i++ ) tw_lp_settype(i + LPaccumulate, &mylps[4]); tw_run(); tw_end(); return 0; }
int gates_main(int argc, char* argv[]){ int i, j; tw_opt_add(gates_opts); tw_init(&argc, &argv); // if (WAVE_COUNT != 0 && g_tw_synchronization_protocol > 2) { // printf("ERROR: Waveform viewing is not supported for non-conservative protocols.\n"); // return 1; // } g_tw_mapping = CUSTOM; g_tw_custom_initial_mapping = &gates_custom_mapping_setup; g_tw_custom_lp_global_to_local_map = &gates_custom_mapping_to_local; routing_table_mpi = routing_table_mapper(tw_nnodes()); g_tw_lp_offset = (*routing_table_mpi)[g_tw_mynode]; g_tw_nlp = (*routing_table_mpi)[g_tw_mynode+1] - g_tw_lp_offset; g_tw_nkp = TOTAL_PARTS / tw_nnodes(); if (g_tw_mynode < TOTAL_PARTS - (g_tw_nkp * tw_nnodes())) { g_tw_nkp++; } g_tw_events_per_pe = 100*g_tw_nlp; g_tw_lookahead = 0.001; tw_define_lps(g_tw_nlp, sizeof(message)); g_tw_lp_types = gates_lps; g_io_lp_types = iolps; tw_lp_setup_types(); g_io_events_buffered_per_rank = 0; char cpname[100]; char *dpath = dirname(argv[0]); sprintf(cpname, "%s/checkpoint/submodule-checkpoint", dpath); io_init(); io_load_checkpoint(cpname, PRE_INIT); #if DEBUG_TRACE char debugfile[100]; sprintf(debugfile, "%s/debug/node_%d_output_file.txt", dpath, g_tw_mynode); node_out_file = fopen(debugfile,"w"); #endif tw_run(); tw_end(); return 0; }
/* determine whether to dump buffer to file * should only be called at GVT! */ void st_buffer_write(int end_of_sim, int type) { MPI_Offset offset = prev_offsets[type]; MPI_File *fh = &buffer_fh[type]; int write_to_file = 0; int my_write_size = 0; int i; int write_sizes[tw_nnodes()]; tw_clock start_cycle_time = tw_clock_read(); my_write_size = g_st_buffer[type]->count; MPI_Allgather(&my_write_size, 1, MPI_INT, &write_sizes[0], 1, MPI_INT, MPI_COMM_ROSS); if (end_of_sim) write_to_file = 1; else { for (i = 0; i < tw_nnodes(); i++) { if ((double) write_sizes[i] / g_st_buffer_size >= g_st_buffer_free_percent / 100.0) write_to_file = 1; } } if (write_to_file) { for (i = 0; i < tw_nnodes(); i++) { if (i < g_tw_mynode) offset += write_sizes[i]; prev_offsets[type] += write_sizes[i]; } //printf("rank %ld writing %d bytes at offset %lld (prev_offsets[ANALYSIS_LP] = %lld)\n", g_tw_mynode, my_write_size, offset, prev_offsets[type]); // dump buffer to file MPI_Status status; g_tw_pe[0]->stats.s_stat_comp += tw_clock_read() - start_cycle_time; start_cycle_time = tw_clock_read(); MPI_File_write_at_all(*fh, offset, st_buffer_read_ptr(g_st_buffer[type]), my_write_size, MPI_BYTE, &status); g_tw_pe[0]->stats.s_stat_write += tw_clock_read() - start_cycle_time; // reset the buffer g_st_buffer[type]->write_pos = 0; g_st_buffer[type]->read_pos = 0; g_st_buffer[type]->count = 0; buffer_overflow_warned = 0; } else g_tw_pe[0]->stats.s_stat_comp += tw_clock_read() - start_cycle_time; }
int main(int argc, char **argv, char **env) { int i; tw_opt_add(app_opt); tw_init(&argc, &argv); offset_lpid = g_tw_mynode * nlp_per_pe; ttl_lps = tw_nnodes() * g_tw_npe * nlp_per_pe; g_tw_memory_nqueues = 1; g_tw_events_per_pe = (mult * nlp_per_pe * g_tmr_start_events) + optimistic_memory; tw_define_lps(nlp_per_pe, sizeof(tmr_message), 0); // use g_tw_nlp now for(i = 0; i < g_tw_nlp; i++) tw_lp_settype(i, &mylps[0]); tw_kp_memory_init(g_tw_kp, 1000, 100, 1); if(verbose) f = stdout; else f = fopen("output", "w"); //f = fopen("/dev/null", "w"); tw_run(); tw_end(); return 0; }
int main(int argc, char **argv, char **env) { int i; lookahead = 1.0; tw_opt_add(app_opt); tw_init(&argc, &argv); g_tw_memory_nqueues = 16; offset_lpid = g_tw_mynode * nlp_per_pe; ttl_lps = tw_nnodes() * g_tw_npe * nlp_per_pe; g_tw_events_per_pe = (mult * nlp_per_pe * g_wifi_start_events)+ optimistic_memory; g_tw_lookahead = lookahead; tw_define_lps(nlp_per_pe, sizeof(wifi_message)); tw_lp_settype(0, &mylps[0]); for(i = 1; i < g_tw_nlp; i++) tw_lp_settype(i, &mylps[0]); tw_run(); tw_end(); return 0; }
tw_event * tw_hash_remove(void *h, tw_event * event, long pe) { #if USE_AVL_TREE tw_event *ret; tw_clock start; g_tw_pe[0]->avl_tree_size--; start = tw_clock_read(); ret = avlDelete(&event->dest_lp->kp->avl_tree, event); g_tw_pe[0]->stats.s_avl += tw_clock_read() - start; return ret; #else tw_hash *hash_t = (tw_hash *) h; tw_event *ret_event; int key; if(pe > tw_nnodes() - 1) tw_error(TW_LOC, "bad pe id"); key = find_entry(hash_t->incoming[pe], event, hash_t->hash_sizes[pe], pe); ret_event = hash_t->incoming[pe][key]; hash_t->incoming[pe][key] = NULL; (hash_t->num_stored[pe])--; return ret_event; #endif }
int main(int argc, char **argv, char **env) { int i; tw_opt_add(app_opt); tw_init(&argc, &argv); offset_lpid = g_tw_mynode * nlp_per_pe; ttl_lps = tw_nnodes() * g_tw_npe * nlp_per_pe; g_tw_memory_nqueues = 1; g_tw_events_per_pe = (mult * nlp_per_pe * g_mem_start_events) + optimistic_memory; tw_define_lps(nlp_per_pe, sizeof(mem_message)); for(i = 0; i < nlp_per_pe; i++) tw_lp_settype(i, &mylps[0]); //((g_tw_nlp/g_tw_nkp) * g_mem_start_events), // init the memory interface my_fd = tw_memory_init(g_tw_events_per_pe * nbufs, sizeof(mem_packet), 0.5); tw_run(); mem_stats_print(); tw_end(); return 0; }
int phold_main(int argc, char **argv, char **env) { offset_lpid = g_tw_mynode * g_tw_nlp; ttl_lps = tw_nnodes() * g_tw_npe * g_tw_nlp; return 0; }
void tw_gvt_force_update(tw_pe *me) { if (tw_nnodes() > 1) { } else { gvt_cnt = g_tw_gvt_interval - 1; } }
int main(int argc, char **argv, char **env) { int i; // get rid of error if compiled w/ MEMORY queues g_tw_memory_nqueues=1; // set a min lookahead of 1.0 lookahead = 1.0; tw_opt_add(app_opt); tw_init(&argc, &argv); if( lookahead > 1.0 ) tw_error(TW_LOC, "Lookahead > 1.0 .. needs to be less\n"); //reset mean based on lookahead mean = mean - lookahead; g_tw_memory_nqueues = 16; // give at least 16 memory queue event offset_lpid = g_tw_mynode * nlp_per_pe; ttl_lps = tw_nnodes() * g_tw_npe * nlp_per_pe; g_tw_events_per_pe = (mult * nlp_per_pe * g_phold_start_events) + optimistic_memory; //g_tw_rng_default = TW_FALSE; g_tw_lookahead = lookahead; tw_define_lps(nlp_per_pe, sizeof(phold_message)); for(i = 0; i < g_tw_nlp; i++) tw_lp_settype(i, &mylps[0]); if( g_tw_mynode == 0 ) { printf("========================================\n"); printf("PHOLD Model Configuration..............\n"); printf(" Lookahead..............%lf\n", lookahead); printf(" Start-events...........%u\n", g_phold_start_events); printf(" stagger................%u\n", stagger); printf(" Mean...................%lf\n", mean); printf(" Mult...................%lf\n", mult); printf(" Memory.................%u\n", optimistic_memory); printf(" Remote.................%lf\n", percent_remote); printf("========================================\n\n"); } tw_run(); tw_end(); return 0; }
void tw_gvt_start(void) { if(tw_nnodes() > 1) TW_DISTRIBUTED = 1; else if(g_tw_npe > 1) TW_PARALLEL = 1; g_tw_7oclock_node_flag = -g_tw_npe; g_tw_clock_gvt_interval = g_tw_clock_gvt_window_size = (tw_clock) CPU * TW_MHZ * secs; g_tw_clock_max_send_delta_t = (tw_clock) CPU * TW_MHZ * delta_t; }
inline double rm_getelevation(double * p) { int npe = tw_nnodes() * g_tw_npe; int nrows_per_pe = ceil(g_rm_spatial_grid[0] / npe); int normalized_p0 = 0; if(0 == g_tw_mynode || g_tw_mynode == tw_nnodes() - 1) nrows_per_pe--; normalized_p0 = ((int) (p[0] / g_rm_spatial_d[1])) % (nrows_per_pe + 2); #if DEBUG //if(!g_tw_mynode) { printf("\n\t\tGETELEVATION\n"); printf("\t\t\tp(%lf, %lf) nrows %d, norm p0 %d\n", p[0], p[1], nrows_per_pe, normalized_p0); } #endif return g_rm_z_values[normalized_p0][(int) (p[1] / g_rm_spatial_d[1])]; }
void tw_net_start(void) { if (MPI_Comm_size(MPI_COMM_ROSS, &world_size) != MPI_SUCCESS) tw_error(TW_LOC, "Cannot get MPI_Comm_size(MPI_COMM_ROSS)"); if( g_tw_mynode == 0) { printf("tw_net_start: Found world size to be %d \n", world_size ); } // Check after tw_nnodes is defined if(tw_nnodes() == 1 && g_tw_npe == 1) { // force the setting of SEQUENTIAL protocol if (g_tw_synchronization_protocol == NO_SYNCH) { g_tw_synchronization_protocol = SEQUENTIAL; } else if(g_tw_synchronization_protocol == CONSERVATIVE || g_tw_synchronization_protocol == OPTIMISTIC) { g_tw_synchronization_protocol = SEQUENTIAL; fprintf(stderr, "Warning: Defaulting to Sequential Simulation, not enought PEs defined.\n"); } } tw_pe_create(1); tw_pe_init(0, g_tw_mynode); //If we're in (some variation of) optimistic mode, we need this hash if (g_tw_synchronization_protocol == OPTIMISTIC || g_tw_synchronization_protocol == OPTIMISTIC_DEBUG || g_tw_synchronization_protocol == OPTIMISTIC_REALTIME) { g_tw_pe[0]->hash_t = tw_hash_create(); } else { g_tw_pe[0]->hash_t = NULL; } if (send_buffer < 1) tw_error(TW_LOC, "network send buffer must be >= 1"); if (read_buffer < 1) tw_error(TW_LOC, "network read buffer must be >= 1"); init_q(&posted_sends, "MPI send queue"); init_q(&posted_recvs, "MPI recv queue"); g_tw_net_device_size = read_buffer; // pre-post all the Irecv operations recv_begin( g_tw_pe[0] ); }
void * tw_hash_create() { #ifdef USE_AVL_TREE int i; g_tw_pe[0]->avl_tree_size = 0; g_tw_pe[0]->avl_list = (AvlTree)tw_calloc(TW_LOC, "avl tree", sizeof(struct avlNode), AVL_NODE_COUNT); for (i = 0; i < AVL_NODE_COUNT - 1; i++) { g_tw_pe[0]->avl_list[i].next = &g_tw_pe[0]->avl_list[i + 1]; } g_tw_pe[0]->avl_list[i].next = NULL; g_tw_pe[0]->avl_list_head = &g_tw_pe[0]->avl_list[0]; return NULL; #else tw_hash *h; unsigned int pi; ncpu = tw_nnodes(); h = (tw_hash *) tw_calloc(TW_LOC, "tw_hash", sizeof(tw_hash), 1); if (!h) tw_error(TW_LOC, "Cannot allocate tw_hash."); h->num_stored = (int *) tw_calloc(TW_LOC, "tw_hash", sizeof(int) * ncpu, 1); h->hash_sizes = (unsigned int *) tw_calloc(TW_LOC, "tw_hash", sizeof(int) * ncpu, 1); h->incoming = (tw_event ***) tw_calloc(TW_LOC, "tw_hash", sizeof(tw_event *)* ncpu, 1); if(!is_prime(g_tw_hash_size)) g_tw_hash_size = next_prime(g_tw_hash_size); for (pi = 0; pi < ncpu; pi++) { h->num_stored[pi] = 0; h->hash_sizes[pi] = g_tw_hash_size; h->incoming[pi] = allocate_table(h->hash_sizes[pi]); } return (void *) h; #endif }
void phold_pre_run(phold_state * s, tw_lp * lp) { tw_lpid dest; if(tw_rand_unif(lp->rng) <= percent_remote) { dest = tw_rand_integer(lp->rng, 0, ttl_lps - 1); } else { dest = lp->gid; } if(dest >= (g_tw_nlp * tw_nnodes())) tw_error(TW_LOC, "bad dest"); tw_event_send(tw_event_new(dest, tw_rand_exponential(lp->rng, mean) + lookahead, lp)); }
void bgp_controller_init( CON_state* s, tw_lp* lp ) { int N_PE = tw_nnodes(); nlp_DDN = NumDDN / N_PE; nlp_Controller = nlp_DDN * NumControllerPerDDN; nlp_FS = nlp_Controller * NumFSPerController; nlp_ION = nlp_FS * N_ION_per_FS; nlp_CN = nlp_ION * N_CN_per_ION; // get the file server gid based on the RR mapping // base is the total number of CN + ION lp in a PE int PEid = lp->gid / nlp_per_pe; int localID = lp->gid % nlp_per_pe; localID = localID - nlp_CN - nlp_ION - nlp_FS; localID /= NumControllerPerDDN; s->ddn_id = PEid * nlp_per_pe + nlp_CN + nlp_ION + nlp_FS+ nlp_Controller + localID; // get the IDs of the file servers which are hooked to this DDN int i; s->previous_FS_id = (int *)calloc( NumFSPerController, sizeof(int) ); int base = nlp_CN + nlp_ION; localID = lp->gid % nlp_per_pe; localID = localID - base - nlp_FS; for (i=0; i<NumFSPerController; i++) s->previous_FS_id[i] = PEid * nlp_per_pe + base + localID * NumFSPerController + i; s->processor_next_available_time = 0; #ifdef PRINTid printf("controller LP %d speaking, my DDN is %d \n", lp->gid, s->ddn_id); for (i=0; i<NumFSPerController; i++) printf("Controller LP %d speaking, my FS is %d\n", lp->gid,s->previous_FS_id[i]); #endif }
void traffic_grid_mapping() { tw_lpid x, y; tw_lpid lpid, kpid; tw_lpid num_cells_per_kp, vp_per_proc; tw_lpid local_lp_count; num_cells_per_kp = (NUM_CELLS_X * NUM_CELLS_Y) / (NUM_VP_X * NUM_VP_Y); vp_per_proc = (NUM_VP_X * NUM_VP_Y) / ((tw_nnodes() * g_tw_npe)) ; g_tw_nlp = nlp_per_pe; g_tw_nkp = vp_per_proc; local_lp_count=0; for (y = 0; y < NUM_CELLS_Y; y++) { for (x = 0; x < NUM_CELLS_X; x++) { lpid = (x + (y * NUM_CELLS_X)); if( g_tw_mynode == CellMapping_lp_to_pe(lpid) ) { kpid = local_lp_count/num_cells_per_kp; local_lp_count++; // MUST COME AFTER!! DO NOT PRE-INCREMENT ELSE KPID is WRONG!! if( kpid >= g_tw_nkp ) tw_error(TW_LOC, "Attempting to mapping a KPid (%llu) for Global LPid %llu that is beyond g_tw_nkp (%llu)\n", kpid, lpid, g_tw_nkp ); tw_lp_onpe(CellMapping_to_local_index(lpid), g_tw_pe[0], lpid); if( g_tw_kp[kpid] == NULL ) tw_kp_onpe(kpid, g_tw_pe[0]); tw_lp_onkp(g_tw_lp[CellMapping_to_local_index(lpid)], g_tw_kp[kpid]); tw_lp_settype( CellMapping_to_local_index(lpid), &mylps[0]); } } } }
/*Takes a MPI LP id and a torus node LP ID, returns the process ID on which the lp is mapped */ tw_peid mapping( tw_lpid gid ) { int rank; int offset; int is_rank = 0; if(gid < N_nodes) { rank = gid / nlp_nodes_per_pe; } else { rank = getProcID( gid ) / nlp_mpi_procs_per_pe; is_rank = N_nodes; } if(nlp_nodes_per_pe == (N_nodes/tw_nnodes())) offset = is_rank + (nlp_nodes_per_pe + 1) * node_rem; else offset = is_rank + nlp_nodes_per_pe * node_rem; if(node_rem) { if( g_tw_mynode >= node_rem ) { if(gid < offset) rank = gid / (nlp_nodes_per_pe + 1); else rank = node_rem + ((gid - offset)/nlp_nodes_per_pe); } else { if(gid >= offset) rank = node_rem + ((gid - offset)/(nlp_nodes_per_pe - 1)); } } return rank; }
void phold_event_handler(phold_state * s, tw_bf * bf, phold_message * m, tw_lp * lp) { tw_lpid dest; if(tw_rand_unif(lp->rng) <= percent_remote) { bf->c1 = 1; dest = tw_rand_integer(lp->rng, 0, ttl_lps - 1); // Makes PHOLD non-deterministic across processors! Don't uncomment /* dest += offset_lpid; */ /* if(dest >= ttl_lps) */ /* dest -= ttl_lps; */ } else { bf->c1 = 0; dest = lp->gid; } if(dest >= (g_tw_nlp * tw_nnodes())) tw_error(TW_LOC, "bad dest"); tw_event_send(tw_event_new(dest, tw_rand_exponential(lp->rng, mean) + lookahead, lp)); }
/* * This function initializes the environment model.. should be called * once by user model. Allows EM to control how ROSS is init'd. * * This function provides the Reactive Model with a pre-simulation 'main' */ int rm_initialize(tw_petype * ptypes, tw_lptype * types, tw_peid npe, tw_kpid nkp, tw_lpid nradios, size_t msg_sz) { //FILE *fp; tw_lptype *t; //tw_pe *pe; //tw_kp *kp; //tw_lp *lp; //size_t size; //int max_name; int ntypes; tw_lpid nlp_grid; //int nkp_grid; //int nnodes; int i; //int j; //int k; //int m; //int kp_per_pe; /* * Open debug plotting files */ #if 0 g_rm_waves_plt_f = fopen("waves.plt", "w"); g_rm_nodes_plt_f = fopen("user_nodes.plt", "w"); g_rm_parts_plt_f = fopen("particles.plt", "w"); if(!g_rm_nodes_plt_f || !g_rm_parts_plt_f) tw_error(TW_LOC, "Unable to open plotting files!"); #endif g_rm_stats = tw_calloc(TW_LOC, "rm stats", sizeof(rm_statistics), 1); memset(g_rm_stats, 0, sizeof(rm_statistics)); // # of cells around me = 2 * # spatial_dim g_rm_spatial_dir = g_rm_spatial_dim * 2; g_rm_spatial_offset = nradios; g_rm_spatial_coeff = 2.0 / g_rm_spatial_dir; g_rm_spatial_grid_i = tw_calloc(TW_LOC, "spatial grid i", sizeof(int), g_rm_spatial_dim); g_rm_spatial_offset_ts = tw_calloc(TW_LOC, "spatial offset ts", sizeof(tw_stime), g_rm_spatial_dir); g_rm_spatial_ground_coeff = 0.75; if(0.0 > g_rm_wave_loss_coeff) { g_rm_wave_loss_coeff = 0.5; g_rm_wave_loss_coeff = 1.0 / exp(g_rm_wave_attenuation * g_rm_spatial_d[0]); if(tw_node_eq(&g_tw_mynode, &g_tw_masternode)) printf("\n\tSETTING WAVE LOSS COEFF %lf! \n\n", g_rm_wave_loss_coeff); } g_rm_wave_velocity = 3.0 * 1000.0 * 1000.0 * 1000.0; // Require NULL terminated array, plus LPs for Cells for(ntypes = 2, t = types; t->state_sz; t++) ntypes++; //printf("Creating %d lp types\n", ntypes); t = tw_calloc(TW_LOC, "lp types array", sizeof(tw_lptype), ntypes); memcpy(t, types, sizeof(tw_lptype) * (ntypes-2)); memcpy(&t[ntypes-2], rm_lps, sizeof(rm_lps)); nlp_grid = rm_grid_init(); nrmlp_per_pe = ceil(nlp_grid / (tw_nnodes() * g_tw_npe)); if(tw_nnodes() == 1) nrmlp_per_pe = nlp_grid; nlp_per_pe = nradios + nrmlp_per_pe; g_tw_events_per_pe = .1 * nlp_grid / (tw_nnodes() * g_tw_npe); g_tw_events_per_pe += optimistic_memory; rm_grid_terrain(); for(i = 0; i < g_tw_npe; i++) tw_pe_settype(g_tw_pe[i], rm_pes); tw_pe_settype(&g_rm_pe, ptypes); g_tw_rng_default = TW_FALSE; tw_define_lps(nlp_per_pe, sizeof(rm_message), 0); for(i = 0; i < g_rm_spatial_offset; i++) tw_lp_settype(i, types); for( ; i < g_tw_nlp; i++) tw_lp_settype(i, rm_lps); return 1; }
int main(int argc, char **argv, char **env) { int i; tw_opt_add(app_opt); tw_init(&argc, &argv); for (i=0; i<N_dims; i++) N_nodes*=dim_length[i]; MEAN_INTERVAL = N_nodes/ARRIVAL_RATE; nlp_per_pe = N_nodes/tw_nnodes()/g_tw_npe; g_tw_events_per_pe = nlp_per_pe/g_tw_npe + opt_mem; tw_define_lps(nlp_per_pe, sizeof(nodes_message), 0); for(i = 0; i < g_tw_nlp; i++) tw_lp_settype(i, &nodes_lps[0]); tw_run(); if(tw_ismaster()) { printf("\nTorus Network Model Statistics:\n"); printf("\t%-50s %11lld\n", "Number of nodes", nlp_per_pe * g_tw_npe * tw_nnodes()); } unsigned long long total_finished_storage[N_COLLECT_POINTS]; unsigned long long total_dropped_storage[N_COLLECT_POINTS]; unsigned long long total_generated_storage[N_COLLECT_POINTS]; unsigned long long wait_length,event_length,N_total_finish,N_total_hop; tw_stime total_time_sum,g_max_latency; for( i=0; i<N_COLLECT_POINTS; i++ ) { MPI_Reduce( &N_dropped_storage[i], &total_dropped_storage[i],1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce( &N_finished_storage[i], &total_finished_storage[i],1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce( &N_generated_storage[i], &total_generated_storage[i],1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); } MPI_Reduce( &queueing_times_sum, &event_length,1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce( &total_queue_length, &wait_length,1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce( &total_time, &total_time_sum,1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce( &N_finished, &N_total_finish,1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce( &total_hops, &N_total_hop,1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce( &max_latency, &g_max_latency,1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); unsigned long long total_rand_total; MPI_Reduce( &rand_total, &total_rand_total,1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); for( i=1; i<N_COLLECT_POINTS; i++ ) { total_dropped_storage[i]+=total_dropped_storage[i-1]; total_finished_storage[i]+=total_finished_storage[i-1]; total_generated_storage[i]+=total_generated_storage[i-1]; } if(tw_ismaster()) { printf("\n ****************** \n"); printf("\n Total drop: %lld; \n", total_dropped_storage[N_COLLECT_POINTS-1]); printf("\n total finish: %lld and %lld; \n", total_finished_storage[N_COLLECT_POINTS-1],N_total_finish); printf("\n total generate: %lld; \n", total_generated_storage[N_COLLECT_POINTS-1]); printf("\n total hops: %lf; \n", (double)N_total_hop/total_finished_storage[N_COLLECT_POINTS-1]); printf("\n total wait length: %lf; \n", (double)wait_length/total_finished_storage[N_COLLECT_POINTS-1]); printf("\n total total queued: %lf; \n", (double)event_length/total_finished_storage[N_COLLECT_POINTS-1]); printf("\n average travel time: %lf; \n\n", total_time_sum/total_finished_storage[N_COLLECT_POINTS-1]); for( i=0; i<N_COLLECT_POINTS; i++ ) { printf(" %d ",i*100/N_COLLECT_POINTS); printf("drop: %lld; finish: %lld; generate: %lld; alive: %lld\n", total_dropped_storage[i], total_finished_storage[i], total_generated_storage[i], total_generated_storage[i]-total_finished_storage[i]); } // capture the steady state statistics unsigned long long steady_sum=0; for( i = N_COLLECT_POINTS/2; i<N_COLLECT_POINTS;i++) steady_sum+=total_generated_storage[i]-total_finished_storage[i]; printf("\n Steady state, packet alive: %lld\n", 2*steady_sum/N_COLLECT_POINTS); printf("Aeverage is %lld\n",total_rand_total/total_generated_storage[N_COLLECT_POINTS-1]); printf("\nMax latency is %lf\n\n",g_max_latency); } tw_end(); return 0; }
void tlm_md_init(int argc, char ** argv, char ** env) { tw_lpid nlp_grid; int i; if(!g_rn_environment) return; g_tlm_stats = tw_calloc(TW_LOC, "", sizeof(*g_tlm_stats), 1); // g_tlm_output_fix up global variables g_tw_ts_end += 0.1; if(0 != strcmp(g_rn_tools_dir, "")) sprintf(g_tlm_spatial_terrain_fn, "tools/%s/terrain.txt", g_rn_tools_dir); else tw_error(TW_LOC, "No terrain file specified!"); /* * Open debug plotting files */ #if 0 g_tlm_waves_plt_f = fopen("waves.plt", "w"); g_tlm_nodes_plt_f = fopen("user_nodes.plt", "w"); g_tlm_parts_plt_f = fopen("particles.plt", "w"); if(!g_tlm_nodes_plt_f || !g_tlm_parts_plt_f) tw_error(TW_LOC, "Unable to open plotting files!"); #endif g_tlm_stats = tw_calloc(TW_LOC, "tlm stats", sizeof(tlm_statistics), 1); memset(g_tlm_stats, 0, sizeof(tlm_statistics)); // # of cells around me = 2 * # spatial_dim g_tlm_spatial_dir = g_tlm_spatial_dim * 2; g_tlm_spatial_coeff = 2.0 / g_tlm_spatial_dir; g_tlm_spatial_grid_i = tw_calloc(TW_LOC, "spatial grid i", sizeof(int), g_tlm_spatial_dim); g_tlm_spatial_offset_ts = tw_calloc(TW_LOC, "spatial offset ts", sizeof(tw_stime), g_tlm_spatial_dir); g_tlm_spatial_offset = g_rn_nmachines / (tw_nnodes() * g_tw_npe); g_tlm_spatial_ground_coeff = 0.75; if(0.0 > g_tlm_wave_loss_coeff) { g_tlm_wave_loss_coeff = 1.0 / exp(g_tlm_wave_attenuation * g_tlm_spatial_d[0]); if(tw_ismaster()) printf("\n\tSETTING WAVE LOSS COEFF %lf! \n\n", g_tlm_wave_loss_coeff); } // speed of light in m/s g_tlm_wave_velocity = 299792458.0; nlp_grid = tlm_grid_init(); ntlm_lp_per_pe = ceil(nlp_grid / (tw_nnodes() * g_tw_npe)); if(tw_nnodes() == 1) ntlm_lp_per_pe = nlp_grid; g_tw_events_per_pe = 1.5 * nlp_grid / (tw_nnodes() * g_tw_npe); g_tw_events_per_pe += g_tlm_optmem; #if 0 for(i = 0; i < g_tlm_spatial_offset; i++) tw_lp_settype(i, types); #endif //tw_error(TW_LOC, "setting types not ported"); if(!tw_ismaster()) return; printf("\nInitializing Model: Transmission Line Matrix\n"); #if DWB printf("\t\t%-42s %11d (%ld)\n", "TLM Membufs", 1000000, g_tlm_fd); #endif #if RM_LOG_STATS g_tlm_output_f = fopen("tlm.log", "w"); if(!g_tlm_output_f) tw_error(TW_LOC, "Unable to open TLM logfile!"); #else g_tlm_output_f = stdout; #endif fprintf(g_tlm_output_f, "\n"); fprintf(g_tlm_output_f, "\t%-50s\n", "Spatial Parameters:"); fprintf(g_tlm_output_f, "\n"); fprintf(g_tlm_output_f, "\t\t%-42s %11.4lf\n", "Spatial Coefficient", g_tlm_spatial_coeff); fprintf(g_tlm_output_f, "\n"); fprintf(g_tlm_output_f, "\t\t%-42s %11dD\n", "Dimensions Computed", g_tlm_spatial_dim); for(i = 0; i < g_tlm_spatial_dim; i++) { fprintf(g_tlm_output_f, "\t\t%-42s %11d %dD\n", "Cells per Dimension", g_tlm_spatial_grid[i], i+1); fprintf(g_tlm_output_f, "\t\t%-42s %11d %dD\n", "Cell Spacing ", g_tlm_spatial_d[i], i+1); } fprintf(g_tlm_output_f, "\n"); fprintf(g_tlm_output_f, "\t%-50s\n", "Temporal Parameters:"); fprintf(g_tlm_output_f, "\n"); fprintf(g_tlm_output_f, "\t\t%-42s %11.11lf\n", "Scatter Offset TS", g_tlm_scatter_ts); fprintf(g_tlm_output_f, "\t\t%-42s %11.4lf\n", "Loss Coefficient", g_tlm_wave_loss_coeff); fprintf(g_tlm_output_f, "\t\t%-42s %11.4lf\n", "Velocity", g_tlm_wave_velocity); for(i = 0; i < g_tlm_spatial_dim; i++) fprintf(g_tlm_output_f, "\t\t%-42s %11.11lf %dD\n", "Timestep (d/V)", g_tlm_spatial_offset_ts[i], i+1); fprintf(g_tlm_output_f, "\t\t%-42s %11.4lf\n", "Amplitude Threshold", g_tlm_wave_threshold); fprintf(g_tlm_output_f, "\t%-50s %11d\n", "Spatial Offset", g_tlm_spatial_offset); }
/* * This function creates and setups the g_rn_machines global data structure of nodes */ void rn_xml_topology() { xmlXPathObjectPtr obj; xmlNodePtr node; unsigned int i; //unsigned int j; unsigned int id; unsigned int size; rn_as *as; rn_area *ar; rn_subnet *sn; rn_machine *m; size = 0; /* Environment */ obj = xpath("//environment", ctxt); if(obj->nodesetval->nodeTab) g_rn_environment = *obj->nodesetval->nodeTab; xmlXPathFreeObject(obj); /* ASes */ obj = xpath("/rossnet/as", ctxt); g_rn_nas = xmlXPathNodeSetGetLength(obj->nodesetval); if(g_rn_nas > 0) { g_rn_as = (rn_as *) tw_calloc(TW_LOC, "", sizeof(rn_as), g_rn_nas); size += sizeof(rn_as) * g_rn_nas; } node = obj->nodesetval->nodeTab[0]; for(i = 0; i < obj->nodesetval->nodeNr; ) { g_rn_as[i].low = -1; g_rn_as[i].id = atoi(xml_getprop(node, "id")); //g_rn_as[i].frequency = atoi(xml_getprop(node, "frequency")); #if WASTE_MEM if(0 == strcmp(xml_getprop(node, "name"), "name")) g_rn_as[i].name = (char *) xmlStrdup((xmlChar *) xml_getprop(node, "name")); #endif node = obj->nodesetval->nodeTab[++i]; } xmlXPathFreeObject(obj); /* Areas */ obj = xpath("/rossnet/as/area", ctxt); g_rn_nareas = xmlXPathNodeSetGetLength(obj->nodesetval); if(g_rn_nareas > 0) { g_rn_areas = (rn_area *) tw_calloc(TW_LOC, "", sizeof(rn_area), g_rn_nareas); size += sizeof(rn_area) * g_rn_nareas; } node = obj->nodesetval->nodeTab[0]; for(i = 0; i < obj->nodesetval->nodeNr; ) { //g_rn_areas[i].root = INT_MAX; //g_rn_areas[i].root_lvl = 0xff; g_rn_areas[i].low = INT_MAX; g_rn_areas[i].id = atoi(xml_getprop(node, "id")); g_rn_areas[i].as = &g_rn_as[atoi(xml_getprop(node->parent, "id"))]; if(g_rn_areas[i].as->areas == NULL) g_rn_areas[i].as->areas = &g_rn_areas[i]; if(i > 0 && g_rn_areas[i-1].as->id == g_rn_areas[i].as->id) g_rn_areas[i-1].next = &g_rn_areas[i]; g_rn_areas[i].as->nareas++; node = obj->nodesetval->nodeTab[++i]; } xmlXPathFreeObject(obj); /* Subnets */ obj = xpath("/rossnet/as/area/subnet", ctxt); g_rn_nsubnets = xmlXPathNodeSetGetLength(obj->nodesetval); if(g_rn_nsubnets > 0) { g_rn_subnets = (rn_subnet *) tw_calloc(TW_LOC, "", sizeof(rn_subnet), g_rn_nsubnets); size += sizeof(rn_subnet) * g_rn_nsubnets; } node = *obj->nodesetval->nodeTab; for(i = 0; i < obj->nodesetval->nodeNr; ) { g_rn_subnets[i].low = INT_MAX; g_rn_subnets[i].id = atoi(xml_getprop(node, "id")); g_rn_subnets[i].area = &g_rn_areas[atoi(xml_getprop(node->parent, "id"))]; if(g_rn_subnets[i].area->subnets == NULL) g_rn_subnets[i].area->subnets = &g_rn_subnets[i]; if(i > 0 && g_rn_subnets[i-1].area->id == g_rn_subnets[i].area->id) g_rn_subnets[i-1].next = &g_rn_subnets[i]; g_rn_subnets[i].area->nsubnets++; node = obj->nodesetval->nodeTab[++i]; } xmlXPathFreeObject(obj); /* Machines */ obj = xpath("//node", ctxt); g_rn_nmachines = xmlXPathNodeSetGetLength(obj->nodesetval); if(g_rn_nmachines) { g_rn_machines = tw_calloc(TW_LOC, "machines", sizeof(rn_machine), g_rn_nmachines); size += sizeof(rn_machine) * g_rn_nmachines; } else tw_error(TW_LOC, "No machines found in XML!"); g_tw_nlp = ceil((double) obj->nodesetval->nodeNr / (double) tw_nnodes()); node = *obj->nodesetval->nodeTab; for(i = 0; i < obj->nodesetval->nodeNr; ) { id = atoi(xml_getprop(node, "id")); m = rn_getmachine(id); m->xml = node; m->conn = -1; m->id = id; m->nlinks = atoi(xml_getprop(node, "links")); //m->level = atoi(xml_getprop(node, "lvl")); m->subnet = &g_rn_subnets[atoi(xml_getprop(node->parent, "id"))]; if(0 != strcmp("", xml_getprop(node, "uid"))) m->uid = atoi(xml_getprop(node, "uid")); else m->uid = -1; if(strcmp("c_router", xml_getprop(node, "type")) == 0) { m->type = c_router; g_rn_nrouters++; } else if(strcmp("c_ct_router", xml_getprop(node, "type")) == 0) { m->type = c_ct_router; g_rn_nrouters++; } else if(strcmp("c_og_router", xml_getprop(node, "type")) == 0) { m->type = c_og_router; g_rn_nrouters++; } else if(strcmp("c_co_router", xml_getprop(node, "type")) == 0) { m->type = c_co_router; g_rn_nrouters++; } else if(strcmp("c_ha_router", xml_getprop(node, "type")) == 0) { m->type = c_ha_router; g_rn_nrouters++; } else if(strcmp("c_host", xml_getprop(node, "type")) == 0) m->type = c_host; else tw_error(TW_LOC, "Unknown node type: %s", xml_getprop(node, "type")); if(m->subnet->machines == NULL) m->subnet->machines = &g_rn_machines[i]; #if 0 if(id > 0 && g_rn_machines[id-1].subnet->id == g_rn_machines[id].subnet->id) g_rn_machines[id-1].next = &g_rn_machines[id]; #endif m->subnet->nmachines++; m->subnet->area->nmachines++; m->link = (rn_link *) tw_calloc(TW_LOC, "", sizeof(rn_link) * m->nlinks, 1); m->hash_link = rn_hash_create(m->nlinks); size += sizeof(rn_link) * m->nlinks; //printf("Init\'ing Machines id: %d \n", m->id); node = obj->nodesetval->nodeTab[++i]; } xmlXPathFreeObject(obj); xml_link_topology(); /* * Set up the global connection library */ obj = xpath("//connect", ctxt); if(obj->nodesetval->nodeNr) { node = *obj->nodesetval->nodeTab; for(i = 0; i < obj->nodesetval->nodeNr; ) { m = rn_getmachine(atoll(xml_getprop(node, "src"))); m->conn = atoll(xml_getprop(node, "dst")); // back link the connection //rn_getmachine(m->conn)->conn = m->id; node = obj->nodesetval->nodeTab[++i]; } } xmlXPathFreeObject(obj); for(i = 0; i < g_rn_nmachines; i++) { m = rn_getmachine(i); sn = m->subnet; ar = sn->area; as = ar->as; if(i < sn->low) { sn->low = i; if(i < ar->low) { ar->low = i; if(0 && i < as->low) as->low = i; } } if(i > sn->high) { sn->high = i; if(i > ar->high) { ar->high = i; if(0 && i > as->high) as->high = i; } } if(m->type == c_host) continue; ar->g_ospf_nlsa = ar->nmachines + ar->as->nareas + g_rn_nas; if(ar->nmachines == 0) tw_error(TW_LOC, "No machines in Area %d!\n", ar->id); #if 0 m->ft = (int *) tw_calloc(TW_LOC, "", sizeof(int ) * ar->g_ospf_nlsa, 1); size += sizeof(int) * ar->g_ospf_nlsa; for(j = 0; j < ar->g_ospf_nlsa; j++) m->ft[j] = -1; #endif /* printf("mach %d: alloc FT of size: %d \n", i, ar->nmachines + as->nareas + g_rn_nas); */ } ar = NULL; as = NULL; while(NULL != (as = rn_nextas(as))) { while(NULL != (ar = rn_nextarea_onas(ar, as))) { if(as->low == -1) as->low = ar->id; as->high = ar->id; } //printf("AS %d: low %ld, high %ld \n", as->id, as->low, as->high); } #if RN_XML_DEBUG || 1 if(tw_ismaster()) { printf("\n"); printf("%-32s %11d \n", "ASes: ", g_rn_nas); printf("%-32s %11d \n", "Areas: ", g_rn_nareas); printf("%-32s %11d \n", "Subnets: ", g_rn_nsubnets); printf("%-32s %11d \n", "Machines: ", g_rn_nmachines); printf("%-32s %11d \n", "Links: ", g_rn_nlinks); printf("%-32s %11d bytes\n", "Total topology size: ", size); printf("\n"); } #endif //verify_topology(); }
int main(int argc, char * argv[]) { g_tw_ts_end = 30; g_tw_gvt_interval = 16; int i; // get rid of error if compiled w/ MEMORY queues g_tw_memory_nqueues=1; // set a min lookahead of 1.0 lookahead = 1.0; //tw_opt_add(app_opt); tw_init(&argc, &argv); if( lookahead > 1.0 ) tw_error(TW_LOC, "Lookahead > 1.0 .. needs to be less\n"); //reset mean based on lookahead mean = mean - lookahead; g_tw_memory_nqueues = 16; // give at least 16 memory queue event offset_lpid = g_tw_mynode * nlp_per_pe; ttl_lps = tw_nnodes() * g_tw_npe * nlp_per_pe; //g_tw_rng_default = TW_FALSE; g_tw_lookahead = lookahead; nlp_per_pe = (NUM_CELLS_X * NUM_CELLS_Y) / (tw_nnodes() * g_tw_npe); g_tw_events_per_pe = (mult * nlp_per_pe * g_traffic_start_events) + optimistic_memory; num_cells_per_kp = (NUM_CELLS_X * NUM_CELLS_Y) / (NUM_VP_X * NUM_VP_Y); vp_per_proc = (NUM_VP_X * NUM_VP_Y) / ((tw_nnodes() * g_tw_npe)) ; g_vp_per_proc = vp_per_proc; g_tw_nlp = nlp_per_pe; g_tw_nkp = vp_per_proc; g_tw_mapping = CUSTOM; g_tw_custom_initial_mapping = &traffic_grid_mapping; g_tw_custom_lp_global_to_local_map = &CellMapping_to_lp; tw_define_lps(nlp_per_pe, sizeof(Msg_Data)); for(i = 0; i < g_tw_nlp; i++) tw_lp_settype(i, &mylps[0]); if( g_tw_mynode == 0 ) { printf("========================================\n"); printf("Traffice Model Configuration..............\n"); printf(" Lookahead..............%lf\n", lookahead); printf(" Start-events...........%u\n", g_traffic_start_events); printf(" stagger................%u\n", stagger); printf(" Mean...................%lf\n", mean); printf(" Mult...................%lf\n", mult); printf(" Memory.................%u\n", optimistic_memory); printf(" Remote.................%lf\n", percent_remote); printf("========================================\n\n"); } tw_run(); tw_end(); printf("Number of Arivals: %lld\n", totalCars); printf("Number of Cars reached their dest: %lld\n", carsFinished); return 0; }
int main(int argc, char **argv, char **env) { #ifdef TEST_COMM_ROSS // Init outside of ROSS MPI_Init(&argc, &argv); // Split COMM_WORLD in half even/odd int mpi_rank; MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); MPI_Comm split_comm; MPI_Comm_split(MPI_COMM_WORLD, mpi_rank%2, mpi_rank, &split_comm); if(mpi_rank%2 == 1){ // tests should catch any MPI_COMM_WORLD collectives MPI_Finalize(); } // Allows ROSS to function as normal tw_comm_set(split_comm); #endif int i; // get rid of error if compiled w/ MEMORY queues g_tw_memory_nqueues=1; // set a min lookahead of 1.0 lookahead = 1.0; tw_opt_add(app_opt); tw_init(&argc, &argv); #ifdef USE_DAMARIS if(g_st_ross_rank) { // only ross ranks should run code between here and tw_run() #endif if( lookahead > 1.0 ) tw_error(TW_LOC, "Lookahead > 1.0 .. needs to be less\n"); //reset mean based on lookahead mean = mean - lookahead; g_tw_memory_nqueues = 16; // give at least 16 memory queue event offset_lpid = g_tw_mynode * nlp_per_pe; ttl_lps = tw_nnodes() * g_tw_npe * nlp_per_pe; g_tw_events_per_pe = (mult * nlp_per_pe * g_phold_start_events) + optimistic_memory; //g_tw_rng_default = TW_FALSE; g_tw_lookahead = lookahead; tw_define_lps(nlp_per_pe, sizeof(phold_message)); for(i = 0; i < g_tw_nlp; i++) { tw_lp_settype(i, &mylps[0]); st_model_settype(i, &model_types[0]); } if( g_tw_mynode == 0 ) { printf("========================================\n"); printf("PHOLD Model Configuration..............\n"); printf(" Lookahead..............%lf\n", lookahead); printf(" Start-events...........%u\n", g_phold_start_events); printf(" stagger................%u\n", stagger); printf(" Mean...................%lf\n", mean); printf(" Mult...................%lf\n", mult); printf(" Memory.................%u\n", optimistic_memory); printf(" Remote.................%lf\n", percent_remote); printf("========================================\n\n"); } tw_run(); #ifdef USE_DAMARIS } // end if(g_st_ross_rank) #endif tw_end(); return 0; }
void epi_init_agent_default(void) { tw_memory *b; epi_agent *a; int i; int j; int id; int lid; int rid; if(!g_epi_nagents) tw_error(TW_LOC, "No agents specified!"); if(!g_tw_nlp) tw_error(TW_LOC, "No locations specified!"); if(!g_epi_nregions) tw_error(TW_LOC, "No regions specified!"); g_epi_regions = tw_calloc(TW_LOC, "", sizeof(unsigned int *), g_epi_nregions); // create reporting tables for each region, for each disease for(i = 0; i < g_epi_nregions; i++) { g_epi_regions[i] = tw_calloc(TW_LOC, "", sizeof(*g_epi_regions[i]), g_epi_ndiseases); for(j = 0; j < g_epi_ndiseases; j++) g_epi_regions[i][j] = tw_calloc(TW_LOC, "", sizeof(*g_epi_regions[i][j]), g_epi_diseases[j].nstages); } // allocate the location priority queues for this node g_epi_pq = tw_calloc(TW_LOC, "", sizeof(*g_epi_pq), g_tw_nlp); for(i = 0; i < g_tw_nlp; i++) g_epi_pq[i] = pq_create(); // round-robin mapping of agents to locations, and locations to regions for(i = 0; i < g_epi_nagents; i++) { lid = i % g_tw_nlp; rid = lid % g_epi_nregions; b = tw_memory_alloc(g_tw_lp[lid], g_epi_fd); a = tw_memory_data(b); a->id = id++; a->region = rid; a->pathogens = NULL; a->curr = 0; a->nloc = tw_rand_integer(g_tw_lp[lid]->rng, 0, 10); // setup "home" location a->loc[0] = lid; a->dur[0] = tw_rand_exponential(g_tw_lp[lid]->rng, g_epi_mean); a->ts_next = a->ts_remove = a->dur[0]; pq_enqueue(g_epi_pq[a->loc[0]], b); printf("A %d nloc %d: (%d, %lf) ", a->id, a->nloc, a->loc[0], a->dur[0]); for(j = 1; j < a->nloc; j++) { a->loc[j] = tw_rand_integer(g_tw_lp[lid]->rng, 0, (g_tw_nlp * tw_nnodes()) - 2); a->dur[j] = tw_rand_exponential(g_tw_lp[lid]->rng, g_epi_mean); printf("(%d %lf) ", a->loc[j], a->dur[j]); } printf("\n"); ga = a; } }
static tw_pe * setup_pes(void) { tw_pe *pe; tw_pe *master; int i; unsigned int num_events_per_pe; num_events_per_pe = 1 + g_tw_events_per_pe + g_tw_events_per_pe_extra; master = g_tw_pe[0]; if (!master) { tw_error(TW_LOC, "No PE configured on this node."); } if (g_tw_mynode == g_tw_masternode) { master->master = 1; } master->local_master = 1; for(i = 0; i < g_tw_npe; i++) { pe = g_tw_pe[i]; if (g_tw_buddy_alloc) { g_tw_buddy_master = create_buddy_table(g_tw_buddy_alloc); if (g_tw_buddy_master == NULL) { tw_error(TW_LOC, "create_buddy_table() failed."); } tw_delta_alloc(pe); } pe->pq = tw_pq_create(); tw_eventq_alloc(&pe->free_q, num_events_per_pe); pe->abort_event = tw_eventq_shift(&pe->free_q); #ifdef USE_RIO for (i = 0; i < g_io_events_buffered_per_rank; i++) { tw_eventq_push(&g_io_free_events, tw_eventq_pop(&g_tw_pe[0]->free_q)); } #endif } if (g_tw_mynode == g_tw_masternode) { printf("\nROSS Core Configuration: \n"); printf("\t%-50s %11u\n", "Total Nodes", tw_nnodes()); fprintf(g_tw_csv, "%u,", tw_nnodes()); printf("\t%-50s [Nodes (%u) x PE_per_Node (%lu)] %lu\n", "Total Processors", tw_nnodes(), g_tw_npe, (tw_nnodes() * g_tw_npe)); fprintf(g_tw_csv, "%lu,", (tw_nnodes() * g_tw_npe)); printf("\t%-50s [Nodes (%u) x KPs (%lu)] %lu\n", "Total KPs", tw_nnodes(), g_tw_nkp, (tw_nnodes() * g_tw_nkp)); fprintf(g_tw_csv, "%lu,", (tw_nnodes() * g_tw_nkp)); printf("\t%-50s %11llu\n", "Total LPs", (tw_nnodes() * g_tw_npe * g_tw_nlp)); fprintf(g_tw_csv, "%llu,", (tw_nnodes() * g_tw_npe * g_tw_nlp)); printf("\t%-50s %11.2lf\n", "Simulation End Time", g_tw_ts_end); fprintf(g_tw_csv, "%11.2lf\n", g_tw_ts_end); switch(g_tw_mapping) { case LINEAR: printf("\t%-50s %11s\n", "LP-to-PE Mapping", "linear"); fprintf(g_tw_csv, "%s,", "linear"); break; case ROUND_ROBIN: printf("\t%-50s %11s\n", "LP-to-PE Mapping", "round robin"); fprintf(g_tw_csv, "%s,", "round robin"); break; case CUSTOM: printf("\t%-50s %11s\n", "LP-to-PE Mapping", "model defined"); fprintf(g_tw_csv, "%s,", "model defined"); break; } printf("\n"); #ifndef ROSS_DO_NOT_PRINT printf("\nROSS Event Memory Allocation:\n"); printf("\t%-50s %11d\n", "Model events", num_events_per_pe); fprintf(g_tw_csv, "%d,", num_events_per_pe); printf("\t%-50s %11d\n", "Network events", g_tw_gvt_threshold); fprintf(g_tw_csv, "%d,", g_tw_gvt_threshold); printf("\t%-50s %11d\n", "Total events", g_tw_events_per_pe); fprintf(g_tw_csv, "%d,", g_tw_events_per_pe); printf("\n"); #endif } return master; }
int main(int argc, char **argv, char **env) { int i; num_buf_slots = vc_size / chunk_size; tw_opt_add(app_opt); tw_init(&argc, &argv); if(strcmp(traffic_str, "uniform") == 0) TRAFFIC = UNIFORM_RANDOM; else if(strcmp(traffic_str, "nearest") == 0) TRAFFIC = NEAREST_NEIGHBOR; else if(strcmp(traffic_str, "diagonal") == 0) TRAFFIC = DIAGONAL; else printf("\n Incorrect traffic pattern specified, using %s as default ", traffic_str ); /* for automatically reducing the channel link bandwidth of a 7-D or a 9-D torus */ link_bandwidth = (link_bandwidth * 10) / (2 * N_dims); injection_limit = injection_interval / MEAN_INTERVAL; for (i=0; i<N_dims; i++) { N_nodes*=dim_length[i]; N_mpi_procs*=dim_length[i]; } nlp_nodes_per_pe = N_nodes/tw_nnodes()/g_tw_npe; nlp_mpi_procs_per_pe = N_mpi_procs/tw_nnodes()/g_tw_npe; num_rows = sqrt(N_nodes); num_cols = num_rows; total_lps = g_tw_nlp * tw_nnodes(); node_rem = N_nodes % (tw_nnodes()/g_tw_npe); if(g_tw_mynode < node_rem) { nlp_nodes_per_pe++; nlp_mpi_procs_per_pe++; } num_packets=1; num_chunks = PACKET_SIZE/chunk_size; if( mpi_message_size > PACKET_SIZE) { num_packets = mpi_message_size / PACKET_SIZE; if(mpi_message_size % PACKET_SIZE != 0 ) num_packets++; } g_tw_mapping=CUSTOM; g_tw_custom_initial_mapping=&torus_mapping; g_tw_custom_lp_global_to_local_map=&torus_mapping_to_lp; g_tw_events_per_pe = mem_factor * 1024 * (nlp_nodes_per_pe/g_tw_npe + nlp_mpi_procs_per_pe/g_tw_npe) + opt_mem; tw_define_lps(nlp_nodes_per_pe + nlp_mpi_procs_per_pe, sizeof(nodes_message), 0); head_delay = (1 / link_bandwidth) * chunk_size; // BG/L torus network paper: Tokens are 32 byte chunks that is why the credit delay is adjusted according to bandwidth * 32 credit_delay = (1 / link_bandwidth) * 8; packet_offset = (g_tw_ts_end/MEAN_INTERVAL) * num_packets; if(tw_ismaster()) { printf("\nTorus Network Model Statistics:\n"); printf("Number of nodes: %d Torus dimensions: %d ", N_nodes, N_dims); printf(" Link Bandwidth: %f Traffic pattern: %s \n", link_bandwidth, traffic_str ); } tw_run(); unsigned long long total_finished_storage[N_COLLECT_POINTS]; unsigned long long total_generated_storage[N_COLLECT_POINTS]; unsigned long long total_num_hops[N_COLLECT_POINTS]; unsigned long long total_queue_depth[N_COLLECT_POINTS]; unsigned long long wait_length,event_length,N_total_packets_finish, N_total_msgs_finish, N_total_hop; tw_stime total_time_sum,g_max_latency; for( i=0; i<N_COLLECT_POINTS; i++ ) { MPI_Reduce( &N_finished_storage[i], &total_finished_storage[i],1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce( &N_generated_storage[i], &total_generated_storage[i],1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce( &N_num_hops[i], &total_num_hops[i],1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce( &N_queue_depth[i], &total_queue_depth[i],1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); } MPI_Reduce( &total_time, &total_time_sum,1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce( &N_finished_packets, &N_total_packets_finish,1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce( &N_finished_msgs, &N_total_msgs_finish,1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce( &total_hops, &N_total_hop,1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce( &max_latency, &g_max_latency,1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); for( i=1; i<N_COLLECT_POINTS; i++ ) { total_finished_storage[i]+=total_finished_storage[i-1]; total_generated_storage[i]+=total_generated_storage[i-1]; } // if( total_lp_time > 0 ) // total_lp_time /= num_mpi_msgs; if(tw_ismaster()) { printf("\n ****************** \n"); printf("\n total packets finished: %lld and %lld; \n", total_finished_storage[N_COLLECT_POINTS-1],N_total_packets_finish); printf("\n total MPI messages finished: %lld; \n", N_total_msgs_finish); printf("\n total generate: %lld; \n", total_generated_storage[N_COLLECT_POINTS-1]); printf("\n total hops: %lf; \n", (double)N_total_hop/N_total_packets_finish); printf("\n average travel time: %lf; \n\n", total_time_sum/N_total_msgs_finish); #if REPORT_BANDWIDTH for( i=0; i<N_COLLECT_POINTS; i++ ) { printf(" %d ",i*100/N_COLLECT_POINTS); printf("total finish: %lld; generate: %lld; alive: %lld \n ", total_finished_storage[i], total_generated_storage[i], total_generated_storage[i]-total_finished_storage[i]); } // capture the steady state statistics tw_stime bandwidth; tw_stime interval = (g_tw_ts_end / N_COLLECT_POINTS); interval = interval/(1000.0 * 1000.0 * 1000.0); //convert ns to seconds for( i=1; i<N_COLLECT_POINTS; i++ ) { bandwidth = total_finished_storage[i] - total_finished_storage[i - 1]; unsigned long long avg_hops = total_num_hops[i]/bandwidth; bandwidth = (bandwidth * PACKET_SIZE) / (1024.0 * 1024.0 * 1024.0); // convert bytes to GB bandwidth = bandwidth / interval; printf("\n Interval %0.7lf Bandwidth %lf avg hops %lld queue depth %lld ", interval, bandwidth, avg_hops, (unsigned long long)total_queue_depth[i]/num_chunks); } unsigned long long steady_sum=0; for( i = N_COLLECT_POINTS/2; i<N_COLLECT_POINTS;i++) steady_sum+=total_generated_storage[i]-total_finished_storage[i]; printf("\n Steady state, packet alive: %lld\n", 2*steady_sum/N_COLLECT_POINTS); #endif printf("\nMax latency is %lf\n\n",g_max_latency); } // if(packet_sent > 0 || credit_sent > 0) // printf("\n Packet sent are %d, credit sent %d ", packet_sent, credit_sent); tw_end(); return 0; }
/** * @brief Determines how to handle the newly received event. * * @param[in] me pointer to PE * @param[in] e pointer to event that we just received * @param[in] buffer not currently used */ static void recv_finish(tw_pe *me, tw_event *e, char * buffer) { (void) buffer; tw_pe *dest_pe; tw_clock start; me->stats.s_nread_network++; me->s_nwhite_recv++; // printf("recv_finish: remote event [cancel %u] FROM: LP %lu, PE %lu, TO: LP %lu, PE %lu at TS %lf \n", // e->state.cancel_q, (tw_lpid)e->src_lp, e->send_pe, (tw_lpid)e->dest_lp, me->id, e->recv_ts); e->dest_lp = tw_getlocal_lp((tw_lpid) e->dest_lp); dest_pe = e->dest_lp->pe; // instrumentation e->dest_lp->kp->kp_stats->s_nread_network++; e->dest_lp->lp_stats->s_nread_network++; if(e->send_pe > tw_nnodes()-1) tw_error(TW_LOC, "bad sendpe_id: %d", e->send_pe); e->cancel_next = NULL; e->caused_by_me = NULL; e->cause_next = NULL; if(e->recv_ts < me->GVT) tw_error(TW_LOC, "%d: Received straggler from %d: %lf (%d)", me->id, e->send_pe, e->recv_ts, e->state.cancel_q); if(tw_gvt_inprogress(me)) me->trans_msg_ts = ROSS_MIN(me->trans_msg_ts, e->recv_ts); // if cancel event, retrieve and flush // else, store in hash table if(e->state.cancel_q) { tw_event *cancel = tw_hash_remove(me->hash_t, e, e->send_pe); // NOTE: it is possible to cancel the event we // are currently processing at this PE since this // MPI module lets me read cancel events during // event sends over the network. cancel->state.cancel_q = 1; cancel->state.remote = 0; cancel->cancel_next = dest_pe->cancel_q; dest_pe->cancel_q = cancel; tw_event_free(me, e); return; } if (g_tw_synchronization_protocol == OPTIMISTIC || g_tw_synchronization_protocol == OPTIMISTIC_DEBUG || g_tw_synchronization_protocol == OPTIMISTIC_REALTIME ) { tw_hash_insert(me->hash_t, e, e->send_pe); e->state.remote = 1; } /* NOTE: the final check in the if conditional below was added to make sure * that we do not execute the fast case unless the cancellation queue is * empty on the destination PE. Otherwise we need to invoke the normal * scheduling routines to make sure that a forward event doesn't bypass a * cancellation event with an earlier timestamp. This is helpful for * stateful models that produce incorrect results when presented with * duplicate messages with no rollback between them. */ if(me == dest_pe && e->dest_lp->kp->last_time <= e->recv_ts && !dest_pe->cancel_q) { /* Fast case, we are sending to our own PE and * there is no rollback caused by this send. */ start = tw_clock_read(); tw_pq_enqueue(dest_pe->pq, e); dest_pe->stats.s_pq += tw_clock_read() - start; return; } if (me->id == dest_pe->id) { /* Slower, but still local send, so put into top * of dest_pe->event_q. */ e->state.owner = TW_pe_event_q; tw_eventq_push(&dest_pe->event_q, e); return; } /* Never should happen; MPI should have gotten the * message to the correct node without needing us * to redirect the message there for it. This is * probably a serious bug with the event headers * not being formatted right. */ tw_error( TW_LOC, "Event recived by PE %u but meant for PE %u", me->id, dest_pe->id); }