/* collective operation for the torus network */ void torus_collective(char* category, int message_size, int remote_event_size, const void* remote_event, tw_lp* sender) { tw_event * e_new; tw_stime xfer_to_nic_time; nodes_message * msg; tw_lpid local_nic_id; char* tmp_ptr; // TODO: be annotation-aware codes_mapping_get_lp_info(sender->gid, grp_name, &mapping_grp_id, NULL, &mapping_type_id, NULL, &mapping_rep_id, &mapping_offset); codes_mapping_get_lp_id(grp_name, LP_CONFIG_NM, NULL, 1, mapping_rep_id, mapping_offset, &local_nic_id); xfer_to_nic_time = g_tw_lookahead + codes_local_latency(sender); e_new = model_net_method_event_new(local_nic_id, xfer_to_nic_time, sender, TORUS, (void**)&msg, (void**)&tmp_ptr); msg->remote_event_size_bytes = message_size; strcpy(msg->category, category); msg->sender_svr=sender->gid; msg->type = T_COLLECTIVE_INIT; tmp_ptr = (char*)msg; tmp_ptr += torus_get_msg_sz(); if(remote_event_size > 0) { msg->remote_event_size_bytes = remote_event_size; memcpy(tmp_ptr, remote_event, remote_event_size); tmp_ptr += remote_event_size; } tw_event_send(e_new); return; }
void torus_collective_init(nodes_state * s, tw_lp * lp) { // TODO: be annotation-aware somehow codes_mapping_get_lp_info(lp->gid, grp_name, &mapping_grp_id, NULL, &mapping_type_id, NULL, &mapping_rep_id, &mapping_offset); int num_lps = codes_mapping_get_lp_count(grp_name, 1, LP_CONFIG_NM, s->anno, 0); int num_reps = codes_mapping_get_group_reps(grp_name); s->node_id = (mapping_rep_id * num_lps) + mapping_offset; int i; /* handle collective operations by forming a tree of all the LPs */ /* special condition for root of the tree */ if( s->node_id == 0) { s->parent_node_id = -1; s->is_root = 1; } else { s->parent_node_id = (s->node_id - ((s->node_id - 1) % TREE_DEGREE)) / TREE_DEGREE; s->is_root = 0; } s->children = (tw_lpid*)malloc(TREE_DEGREE * sizeof(tw_lpid)); /* set the isleaf to zero by default */ s->is_leaf = 1; s->num_children = 0; /* calculate the children of the current node. If its a leaf, no need to set children, only set isleaf and break the loop*/ for( i = 0; i < TREE_DEGREE; i++ ) { tw_lpid next_child = (TREE_DEGREE * s->node_id) + i + 1; if(next_child < (num_lps * num_reps)) { s->num_children++; s->is_leaf = 0; s->children[i] = next_child; } else s->children[i] = -1; } #if TORUS_COLLECTIVE_DEBUG == 1 printf("\n LP %ld parent node id ", s->node_id); for( i = 0; i < TREE_DEGREE; i++ ) printf(" child node ID %ld ", s->children[i]); printf("\n"); if(s->is_leaf) printf("\n LP %ld is leaf ", s->node_id); #endif }
tw_lpid get_next_node(tw_lpid sender_id) { tw_lpid rtn_id; char grp_name[MAX_NAME_LENGTH], lp_type_name[MAX_NAME_LENGTH], annotation[MAX_NAME_LENGTH]; int lp_type_id, grp_id, grp_rep_id, offset; //num_reps; codes_mapping_get_lp_info(sender_id, grp_name, &grp_id, lp_type_name, &lp_type_id, annotation, &grp_rep_id, &offset); // num_reps = codes_mapping_get_group_reps(grp_name); //We match up node pairs randomly. // while((dest_rep_id = rand() % num_reps) == grp_rep_id); if (grp_rep_id != 1) rtn_id = (tw_lpid) -1; else codes_mapping_get_lp_id(grp_name, lp_type_name, NULL, 1, 2/*the neighbor*/, 0, &rtn_id); return rtn_id; }
static void node_collective_init(nodes_state * s, tw_bf * bf, nodes_message * msg, tw_lp * lp) { tw_event * e_new; tw_lpid parent_nic_id; tw_stime xfer_to_nic_time; nodes_message * msg_new; int num_lps; msg->saved_collective_init_time = s->collective_init_time; s->collective_init_time = tw_now(lp); s->origin_svr = msg->sender_svr; if(s->is_leaf) { //printf("\n LP %ld sending message to parent %ld ", s->node_id, s->parent_node_id); /* get the global LP ID of the parent node */ // TODO: be annotation-aware codes_mapping_get_lp_info(lp->gid, grp_name, &mapping_grp_id, NULL, &mapping_type_id, NULL, &mapping_rep_id, &mapping_offset); num_lps = codes_mapping_get_lp_count(grp_name, 1, LP_CONFIG_NM, NULL, 1); codes_mapping_get_lp_id(grp_name, LP_CONFIG_NM, NULL, 1, s->parent_node_id/num_lps, (s->parent_node_id % num_lps), &parent_nic_id); /* send a message to the parent that the LP has entered the collective operation */ xfer_to_nic_time = g_tw_lookahead + LEVEL_DELAY; //e_new = codes_event_new(parent_nic_id, xfer_to_nic_time, lp); void* m_data; e_new = model_net_method_event_new(parent_nic_id, xfer_to_nic_time, lp, TORUS, (void**)&msg_new, (void**)&m_data); memcpy(msg_new, msg, sizeof(nodes_message)); if (msg->remote_event_size_bytes) { memcpy(m_data, model_net_method_get_edata(TORUS, msg), msg->remote_event_size_bytes); } msg_new->type = T_COLLECTIVE_FAN_IN; msg_new->sender_node = s->node_id; tw_event_send(e_new); } return; }
static void handle_kickoff_event( svr_state * ns, tw_bf * b, svr_msg * m, tw_lp * lp) { char* anno; tw_lpid local_dest = -1, global_dest = -1; svr_msg * m_local = malloc(sizeof(svr_msg)); svr_msg * m_remote = malloc(sizeof(svr_msg)); m_local->svr_event_type = LOCAL; m_local->src = lp->gid; memcpy(m_remote, m_local, sizeof(svr_msg)); m_remote->svr_event_type = REMOTE; assert(net_id == DRAGONFLY); /* only supported for dragonfly model right now. */ ns->start_ts = tw_now(lp); codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, anno, &rep_id, &offset); /* in case of uniform random traffic, send to a random destination. */ if(traffic == UNIFORM) { local_dest = tw_rand_integer(lp->rng, 0, num_nodes - 1); // printf("\n LP %ld sending to %d ", lp->gid, local_dest); } else if(traffic == NEAREST_GROUP) { local_dest = (rep_id * 2 + offset + num_nodes_per_grp) % num_nodes; // printf("\n LP %ld sending to %ld num nodes %d ", rep_id * 2 + offset, local_dest, num_nodes); } else if(traffic == NEAREST_NEIGHBOR) { local_dest = (rep_id * 2 + offset + 2) % num_nodes; // printf("\n LP %ld sending to %ld num nodes %d ", rep_id * 2 + offset, local_dest, num_nodes); } assert(local_dest < num_nodes); codes_mapping_get_lp_id(group_name, lp_type_name, anno, 1, local_dest / num_servers_per_rep, local_dest % num_servers_per_rep, &global_dest); ns->msg_sent_count++; model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)m_remote, sizeof(svr_msg), (const void*)m_local, lp); issue_event(ns, lp); return; }
static void node_collective_fan_in(nodes_state * s, tw_bf * bf, nodes_message * msg, tw_lp * lp) { int i; s->num_fan_nodes++; // TODO: be annotation-aware codes_mapping_get_lp_info(lp->gid, grp_name, &mapping_grp_id, NULL, &mapping_type_id, NULL, &mapping_rep_id, &mapping_offset); int num_lps = codes_mapping_get_lp_count(grp_name, 1, LP_CONFIG_NM, NULL, 1); tw_event* e_new; nodes_message * msg_new; tw_stime xfer_to_nic_time; bf->c1 = 0; bf->c2 = 0; /* if the number of fanned in nodes have completed at the current node then signal the parent */ if((s->num_fan_nodes == s->num_children) && !s->is_root) { bf->c1 = 1; msg->saved_fan_nodes = s->num_fan_nodes-1; s->num_fan_nodes = 0; tw_lpid parent_nic_id; xfer_to_nic_time = g_tw_lookahead + LEVEL_DELAY; /* get the global LP ID of the parent node */ codes_mapping_get_lp_id(grp_name, LP_CONFIG_NM, NULL, 1, s->parent_node_id/num_lps, (s->parent_node_id % num_lps), &parent_nic_id); /* send a message to the parent that the LP has entered the collective operation */ //e_new = codes_event_new(parent_nic_id, xfer_to_nic_time, lp); //msg_new = tw_event_data(e_new); void * m_data; e_new = model_net_method_event_new(parent_nic_id, xfer_to_nic_time, lp, TORUS, (void**)&msg_new, &m_data); memcpy(msg_new, msg, sizeof(nodes_message)); msg_new->type = T_COLLECTIVE_FAN_IN; msg_new->sender_node = s->node_id; if (msg->remote_event_size_bytes) { memcpy(m_data, model_net_method_get_edata(TORUS, msg), msg->remote_event_size_bytes); } tw_event_send(e_new); } /* root node starts off with the fan-out phase */ if(s->is_root && (s->num_fan_nodes == s->num_children)) { bf->c2 = 1; msg->saved_fan_nodes = s->num_fan_nodes-1; s->num_fan_nodes = 0; send_remote_event(s, bf, msg, lp); for( i = 0; i < s->num_children; i++ ) { tw_lpid child_nic_id; /* Do some computation and fan out immediate child nodes from the collective */ xfer_to_nic_time = g_tw_lookahead + COLLECTIVE_COMPUTATION_DELAY + LEVEL_DELAY + tw_rand_exponential(lp->rng, (double)LEVEL_DELAY/50); /* get global LP ID of the child node */ codes_mapping_get_lp_id(grp_name, LP_CONFIG_NM, NULL, 1, s->children[i]/num_lps, (s->children[i] % num_lps), &child_nic_id); //e_new = codes_event_new(child_nic_id, xfer_to_nic_time, lp); //msg_new = tw_event_data(e_new); void * m_data; e_new = model_net_method_event_new(child_nic_id, xfer_to_nic_time, lp, TORUS, (void**)&msg_new, &m_data); memcpy(msg_new, msg, sizeof(nodes_message)); if (msg->remote_event_size_bytes) { memcpy(m_data, model_net_method_get_edata(TORUS, msg), msg->remote_event_size_bytes); } msg_new->type = T_COLLECTIVE_FAN_OUT; msg_new->sender_node = s->node_id; tw_event_send(e_new); } } }
/*Initialize the torus model, this initialization part is borrowed from Ning's torus model */ static void torus_init( nodes_state * s, tw_lp * lp ) { int i, j; char anno[MAX_NAME_LENGTH]; codes_mapping_get_lp_info(lp->gid, grp_name, &mapping_grp_id, NULL, &mapping_type_id, anno, &mapping_rep_id, &mapping_offset); if (anno[0] == '\0') { s->anno = NULL; s->params = &all_params[num_params-1]; } else { s->anno = strdup(anno); int id = configuration_get_annotation_index(anno, anno_map); s->params = &all_params[id]; } // shorthand const torus_param *p = s->params; s->neighbour_minus_lpID = (int*)malloc(p->n_dims * sizeof(int)); s->neighbour_plus_lpID = (int*)malloc(p->n_dims * sizeof(int)); s->dim_position = (int*)malloc(p->n_dims * sizeof(int)); s->buffer = (int**)malloc(2*p->n_dims * sizeof(int*)); s->next_link_available_time = (tw_stime**)malloc(2*p->n_dims * sizeof(tw_stime*)); s->next_credit_available_time = (tw_stime**)malloc(2*p->n_dims * sizeof(tw_stime*)); s->next_flit_generate_time = (tw_stime**)malloc(2*p->n_dims*sizeof(tw_stime*)); for(i=0; i < 2*p->n_dims; i++) { s->buffer[i] = (int*)malloc(p->num_vc * sizeof(int)); s->next_link_available_time[i] = (tw_stime*)malloc(p->num_vc * sizeof(tw_stime)); s->next_credit_available_time[i] = (tw_stime*)malloc(p->num_vc * sizeof(tw_stime)); s->next_flit_generate_time[i] = (tw_stime*)malloc(p->num_vc * sizeof(tw_stime)); } // calculate my torus coords to_dim_id(codes_mapping_get_lp_relative_id(lp->gid, 0, 1), s->params->n_dims, s->params->dim_length, s->dim_position); /* DEBUG printf("%lu: my coords:", lp->gid); for (i = 0; i < p->n_dims; i++) printf(" %d", s->dim_position[i]); printf("\n"); */ int temp_dim_pos[ p->n_dims ]; for ( i = 0; i < p->n_dims; i++ ) temp_dim_pos[ i ] = s->dim_position[ i ]; // calculate minus neighbour's lpID for ( j = 0; j < p->n_dims; j++ ) { temp_dim_pos[ j ] = (s->dim_position[ j ] -1 + p->dim_length[ j ]) % p->dim_length[ j ]; s->neighbour_minus_lpID[j] = to_flat_id(p->n_dims, p->dim_length, temp_dim_pos); /* DEBUG printf(" minus neighbor: flat:%d lpid:%lu\n", s->neighbour_minus_lpID[j], codes_mapping_get_lpid_from_relative(s->neighbour_minus_lpID[j], NULL, LP_CONFIG_NM, s->anno, 1)); */ temp_dim_pos[ j ] = s->dim_position[ j ]; } // calculate plus neighbour's lpID for ( j = 0; j < p->n_dims; j++ ) { temp_dim_pos[ j ] = ( s->dim_position[ j ] + 1 + p->dim_length[ j ]) % p->dim_length[ j ]; s->neighbour_plus_lpID[j] = to_flat_id(p->n_dims, p->dim_length, temp_dim_pos); /* DEBUG printf(" plus neighbor: flat:%d lpid:%lu\n", s->neighbour_plus_lpID[j], codes_mapping_get_lpid_from_relative(s->neighbour_plus_lpID[j], NULL, LP_CONFIG_NM, s->anno, 1)); */ temp_dim_pos[ j ] = s->dim_position[ j ]; } //printf("\n"); for( j=0; j < 2 * p->n_dims; j++ ) { for( i = 0; i < p->num_vc; i++ ) { s->buffer[ j ][ i ] = 0; s->next_link_available_time[ j ][ i ] = 0.0; s->next_credit_available_time[j][i] = 0.0; } } // record LP time s->packet_counter = 0; torus_collective_init(s, lp); }