/** * @brief Determines how to handle the buffer of event whose send operation * just finished. * * @param[in] me pointer to PE * @param[in] e pointer to event that we just received * @param[in] buffer not currently used */ static void send_finish(tw_pe *me, tw_event *e, char * buffer) { (void) buffer; me->stats.s_nsend_network++; // instrumentation e->src_lp->kp->kp_stats->s_nsend_network++; e->src_lp->lp_stats->s_nsend_network++; if (e->state.owner == TW_net_asend) { if (e->state.cancel_asend) { /* Event was cancelled during transmission. We must * send another message to pass the cancel flag to * the other node. */ e->state.cancel_asend = 0; e->state.cancel_q = 1; tw_eventq_push(&outq, e); } else { /* Event finished transmission and was not cancelled. * Add to our sent event queue so we can retain the * event in case we need to cancel it later. Note it * is currently in remote format and must be converted * back to local format for fossil collection. */ e->state.owner = TW_pe_sevent_q; if( g_tw_synchronization_protocol == CONSERVATIVE ) tw_event_free(me, e); } return; } if (e->state.owner == TW_net_acancel) { /* We just finished sending the cancellation message * for this event. We need to free the buffer and * make it available for reuse. */ tw_event_free(me, e); return; } /* Never should happen, not unless we somehow broke this * module's other functions related to sending an event. */ tw_error( TW_LOC, "Don't know how to finish send of owner=%u, cancel_q=%d", e->state.owner, e->state.cancel_q); }
static inline void event_cancel(tw_event * event) { tw_pe *send_pe = event->src_lp->pe; tw_peid dest_peid; if(event->state.owner == TW_net_asend || event->state.owner == TW_pe_sevent_q) { /* Slowest approach of all; this has to be sent over the * network to let the dest_pe know it shouldn't have seen * it in the first place. */ tw_net_cancel(event); send_pe->stats.s_nsend_net_remote--; if(tw_gvt_inprogress(send_pe)) { send_pe->trans_msg_ts = ROSS_MIN(send_pe->trans_msg_ts, event->recv_ts); } return; } dest_peid = event->dest_lp->pe->id; if (send_pe->id == dest_peid) { switch (event->state.owner) { case TW_pe_pq: /* Currently in our pq and not processed; delete it and * free the event buffer immediately. No need to wait. */ tw_pq_delete_any(send_pe->pq, event); tw_event_free(send_pe, event); break; case TW_pe_event_q: case TW_kp_pevent_q: local_cancel(send_pe, event); if(tw_gvt_inprogress(send_pe)) { send_pe->trans_msg_ts = ROSS_MIN(send_pe->trans_msg_ts, event->recv_ts); } break; default: tw_error(TW_LOC, "unknown fast local cancel owner %d", event->state.owner); } } else if (send_pe->node == dest_peid) { /* Slower, but still a local cancel, so put into * top of dest_pe->cancel_q for final deletion. */ local_cancel(event->dest_lp->pe, event); send_pe->stats.s_nsend_loc_remote--; if(tw_gvt_inprogress(send_pe)) { send_pe->trans_msg_ts = ROSS_MIN(send_pe->trans_msg_ts, event->recv_ts); } } else { tw_error(TW_LOC, "Should be remote cancel!"); } }
void tw_net_cancel(tw_event *e) { tw_pe *src_pe = e->src_lp->pe; switch (e->state.owner) { case TW_net_outq: /* Cancelled before we could transmit it. Do not * transmit the event and instead just release the * buffer back into our own free list. */ tw_eventq_delete_any(&outq, e); tw_event_free(src_pe, e); return; break; case TW_net_asend: /* Too late. We've already let MPI start to send * this event over the network. We can't pull it * back now without sending another message to do * the cancel. * * Setting the cancel_q flag will signal us to do * another message send once the current send of * this message is completed. */ e->state.cancel_asend = 1; break; case TW_pe_sevent_q: /* Way late; the event was already sent and is in * our sent event queue. Mark it as a cancel and * place it at the front of the outq. */ e->state.cancel_q = 1; tw_eventq_unshift(&outq, e); break; default: /* Huh? Where did you come from? Why are we being * told about you? We did not send you so we cannot * cancel you! */ tw_error( TW_LOC, "Don't know how to cancel event owned by %u", e->state.owner); } service_queues(src_pe); }
static int recv_begin(tw_pe *me) { MPI_Status status; tw_event *e = NULL; int flag = 0; int changed = 0; while (posted_recvs.cur < read_buffer) { unsigned id = posted_recvs.cur; if(!(e = tw_event_grab(me))) { if(tw_gvt_inprogress(me)) tw_error(TW_LOC, "Out of events in GVT! Consider increasing --extramem"); return changed; } #if ROSS_MEMORY if( MPI_Irecv(posted_recvs.buffers[id], EVENT_SIZE(e), MPI_BYTE, MPI_ANY_SOURCE, EVENT_TAG, MPI_COMM_ROSS, &posted_recvs.req_list[id]) != MPI_SUCCESS) #else if( MPI_Irecv(e, (int)EVENT_SIZE(e), MPI_BYTE, MPI_ANY_SOURCE, EVENT_TAG, MPI_COMM_ROSS, &posted_recvs.req_list[id]) != MPI_SUCCESS) #endif { tw_event_free(me, e); return changed; } posted_recvs.event_list[id] = e; posted_recvs.cur++; changed = 1; } return changed; }
void ip_packet_drop(ip_state * state, rn_message * msg, tw_lp * lp) { tw_event *e; state->stats->s_ndropped++; if(msg->src == lp->gid) state->stats->s_ndropped_source++; #if VERIFY_IP printf("%lld: dropped src %lld, dst %lld on port %d \n", lp->gid, msg->src, msg->dst, msg->port); #endif // Need to free the event otherwise it will simply be lost! e = rn_event_new(msg->dst, 0.0, lp, DOWNSTREAM, msg->size); if(e == lp->pe->abort_event) tw_event_free(lp->pe, e); }
/** * @brief Determines how to handle the newly received event. * * @param[in] me pointer to PE * @param[in] e pointer to event that we just received * @param[in] buffer not currently used */ static void recv_finish(tw_pe *me, tw_event *e, char * buffer) { (void) buffer; tw_pe *dest_pe; tw_clock start; me->stats.s_nread_network++; me->s_nwhite_recv++; // printf("recv_finish: remote event [cancel %u] FROM: LP %lu, PE %lu, TO: LP %lu, PE %lu at TS %lf \n", // e->state.cancel_q, (tw_lpid)e->src_lp, e->send_pe, (tw_lpid)e->dest_lp, me->id, e->recv_ts); e->dest_lp = tw_getlocal_lp((tw_lpid) e->dest_lp); dest_pe = e->dest_lp->pe; // instrumentation e->dest_lp->kp->kp_stats->s_nread_network++; e->dest_lp->lp_stats->s_nread_network++; if(e->send_pe > tw_nnodes()-1) tw_error(TW_LOC, "bad sendpe_id: %d", e->send_pe); e->cancel_next = NULL; e->caused_by_me = NULL; e->cause_next = NULL; if(e->recv_ts < me->GVT) tw_error(TW_LOC, "%d: Received straggler from %d: %lf (%d)", me->id, e->send_pe, e->recv_ts, e->state.cancel_q); if(tw_gvt_inprogress(me)) me->trans_msg_ts = ROSS_MIN(me->trans_msg_ts, e->recv_ts); // if cancel event, retrieve and flush // else, store in hash table if(e->state.cancel_q) { tw_event *cancel = tw_hash_remove(me->hash_t, e, e->send_pe); // NOTE: it is possible to cancel the event we // are currently processing at this PE since this // MPI module lets me read cancel events during // event sends over the network. cancel->state.cancel_q = 1; cancel->state.remote = 0; cancel->cancel_next = dest_pe->cancel_q; dest_pe->cancel_q = cancel; tw_event_free(me, e); return; } if (g_tw_synchronization_protocol == OPTIMISTIC || g_tw_synchronization_protocol == OPTIMISTIC_DEBUG || g_tw_synchronization_protocol == OPTIMISTIC_REALTIME ) { tw_hash_insert(me->hash_t, e, e->send_pe); e->state.remote = 1; } /* NOTE: the final check in the if conditional below was added to make sure * that we do not execute the fast case unless the cancellation queue is * empty on the destination PE. Otherwise we need to invoke the normal * scheduling routines to make sure that a forward event doesn't bypass a * cancellation event with an earlier timestamp. This is helpful for * stateful models that produce incorrect results when presented with * duplicate messages with no rollback between them. */ if(me == dest_pe && e->dest_lp->kp->last_time <= e->recv_ts && !dest_pe->cancel_q) { /* Fast case, we are sending to our own PE and * there is no rollback caused by this send. */ start = tw_clock_read(); tw_pq_enqueue(dest_pe->pq, e); dest_pe->stats.s_pq += tw_clock_read() - start; return; } if (me->id == dest_pe->id) { /* Slower, but still local send, so put into top * of dest_pe->event_q. */ e->state.owner = TW_pe_event_q; tw_eventq_push(&dest_pe->event_q, e); return; } /* Never should happen; MPI should have gotten the * message to the correct node without needing us * to redirect the message there for it. This is * probably a serious bug with the event headers * not being formatted right. */ tw_error( TW_LOC, "Event recived by PE %u but meant for PE %u", me->id, dest_pe->id); }
static int recv_begin(tw_pe *me) { MPI_Status status; tw_event *e = NULL; int flag = 0; int changed = 0; while (posted_recvs.cur < read_buffer) { unsigned id = posted_recvs.cur; MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, &status); if(flag) { if(!(e = tw_event_grab(me))) { if(tw_gvt_inprogress(me)) tw_error(TW_LOC, "out of events in GVT!"); break; } } else { return changed; } #if ROSS_MEMORY if(!flag || MPI_Irecv(posted_recvs.buffers[id], EVENT_SIZE(e), MPI_BYTE, MPI_ANY_SOURCE, EVENT_TAG, MPI_COMM_WORLD, &posted_recvs.req_list[id]) != MPI_SUCCESS) #else if(!flag || MPI_Irecv(e, (int)EVENT_SIZE(e), MPI_BYTE, MPI_ANY_SOURCE, EVENT_TAG, MPI_COMM_WORLD, &posted_recvs.req_list[id]) != MPI_SUCCESS) #endif { tw_event_free(me, e); return changed; } posted_recvs.event_list[id] = e; posted_recvs.cur++; changed = 1; } return changed; }
tw_event * tw_socket_read_event(tw_pe * me) { tw_net_node *node = g_tw_net_node[me->id]; tw_event *recv_event; tw_event *cancel_event; #ifdef ROSS_MEMORY_LIB tw_memory *last; tw_memory *memory; #endif //tw_message *temp_message; void *temp_data; //tw_pe *send_pe; tw_peid send_peid; tw_pe *dest_pe; int rv; unsigned int i; #ifdef ROSS_MEMORY_LIB void *temp_mem_data; size_t mem_size; tw_fd mem_fd; #endif rv = 0; /* * Get a free event from our freeq and save the pointers * to the message and the data for later use. */ if(me->abort_event == (recv_event = tw_event_grab(me))) return NULL; //temp_message = recv_event->message; //temp_data = recv_event->message->data; temp_data = recv_event + 1; /* * Attempt to read an event, and return NULL if no more events to recv. */ for (i = 0; i < nnet_nodes - g_tw_npe; i++) { rv = tw_socket_read(node->clients[i], (char *) recv_event, sizeof(tw_event) + g_tw_msg_sz, 100); if (rv > 0) break; } /* * Check to see if we actually read an event */ if (1 > rv) { if(recv_event != me->abort_event) { recv_event->event_id = 0; tw_eventq_unshift(&me->free_q, recv_event); } return NULL; } if (recv_event == me->abort_event) tw_error(TW_LOC, "Out of memory! Allocate more events!"); if(recv_event->recv_ts < me->GVT) tw_error(TW_LOC, "Received straggler event!"); /* * Restore recv'ed event's pointers * * on recv'rs side: have dest_lp ptr, not src_lp ptr */ //recv_event->dest_lp = tw_getlp((tw_lpid)recv_event->dest_lp); //recv_event->src_lp = tw_getlp((tw_lpid)recv_event->src_lp); //recv_event->message = temp_message; //recv_event->message->data = temp_data; recv_event->dest_lp = tw_getlocal_lp((tw_lpid) recv_event->dest_lp); //send_pe = recv_event->src_lp->pe; send_peid = (recv_event->dest_lp->type.map) ((tw_lpid) recv_event->src_lp); if(send_peid == me->id) tw_error(TW_LOC, "Sent event over network to self?"); if (recv_event->recv_ts > g_tw_ts_end) tw_error(TW_LOC, "%d: Received remote event at %d, end=%d!", recv_event->dest_lp->id, recv_event->recv_ts, g_tw_ts_end); if(recv_event->dest_lp->pe != me) tw_error(TW_LOC, "Not destination PE!"); /* * If a CANCEL message, just get the event out of hash table * and call * tw_event_cancel() on it, which rolls it back if nec */ if(recv_event->state.owner == TW_net_acancel) { #if VERIFY_SOCKET_TCP printf ("\t\t\t\t\t\t\t\tREAD CANCEL: dest p%d l%d: ts=%f sn=%d\n", recv_event->dest_lp->pe->id, recv_event->dest_lp->id, recv_event->recv_ts, recv_event->event_id); #endif cancel_event = NULL; cancel_event = tw_hash_remove(me->hash_t, recv_event, send_peid); dest_pe = cancel_event->dest_lp->pe; cancel_event->state.cancel_q = 1; cancel_event->state.remote = 0; if(cancel_event == recv_event) tw_error(TW_LOC, "cancel_event == recv_event!"); if(cancel_event->state.owner == 0 || cancel_event->state.owner == TW_pe_free_q) tw_error(TW_LOC, "cancel_event no owner!"); tw_mutex_lock(&dest_pe->cancel_q_lck); cancel_event->cancel_next = dest_pe->cancel_q; dest_pe->cancel_q = cancel_event; tw_mutex_unlock(&dest_pe->cancel_q_lck); recv_event->event_id = recv_event->state.cancel_q = 0; recv_event->state.remote = 0; tw_event_free(me, recv_event); return cancel_event; } recv_event->next = NULL; //recv_event->lp_state = NULL; recv_event->cancel_next = NULL; recv_event->caused_by_me = NULL; recv_event->cause_next = NULL; // signals for on-the-fly fossil collection recv_event->state.remote = 1; tw_hash_insert(me->hash_t, recv_event, send_peid); #if VERIFY_SOCKET_TCP printf ("\t\t\t\t\t\t\t\tREAD NORMAL: dest p%d l%d: ts=%f sn=%d src p%d l%d \n", recv_event->dest_lp->pe->id, recv_event->dest_lp->id, recv_event->recv_ts, recv_event->seq_num, recv_event->src_lp->pe->id, recv_event->src_lp->id); #endif #ifdef ROSS_MEMORY_LIB mem_size = (size_t) recv_event->memory; mem_fd = (tw_fd) recv_event->prev; last = NULL; while(mem_size) { memory = tw_memory_alloc(recv_event->src_lp, mem_fd); temp_mem_data = memory->data; if(last) last->next = memory; else recv_event->memory = memory; rv = 0; while(rv != mem_size) { rv = tw_socket_read(node->clients[i], (char *) memory, mem_size, 100); } memory->data = temp_mem_data; memory->prev = (tw_memory *) mem_fd; #if VERIFY_SOCKET_TCP printf("recv\'d mem buf of size %d on event %f\n", rv, recv_event->recv_ts); #endif mem_size = (size_t) memory->next; mem_fd = (tw_fd) memory->prev; last = memory; } #endif recv_event->prev = NULL; return recv_event; }