Beispiel #1
0
/**
 * @brief Determines how to handle the buffer of event whose send operation
 * just finished.
 *
 * @param[in] me pointer to PE
 * @param[in] e pointer to event that we just received
 * @param[in] buffer not currently used
 */
static void
send_finish(tw_pe *me, tw_event *e, char * buffer)
{
  (void) buffer;
  me->stats.s_nsend_network++;
  // instrumentation
  e->src_lp->kp->kp_stats->s_nsend_network++;
  e->src_lp->lp_stats->s_nsend_network++;

  if (e->state.owner == TW_net_asend) {
    if (e->state.cancel_asend) {
      /* Event was cancelled during transmission.  We must
       * send another message to pass the cancel flag to
       * the other node.
       */
      e->state.cancel_asend = 0;
      e->state.cancel_q = 1;
      tw_eventq_push(&outq, e);
    } else {
      /* Event finished transmission and was not cancelled.
       * Add to our sent event queue so we can retain the
       * event in case we need to cancel it later.  Note it
       * is currently in remote format and must be converted
       * back to local format for fossil collection.
       */
      e->state.owner = TW_pe_sevent_q;
      if( g_tw_synchronization_protocol == CONSERVATIVE )
	tw_event_free(me, e);
    }

    return;
  }

  if (e->state.owner == TW_net_acancel) {
    /* We just finished sending the cancellation message
     * for this event.  We need to free the buffer and
     * make it available for reuse.
     */
    tw_event_free(me, e);
    return;
  }

  /* Never should happen, not unless we somehow broke this
   * module's other functions related to sending an event.
   */

  tw_error(
	   TW_LOC,
	   "Don't know how to finish send of owner=%u, cancel_q=%d",
	   e->state.owner,
	   e->state.cancel_q);

}
static inline void event_cancel(tw_event * event) {
    tw_pe *send_pe = event->src_lp->pe;
    tw_peid dest_peid;

    if(event->state.owner == TW_net_asend || event->state.owner == TW_pe_sevent_q) {
        /* Slowest approach of all; this has to be sent over the
        * network to let the dest_pe know it shouldn't have seen
        * it in the first place.
        */
        tw_net_cancel(event);
        send_pe->stats.s_nsend_net_remote--;

        if(tw_gvt_inprogress(send_pe)) {
            send_pe->trans_msg_ts = ROSS_MIN(send_pe->trans_msg_ts, event->recv_ts);
        }

        return;
    }

    dest_peid = event->dest_lp->pe->id;

    if (send_pe->id == dest_peid) {
        switch (event->state.owner) {
        case TW_pe_pq:
            /* Currently in our pq and not processed; delete it and
            * free the event buffer immediately.  No need to wait.
            */
            tw_pq_delete_any(send_pe->pq, event);
            tw_event_free(send_pe, event);
            break;

        case TW_pe_event_q:
        case TW_kp_pevent_q:
            local_cancel(send_pe, event);

            if(tw_gvt_inprogress(send_pe)) {
                send_pe->trans_msg_ts = ROSS_MIN(send_pe->trans_msg_ts, event->recv_ts);
            }
            break;

        default:
            tw_error(TW_LOC, "unknown fast local cancel owner %d", event->state.owner);
        }
    } else if (send_pe->node == dest_peid) {
        /* Slower, but still a local cancel, so put into
        * top of dest_pe->cancel_q for final deletion.
        */
        local_cancel(event->dest_lp->pe, event);
        send_pe->stats.s_nsend_loc_remote--;

        if(tw_gvt_inprogress(send_pe)) {
            send_pe->trans_msg_ts = ROSS_MIN(send_pe->trans_msg_ts, event->recv_ts);
        }
    } else {
        tw_error(TW_LOC, "Should be remote cancel!");
    }
}
Beispiel #3
0
void
tw_net_cancel(tw_event *e)
{
  tw_pe *src_pe = e->src_lp->pe;

  switch (e->state.owner) {
  case TW_net_outq:
    /* Cancelled before we could transmit it.  Do not
     * transmit the event and instead just release the
     * buffer back into our own free list.
     */
    tw_eventq_delete_any(&outq, e);
    tw_event_free(src_pe, e);

    return;

    break;

  case TW_net_asend:
    /* Too late.  We've already let MPI start to send
     * this event over the network.  We can't pull it
     * back now without sending another message to do
     * the cancel.
     *
     * Setting the cancel_q flag will signal us to do
     * another message send once the current send of
     * this message is completed.
     */
    e->state.cancel_asend = 1;
    break;

  case TW_pe_sevent_q:
    /* Way late; the event was already sent and is in
     * our sent event queue.  Mark it as a cancel and
     * place it at the front of the outq.
     */
    e->state.cancel_q = 1;
    tw_eventq_unshift(&outq, e);
    break;

  default:
    /* Huh?  Where did you come from?  Why are we being
     * told about you?  We did not send you so we cannot
     * cancel you!
     */
    tw_error(
	     TW_LOC,
	     "Don't know how to cancel event owned by %u",
	     e->state.owner);
  }

  service_queues(src_pe);
}
Beispiel #4
0
static int
recv_begin(tw_pe *me)
{
  MPI_Status status;

  tw_event	*e = NULL;

  int flag = 0;
  int changed = 0;

  while (posted_recvs.cur < read_buffer)
    {
      unsigned id = posted_recvs.cur;

      if(!(e = tw_event_grab(me)))
      {
	  if(tw_gvt_inprogress(me))
	      tw_error(TW_LOC, "Out of events in GVT! Consider increasing --extramem");
	  return changed;	  
      }

#if ROSS_MEMORY
      if( MPI_Irecv(posted_recvs.buffers[id],
		   EVENT_SIZE(e),
		   MPI_BYTE,
		   MPI_ANY_SOURCE,
		   EVENT_TAG,
		   MPI_COMM_ROSS,
		   &posted_recvs.req_list[id]) != MPI_SUCCESS)
#else
	if( MPI_Irecv(e,
		     (int)EVENT_SIZE(e),
		     MPI_BYTE,
		     MPI_ANY_SOURCE,
		     EVENT_TAG,
		     MPI_COMM_ROSS,
		     &posted_recvs.req_list[id]) != MPI_SUCCESS)
#endif
	  {
	    tw_event_free(me, e);
	    return changed;
	  }

      posted_recvs.event_list[id] = e;
      posted_recvs.cur++;
      changed = 1;
    }

  return changed;
}
Beispiel #5
0
void
ip_packet_drop(ip_state * state, rn_message * msg, tw_lp * lp)
{
	tw_event	*e;

	state->stats->s_ndropped++;

	if(msg->src == lp->gid)
		state->stats->s_ndropped_source++;

#if VERIFY_IP
	printf("%lld: dropped src %lld, dst %lld on port %d \n", 
		lp->gid, msg->src, msg->dst, msg->port);
#endif

	// Need to free the event otherwise it will simply be lost!
	e = rn_event_new(msg->dst, 0.0, lp, DOWNSTREAM, msg->size);

	if(e == lp->pe->abort_event)
		tw_event_free(lp->pe, e);
}
Beispiel #6
0
/**
 * @brief Determines how to handle the newly received event.
 *
 * @param[in] me pointer to PE
 * @param[in] e pointer to event that we just received
 * @param[in] buffer not currently used
 */
static void
recv_finish(tw_pe *me, tw_event *e, char * buffer)
{
  (void) buffer;
  tw_pe		*dest_pe;
  tw_clock start;

  me->stats.s_nread_network++;
  me->s_nwhite_recv++;

  //  printf("recv_finish: remote event [cancel %u] FROM: LP %lu, PE %lu, TO: LP %lu, PE %lu at TS %lf \n",
  //	 e->state.cancel_q, (tw_lpid)e->src_lp, e->send_pe, (tw_lpid)e->dest_lp, me->id, e->recv_ts);

  e->dest_lp = tw_getlocal_lp((tw_lpid) e->dest_lp);
  dest_pe = e->dest_lp->pe;
  // instrumentation
  e->dest_lp->kp->kp_stats->s_nread_network++;
  e->dest_lp->lp_stats->s_nread_network++;

  if(e->send_pe > tw_nnodes()-1)
    tw_error(TW_LOC, "bad sendpe_id: %d", e->send_pe);

  e->cancel_next = NULL;
  e->caused_by_me = NULL;
  e->cause_next = NULL;



  if(e->recv_ts < me->GVT)
    tw_error(TW_LOC, "%d: Received straggler from %d: %lf (%d)",
	     me->id,  e->send_pe, e->recv_ts, e->state.cancel_q);

  if(tw_gvt_inprogress(me))
    me->trans_msg_ts = ROSS_MIN(me->trans_msg_ts, e->recv_ts);

  // if cancel event, retrieve and flush
  // else, store in hash table
  if(e->state.cancel_q)
    {
      tw_event *cancel = tw_hash_remove(me->hash_t, e, e->send_pe);

      // NOTE: it is possible to cancel the event we
      // are currently processing at this PE since this
      // MPI module lets me read cancel events during
      // event sends over the network.

      cancel->state.cancel_q = 1;
      cancel->state.remote = 0;

      cancel->cancel_next = dest_pe->cancel_q;
      dest_pe->cancel_q = cancel;

      tw_event_free(me, e);

      return;
    }

  if (g_tw_synchronization_protocol == OPTIMISTIC ||
      g_tw_synchronization_protocol == OPTIMISTIC_DEBUG ||
      g_tw_synchronization_protocol == OPTIMISTIC_REALTIME ) {
    tw_hash_insert(me->hash_t, e, e->send_pe);
    e->state.remote = 1;
  }

  /* NOTE: the final check in the if conditional below was added to make sure
   * that we do not execute the fast case unless the cancellation queue is
   * empty on the destination PE.  Otherwise we need to invoke the normal
   * scheduling routines to make sure that a forward event doesn't bypass a
   * cancellation event with an earlier timestamp.  This is helpful for
   * stateful models that produce incorrect results when presented with
   * duplicate messages with no rollback between them.
   */
  if(me == dest_pe && e->dest_lp->kp->last_time <= e->recv_ts && !dest_pe->cancel_q) {
    /* Fast case, we are sending to our own PE and
     * there is no rollback caused by this send.
     */
    start = tw_clock_read();
    tw_pq_enqueue(dest_pe->pq, e);
    dest_pe->stats.s_pq += tw_clock_read() - start;
    return;
  }

  if (me->id == dest_pe->id) {
    /* Slower, but still local send, so put into top
     * of dest_pe->event_q.
     */
    e->state.owner = TW_pe_event_q;
    tw_eventq_push(&dest_pe->event_q, e);
    return;
  }

  /* Never should happen; MPI should have gotten the
   * message to the correct node without needing us
   * to redirect the message there for it.  This is
   * probably a serious bug with the event headers
   * not being formatted right.
   */
  tw_error(
	   TW_LOC,
	   "Event recived by PE %u but meant for PE %u",
	   me->id,
	   dest_pe->id);
}
Beispiel #7
0
static int
recv_begin(tw_pe *me)
{
  MPI_Status status;

  tw_event	*e = NULL;

  int flag = 0;
  int changed = 0;

  while (posted_recvs.cur < read_buffer)
    {
      unsigned id = posted_recvs.cur;

      MPI_Iprobe(MPI_ANY_SOURCE,
		 MPI_ANY_TAG,
		 MPI_COMM_WORLD,
		 &flag,
		 &status);

      if(flag)
	{
	  if(!(e = tw_event_grab(me)))
	    {
	      if(tw_gvt_inprogress(me))
		tw_error(TW_LOC, "out of events in GVT!");

	      break;
	    }
	} else
	{
	  return changed;
	}

#if ROSS_MEMORY
      if(!flag ||
	 MPI_Irecv(posted_recvs.buffers[id],
		   EVENT_SIZE(e),
		   MPI_BYTE,
		   MPI_ANY_SOURCE,
		   EVENT_TAG,
		   MPI_COMM_WORLD,
		   &posted_recvs.req_list[id]) != MPI_SUCCESS)
#else
	if(!flag ||
	   MPI_Irecv(e,
		     (int)EVENT_SIZE(e),
		     MPI_BYTE,
		     MPI_ANY_SOURCE,
		     EVENT_TAG,
		     MPI_COMM_WORLD,
		     &posted_recvs.req_list[id]) != MPI_SUCCESS)
#endif
	  {
	    tw_event_free(me, e);
	    return changed;
	  }

      posted_recvs.event_list[id] = e;
      posted_recvs.cur++;
      changed = 1;
    }

  return changed;
}
Beispiel #8
0
tw_event       *
tw_socket_read_event(tw_pe * me)
{
	tw_net_node	*node = g_tw_net_node[me->id];

	tw_event       *recv_event;
	tw_event       *cancel_event;

#ifdef ROSS_MEMORY_LIB
	tw_memory	*last;
	tw_memory	*memory;
#endif

	//tw_message     *temp_message;
	void           *temp_data;

	//tw_pe          *send_pe;
	tw_peid		send_peid;
	tw_pe          *dest_pe;

	int             rv;
	unsigned int             i;

#ifdef ROSS_MEMORY_LIB
	void           *temp_mem_data;

	size_t		mem_size;
	tw_fd		mem_fd;
#endif

	rv = 0;

	/*
	 * Get a free event from our freeq and save the pointers
	 * to the message and the data for later use.
	 */
	if(me->abort_event == (recv_event = tw_event_grab(me)))
		return NULL;

	//temp_message = recv_event->message;
	//temp_data = recv_event->message->data;
	temp_data = recv_event + 1;

	/*
	 * Attempt to read an event, and return NULL if no more events to recv.
	 */
	for (i = 0; i < nnet_nodes - g_tw_npe; i++)
	{
		rv = tw_socket_read(node->clients[i],
			(char *) recv_event, sizeof(tw_event) + g_tw_msg_sz, 100);

		if (rv > 0)
			break;
	}

	/*
	 * Check to see if we actually read an event
	 */
	if (1 > rv)
	{
		if(recv_event != me->abort_event)
		{
			recv_event->event_id = 0;
			tw_eventq_unshift(&me->free_q, recv_event);
		}

		return NULL;
	}

	if (recv_event == me->abort_event)
		tw_error(TW_LOC, "Out of memory!  Allocate more events!");

	if(recv_event->recv_ts < me->GVT)
		tw_error(TW_LOC, "Received straggler event!");

	/*
	 * Restore recv'ed event's pointers
	 *
	 * on recv'rs side: have dest_lp ptr, not src_lp ptr
	 */
	//recv_event->dest_lp = tw_getlp((tw_lpid)recv_event->dest_lp);
	//recv_event->src_lp = tw_getlp((tw_lpid)recv_event->src_lp);
	//recv_event->message = temp_message;
	//recv_event->message->data = temp_data;
	recv_event->dest_lp = tw_getlocal_lp((tw_lpid) recv_event->dest_lp);

	//send_pe = recv_event->src_lp->pe;
	send_peid = (recv_event->dest_lp->type.map)
				((tw_lpid) recv_event->src_lp);

	if(send_peid == me->id)
		tw_error(TW_LOC, "Sent event over network to self?");

	if (recv_event->recv_ts > g_tw_ts_end)
		tw_error(TW_LOC, "%d: Received remote event at %d, end=%d!", 
				recv_event->dest_lp->id,
				recv_event->recv_ts, g_tw_ts_end);

	if(recv_event->dest_lp->pe != me)
		tw_error(TW_LOC, "Not destination PE!");

	/*
	 * If a CANCEL message, just get the event out of hash table * and call 
	 * tw_event_cancel() on it, which rolls it back if nec 
	 */
	if(recv_event->state.owner == TW_net_acancel)
	{
#if VERIFY_SOCKET_TCP
		printf
			("\t\t\t\t\t\t\t\tREAD CANCEL: dest p%d l%d: ts=%f sn=%d\n",
			 recv_event->dest_lp->pe->id,
			 recv_event->dest_lp->id,
			 recv_event->recv_ts, recv_event->event_id);
#endif

		cancel_event = NULL;

		cancel_event = tw_hash_remove(me->hash_t, recv_event, send_peid);
		dest_pe = cancel_event->dest_lp->pe;
		cancel_event->state.cancel_q = 1;
		cancel_event->state.remote = 0;

		if(cancel_event == recv_event)
			tw_error(TW_LOC, "cancel_event == recv_event!");

		if(cancel_event->state.owner == 0 ||
			cancel_event->state.owner == TW_pe_free_q)
			tw_error(TW_LOC, "cancel_event no owner!");

		tw_mutex_lock(&dest_pe->cancel_q_lck);
		cancel_event->cancel_next = dest_pe->cancel_q;
		dest_pe->cancel_q = cancel_event;
		tw_mutex_unlock(&dest_pe->cancel_q_lck);

		recv_event->event_id = recv_event->state.cancel_q = 0;
		recv_event->state.remote = 0;

		tw_event_free(me, recv_event);

		return cancel_event;
	}

	recv_event->next = NULL;
	//recv_event->lp_state = NULL;
	recv_event->cancel_next = NULL;
	recv_event->caused_by_me = NULL;
	recv_event->cause_next = NULL;

	// signals for on-the-fly fossil collection
	recv_event->state.remote = 1;

	tw_hash_insert(me->hash_t, recv_event, send_peid);

#if VERIFY_SOCKET_TCP
	printf
		("\t\t\t\t\t\t\t\tREAD NORMAL: dest p%d l%d: ts=%f sn=%d src p%d l%d \n",
		 recv_event->dest_lp->pe->id,
		 recv_event->dest_lp->id,
		 recv_event->recv_ts, recv_event->seq_num,
		 recv_event->src_lp->pe->id,
		 recv_event->src_lp->id);
#endif

#ifdef ROSS_MEMORY_LIB
	mem_size = (size_t) recv_event->memory;
	mem_fd = (tw_fd) recv_event->prev;
	last = NULL;
	while(mem_size)
	{
		memory = tw_memory_alloc(recv_event->src_lp, mem_fd);
		temp_mem_data = memory->data;

		if(last)
			last->next = memory;
		else
			recv_event->memory = memory;

		rv = 0;
		while(rv != mem_size)
		{
			rv = tw_socket_read(node->clients[i],
					(char *) memory, mem_size, 100);
		}

		memory->data = temp_mem_data;
		memory->prev = (tw_memory *) mem_fd;

#if VERIFY_SOCKET_TCP
		printf("recv\'d mem buf of size %d on event %f\n", rv, recv_event->recv_ts);
#endif

		mem_size = (size_t) memory->next;
		mem_fd = (tw_fd) memory->prev;
		last = memory;
	}
#endif

	recv_event->prev = NULL;

	return recv_event;
}