Esempio n. 1
0
/**
 * @brief Determines how to handle the newly received event.
 *
 * @param[in] me pointer to PE
 * @param[in] e pointer to event that we just received
 * @param[in] buffer not currently used
 */
static void
recv_finish(tw_pe *me, tw_event *e, char * buffer)
{
  (void) buffer;
  tw_pe		*dest_pe;
  tw_clock start;

  me->stats.s_nread_network++;
  me->s_nwhite_recv++;

  //  printf("recv_finish: remote event [cancel %u] FROM: LP %lu, PE %lu, TO: LP %lu, PE %lu at TS %lf \n",
  //	 e->state.cancel_q, (tw_lpid)e->src_lp, e->send_pe, (tw_lpid)e->dest_lp, me->id, e->recv_ts);

  e->dest_lp = tw_getlocal_lp((tw_lpid) e->dest_lp);
  dest_pe = e->dest_lp->pe;
  // instrumentation
  e->dest_lp->kp->kp_stats->s_nread_network++;
  e->dest_lp->lp_stats->s_nread_network++;

  if(e->send_pe > tw_nnodes()-1)
    tw_error(TW_LOC, "bad sendpe_id: %d", e->send_pe);

  e->cancel_next = NULL;
  e->caused_by_me = NULL;
  e->cause_next = NULL;



  if(e->recv_ts < me->GVT)
    tw_error(TW_LOC, "%d: Received straggler from %d: %lf (%d)",
	     me->id,  e->send_pe, e->recv_ts, e->state.cancel_q);

  if(tw_gvt_inprogress(me))
    me->trans_msg_ts = ROSS_MIN(me->trans_msg_ts, e->recv_ts);

  // if cancel event, retrieve and flush
  // else, store in hash table
  if(e->state.cancel_q)
    {
      tw_event *cancel = tw_hash_remove(me->hash_t, e, e->send_pe);

      // NOTE: it is possible to cancel the event we
      // are currently processing at this PE since this
      // MPI module lets me read cancel events during
      // event sends over the network.

      cancel->state.cancel_q = 1;
      cancel->state.remote = 0;

      cancel->cancel_next = dest_pe->cancel_q;
      dest_pe->cancel_q = cancel;

      tw_event_free(me, e);

      return;
    }

  if (g_tw_synchronization_protocol == OPTIMISTIC ||
      g_tw_synchronization_protocol == OPTIMISTIC_DEBUG ||
      g_tw_synchronization_protocol == OPTIMISTIC_REALTIME ) {
    tw_hash_insert(me->hash_t, e, e->send_pe);
    e->state.remote = 1;
  }

  /* NOTE: the final check in the if conditional below was added to make sure
   * that we do not execute the fast case unless the cancellation queue is
   * empty on the destination PE.  Otherwise we need to invoke the normal
   * scheduling routines to make sure that a forward event doesn't bypass a
   * cancellation event with an earlier timestamp.  This is helpful for
   * stateful models that produce incorrect results when presented with
   * duplicate messages with no rollback between them.
   */
  if(me == dest_pe && e->dest_lp->kp->last_time <= e->recv_ts && !dest_pe->cancel_q) {
    /* Fast case, we are sending to our own PE and
     * there is no rollback caused by this send.
     */
    start = tw_clock_read();
    tw_pq_enqueue(dest_pe->pq, e);
    dest_pe->stats.s_pq += tw_clock_read() - start;
    return;
  }

  if (me->id == dest_pe->id) {
    /* Slower, but still local send, so put into top
     * of dest_pe->event_q.
     */
    e->state.owner = TW_pe_event_q;
    tw_eventq_push(&dest_pe->event_q, e);
    return;
  }

  /* Never should happen; MPI should have gotten the
   * message to the correct node without needing us
   * to redirect the message there for it.  This is
   * probably a serious bug with the event headers
   * not being formatted right.
   */
  tw_error(
	   TW_LOC,
	   "Event recived by PE %u but meant for PE %u",
	   me->id,
	   dest_pe->id);
}
Esempio n. 2
0
tw_event       *
tw_socket_read_event(tw_pe * me)
{
	tw_net_node	*node = g_tw_net_node[me->id];

	tw_event       *recv_event;
	tw_event       *cancel_event;

#ifdef ROSS_MEMORY_LIB
	tw_memory	*last;
	tw_memory	*memory;
#endif

	//tw_message     *temp_message;
	void           *temp_data;

	//tw_pe          *send_pe;
	tw_peid		send_peid;
	tw_pe          *dest_pe;

	int             rv;
	unsigned int             i;

#ifdef ROSS_MEMORY_LIB
	void           *temp_mem_data;

	size_t		mem_size;
	tw_fd		mem_fd;
#endif

	rv = 0;

	/*
	 * Get a free event from our freeq and save the pointers
	 * to the message and the data for later use.
	 */
	if(me->abort_event == (recv_event = tw_event_grab(me)))
		return NULL;

	//temp_message = recv_event->message;
	//temp_data = recv_event->message->data;
	temp_data = recv_event + 1;

	/*
	 * Attempt to read an event, and return NULL if no more events to recv.
	 */
	for (i = 0; i < nnet_nodes - g_tw_npe; i++)
	{
		rv = tw_socket_read(node->clients[i],
			(char *) recv_event, sizeof(tw_event) + g_tw_msg_sz, 100);

		if (rv > 0)
			break;
	}

	/*
	 * Check to see if we actually read an event
	 */
	if (1 > rv)
	{
		if(recv_event != me->abort_event)
		{
			recv_event->event_id = 0;
			tw_eventq_unshift(&me->free_q, recv_event);
		}

		return NULL;
	}

	if (recv_event == me->abort_event)
		tw_error(TW_LOC, "Out of memory!  Allocate more events!");

	if(recv_event->recv_ts < me->GVT)
		tw_error(TW_LOC, "Received straggler event!");

	/*
	 * Restore recv'ed event's pointers
	 *
	 * on recv'rs side: have dest_lp ptr, not src_lp ptr
	 */
	//recv_event->dest_lp = tw_getlp((tw_lpid)recv_event->dest_lp);
	//recv_event->src_lp = tw_getlp((tw_lpid)recv_event->src_lp);
	//recv_event->message = temp_message;
	//recv_event->message->data = temp_data;
	recv_event->dest_lp = tw_getlocal_lp((tw_lpid) recv_event->dest_lp);

	//send_pe = recv_event->src_lp->pe;
	send_peid = (recv_event->dest_lp->type.map)
				((tw_lpid) recv_event->src_lp);

	if(send_peid == me->id)
		tw_error(TW_LOC, "Sent event over network to self?");

	if (recv_event->recv_ts > g_tw_ts_end)
		tw_error(TW_LOC, "%d: Received remote event at %d, end=%d!", 
				recv_event->dest_lp->id,
				recv_event->recv_ts, g_tw_ts_end);

	if(recv_event->dest_lp->pe != me)
		tw_error(TW_LOC, "Not destination PE!");

	/*
	 * If a CANCEL message, just get the event out of hash table * and call 
	 * tw_event_cancel() on it, which rolls it back if nec 
	 */
	if(recv_event->state.owner == TW_net_acancel)
	{
#if VERIFY_SOCKET_TCP
		printf
			("\t\t\t\t\t\t\t\tREAD CANCEL: dest p%d l%d: ts=%f sn=%d\n",
			 recv_event->dest_lp->pe->id,
			 recv_event->dest_lp->id,
			 recv_event->recv_ts, recv_event->event_id);
#endif

		cancel_event = NULL;

		cancel_event = tw_hash_remove(me->hash_t, recv_event, send_peid);
		dest_pe = cancel_event->dest_lp->pe;
		cancel_event->state.cancel_q = 1;
		cancel_event->state.remote = 0;

		if(cancel_event == recv_event)
			tw_error(TW_LOC, "cancel_event == recv_event!");

		if(cancel_event->state.owner == 0 ||
			cancel_event->state.owner == TW_pe_free_q)
			tw_error(TW_LOC, "cancel_event no owner!");

		tw_mutex_lock(&dest_pe->cancel_q_lck);
		cancel_event->cancel_next = dest_pe->cancel_q;
		dest_pe->cancel_q = cancel_event;
		tw_mutex_unlock(&dest_pe->cancel_q_lck);

		recv_event->event_id = recv_event->state.cancel_q = 0;
		recv_event->state.remote = 0;

		tw_event_free(me, recv_event);

		return cancel_event;
	}

	recv_event->next = NULL;
	//recv_event->lp_state = NULL;
	recv_event->cancel_next = NULL;
	recv_event->caused_by_me = NULL;
	recv_event->cause_next = NULL;

	// signals for on-the-fly fossil collection
	recv_event->state.remote = 1;

	tw_hash_insert(me->hash_t, recv_event, send_peid);

#if VERIFY_SOCKET_TCP
	printf
		("\t\t\t\t\t\t\t\tREAD NORMAL: dest p%d l%d: ts=%f sn=%d src p%d l%d \n",
		 recv_event->dest_lp->pe->id,
		 recv_event->dest_lp->id,
		 recv_event->recv_ts, recv_event->seq_num,
		 recv_event->src_lp->pe->id,
		 recv_event->src_lp->id);
#endif

#ifdef ROSS_MEMORY_LIB
	mem_size = (size_t) recv_event->memory;
	mem_fd = (tw_fd) recv_event->prev;
	last = NULL;
	while(mem_size)
	{
		memory = tw_memory_alloc(recv_event->src_lp, mem_fd);
		temp_mem_data = memory->data;

		if(last)
			last->next = memory;
		else
			recv_event->memory = memory;

		rv = 0;
		while(rv != mem_size)
		{
			rv = tw_socket_read(node->clients[i],
					(char *) memory, mem_size, 100);
		}

		memory->data = temp_mem_data;
		memory->prev = (tw_memory *) mem_fd;

#if VERIFY_SOCKET_TCP
		printf("recv\'d mem buf of size %d on event %f\n", rv, recv_event->recv_ts);
#endif

		mem_size = (size_t) memory->next;
		mem_fd = (tw_fd) memory->prev;
		last = memory;
	}
#endif

	recv_event->prev = NULL;

	return recv_event;
}