Beispiel #1
0
/**
 * @brief Determines how to handle the buffer of event whose send operation
 * just finished.
 *
 * @param[in] me pointer to PE
 * @param[in] e pointer to event that we just received
 * @param[in] buffer not currently used
 */
static void
send_finish(tw_pe *me, tw_event *e, char * buffer)
{
  (void) buffer;
  me->stats.s_nsend_network++;
  // instrumentation
  e->src_lp->kp->kp_stats->s_nsend_network++;
  e->src_lp->lp_stats->s_nsend_network++;

  if (e->state.owner == TW_net_asend) {
    if (e->state.cancel_asend) {
      /* Event was cancelled during transmission.  We must
       * send another message to pass the cancel flag to
       * the other node.
       */
      e->state.cancel_asend = 0;
      e->state.cancel_q = 1;
      tw_eventq_push(&outq, e);
    } else {
      /* Event finished transmission and was not cancelled.
       * Add to our sent event queue so we can retain the
       * event in case we need to cancel it later.  Note it
       * is currently in remote format and must be converted
       * back to local format for fossil collection.
       */
      e->state.owner = TW_pe_sevent_q;
      if( g_tw_synchronization_protocol == CONSERVATIVE )
	tw_event_free(me, e);
    }

    return;
  }

  if (e->state.owner == TW_net_acancel) {
    /* We just finished sending the cancellation message
     * for this event.  We need to free the buffer and
     * make it available for reuse.
     */
    tw_event_free(me, e);
    return;
  }

  /* Never should happen, not unless we somehow broke this
   * module's other functions related to sending an event.
   */

  tw_error(
	   TW_LOC,
	   "Don't know how to finish send of owner=%u, cancel_q=%d",
	   e->state.owner,
	   e->state.cancel_q);

}
Beispiel #2
0
static tw_pe * setup_pes(void) {
    tw_pe   *pe;
    tw_pe   *master;

    int  i;
    unsigned int num_events_per_pe;

    num_events_per_pe = 1 + g_tw_events_per_pe + g_tw_events_per_pe_extra;

    master = g_tw_pe[0];

    if (!master) {
        tw_error(TW_LOC, "No PE configured on this node.");
    }

    if (g_tw_mynode == g_tw_masternode) {
        master->master = 1;
    }
    master->local_master = 1;

    for(i = 0; i < g_tw_npe; i++) {
        pe = g_tw_pe[i];
        if (g_tw_buddy_alloc) {
            g_tw_buddy_master = create_buddy_table(g_tw_buddy_alloc);
            if (g_tw_buddy_master == NULL) {
                tw_error(TW_LOC, "create_buddy_table() failed.");
            }
            tw_delta_alloc(pe);
        }
        pe->pq = tw_pq_create();

        tw_eventq_alloc(&pe->free_q, num_events_per_pe);
        pe->abort_event = tw_eventq_shift(&pe->free_q);
#ifdef USE_RIO
        for (i = 0; i < g_io_events_buffered_per_rank; i++) {
            tw_eventq_push(&g_io_free_events, tw_eventq_pop(&g_tw_pe[0]->free_q));
        }
#endif
    }

    if (g_tw_mynode == g_tw_masternode) {
        printf("\nROSS Core Configuration: \n");
        printf("\t%-50s %11u\n", "Total Nodes", tw_nnodes());
        fprintf(g_tw_csv, "%u,", tw_nnodes());

        printf("\t%-50s [Nodes (%u) x PE_per_Node (%lu)] %lu\n", "Total Processors", tw_nnodes(), g_tw_npe, (tw_nnodes() * g_tw_npe));
        fprintf(g_tw_csv, "%lu,", (tw_nnodes() * g_tw_npe));

        printf("\t%-50s [Nodes (%u) x KPs (%lu)] %lu\n", "Total KPs", tw_nnodes(), g_tw_nkp, (tw_nnodes() * g_tw_nkp));
        fprintf(g_tw_csv, "%lu,", (tw_nnodes() * g_tw_nkp));

        printf("\t%-50s %11llu\n", "Total LPs", (tw_nnodes() * g_tw_npe * g_tw_nlp));
        fprintf(g_tw_csv, "%llu,", (tw_nnodes() * g_tw_npe * g_tw_nlp));

        printf("\t%-50s %11.2lf\n", "Simulation End Time", g_tw_ts_end);
        fprintf(g_tw_csv, "%11.2lf\n", g_tw_ts_end);


        switch(g_tw_mapping) {
            case LINEAR:
                printf("\t%-50s %11s\n", "LP-to-PE Mapping", "linear");
                fprintf(g_tw_csv, "%s,", "linear");
                break;

            case ROUND_ROBIN:
                printf("\t%-50s %11s\n", "LP-to-PE Mapping", "round robin");
                fprintf(g_tw_csv, "%s,", "round robin");
                break;

            case CUSTOM:
                printf("\t%-50s %11s\n", "LP-to-PE Mapping", "model defined");
                fprintf(g_tw_csv, "%s,", "model defined");
                break;
        }
        printf("\n");

#ifndef ROSS_DO_NOT_PRINT
        printf("\nROSS Event Memory Allocation:\n");
        printf("\t%-50s %11d\n", "Model events", num_events_per_pe);
        fprintf(g_tw_csv, "%d,", num_events_per_pe);

        printf("\t%-50s %11d\n", "Network events", g_tw_gvt_threshold);
        fprintf(g_tw_csv, "%d,", g_tw_gvt_threshold);

        printf("\t%-50s %11d\n", "Total events", g_tw_events_per_pe);
        fprintf(g_tw_csv, "%d,", g_tw_events_per_pe);
        printf("\n");
#endif
    }
    return master;
}
void tw_event_send(tw_event * event) {
    tw_lp     *src_lp = event->src_lp;
    tw_pe     *send_pe = src_lp->pe;
    tw_pe     *dest_pe = NULL;

    tw_peid        dest_peid = -1;
    tw_stime   recv_ts = event->recv_ts;

    if (event == send_pe->abort_event) {
        if (recv_ts < g_tw_ts_end) {
            send_pe->cev_abort = 1;
        }
        return;
    }

    //Trap lookahead violations in debug mode
    //Note that compiling with the -DNDEBUG flag will turn this off!
    if (g_tw_synchronization_protocol == CONSERVATIVE) {
        if (recv_ts - tw_now(src_lp) < g_tw_lookahead) {
            tw_error(TW_LOC, "Lookahead violation: decrease g_tw_lookahead");
        }
    }

    if (event->out_msgs) {
        tw_error(TW_LOC, "It is an error to send an event with pre-loaded output message.");
    }

    link_causality(event, send_pe->cur_event);

    // call LP remote mapping function to get dest_pe
    dest_peid = (*src_lp->type->map) ((tw_lpid) event->dest_lp);

    if (dest_peid == g_tw_mynode) {
        event->dest_lp = tw_getlocal_lp((tw_lpid) event->dest_lp);
        dest_pe = event->dest_lp->pe;

        if (send_pe == dest_pe && event->dest_lp->kp->last_time <= recv_ts) {
            /* Fast case, we are sending to our own PE and there is
            * no rollback caused by this send.  We cannot have any
            * transient messages on local sends so we can return.
            */
            tw_pq_enqueue(send_pe->pq, event);
            return;
        } else {
            /* Slower, but still local send, so put into top of
            * dest_pe->event_q.
            */
            event->state.owner = TW_pe_event_q;

            tw_eventq_push(&dest_pe->event_q, event);

            if(send_pe != dest_pe) {
                send_pe->stats.s_nsend_loc_remote++;
            }
        }
    } else {
        /* Slowest approach of all; this is not a local event.
        * We need to send it over the network to the other PE
        * for processing.
        */
        send_pe->stats.s_nsend_net_remote++;
        event->state.owner = TW_net_asend;
        tw_net_send(event);
    }

    if(tw_gvt_inprogress(send_pe)) {
        send_pe->trans_msg_ts = ROSS_MIN(send_pe->trans_msg_ts, recv_ts);
    }
}
Beispiel #4
0
/**
 * @brief Determines how to handle the newly received event.
 *
 * @param[in] me pointer to PE
 * @param[in] e pointer to event that we just received
 * @param[in] buffer not currently used
 */
static void
recv_finish(tw_pe *me, tw_event *e, char * buffer)
{
  (void) buffer;
  tw_pe		*dest_pe;
  tw_clock start;

  me->stats.s_nread_network++;
  me->s_nwhite_recv++;

  //  printf("recv_finish: remote event [cancel %u] FROM: LP %lu, PE %lu, TO: LP %lu, PE %lu at TS %lf \n",
  //	 e->state.cancel_q, (tw_lpid)e->src_lp, e->send_pe, (tw_lpid)e->dest_lp, me->id, e->recv_ts);

  e->dest_lp = tw_getlocal_lp((tw_lpid) e->dest_lp);
  dest_pe = e->dest_lp->pe;
  // instrumentation
  e->dest_lp->kp->kp_stats->s_nread_network++;
  e->dest_lp->lp_stats->s_nread_network++;

  if(e->send_pe > tw_nnodes()-1)
    tw_error(TW_LOC, "bad sendpe_id: %d", e->send_pe);

  e->cancel_next = NULL;
  e->caused_by_me = NULL;
  e->cause_next = NULL;



  if(e->recv_ts < me->GVT)
    tw_error(TW_LOC, "%d: Received straggler from %d: %lf (%d)",
	     me->id,  e->send_pe, e->recv_ts, e->state.cancel_q);

  if(tw_gvt_inprogress(me))
    me->trans_msg_ts = ROSS_MIN(me->trans_msg_ts, e->recv_ts);

  // if cancel event, retrieve and flush
  // else, store in hash table
  if(e->state.cancel_q)
    {
      tw_event *cancel = tw_hash_remove(me->hash_t, e, e->send_pe);

      // NOTE: it is possible to cancel the event we
      // are currently processing at this PE since this
      // MPI module lets me read cancel events during
      // event sends over the network.

      cancel->state.cancel_q = 1;
      cancel->state.remote = 0;

      cancel->cancel_next = dest_pe->cancel_q;
      dest_pe->cancel_q = cancel;

      tw_event_free(me, e);

      return;
    }

  if (g_tw_synchronization_protocol == OPTIMISTIC ||
      g_tw_synchronization_protocol == OPTIMISTIC_DEBUG ||
      g_tw_synchronization_protocol == OPTIMISTIC_REALTIME ) {
    tw_hash_insert(me->hash_t, e, e->send_pe);
    e->state.remote = 1;
  }

  /* NOTE: the final check in the if conditional below was added to make sure
   * that we do not execute the fast case unless the cancellation queue is
   * empty on the destination PE.  Otherwise we need to invoke the normal
   * scheduling routines to make sure that a forward event doesn't bypass a
   * cancellation event with an earlier timestamp.  This is helpful for
   * stateful models that produce incorrect results when presented with
   * duplicate messages with no rollback between them.
   */
  if(me == dest_pe && e->dest_lp->kp->last_time <= e->recv_ts && !dest_pe->cancel_q) {
    /* Fast case, we are sending to our own PE and
     * there is no rollback caused by this send.
     */
    start = tw_clock_read();
    tw_pq_enqueue(dest_pe->pq, e);
    dest_pe->stats.s_pq += tw_clock_read() - start;
    return;
  }

  if (me->id == dest_pe->id) {
    /* Slower, but still local send, so put into top
     * of dest_pe->event_q.
     */
    e->state.owner = TW_pe_event_q;
    tw_eventq_push(&dest_pe->event_q, e);
    return;
  }

  /* Never should happen; MPI should have gotten the
   * message to the correct node without needing us
   * to redirect the message there for it.  This is
   * probably a serious bug with the event headers
   * not being formatted right.
   */
  tw_error(
	   TW_LOC,
	   "Event recived by PE %u but meant for PE %u",
	   me->id,
	   dest_pe->id);
}