Esempio n. 1
0
void analysis_commit(analysis_state *s, tw_bf *bf, analysis_msg *m, tw_lp *lp)
{
    if ((g_st_model_stats == VT_STATS || g_st_model_stats == ALL_STATS) && s->num_lps > 0)
    {
        // write committed data to buffer
        model_sample_data *sample;
        int i, j;
        tw_lp *model_lp;
        lp_metadata metadata;
        // start at beginning
        for (sample = s->model_samples_head; sample != NULL; sample = sample->next)
        {
            if (sample->timestamp == m->timestamp)
            {
                for (j = 0; j < s->num_lps; j++)
                {
                    model_lp = tw_getlocal_lp(s->lp_list[j]);
                    if (model_lp->model_types == NULL || model_lp->model_types->sample_struct_sz == 0)
                        continue;
                    metadata.lpid = model_lp->gid;
                    metadata.kpid = model_lp->kp->id;
                    metadata.peid = model_lp->pe->id;
                    metadata.ts = m->timestamp;
                    metadata.sample_sz = model_lp->model_types->sample_struct_sz;
                    metadata.flag = MODEL_TYPE;

                    char buffer[sizeof(lp_metadata) + model_lp->model_types->sample_struct_sz];
                    memcpy(&buffer[0], (char*)&metadata, sizeof(lp_metadata));
                    memcpy(&buffer[sizeof(lp_metadata)], (char*)sample->lp_data[j], model_lp->model_types->sample_struct_sz);
                    if (g_tw_synchronization_protocol != SEQUENTIAL)
                        st_buffer_push(ANALYSIS_LP, &buffer[0], sizeof(lp_metadata) + model_lp->model_types->sample_struct_sz); 
                    else if (g_tw_synchronization_protocol == SEQUENTIAL && !g_st_disable_out)
                        fwrite(buffer, sizeof(lp_metadata) + model_lp->model_types->sample_struct_sz, 1, seq_analysis);
                }

                sample->timestamp = 0;

                if (sample->prev)
                    sample->prev->next = sample->next;
                if (sample->next)
                    sample->next->prev = sample->prev;
                if (s->model_samples_head == sample)
                    s->model_samples_head = sample->next;
                if (s->model_samples_tail != sample)
                { // move this freed sample to the end of the list, so we don't have a memory leak
                    sample->prev = s->model_samples_tail;
                    sample->prev->next = sample;
                    sample->next = NULL;
                    s->model_samples_tail = sample;
                }

                break;
            }
        }
    }
}
Esempio n. 2
0
void analysis_event_rc(analysis_state *s, tw_bf *bf, analysis_msg *m, tw_lp *lp)
{
    tw_lp *model_lp;
    int i, j;

    lp->pe->stats.s_alp_e_rbs++;

    if ((g_st_model_stats == VT_STATS || g_st_model_stats == ALL_STATS) && s->num_lps > 0)
    {
        // need to remove sample associated with this event from the list
        model_sample_data *sample;
        // start at end, because it's most likely closer to the timestamp we're looking for
        for (sample = s->model_samples_current->prev; sample != NULL; sample = sample->prev)
        { 
            //sample = &s->model_samples[i];
            if (sample->timestamp == m->timestamp)
            {
                for (j = 0; j < s->num_lps; j++)
                {
                    if (s->lp_list[j] == ULLONG_MAX)
                        break;

                    // first call the appropriate RC fn, to allow it to undo any state changes
                    model_lp = tw_getlocal_lp(s->lp_list[j]);
                    if (model_lp->model_types == NULL || model_lp->model_types->sample_struct_sz == 0)
                        continue;
                    model_lp->model_types->sample_revent_fn(model_lp->cur_state, bf, lp, sample->lp_data[j]);
                }

                sample->timestamp = 0;

                if (sample->prev)
                    sample->prev->next = sample->next;
                if (sample->next)
                    sample->next->prev = sample->prev;
                if (s->model_samples_head == sample)
                    s->model_samples_head = sample->next;
                if (s->model_samples_tail != sample)
                { // move this freed sample to the end of the list, so we don't have a memory leak
                    sample->prev = s->model_samples_tail;
                    sample->prev->next = sample;
                    sample->next = NULL;
                    s->model_samples_tail = sample;
                }


                break;
            }
        }
    }
    
}
Esempio n. 3
0
void analysis_event(analysis_state *s, tw_bf *bf, analysis_msg *m, tw_lp *lp)
{
    int i;
    tw_lp *model_lp;

    lp->pe->stats.s_alp_nevent_processed++; //don't undo in RC

    if ((g_st_model_stats == VT_STATS || g_st_model_stats == ALL_STATS) && s->num_lps > 0)
    {
        model_sample_data *sample = s->model_samples_current;
        // TODO handle this situation better
        if (sample == s->model_samples_tail)
            printf("WARNING: last available sample space for analysis lp!\n");

        sample->timestamp = tw_now(lp);
        m->timestamp = tw_now(lp);

        // call the model sampling function for each LP on this KP
        for (i = 0; i < s->num_lps; i++)
        {
            if (s->lp_list[i] == ULLONG_MAX)
                break;

            model_lp = tw_getlocal_lp(s->lp_list[i]);
            if (model_lp->model_types == NULL || model_lp->model_types->sample_struct_sz == 0)
                continue;

            model_lp->model_types->sample_event_fn(model_lp->cur_state, bf, lp, sample->lp_data[i]);
        }

        s->model_samples_current = s->model_samples_current->next;
    }

    // sim engine sampling
    if (g_tw_synchronization_protocol != SEQUENTIAL && 
            (g_st_engine_stats == VT_STATS || g_st_engine_stats == ALL_STATS))
    {
#ifdef USE_DAMARIS
        if (g_st_damaris_enabled)
            st_damaris_expose_data(lp->pe, tw_now(lp), ANALYSIS_LP);
        else
            st_collect_engine_data(lp->pe, ANALYSIS_LP);
#else
        st_collect_engine_data(lp->pe, ANALYSIS_LP);
#endif
    }
        //collect_sim_engine_data(lp->pe, lp, s, (tw_stime) tw_clock_read() / g_tw_clock_rate);
    
    // create next sampling event
    st_create_sample_event(lp);
}
Esempio n. 4
0
void analysis_init(analysis_state *s, tw_lp *lp)
{
    int i, j, idx = 0, sim_idx = 0;
    tw_lp *cur_lp;

    // set our id relative to all analysis LPs
    s->analysis_id = lp->gid - analysis_start_gid;
    s->num_lps = ceil((double)g_tw_nlp / g_tw_nkp);

    // create list of LPs this is responsible for
    s->lp_list = (tw_lpid*)tw_calloc(TW_LOC, "analysis LPs", sizeof(tw_lpid), s->num_lps);
    s->lp_list_sim = (tw_lpid*)tw_calloc(TW_LOC, "analysis LPs", sizeof(tw_lpid), s->num_lps);
    // size of lp_list is max number of LPs this analysis LP is responsible for
    for (i = 0; i < s->num_lps; i++)
    {
        s->lp_list[i] = ULLONG_MAX;
        s->lp_list_sim[i] = ULLONG_MAX;
    }

    for (i = 0; i < g_tw_nlp; i++)
    {
        cur_lp = g_tw_lp[i];

        if (cur_lp->kp->id == s->analysis_id % g_tw_nkp)
        {
            s->lp_list_sim[sim_idx] = cur_lp->gid;
            sim_idx++;

            // check if this LP even needs sampling performed
            if (cur_lp->model_types == NULL || cur_lp->model_types->sample_struct_sz == 0)
                continue;

            s->lp_list[idx] = cur_lp->gid;
            idx++;
        }
    }

    // update num_lps
    s->num_lps = idx;
    s->num_lps_sim = sim_idx;

    // setup memory to use for model samples
    if ((g_st_model_stats == VT_STATS || g_st_model_stats == ALL_STATS) && s->num_lps > 0)
    {
        s->model_samples_head = (model_sample_data*) tw_calloc(TW_LOC, "analysis LPs", sizeof(model_sample_data), g_st_sample_count); 
        s->model_samples_current = s->model_samples_head;
        model_sample_data *sample = s->model_samples_head;
        for (i = 0; i < g_st_sample_count; i++)
        {
            if (i == 0)
            {
                sample->prev = NULL;
                sample->next = sample + 1;
                sample->next->prev = sample;
            }
            else if (i == g_st_sample_count - 1)
            {
                sample->next = NULL;
                s->model_samples_tail = sample;
            }
            else 
            {
                sample->next = sample + 1;
                sample->next->prev = sample;
            }
            if (s->num_lps <= 0)
                tw_error(TW_LOC, "s->num_lps <= 0!");
            sample->lp_data = (void**) tw_calloc(TW_LOC, "analysis LPs", sizeof(void*), s->num_lps);
            for (j = 0; j < s->num_lps; j++)
            {
                cur_lp = tw_getlocal_lp(s->lp_list[j]);
                sample->lp_data[j] = (void *) tw_calloc(TW_LOC, "analysis LPs", cur_lp->model_types->sample_struct_sz, 1);
            }
            sample = sample->next;
        }
    }

    // schedule 1st sampling event 
    st_create_sample_event(lp);
}
void tw_event_send(tw_event * event) {
    tw_lp     *src_lp = event->src_lp;
    tw_pe     *send_pe = src_lp->pe;
    tw_pe     *dest_pe = NULL;

    tw_peid        dest_peid = -1;
    tw_stime   recv_ts = event->recv_ts;

    if (event == send_pe->abort_event) {
        if (recv_ts < g_tw_ts_end) {
            send_pe->cev_abort = 1;
        }
        return;
    }

    //Trap lookahead violations in debug mode
    //Note that compiling with the -DNDEBUG flag will turn this off!
    if (g_tw_synchronization_protocol == CONSERVATIVE) {
        if (recv_ts - tw_now(src_lp) < g_tw_lookahead) {
            tw_error(TW_LOC, "Lookahead violation: decrease g_tw_lookahead");
        }
    }

    if (event->out_msgs) {
        tw_error(TW_LOC, "It is an error to send an event with pre-loaded output message.");
    }

    link_causality(event, send_pe->cur_event);

    // call LP remote mapping function to get dest_pe
    dest_peid = (*src_lp->type->map) ((tw_lpid) event->dest_lp);

    if (dest_peid == g_tw_mynode) {
        event->dest_lp = tw_getlocal_lp((tw_lpid) event->dest_lp);
        dest_pe = event->dest_lp->pe;

        if (send_pe == dest_pe && event->dest_lp->kp->last_time <= recv_ts) {
            /* Fast case, we are sending to our own PE and there is
            * no rollback caused by this send.  We cannot have any
            * transient messages on local sends so we can return.
            */
            tw_pq_enqueue(send_pe->pq, event);
            return;
        } else {
            /* Slower, but still local send, so put into top of
            * dest_pe->event_q.
            */
            event->state.owner = TW_pe_event_q;

            tw_eventq_push(&dest_pe->event_q, event);

            if(send_pe != dest_pe) {
                send_pe->stats.s_nsend_loc_remote++;
            }
        }
    } else {
        /* Slowest approach of all; this is not a local event.
        * We need to send it over the network to the other PE
        * for processing.
        */
        send_pe->stats.s_nsend_net_remote++;
        event->state.owner = TW_net_asend;
        tw_net_send(event);
    }

    if(tw_gvt_inprogress(send_pe)) {
        send_pe->trans_msg_ts = ROSS_MIN(send_pe->trans_msg_ts, recv_ts);
    }
}
Esempio n. 6
0
inline
double	*
rm_getlocation(tw_lp * lp)
{
	double	*position;
	double   temp;

	tw_lpid	 id;
	int	 i;

	position = tw_calloc(TW_LOC, "position", sizeof(double) * g_rm_spatial_dim, 1);
	id = lp->gid - (g_rm_spatial_offset * (g_tw_mynode + 1));

#if DEBUG
//if(!g_tw_mynode)
printf("%ld: GETLOCATION: id %d\n", lp->gid, id);
#endif

	for(i = g_rm_spatial_dim-1; id >= 0 && i >= 0; i--)
	{
		if(g_rm_spatial_grid_i[i])
		{
			position[i] = floor(id / g_rm_spatial_grid_i[i]);
		} else
			position[i] = id;

#if DEBUG
	//if(!g_tw_mynode)
		printf("\ti %d, p %lf \n", i, position[i]);
#endif

		if(id && id >= g_rm_spatial_grid_i[i])
			id -= (position[i] * g_rm_spatial_grid_i[i]);

		if(position[i] < 0 || position[i] > g_rm_spatial_grid[i])
			tw_error(TW_LOC, "%ld: Off grid in %dD: 0 <= %d <= %d, LP %ld", 
				 id, i+1, position[i], g_rm_spatial_grid[i], lp->id);

		position[i] *= g_rm_spatial_d[i];

#if DEBUG
	//if(!g_tw_mynode)
		printf("\ti %d, p %lf \n\n", i, position[i]);
#endif
	}

#if DEBUG
//if(!g_tw_mynode)
	printf("\t\tp2 before %lf \n", position[2]);
#endif

	// offset Z-value by base
	position[2] += rm_getelevation(position);

#if DEBUG
//if(!g_tw_mynode)
	printf("\t\tp2 after %lf \n", position[2]);
#endif

	lpid = lp->gid;

	if(rm_getcell(position) != lp->gid)
	{
		printf("%d %lld %lld %lld: (%lf, %lf, %lf (%lf)) gid: %lld != %lld\n", g_tw_mynode, lp->gid, lp->id, id, position[0], position[1], temp, position[2], lp->gid, rm_getcell(position));

		if(_rm_map(rm_getcell(position)) == g_tw_mynode)
		{
			if(tw_getlocal_lp(rm_getcell(position))->type.state_sz != sizeof(rm_state))
				tw_error(TW_LOC, "got user model LP!");
		}

		if(rm_getcell(position) != lp->gid)
			tw_error(TW_LOC, "%d: Did not get correct cell location!", g_tw_mynode);
	}

	return position;
}
Esempio n. 7
0
/**
 * @brief Determines how to handle the newly received event.
 *
 * @param[in] me pointer to PE
 * @param[in] e pointer to event that we just received
 * @param[in] buffer not currently used
 */
static void
recv_finish(tw_pe *me, tw_event *e, char * buffer)
{
  (void) buffer;
  tw_pe		*dest_pe;
  tw_clock start;

  me->stats.s_nread_network++;
  me->s_nwhite_recv++;

  //  printf("recv_finish: remote event [cancel %u] FROM: LP %lu, PE %lu, TO: LP %lu, PE %lu at TS %lf \n",
  //	 e->state.cancel_q, (tw_lpid)e->src_lp, e->send_pe, (tw_lpid)e->dest_lp, me->id, e->recv_ts);

  e->dest_lp = tw_getlocal_lp((tw_lpid) e->dest_lp);
  dest_pe = e->dest_lp->pe;
  // instrumentation
  e->dest_lp->kp->kp_stats->s_nread_network++;
  e->dest_lp->lp_stats->s_nread_network++;

  if(e->send_pe > tw_nnodes()-1)
    tw_error(TW_LOC, "bad sendpe_id: %d", e->send_pe);

  e->cancel_next = NULL;
  e->caused_by_me = NULL;
  e->cause_next = NULL;



  if(e->recv_ts < me->GVT)
    tw_error(TW_LOC, "%d: Received straggler from %d: %lf (%d)",
	     me->id,  e->send_pe, e->recv_ts, e->state.cancel_q);

  if(tw_gvt_inprogress(me))
    me->trans_msg_ts = ROSS_MIN(me->trans_msg_ts, e->recv_ts);

  // if cancel event, retrieve and flush
  // else, store in hash table
  if(e->state.cancel_q)
    {
      tw_event *cancel = tw_hash_remove(me->hash_t, e, e->send_pe);

      // NOTE: it is possible to cancel the event we
      // are currently processing at this PE since this
      // MPI module lets me read cancel events during
      // event sends over the network.

      cancel->state.cancel_q = 1;
      cancel->state.remote = 0;

      cancel->cancel_next = dest_pe->cancel_q;
      dest_pe->cancel_q = cancel;

      tw_event_free(me, e);

      return;
    }

  if (g_tw_synchronization_protocol == OPTIMISTIC ||
      g_tw_synchronization_protocol == OPTIMISTIC_DEBUG ||
      g_tw_synchronization_protocol == OPTIMISTIC_REALTIME ) {
    tw_hash_insert(me->hash_t, e, e->send_pe);
    e->state.remote = 1;
  }

  /* NOTE: the final check in the if conditional below was added to make sure
   * that we do not execute the fast case unless the cancellation queue is
   * empty on the destination PE.  Otherwise we need to invoke the normal
   * scheduling routines to make sure that a forward event doesn't bypass a
   * cancellation event with an earlier timestamp.  This is helpful for
   * stateful models that produce incorrect results when presented with
   * duplicate messages with no rollback between them.
   */
  if(me == dest_pe && e->dest_lp->kp->last_time <= e->recv_ts && !dest_pe->cancel_q) {
    /* Fast case, we are sending to our own PE and
     * there is no rollback caused by this send.
     */
    start = tw_clock_read();
    tw_pq_enqueue(dest_pe->pq, e);
    dest_pe->stats.s_pq += tw_clock_read() - start;
    return;
  }

  if (me->id == dest_pe->id) {
    /* Slower, but still local send, so put into top
     * of dest_pe->event_q.
     */
    e->state.owner = TW_pe_event_q;
    tw_eventq_push(&dest_pe->event_q, e);
    return;
  }

  /* Never should happen; MPI should have gotten the
   * message to the correct node without needing us
   * to redirect the message there for it.  This is
   * probably a serious bug with the event headers
   * not being formatted right.
   */
  tw_error(
	   TW_LOC,
	   "Event recived by PE %u but meant for PE %u",
	   me->id,
	   dest_pe->id);
}
Esempio n. 8
0
tw_event       *
tw_socket_read_event(tw_pe * me)
{
	tw_net_node	*node = g_tw_net_node[me->id];

	tw_event       *recv_event;
	tw_event       *cancel_event;

#ifdef ROSS_MEMORY_LIB
	tw_memory	*last;
	tw_memory	*memory;
#endif

	//tw_message     *temp_message;
	void           *temp_data;

	//tw_pe          *send_pe;
	tw_peid		send_peid;
	tw_pe          *dest_pe;

	int             rv;
	unsigned int             i;

#ifdef ROSS_MEMORY_LIB
	void           *temp_mem_data;

	size_t		mem_size;
	tw_fd		mem_fd;
#endif

	rv = 0;

	/*
	 * Get a free event from our freeq and save the pointers
	 * to the message and the data for later use.
	 */
	if(me->abort_event == (recv_event = tw_event_grab(me)))
		return NULL;

	//temp_message = recv_event->message;
	//temp_data = recv_event->message->data;
	temp_data = recv_event + 1;

	/*
	 * Attempt to read an event, and return NULL if no more events to recv.
	 */
	for (i = 0; i < nnet_nodes - g_tw_npe; i++)
	{
		rv = tw_socket_read(node->clients[i],
			(char *) recv_event, sizeof(tw_event) + g_tw_msg_sz, 100);

		if (rv > 0)
			break;
	}

	/*
	 * Check to see if we actually read an event
	 */
	if (1 > rv)
	{
		if(recv_event != me->abort_event)
		{
			recv_event->event_id = 0;
			tw_eventq_unshift(&me->free_q, recv_event);
		}

		return NULL;
	}

	if (recv_event == me->abort_event)
		tw_error(TW_LOC, "Out of memory!  Allocate more events!");

	if(recv_event->recv_ts < me->GVT)
		tw_error(TW_LOC, "Received straggler event!");

	/*
	 * Restore recv'ed event's pointers
	 *
	 * on recv'rs side: have dest_lp ptr, not src_lp ptr
	 */
	//recv_event->dest_lp = tw_getlp((tw_lpid)recv_event->dest_lp);
	//recv_event->src_lp = tw_getlp((tw_lpid)recv_event->src_lp);
	//recv_event->message = temp_message;
	//recv_event->message->data = temp_data;
	recv_event->dest_lp = tw_getlocal_lp((tw_lpid) recv_event->dest_lp);

	//send_pe = recv_event->src_lp->pe;
	send_peid = (recv_event->dest_lp->type.map)
				((tw_lpid) recv_event->src_lp);

	if(send_peid == me->id)
		tw_error(TW_LOC, "Sent event over network to self?");

	if (recv_event->recv_ts > g_tw_ts_end)
		tw_error(TW_LOC, "%d: Received remote event at %d, end=%d!", 
				recv_event->dest_lp->id,
				recv_event->recv_ts, g_tw_ts_end);

	if(recv_event->dest_lp->pe != me)
		tw_error(TW_LOC, "Not destination PE!");

	/*
	 * If a CANCEL message, just get the event out of hash table * and call 
	 * tw_event_cancel() on it, which rolls it back if nec 
	 */
	if(recv_event->state.owner == TW_net_acancel)
	{
#if VERIFY_SOCKET_TCP
		printf
			("\t\t\t\t\t\t\t\tREAD CANCEL: dest p%d l%d: ts=%f sn=%d\n",
			 recv_event->dest_lp->pe->id,
			 recv_event->dest_lp->id,
			 recv_event->recv_ts, recv_event->event_id);
#endif

		cancel_event = NULL;

		cancel_event = tw_hash_remove(me->hash_t, recv_event, send_peid);
		dest_pe = cancel_event->dest_lp->pe;
		cancel_event->state.cancel_q = 1;
		cancel_event->state.remote = 0;

		if(cancel_event == recv_event)
			tw_error(TW_LOC, "cancel_event == recv_event!");

		if(cancel_event->state.owner == 0 ||
			cancel_event->state.owner == TW_pe_free_q)
			tw_error(TW_LOC, "cancel_event no owner!");

		tw_mutex_lock(&dest_pe->cancel_q_lck);
		cancel_event->cancel_next = dest_pe->cancel_q;
		dest_pe->cancel_q = cancel_event;
		tw_mutex_unlock(&dest_pe->cancel_q_lck);

		recv_event->event_id = recv_event->state.cancel_q = 0;
		recv_event->state.remote = 0;

		tw_event_free(me, recv_event);

		return cancel_event;
	}

	recv_event->next = NULL;
	//recv_event->lp_state = NULL;
	recv_event->cancel_next = NULL;
	recv_event->caused_by_me = NULL;
	recv_event->cause_next = NULL;

	// signals for on-the-fly fossil collection
	recv_event->state.remote = 1;

	tw_hash_insert(me->hash_t, recv_event, send_peid);

#if VERIFY_SOCKET_TCP
	printf
		("\t\t\t\t\t\t\t\tREAD NORMAL: dest p%d l%d: ts=%f sn=%d src p%d l%d \n",
		 recv_event->dest_lp->pe->id,
		 recv_event->dest_lp->id,
		 recv_event->recv_ts, recv_event->seq_num,
		 recv_event->src_lp->pe->id,
		 recv_event->src_lp->id);
#endif

#ifdef ROSS_MEMORY_LIB
	mem_size = (size_t) recv_event->memory;
	mem_fd = (tw_fd) recv_event->prev;
	last = NULL;
	while(mem_size)
	{
		memory = tw_memory_alloc(recv_event->src_lp, mem_fd);
		temp_mem_data = memory->data;

		if(last)
			last->next = memory;
		else
			recv_event->memory = memory;

		rv = 0;
		while(rv != mem_size)
		{
			rv = tw_socket_read(node->clients[i],
					(char *) memory, mem_size, 100);
		}

		memory->data = temp_mem_data;
		memory->prev = (tw_memory *) mem_fd;

#if VERIFY_SOCKET_TCP
		printf("recv\'d mem buf of size %d on event %f\n", rv, recv_event->recv_ts);
#endif

		mem_size = (size_t) memory->next;
		mem_fd = (tw_fd) memory->prev;
		last = memory;
	}
#endif

	recv_event->prev = NULL;

	return recv_event;
}