示例#1
0
void
tw_gvt_step2(tw_pe *me)
{
	tw_stat local_white = 0;
	tw_stat total_white = 0;
	
	tw_stime pq_min = DBL_MAX;
	tw_stime net_min = DBL_MAX;
	
	tw_stime lvt;
	tw_stime gvt;
	
	tw_clock start = tw_clock_read();
	
	if(me->gvt_status != TW_GVT_COMPUTE)
		return;
	
	while(1)
	{
	    tw_net_read(me);
		
	    // send message counts to create consistent cut
	    local_white = me->s_nwhite_sent - me->s_nwhite_recv;
	    all_reduce_cnt++;
	    if(MPI_Allreduce(
						 &local_white,
						 &total_white,
						 1,
						 MPI_LONG_LONG,
						 MPI_SUM,
						 MPI_COMM_WORLD) != MPI_SUCCESS)
			tw_error(TW_LOC, "MPI_Allreduce for GVT failed");
	    
	    if(total_white == 0)
			break;
	}
	
	pq_min = tw_pq_minimum(me->pq);
	net_min = tw_net_minimum(me);
	
	lvt = me->trans_msg_ts;
	if(lvt > pq_min)
		lvt = pq_min;
	if(lvt > net_min)
		lvt = net_min;
	
	all_reduce_cnt++;
	if(MPI_Allreduce(
					 &lvt,
					 &gvt,
					 1,
					 MPI_DOUBLE,
					 MPI_MIN,
					 MPI_COMM_WORLD) != MPI_SUCCESS)
		tw_error(TW_LOC, "MPI_Allreduce for GVT failed");
	
	gvt = min(gvt, me->GVT_prev);
	
	if(gvt != me->GVT_prev)
	{
		g_tw_gvt_no_change = 0;
	} else
	{
		g_tw_gvt_no_change++;
		if (g_tw_gvt_no_change >= g_tw_gvt_max_no_change) {
			tw_error(
					 TW_LOC,
					 "GVT computed %d times in a row"
					 " without changing: GVT = %14.14lf, PREV %14.14lf"
					 " -- GLOBAL SYNCH -- out of memory!",
					 g_tw_gvt_no_change, gvt, me->GVT_prev);
		}
	}
	
	if (me->GVT > gvt)
	{
		tw_error(TW_LOC, "PE %u GVT decreased %g -> %g",
				 me->id, me->GVT, gvt);
	}
	
	if(gvt / g_tw_ts_end > percent_complete &&
	   tw_node_eq(&g_tw_mynode, &g_tw_masternode))
	{
		gvt_print(gvt);
	}
	
	me->s_nwhite_sent = 0;
	me->s_nwhite_recv = 0;
	me->trans_msg_ts = DBL_MAX;
	me->GVT_prev = DBL_MAX; // me->GVT;
	me->GVT = gvt;
	me->gvt_status = TW_GVT_NORMAL;
	
//	printf("gvt before fossil %14.14lf\n", gvt);
	
	gvt_cnt = 0;
	
	// update GVT timing stats
	me->stats.s_gvt += tw_clock_read() - start;
	
	// only FC if OPTIMISTIC
	if( g_tw_synchronization_protocol == OPTIMISTIC )
	{
	    start = tw_clock_read();
		
	//	printf("ROSS fossil collect at gvt %14.14lf\n", gvt);
		
	    tw_pe_fossil_collect(me);
		fossil_collected = 1;
	    me->stats.s_fossil_collect += tw_clock_read() - start;
	}
	
	g_tw_gvt_done++;
}
示例#2
0
void
tw_gvt_step2(tw_pe *me)
{
	long long local_white = 0;
	long long total_white = 0;

	tw_stime pq_min = DBL_MAX;
	tw_stime net_min = DBL_MAX;

	tw_stime lvt;
	tw_stime gvt;

    tw_clock net_start;
	tw_clock start = tw_clock_read();

	if(me->gvt_status != TW_GVT_COMPUTE)
		return;
	while(1)
	  {
        net_start = tw_clock_read();
	    tw_net_read(me);
        me->stats.s_net_read += tw_clock_read() - net_start;

	    // send message counts to create consistent cut
	    local_white = me->s_nwhite_sent - me->s_nwhite_recv;
	    all_reduce_cnt++;
	    if(MPI_Allreduce(
			     &local_white,
			     &total_white,
			     1,
			     MPI_LONG_LONG,
			     MPI_SUM,
			     MPI_COMM_ROSS) != MPI_SUCCESS)
	      tw_error(TW_LOC, "MPI_Allreduce for GVT failed");

	    if(total_white == 0)
	      break;
	  }

	pq_min = tw_pq_minimum(me->pq);
	net_min = tw_net_minimum(me);

	lvt = me->trans_msg_ts;
	if(lvt > pq_min)
	  lvt = pq_min;
	if(lvt > net_min)
		lvt = net_min;

	all_reduce_cnt++;
	if(MPI_Allreduce(
			&lvt,
			&gvt,
			1,
			MPI_DOUBLE,
			MPI_MIN,
			MPI_COMM_ROSS) != MPI_SUCCESS)
			tw_error(TW_LOC, "MPI_Allreduce for GVT failed");

	gvt = ROSS_MIN(gvt, me->GVT_prev);

	if(gvt != me->GVT_prev)
	{
		g_tw_gvt_no_change = 0;
	} else
	{
		g_tw_gvt_no_change++;
		if (g_tw_gvt_no_change >= g_tw_gvt_max_no_change) {
			tw_error(
				TW_LOC,
				"GVT computed %d times in a row"
				" without changing: GVT = %14.14lf, PREV %14.14lf"
				" -- GLOBAL SYNCH -- out of memory!",
				g_tw_gvt_no_change, gvt, me->GVT_prev);
		}
	}

	if (me->GVT > gvt)
	{
		tw_error(TW_LOC, "PE %u GVT decreased %g -> %g",
				me->id, me->GVT, gvt);
	}

	if (gvt / g_tw_ts_end > percent_complete && (g_tw_mynode == g_tw_masternode))
	{
		gvt_print(gvt);
	}

	me->s_nwhite_sent = 0;
	me->s_nwhite_recv = 0;
	me->trans_msg_ts = DBL_MAX;
	me->GVT_prev = DBL_MAX; // me->GVT;
	me->GVT = gvt;
	me->gvt_status = TW_GVT_NORMAL;

	gvt_cnt = 0;

	// update GVT timing stats
	me->stats.s_gvt += tw_clock_read() - start;

	// only FC if OPTIMISTIC or REALTIME, do not do for DEBUG MODE
	if( g_tw_synchronization_protocol == OPTIMISTIC ||
	    g_tw_synchronization_protocol == OPTIMISTIC_REALTIME )
	  {
	    start = tw_clock_read();
	    tw_pe_fossil_collect(me);
	    me->stats.s_fossil_collect += tw_clock_read() - start;
	  }

    // do any necessary instrumentation calls
    if ((g_st_engine_stats == GVT_STATS || g_st_engine_stats == ALL_STATS) && 
            g_tw_gvt_done % g_st_num_gvt == 0 && gvt <= g_tw_ts_end)
    {
#ifdef USE_DAMARIS
        if (g_st_damaris_enabled)
        {
            st_damaris_expose_data(me, gvt, GVT_COL);
            st_damaris_end_iteration();
        }
        else
            st_collect_engine_data(me, GVT_COL);
#else
		st_collect_engine_data(me, GVT_COL);
#endif
    }
#ifdef USE_DAMARIS
    // need to make sure damaris_end_iteration is called if GVT instrumentation not turned on
    //if (!g_st_stats_enabled && g_st_real_time_samp) //need to make sure if one PE enters this, all do; otherwise deadlock
    if (g_st_damaris_enabled && (g_st_engine_stats == RT_STATS || g_st_engine_stats == VT_STATS))
    {
        st_damaris_end_iteration();
    }
#endif

    if ((g_st_model_stats == GVT_STATS || g_st_model_stats == ALL_STATS) && g_tw_gvt_done % g_st_num_gvt == 0)
        st_collect_model_data(me, (tw_stime)tw_clock_read() / g_tw_clock_rate, GVT_STATS);
    
    st_inst_dump();
    // done with instrumentation related stuff

	g_tw_gvt_done++;

	// reset for the next gvt round -- for use in realtime GVT mode only!!
	g_tw_gvt_interval_start_cycles = tw_clock_read();
 }