Ejemplo n.º 1
0
/* This function can be called from within the sigprof handler and therefore
 * must be signal safe.  no strdup and friends.
 */
static void send_samples (TLS* tls)
{
    Assert(tls != NULL);

    tls->header.time_end = CBTF_GetTime();
    tls->header.addr_begin = tls->buffer.addr_begin;
    tls->header.addr_end = tls->buffer.addr_end;

    /* rank is not filled until mpi_init finished. safe to set here*/
    tls->header.rank = monitor_mpi_comm_rank();

    tls->data.pc.pc_len = tls->buffer.length;
    tls->data.count.count_len = tls->buffer.length;
    tls->data.events.events_len = tls->buffer.length;

#if 0
int bufsize = tls->buffer.length * sizeof(tls->buffer);
int pcsize =  tls->buffer.length * sizeof(tls->data.pc.pc_val);
int countsize =  tls->buffer.length * sizeof(tls->data.count.count_val);

fprintf(stderr,"[%d:%d] HWCSAMP SEND SAMPLES\n", tls->header.pid,tls->header.omp_tid);
fprintf(stderr,"send_samples: size of tls data is %d, buffer is %d\n",sizeof(tls->data), sizeof(tls->buffer));
fprintf(stderr,"send_samples: size of tls PC data is %d  %d, COUNT is %d  %d\n",tls->buffer.length , pcsize,tls->buffer.length , countsize);
fprintf(stderr,"send_samples: size of tls HASH is %d\n", sizeof(tls->buffer.hash_table));
fprintf(stderr,"send_samples: size of tls data pc buff is %d\n", sizeof(tls->data.pc.pc_val)*CBTF_HWCPCBufferSize);
fprintf(stderr,"send_samples: size of tls data count buff is %d\n", sizeof(tls->data.count.count_val)*CBTF_HWCPCBufferSize);
fprintf(stderr,"send_samples: size of tls hwccounts is %d\n", sizeof(tls->buffer.hwccounts));
fprintf(stderr,"send_samples: size of CBTF_evcounts is %d\n", sizeof(CBTF_evcounts)*CBTF_HWCPCBufferSize);
fprintf(stderr,"send_samples: size of lon long is %d\n", sizeof(long long));
fprintf(stderr,"send_samples: size of uint64_t is %d\n", sizeof(uint64_t));
fprintf(stderr,"send_samples: size of CBTF_HWCPCData is %d\n", sizeof(CBTF_HWCPCData));
int eventssize =  tls->buffer.length * sizeof(tls->data.events.events_val);
fprintf(stderr,"send_samples: size of eventssize = %d\n",eventssize);
#endif

#ifndef NDEBUG
    if (IsCollectorDebugEnabled) {
	    fprintf(stderr, "[%ld,%d] hwcsamp send_samples:\n",tls->header.pid, tls->header.omp_tid);
	    fprintf(stderr, "[%ld,%d] time_range(%lu,%lu) addr range[%#lx, %#lx] pc_len(%d) count_len(%d)\n",
		tls->header.pid, tls->header.omp_tid,
		tls->header.time_begin,tls->header.time_end,
		tls->header.addr_begin,tls->header.addr_end,
		tls->data.pc.pc_len,
		tls->data.count.count_len);
	}
#endif

    cbtf_collector_send(&(tls->header), (xdrproc_t)xdr_CBTF_hwcsamp_data, &(tls->data));

    /* Re-initialize the data blob's header */
    initialize_data(tls);
}
Ejemplo n.º 2
0
void
MONITOR_WRAP_NAME(mpi_finalize__)(int *ierror)
{
    int count;

    MONITOR_DEBUG1("\n");
    MONITOR_GET_REAL_NAME_WRAP(real_mpi_finalize, mpi_finalize__);
    count = monitor_mpi_fini_count(1);
    if (count == 1) {
	MONITOR_DEBUG("calling monitor_fini_mpi(), size = %d, rank = %d ...\n",
		      monitor_mpi_comm_size(), monitor_mpi_comm_rank());
	monitor_fini_mpi();
    }
    (*real_mpi_finalize)(ierror);
    if (count == 1) {
	MONITOR_DEBUG1("calling monitor_mpi_post_fini() ...\n");
	monitor_mpi_post_fini();
    }
    monitor_mpi_fini_count(-1);
}
Ejemplo n.º 3
0
int
MONITOR_WRAP_NAME(PMPI_Finalize)(void)
{
    int ret, count;

    MONITOR_DEBUG1("\n");
    MONITOR_GET_REAL_NAME_WRAP(real_pmpi_finalize, PMPI_Finalize);
    count = monitor_mpi_fini_count(1);
    if (count == 1) {
	MONITOR_DEBUG("calling monitor_fini_mpi(), size = %d, rank = %d ...\n",
		      monitor_mpi_comm_size(), monitor_mpi_comm_rank());
	monitor_fini_mpi();
    }
    ret = (*real_pmpi_finalize)();
    if (count == 1) {
	MONITOR_DEBUG1("calling monitor_mpi_post_fini() ...\n");
	monitor_mpi_post_fini();
    }
    monitor_mpi_fini_count(-1);

    return (ret);
}
Ejemplo n.º 4
0
// Returns: generalized rank, or else -1 if unknown or unavailable.
//
int
hpcrun_get_rank(void)
{
  int rank;

  rank = monitor_mpi_comm_rank();
  if (rank >= 0) {
    return rank;
  }

  // Dmapp only runs on Cray systems which pretty much run only static
  // binaries.  The dynamic case probably works but is currently
  // untested (because there are few or no examples), so ifdef it out
  // for now.
  //
#ifdef HPCRUN_STATIC_LINK
  struct jobinfo info;
  if (dmapp_get_jobinfo(&info) == 0) {
    rank = info.pe;
    if (rank >= 0) {
      return rank;
    }
  }
#endif

  // Gasnet stores the rank in a global variable and <gasnet.h> makes
  // heavy use of macros.  So, there is no gasnet_mynode() function to
  // call.  We can catch the variable statically via hpclink, but
  // basically there's no hope for the dynamic case.
  //
#ifdef HPCRUN_STATIC_LINK
  rank = (int) gasneti_mynode;
  if (rank >= 0) {
    return rank;
  }
#endif

  return -1;
}
Ejemplo n.º 5
0
/**
 * Send events.
 *
 * Sends all the events in the tracing buffer to the framework for storage in 
 * the experiment's database. Then resets the tracing buffer to the empty state.
 * This is done regardless of whether or not the buffer is actually full.
 */
static void send_samples(TLS *tls)
{
    Assert(tls != NULL);

    tls->header.id = strdup(cbtf_collector_unique_id);
    tls->header.time_end = CBTF_GetTime();
    tls->header.rank = monitor_mpi_comm_rank();

#ifndef NDEBUG
	if (IsCollectorDebugEnabled) {
	    fprintf(stderr, "[%ld,%d] mpi send_samples:\n",tls->header.pid, tls->header.omp_tid);
	    fprintf(stderr, "[%ld,%d] time_range(%lu,%lu) addr range[%lx, %lx] stacktraces_len(%u) events_len(%u)\n",
		tls->header.pid, tls->header.omp_tid,
		tls->header.time_begin,tls->header.time_end,
		tls->header.addr_begin,tls->header.addr_end,
#if defined(PROFILE)
		tls->data.stacktraces.stacktraces_len
#else
		tls->data.stacktraces.stacktraces_len,
		tls->data.events.events_len
#endif
	    );
	}
#endif

#if defined(PROFILE)
    cbtf_collector_send(&(tls->header), (xdrproc_t)xdr_CBTF_mpi_profile_data, &(tls->data));
#else
#if defined(EXTENDEDTRACE)
    cbtf_collector_send(&(tls->header), (xdrproc_t)xdr_CBTF_mpi_exttrace_data, &(tls->data));
#else
    cbtf_collector_send(&(tls->header), (xdrproc_t)xdr_CBTF_mpi_trace_data, &(tls->data));
#endif
#endif

    /* Re-initialize the data blob's header */
    initialize_data(tls);
}
Ejemplo n.º 6
0
// This callback should is only needed during mpi startup and mrnet connection.
// Once connected, this should just return as early as possible.
// Since we do not really want mrnet specific info here, we rely on the fact
// that we only use this callback the first time mpi_comm_rank is called
// so that the underlying common collector code can aquire the rank which
// is need for mrnet connections.  Therefore we should never do anything
// except in the first call.
void monitor_mpi_post_comm_rank(void)
{

    /* We do not even want to aquire the tls if mpi_comm_rank has
     * been called already. */
    if (init_mpi_comm_rank) {
	return;
    }
    init_mpi_comm_rank = true;

    /* Access our thread-local storage */
#ifdef USE_EXPLICIT_TLS
    TLS* tls = CBTF_GetTLS(TLSKey);
#else
    TLS* tls = &the_tls;
#endif
    Assert(tls != NULL);


#ifndef NDEBUG
    char* statusstr = "UNKNOWNSTATUS";
    if (IsMonitorDebugEnabled) {
        switch(tls->sampling_status) {
            case CBTF_Monitor_Resumed:
                statusstr = "RESUME";
                break;
            case CBTF_Monitor_Paused:
                statusstr = "PAUSE";
                break;
            case CBTF_Monitor_Started:
                statusstr = "STARTED";
                break;
            case CBTF_Monitor_Not_Started:
                statusstr = "NOTSTARTED";
                break;
            case CBTF_Monitor_Finished:
                statusstr = "FINISHED";
                break;
        }
    }
#endif


#ifndef NDEBUG
    if (IsMonitorDebugEnabled) {
	fprintf(stderr,"[%d,%d] monitor_mpi_post_comm_rank sampling_status:%s\n",
		thePid,monitor_get_thread_num(),statusstr);
    }
#endif

    // NOTE: For post_comm_rank, we always need to resume since
    // that is where mrnet connection is made.  Therefore that
    // resume should not resume if we are not start enabled.
    bool resume_sampling = false;

    // FIXME: commented out if test remove?
    //if (tls->sampling_status == CBTF_Monitor_Paused) {
	if (tls->mpi_pcontrol && tls->start_enabled) {
	    if (IsMonitorDebugEnabled) {
		fprintf(stderr,"[%d,%d] monitor_mpi_post_comm_rank SAMPLING pcontrol start enabled rank:%d\n",
		    thePid,monitor_get_thread_num(), monitor_mpi_comm_rank());
	    }
	    resume_sampling = true;
	    tls->sampling_status = CBTF_Monitor_Resumed;
	} else if(tls->mpi_pcontrol && !tls->start_enabled) {
	    if (IsMonitorDebugEnabled) {
		fprintf(stderr,"[%d,%d] monitor_mpi_post_comm_rank SAMPLING pcontrol start disabled rank:%d\n",
		    thePid,monitor_get_thread_num(), monitor_mpi_comm_rank());
	    }
	    // FORCE THIS HERE FOR NOW.
	    resume_sampling = true;
	    // tell collector service that start was deferred
	    //cbtf_offline_service_start_deferred();
	} else {
	    if (IsMonitorDebugEnabled) {
		fprintf(stderr,"[%d,%d] monitor_mpi_post_comm_rank SAMPLING enabled rank:%d\n",
		    thePid,monitor_get_thread_num(), monitor_mpi_comm_rank());
	    }
	    resume_sampling = true;
	    tls->sampling_status = CBTF_Monitor_Resumed;
	}
    //}

    if (resume_sampling) {
        if (IsMonitorDebugEnabled) {
	    fprintf(stderr,"[%d,%d] monitor_mpi_post_comm_rank RESUME SAMPLING\n",
		    thePid,monitor_get_thread_num());
        }
	//tls->sampling_status = CBTF_Monitor_Resumed;
	cbtf_offline_sampling_status(CBTF_Monitor_MPI_post_comm_rank_event,CBTF_Monitor_Resumed);
    }
}
Ejemplo n.º 7
0
void
monitor_init_mpi(int *argc, char ***argv)
{
    /* Access our thread-local storage */
#ifdef USE_EXPLICIT_TLS
    TLS* tls = CBTF_GetTLS(TLSKey);
#else
    TLS* tls = &the_tls;
#endif
    Assert(tls != NULL);

    bool resume_sampling = false;
    if (tls->sampling_status == CBTF_Monitor_Paused) {
	if (tls->mpi_pcontrol && tls->start_enabled) {
	    if (IsMonitorDebugEnabled) {
		fprintf(stderr,"[%d,%d] monitor_init_mpi SAMPLING pcontrol start enabled rank:%d\n",
		    thePid,monitor_get_thread_num(), monitor_mpi_comm_rank());
	    }
	    resume_sampling = true;
	} else if(tls->mpi_pcontrol && !tls->start_enabled) {
	    if (IsMonitorDebugEnabled) {
		fprintf(stderr,"[%d,%d] monitor_init_mpi SAMPLING pcontrol start disabled rank:%d\n",
		    thePid,monitor_get_thread_num(), monitor_mpi_comm_rank());
	    }
	} else {
	    if (IsMonitorDebugEnabled) {
		fprintf(stderr,"[%d,%d] monitor_init_mpi SAMPLING enabled rank:%d\n",
		    thePid,monitor_get_thread_num(), monitor_mpi_comm_rank());
	    }
	    resume_sampling = true;
	}
    }

    cbtf_offline_notify_event(CBTF_Monitor_MPI_init_event);
    if (resume_sampling) {
        if (IsMonitorDebugEnabled) {
	    fprintf(stderr,"[%d,%d] monitor_init_mpi RESUME SAMPLING\n",
		    thePid,monitor_get_thread_num());
        }
	tls->sampling_status = CBTF_Monitor_Resumed;
	cbtf_offline_sampling_status(CBTF_Monitor_MPI_init_event,CBTF_Monitor_Resumed);
    } else {
        if (IsMonitorDebugEnabled) {
	    fprintf(stderr,"[%d,%d] monitor_init_mpi is SAMPLING\n",
		    thePid,monitor_get_thread_num());
        }
	// this is the case where start collection was disabled util at
	// the first (if any) mpi_pcontrol(1) is encountered.
	// We need to make this available to any code downstream that
	// may temporarily disable the collector via cbtf_offline_service_stop_timer
	// and later reenable the collector via cbtf_offline_service_start_timer.
	// Both of those live in services/collector/collector.c.
	// the disable and reenable is used internaly in collector.c to
	// protect one CBTF_MRNet_Send of attached threads from being
	// sampled. The other use is in the services/collector/monitor.c
	// code where we need to disable,reenable for similar reasons.
	// Could check tls->sampling_status for this from both
	// cbtf_offline_service_stop_timer and cbtf_offline_service_start_timer.
	// Provide this as util function that returns this value.
    }

    tls->in_mpi_pre_init = false;
    CBTF_in_mpi_startup = false;
}