void vt_metric_read(struct vt_metv* metv, uint64_t offsets[], uint64_t values[]) { int i; if ( metv == NULL ) return; /* read counter values of set */ if ( cpc_set_sample(cpc, metv->set, metv->buffer) == -1 ) vt_error_msg("cpc_set_sample: %s", strerror(errno)); for ( i = 0; i < nmetrics; i++ ) { /* get 64-bit counter values from CPC buffer */ if ( cpc_buf_get(cpc, metv->buffer, metv->indices[i], &(values[i])) == -1 ) break; } if ( i != nmetrics ) vt_error_msg("cpc_buf_get: %s", strerror(errno)); /* add offsets to values, if necessary */ if ( offsets != NULL ) { for ( i = 0; i < nmetrics; i++ ) values[i] += offsets[i]; } }
void vt_comm_free(MPI_Comm comm) { /* if only one communicator exists, we just need to decrease last_comm */ if (last_comm == 1 && comms[0].comm == comm) { last_comm = 0; } /* if more than one communicator exists, we need to search for the entry */ else if (last_comm > 1) { uint32_t i; if ((i = comm_search(comm)) != (uint32_t)-1) { /* swap deletion candidate with last entry in the list */ comms[i] = comms[--last_comm]; } else { vt_error_msg("vt_comm_free1: Cannot find communicator"); } } else { vt_error_msg("vt_comm_free2: Cannot find communicator"); } }
void vt_group_free(MPI_Group group) { if (last_group == 1 && groups[0].group == group) { groups[0].refcnt--; if (groups[0].refcnt == 0) last_group--; } else if (last_group > 1) { uint32_t i; if ((i = group_search(group)) != (uint32_t)-1) { /* decrease reference count on entry */ groups[i].refcnt--; /* check if entry can be deleted */ if (groups[i].refcnt == 0) groups[i] = groups[--last_group]; } else { vt_error_msg("vt_group_free1: Cannot find group"); } } else { vt_error_msg("vt_group_free2: Cannot find group"); } }
/* platform specific initialization */ void vt_pform_init() { int pid = getpid(); char exec_proc[VT_PATH_MAX]; char exec[VT_PATH_MAX]; int exec_len; int hostid_retries; #if TIMER == TIMER_MMTIMER int fd; unsigned long femtosecs_per_tick = 0; int offset; if((fd = open(MMTIMER_FULLNAME, O_RDONLY)) == -1) { vt_error_msg("Failed to open " MMTIMER_FULLNAME); } if ((offset = ioctl(fd, MMTIMER_GETOFFSET, 0)) == -ENOSYS) { vt_error_msg("Cannot get mmtimer offset"); } if ((mmdev_timer_addr = mmap(0, getpagesize(), PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { vt_error_msg("Cannot mmap mmtimer"); } mmdev_timer_addr += offset; ioctl(fd, MMTIMER_GETRES, &femtosecs_per_tick); mmdev_ticks_per_sec = (uint64_t)(1.0 / (1e-15 * femtosecs_per_tick)); close(fd); #elif TIMER == TIMER_CLOCK_GETTIME struct timespec tp; clock_gettime(CLOCK_REALTIME, &tp); vt_time_base = tp.tv_sec - (tp.tv_sec & 0xFF); #elif TIMER == TIMER_PAPI_REAL_USEC vt_time_base = vt_metric_real_usec(); #endif /* get full path of executable */ snprintf(exec_proc, sizeof (exec_proc), VT_PROCDIR"%d/exe", pid); exec_len = readlink(exec_proc, exec, sizeof (exec)-1); if(exec_len != -1) { exec[exec_len] = '\0'; vt_exec = strdup(exec); } /* get unique numeric SMP-node identifier */ hostid_retries = 0; while( !vt_node_id && (hostid_retries++ < VT_MAX_GETHOSTID_RETRIES) ) { vt_node_id = gethostid(); } if (!vt_node_id) vt_error_msg("Maximum retries (%i) for gethostid exceeded!", VT_MAX_GETHOSTID_RETRIES); }
void vt_metric_free(struct vt_metv* metv, uint32_t tid) { (void)tid; if ( metv == NULL ) return; if ( cpc_buf_destroy(cpc, metv->buffer) == -1 ) vt_error_msg("cpc_buf_destroy: %s", strerror(errno)); if ( cpc_set_destroy(cpc, metv->set) == -1 ) vt_error_msg("cpc_set_destroy: %s", strerror(errno)); free(metv); }
size_t vt_env_thread_bsize() { static size_t buffer_size = 0; char* tmp; if (buffer_size == 0) { tmp = getenv("VT_THREAD_BUFFER_SIZE"); if (tmp != NULL && strlen(tmp) > 0) { vt_cntl_msg(2, "VT_THREAD_BUFFER_SIZE=%s", tmp); buffer_size = parse_size(tmp); if (buffer_size <= 0) { vt_error_msg("VT_BUFFER_SIZE not properly set"); } else if (buffer_size < VT_MIN_BUFSIZE) { vt_warning("VT_BUFFER_SIZE=%d resized to %d bytes", buffer_size, VT_MIN_BUFSIZE); buffer_size = VT_MIN_BUFSIZE; } } else { buffer_size = 0; } } return buffer_size; }
int vt_env_iofsl_mode() { static int mode = -1; char* tmp; if (mode == -1) { tmp = getenv("VT_IOFSL_MODE"); if (tmp != NULL && strlen(tmp) > 0) { char tmpbuf[128]; char* p; vt_cntl_msg(2, "VT_IOFSL_MODE=%s", tmp); p = tmpbuf; strncpy(tmpbuf, tmp, 127); tmpbuf[127] = '\0'; while( *p ) { *p = tolower(*p); p++; } if (strcmp(tmpbuf, "multifile") == 0) mode = VT_IOFSL_MODE_MULTIFILE; else if (strcmp(tmpbuf, "multifile_split") == 0) mode = VT_IOFSL_MODE_MULTIFILE_SPLIT; else vt_error_msg("VT_IOFSL_MODE not properly set"); } else { mode = VT_IOFSL_MODE_MULTIFILE_SPLIT; } } return mode; }
void vt_env_cudatrace() { char* tmp = getenv("VT_CUDATRACE"); if (tmp != NULL && strlen(tmp) > 0) { /* split error message in three parts due to C89 limitations */ char* error_msg[3] = { "VT_CUDATRACE has been replaced by VT_GPUTRACE!\n" "Usage: export VT_GPUTRACE=option1,option2,option2,...\n" "The following CUDA measurement options are available:\n" " cuda : enable CUDA (needed to use CUDA runtime API wrapper)\n" " cupti : use the CUPTI interface instead of the library wrapper\n", " runtime : CUDA runtime API\n" " driver : CUDA driver API\n" " kernel : CUDA kernels\n" " concurrent: enable concurrent kernel tracing at initialization time\n" " idle : GPU compute idle time\n" " memcpy : CUDA memory copies\n" " memusage : CUDA memory allocation\n" " debug : CUDA tracing debug mode\n" " error : CUDA errors will exit the program\n" " yes|default: same as 'cuda,runtime,kernel,memcpy'\n" " no: disable CUDA measurement\n", "VT_CUDATRACE_CUPTI, VT_CUDATRACE_MEMCPY, VT_GPUTRACE_IDLE, " "VT_GPUTRACE_ERROR have been replaced by VT_GPUTRACE as well!\n" "Read the user manual for further information!\n" }; vt_error_msg("%s%s%s", error_msg[0], error_msg[1], error_msg[2]); } }
void vt_request_free(struct VTRequest* req) { /* delete request by copying last request in place of req */ if (!lastreq) { vt_error_msg("INTERNAL ERROR in request handling - no last request"); } *req = *lastreq; lastreq->flags = ERF_NONE; lastreq->request = 0; /* adjust pointer to last request */ lastidx--; if (lastidx < 0) { /* reached low end of block */ if (last_block->prev) { /* goto previous block if existing */ lastidx = VT_REQBLK_SIZE-1; lastreq = &(last_block->prev->req[lastidx]); } else { /* no previous block: re-initialize */ lastidx = VT_REQBLK_SIZE; lastreq = 0; } last_block = last_block->prev; } else { lastreq--; } }
int vt_metric_open() { char* env; char* env_sep; char* var; char* token; /* read environment variable "VT_METRICS" */ if ( ( env = vt_env_metrics() ) == NULL ) return 0; env_sep = vt_env_metrics_sep(); var = strdup(env); vt_cntl_msg(2, "VT_METRICS=%s", var); /* initialize CPC */ if ( ( cpc = cpc_open(CPC_VER_CURRENT) ) == NULL ) vt_error_msg("cpc_open: %s", strerror(errno)); /* read metrics from specification string */ token = strtok(var, env_sep); while ( token && (nmetrics < VT_METRIC_MAXNUM) ) { metricv_add( token ); token = strtok(NULL, env_sep); } free(var); return nmetrics; }
/* * Create a VampirTrace CUPTI activity context. * * @return pointer to created VampirTrace CUPTI Activity context */ static vt_cupti_activity_t* vt_cuptiact_createCtxActivity(CUcontext cuCtx) { vt_cupti_activity_t* vtCtxAct = NULL; /* create new context, as it is not listed */ vtCtxAct = (vt_cupti_activity_t *)malloc(sizeof(vt_cupti_activity_t)); if(vtCtxAct == NULL) vt_error_msg("[CUPTI Activity] Could not allocate memory for activity context!"); vtCtxAct->strmList = NULL; vtCtxAct->gpuMemAllocated = 0; vtCtxAct->gpuMemList = NULL; vtCtxAct->buffer = NULL; vtCtxAct->vtLastGPUTime = vt_gpu_init_time; vtCtxAct->gpuIdleOn = 1; /* * Get time synchronization factor between host and GPU time for measurement * interval */ { VT_CUPTI_CALL(cuptiGetTimestamp(&(vtCtxAct->sync.gpuStart)), "cuptiGetTimestamp"); vtCtxAct->sync.hostStart = vt_pform_wtime(); } /* set default CUPTI stream ID (needed for memory usage and idle tracing) */ VT_CUPTI_CALL(cuptiGetStreamId(cuCtx, NULL, &(vtCtxAct->defaultStrmID)), "cuptiGetStreamId"); return vtCtxAct; }
/* platform specific initialization */ void vt_pform_init() { int pid = getpid(); char exec_proc[512]; int hostid_retries; #if TIMER == TIMER_SWITCH_CLOCK int i; for (i=0; i<NUMRETRY; i++) { if ( (vt_swclk = swclockInit()) != 0 ) break; } #elif TIMER == TIMER_POWER_REALTIME timebasestruct_t t; read_real_time(&t, TIMEBASE_SZ); time_base_to_time(&t, TIMEBASE_SZ); vt_time_base = t.tb_high - (t.tb_high & 0xFFFF); #elif TIMER == TIMER_PAPI_REAL_USEC vt_time_base = vt_metric_real_usec(); #endif /* get full path of executable */ snprintf(exec_proc, sizeof (exec_proc), VT_PROCDIR"%d/object/a.out", pid); vt_exec = strdup(exec_proc); /* get unique numeric SMP-node identifier */ hostid_retries = 0; while( !vt_node_id && (hostid_retries++ < VT_MAX_GETHOSTID_RETRIES) ) { vt_node_id = gethostid(); } if (!vt_node_id) vt_error_msg("Maximum retries (%i) for gethostid exceeded!", VT_MAX_GETHOSTID_RETRIES); }
int vt_env_stat_props() { static int propflags = 0; char* tmp; if (propflags == 0) { tmp = getenv("VT_STAT_PROPS"); if (tmp != NULL && strlen(tmp) > 0) { char tmpbuf[128]; char* p; char* tk; int dc; vt_cntl_msg(2, "VT_STAT_PROPS=%s", tmp); p = tmpbuf; strncpy(tmpbuf, tmp, 127); tmpbuf[127] = '\0'; while( *p ) { *p = tolower(*p); p++; } if (strcmp( tmpbuf, "all" ) == 0) { propflags = (VT_SUM_PROP_FUNC | VT_SUM_PROP_MSG | VT_SUM_PROP_COLLOP); } else { tk = strtok(tmpbuf, ":"); dc = 0; propflags = 0; do { if (dc <= 2 && (strcmp( tk, "func" ) == 0)) propflags |= VT_SUM_PROP_FUNC; else if(dc <= 2 && (strcmp( tk, "msg" ) == 0)) propflags |= VT_SUM_PROP_MSG; else if(dc <= 2 && (strcmp( tk, "collop" ) == 0)) propflags |= VT_SUM_PROP_COLLOP; /* else if(dc <= 3 && (strcmp( tk, "fileop" ) == 0)) propflags |= VT_SUM_PROP_FILEOP; */ else vt_error_msg("VT_STAT_PROPS not properly set"); dc++; } while((tk = strtok(0, ":"))); } } else { /* propflags = (VT_SUM_PROP_FUNC | VT_SUM_PROP_MSG | VT_SUM_PROP_COLLOP | VT_SUM_PROP_FILEOP); */ propflags = (VT_SUM_PROP_FUNC | VT_SUM_PROP_MSG | VT_SUM_PROP_COLLOP); } } return propflags; }
/* * Create a VampirTrace CUPTI Activity context. * * @param ctxID ID of the CUDA context * @param devID ID of the CUDA device * * @return pointer to created VampirTrace CUPTI Activity context */ static vt_cuptiact_ctx_t* vt_cuptiact_createContext(uint32_t ctxID, CUcontext cuCtx, uint32_t devID) { vt_cuptiact_ctx_t* vtCtx = NULL; /* create new context, as it is not listed */ vtCtx = (vt_cuptiact_ctx_t *)malloc(sizeof(vt_cuptiact_ctx_t)); if(vtCtx == NULL) vt_error_msg("[CUPTI Activity] Could not allocate memory for context!"); vtCtx->ctxID = ctxID; vtCtx->next = NULL; vtCtx->strmList = NULL; vtCtx->gpuMemAllocated = 0; vtCtx->gpuMemList = NULL; vtCtx->buffer = NULL; vtCtx->vtLastGPUTime = vt_gpu_init_time; vtCtx->gpuIdleOn = 1; /* * Get time synchronization factor between host and GPU time for measurement * interval */ { VT_CUPTI_CALL(cuptiGetTimestamp(&(vtCtx->sync.gpuStart)), "cuptiGetTimestamp"); vtCtx->sync.hostStart = vt_pform_wtime(); } VT_CHECK_THREAD; vtCtx->ptid = VT_MY_THREAD; if(cuCtx == NULL) CHECK_CU_ERROR(cuCtxGetCurrent(&cuCtx), NULL); vtCtx->cuCtx = cuCtx; /* set default CUPTI stream ID (needed for memory usage and idle tracing) */ VT_CUPTI_CALL(cuptiGetStreamId(vtCtx->cuCtx, NULL, &(vtCtx->defaultStrmID)), "cuptiGetStreamId"); if(devID == (uint32_t)-1){ CUdevice cuDev; /* driver API prog: correct cuDev, but result is 201 (invalid context) */ if(CUDA_SUCCESS != cuCtxGetDevice(&cuDev)){ devID = VT_NO_ID; }else{ devID = (uint32_t)cuDev; } } vtCtx->devID = devID; vtCtx->cuDev = devID; /*vt_cntl_msg(1,"device id: %d", devID);*/ return vtCtx; }
int vt_metric_open() { int i; char* env; char* env_sep; char* var; char* token; const int max_metrics = sizeof (vt_sx_metrics) / sizeof (vt_sx_metrics[0]); /* read environment variable "VT_METRICS"; return if unset. */ env = vt_env_metrics(); if ( env == NULL ) return nmetrics; env_sep = vt_env_metrics_sep(); var = strdup(env); vt_cntl_msg(2, "VT_METRICS=%s", var); /* convert VT_METRICS's letters to lower case */ token = var; while ( *token ) { *token = tolower(*token); token++; } /* read metrics from specification string */ token = strtok(var, env_sep); if (token && (0 == strcmp (token, "all"))) { vt_cntl_msg(2, "token:%s Adding all metrics", token); for (i = 0; i < max_metrics; i++) { metricv_add(i); vt_cntl_msg(2, "metric i:%d name:%s", i, vt_sx_metrics[i].name); } } else { while ( token && (nmetrics < VT_METRIC_MAXNUM) ) { /* search metricmap for a suitable definition */ /* printf("Token%d: <%s>\n", nmetrics, token); */ for (i = 0; i < max_metrics; i++) { if (0 == strcmp (token, vt_sx_metrics[i].name)) { metricv_add(i); vt_cntl_msg(2, "metric i:%d token:%s", i, token); break; } } if (i == max_metrics) { vt_error_msg ("Metric <%s> not supported", token); } token = strtok(NULL, env_sep); } } sx_ctr_array = calloc(SX_CTR_MAX, sizeof (uint64_t)); /* clean up */ free(var); return nmetrics; }
static void metric_error(int errcode, char *note) { char errstring[PAPI_MAX_STR_LEN]; PAPI_perror(errcode, errstring, PAPI_MAX_STR_LEN); if (errcode == PAPI_ESYS) { strncat(errstring, ": ", PAPI_MAX_STR_LEN-strlen(errstring)); strncat(errstring, strerror(errno), PAPI_MAX_STR_LEN-strlen(errstring)); } vt_error_msg("%s: %s (fatal)\n", note?note:"PAPI", errstring); }
/* * Initialize the CUPTI events data of the given VampirTrace CUPTI context. * * @param vtCtx pointer to the VampirTrace CUPTI context */ void vt_cupti_events_initContext(vt_cupti_ctx_t *vtcuptiCtx) { vt_cupti_events_t *vtcuptiEvtCtx = NULL; vt_cntl_msg(2, "[CUPTI Events] Initializing VampirTrace CUPTI events context"); /* get a pointer to eventIDArray */ { CUresult cuErr = CUDA_SUCCESS; int dev_major, dev_minor; vt_cupti_device_t *cuptiDev; /* TODO: do not trace this driver API function call */ cuErr = cuDeviceComputeCapability(&dev_major, &dev_minor, vtcuptiCtx->cuDev); VT_CUDRV_CALL(cuErr, "cuDeviceComputeCapability"); /* check if device capability already listed */ VT_CUPTI_LOCK(); cuptiDev = vtcuptievtCapList; VT_CUPTI_UNLOCK(); cuptiDev = vt_cupti_checkMetricList(cuptiDev, dev_major, dev_minor); if(cuptiDev){ /* allocate the VampirTrace CUPTI events context */ vtcuptiEvtCtx = (vt_cupti_events_t *)malloc(sizeof(vt_cupti_events_t)); if(vtcuptiEvtCtx == NULL) vt_error_msg("[CUPTI Events] malloc(sizeof(vt_cupti_events_t)) failed!"); vtcuptiEvtCtx->vtDevCap = cuptiDev; vtcuptiEvtCtx->vtGrpList = NULL; vtcuptiEvtCtx->counterData = NULL; vtcuptiEvtCtx->cuptiEvtIDs = NULL; vtcuptiCtx->events = vtcuptiEvtCtx; }else{ return; } } /* create and add the VampirTrace CUPTI groups to the context */ vt_cupti_addEvtGrpsToCtx(vtcuptiCtx); /* allocate memory for CUPTI counter reads */ { size_t allocSize = vtcuptiEvtCtx->vtGrpList->evtNum; vtcuptiEvtCtx->counterData = (uint64_t *)malloc(allocSize*sizeof(uint64_t)); vtcuptiEvtCtx->cuptiEvtIDs = (CUpti_EventID *)malloc(allocSize*sizeof(CUpti_EventID)); } vt_cuptievt_start(vtcuptiEvtCtx); }
void vt_mpifile_init() { if( !mpifile_initialized ) { struct rlimit rl; mpifile_gid = vt_def_file_group( VT_CURRENT_THREAD, "MPI I/O" ); if( getrlimit(RLIMIT_NOFILE, &rl) ) vt_error_msg( "getrlimit() failed reading max no. of open files" ); nmaxfiles = (rl.rlim_cur == RLIM_INFINITY) ? 131072 : (int)rl.rlim_cur; mpifh_fid_map = (struct mpifh_fid_map*)calloc( nmaxfiles, sizeof(struct mpifh_fid_map) ); if( !mpifh_fid_map ) vt_error_msg( "Out of memory while allocating %i MPI_File handles", nmaxfiles ); nfiles = 0; memset( htab_mpifile, 0, sizeof(htab_mpifile) ); mpifile_initialized = 1; } }
void vt_win_set_gid( MPI_Win win, uint32_t gid ) { uint32_t i = win_search(win); if ( i != (uint32_t)-1 ) { wins[i].gid = gid; } else { vt_error_msg("Cannot find window"); } }
void vt_metric_thread_init(long (*id_fn)(void)) { (void)id_fn; if ( nmetrics == 0 ) return; /* we don't support threads for the moment */ #if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) vt_error_msg("NEC SX Performance Counters for threaded application " "not yet supported"); #endif /* VT_MT || VT_HYB || VT_JAVA */ }
struct vt_metv* vt_metric_create() { struct vt_metv* metv; int i; if ( nmetrics == 0 ) return NULL; metv = (struct vt_metv*)malloc(sizeof(struct vt_metv)); if ( metv == NULL ) vt_error(); /* create CPC set */ metv->set = NULL; if ( ( metv->set = cpc_set_create(cpc) ) == NULL ) vt_error_msg("cpc_set_create: %s", strerror(errno)); metv->indices = (int*)calloc(nmetrics, sizeof(int)); for ( i = 0; i < nmetrics; i++ ) { /* add request to set and store the corresponding index */ metv->indices[i] = cpc_set_add_request(cpc, metv->set, metricv[i]->name, 0, CPC_COUNT_USER, 0, NULL); if ( metv->indices[i] == -1 ) vt_error_msg("cpc_set_add_request (%s): %s", metricv[i]->name, strerror(errno)); } /* create CPC buffer */ if ( ( metv->buffer = cpc_buf_create(cpc, metv->set) ) == NULL ) vt_error_msg("cpc_buf_create: %s", strerror(errno)); /* bind set to the calling LWP */ if ( cpc_bind_curlwp(cpc, metv->set, 0) == -1 ) vt_error_msg("cpc_bind_curlwp: %s", strerror(errno)); return metv; }
static void metricv_add(char* name, int code) { if (nmetrics >= VT_METRIC_MAXNUM) vt_error_msg("Number of counters exceeds VampirTrace allowed maximum of %d\n", VT_METRIC_MAXNUM); else { metricv[nmetrics] = (struct metric*)malloc(sizeof(struct metric)); metricv[nmetrics]->name = strdup(name); metricv[nmetrics]->descr[0] = '\0'; metricv[nmetrics]->papi_code = code; nmetrics++; } }
/* * Create a VampirTrace CUPTI Activity stream. * * @param devID ID of the CUDA device * @param strmID ID of the CUDA stream * * @return pointer to created VampirTrace CUPTI Activity stream */ static vt_cuptiact_strm_t* vt_cuptiact_createStream(vt_cupti_ctx_t *vtCtx, uint32_t strmID) { vt_cuptiact_strm_t *vtStrm = NULL; vtStrm = (vt_cuptiact_strm_t *)malloc(sizeof(vt_cuptiact_strm_t)); if(vtStrm == NULL) vt_error_msg("[CUPTI Activity] Could not allocate memory for stream!"); vtStrm->strmID = strmID; vtStrm->vtLastTime = vt_gpu_init_time; vtStrm->destroyed = 0; vtStrm->next = NULL; /* create VT-User-Thread with name and parent id and get its id */ { char thread_name[16] = "CUDA"; if(vt_gpu_stream_reuse){ if(vtCtx->devID != VT_NO_ID){ if(-1 == snprintf(thread_name+4, 12, "[%d]", vtCtx->devID)) vt_cntl_msg(1, "Could not create thread name for CUDA thread!"); } }else{ if(vtCtx->devID == VT_NO_ID){ if(-1 == snprintf(thread_name+4, 12, "[?:%d]", strmID)) vt_cntl_msg(1, "Could not create thread name for CUDA thread!"); }else{ if(-1 == snprintf(thread_name+4, 12, "[%d:%d]", vtCtx->devID, strmID)) vt_cntl_msg(1, "Could not create thread name for CUDA thread!"); } } VT_CHECK_THREAD; vt_gpu_registerThread(thread_name, VT_MY_THREAD, &(vtStrm->vtThrdID)); } /* if first stream created for this device, make it the default stream */ if(vtCtx->activity->strmList == NULL){ /* write enter event for GPU_IDLE on first stream */ if(vt_gpu_trace_idle == 1){ if(vt_gpu_init_time < vt_start_time) vt_gpu_init_time = vt_start_time; vt_enter(vtStrm->vtThrdID, &vt_gpu_init_time, vt_gpu_rid_idle); /*vt_warning("IDLEente: %llu (%d)", vt_gpu_init_time, vtStrm->vtThrdID);*/ vtCtx->activity->gpuIdleOn = 1; } } return vtStrm; }
uint32_t vt_fork_get_num_childs_tot() { uint32_t nchilds_tot; /* any fork performed? (trace-id file exists?) */ if ( fork_performed ) { int fd; char tmp[16] = ""; vt_libassert(trcid_filename[0] != '\0'); VT_SUSPEND_IO_TRACING(VT_CURRENT_THREAD); /* open temp. id file for reading */ if ( (fd = open(trcid_filename, O_RDONLY)) == -1 ) vt_error_msg("Cannot open file %s: %s", trcid_filename, strerror(errno)); /* read current trace id */ if ( read(fd, tmp, 16) == -1 ) vt_error_msg("Cannot read file %s: %s", trcid_filename, strerror(errno)); vt_libassert(tmp[0] != '\0'); nchilds_tot = atoi(tmp); vt_libassert(nchilds_tot > 0); /* close temp. id file */ close(fd); VT_RESUME_IO_TRACING(VT_CURRENT_THREAD); } else { nchilds_tot = 0; } return nchilds_tot; }
/* Save the mapping fh-->id */ static vt_mpifile_data* store_id( const MPI_File fh, const uint32_t id ) { struct mpifh_fid_map *newentry; if( nfiles >= nmaxfiles ) vt_error_msg( "Too many MPI_File handles" ); /* nfiles is always the index to the next free entry */ newentry = &(mpifh_fid_map[nfiles]); newentry->mpifh = fh; newentry->file_data.fid = id; newentry->file_data.handle = VTTHRD_IO_NEXT_HANDLE(VTTHRD_MY_VTTHRD); nfiles++; return &(newentry->file_data); }
uint32_t vt_group_id(MPI_Group group) { uint32_t i; if ((i = group_search(group)) != (uint32_t)-1) { return groups[i].gid; } else { vt_error_msg("Cannot find group"); return (uint32_t)-1; } }
void vt_memhook_init() { uint32_t fid; uint32_t gid; #if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) vt_error_msg("Memory tracing by GNU C malloc-hooks for threaded application " "not yet supported"); #endif /* VT_MT || VT_HYB || VT_JAVA */ if( vt_memhook_is_initialized ) return; vt_malloc_hook_org = __malloc_hook; vt_realloc_hook_org = __realloc_hook; vt_free_hook_org = __free_hook; /* define source */ fid = vt_def_scl_file(VT_CURRENT_THREAD, "MEM"); /* define regions */ memhook_regid[MEMHOOK_REG_MALLOC] = vt_def_region(VT_CURRENT_THREAD, "malloc", fid, VT_NO_LNO, VT_NO_LNO, NULL, VT_MEMORY); memhook_regid[MEMHOOK_REG_REALLOC] = vt_def_region(VT_CURRENT_THREAD, "realloc", fid, VT_NO_LNO, VT_NO_LNO, NULL, VT_MEMORY); memhook_regid[MEMHOOK_REG_FREE] = vt_def_region(VT_CURRENT_THREAD, "free", fid, VT_NO_LNO, VT_NO_LNO, NULL, VT_MEMORY); /* define markers, if necessary */ if( (memalloc_marker = vt_env_memtrace_marker()) ) { memalloc_mid[MEMHOOK_MARK_ALLOC] = vt_def_marker(VT_CURRENT_THREAD, "Memory Allocation", VT_MARKER_HINT); memalloc_mid[MEMHOOK_MARK_FREE] = vt_def_marker(VT_CURRENT_THREAD, "Memory Deallocation", VT_MARKER_HINT); } /* define counter group */ gid = vt_def_counter_group(VT_CURRENT_THREAD, "Memory"); /* define counter */ memalloc_cid = vt_def_counter(VT_CURRENT_THREAD, "MEM_ALLOC", VT_CNTR_ABS | VT_CNTR_NEXT, gid, "Bytes"); vt_memhook_is_initialized = 1; }
uint32_t vt_comm_id(MPI_Comm comm) { uint32_t i; if ((i = comm_search(comm)) != (uint32_t)-1) { return comms[i].cid; } else { vt_error_msg("Cannot find communicator"); return (uint32_t)-1; } }
void vt_getcpu_read(uint32_t* value, uint8_t* changed) { int cpuid; *changed = 0; if ( (cpuid = sched_getcpu()) == -1 ) vt_error_msg("sched_getcpu: %s", strerror(errno)); if( (uint32_t)cpuid != *value ) { *value = (uint32_t)cpuid; *changed = 1; } }
/* * Handles errors returned from CUPTI function calls. * * @param ecode the CUDA driver API error code * @param msg a message to get more detailed information about the error * @param the corresponding file * @param the line the error occurred */ void vt_cupti_handleError(CUptiResult err, const char* msg, const char *file, const int line) { const char *errstr; if(msg != NULL) vt_cntl_msg(1, msg); cuptiGetResultString(err, &errstr); if(vt_gpu_error){ vt_error_msg("[CUPTI] %s:%d:'%s'", file, line, errstr); }else{ vt_warning("[CUPTI] %s:%d:'%s'", file, line, errstr); } }