void VTThrd_init() { /* get the maximum number of threads */ VTThrdMaxNum = (uint32_t)vt_env_max_threads(); /* create vector of the thread objects */ VTThrdv = (VTThrd**)calloc(VTThrdMaxNum, sizeof(VTThrd*)); if ( VTThrdv == NULL ) vt_error(); #if (defined(VT_MT) || defined (VT_HYB) || defined(VT_JAVA)) /* initialize thread-type specifics */ # if defined(VT_THRD_PTHREAD) VTThrd_initPthread(); # elif defined(VT_THRD_OMP) VTThrd_initOmp(); # elif defined(VT_JAVA) VTThrd_initJava(); # endif /* VT_THRD_[PTHREAD|OMP] || VT_JAVA */ /* create mutexes for locking */ VTThrd_createMutex(&VTThrdMutexEnv); VTThrd_createMutex(&VTThrdMutexIds); #endif /* VT_MT || VT_HYB || VT_JAVA */ /* create object for master thread (for Java this will be done in VTThrd_initJava(), 'cause it gets the read thread name) */ #if !defined(VT_JAVA) VTThrd_create(0, 0, NULL, 0); VTThrd_open(0); #endif /* VT_JAVA */ }
/* * Initialize Mutex, VampirTrace ids and registers the finalize function. * This may be done implicitly by vt_cupti_count(). */ void vt_cupti_init() { if(!vt_cupti_initialized){ #if (defined(VT_MT) || defined(VT_HYB)) VTThrd_createMutex(&VTThrdMutexCupti); #endif CUPTI_LOCK(); if(!vt_cupti_initialized){ vt_cntl_msg(2, "[CUPTI] Initializing ... "); /* create VampirTrace counter group ID only once */ #if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_LOCK_IDS(); #endif rid_cupti_init = vt_def_region(VT_MASTER_THREAD, "vtcuptiHostThreadInit", VT_NO_ID, VT_NO_LNO, VT_NO_LNO, "VT_CUPTI", VT_FUNCTION); cgid_cupti = vt_def_counter_group(VT_MASTER_THREAD, "CUPTI"); #if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_UNLOCK_IDS(); #endif vt_cupti_capList = vt_cupti_setupMetricList(); /* register the finalize function of the CUDA wrapper to be called before * the program exits and CUDA has done its implicit clean-up */ atexit(vt_cupti_finalize); vt_cupti_initialized = 1; CUPTI_UNLOCK(); } } }
void VTThrd_lock(VTThrdMutex** mutex) { if (*mutex == NULL) VTThrd_createMutex(mutex); pthread_mutex_lock(&((*mutex)->m)); }
void VTThrd_lock(VTThrdMutex** mutex) { if (*mutex == NULL) VTThrd_createMutex(mutex); omp_set_lock(&((*mutex)->m)); }
void VTThrd_lock(VTThrdMutex** mutex) { jvmtiError error; if (*mutex == NULL) VTThrd_createMutex(mutex); error = (*jvmti)->RawMonitorEnter(jvmti, (*mutex)->m); vt_java_check_error(jvmti, error, "RawMonitorEnter"); }
void vt_cupti_init() { if(!vt_cupti_initialized){ #if (defined(VT_MT) || defined(VT_HYB)) VTThrd_createMutex(&VTThrdMutexCupti); #endif VT_CUPTI_LOCK(); if(!vt_cupti_initialized){ vt_cntl_msg(2, "[CUPTI] Initializing ... "); /* register the finalize function of VampirTrace CUPTI to be called before * the program exits */ atexit(vt_cupti_finalize); vt_cupti_initialized = 1; VT_CUPTI_UNLOCK(); } } }
static void add_events(const struct vt_plugin * current_plugin, VTThrd * thrd) { int j; struct vt_plugin_single_counter * current; uint32_t * current_size; struct vt_plugin_cntr_defines * plugin_cntr_defines = (struct vt_plugin_cntr_defines *) thrd->plugin_cntr_defines; /* get the current counters for this thread and synch type*/ current = plugin_cntr_defines->counters[current_plugin->info.synch]; if (current == NULL) { plugin_cntr_defines->counters[current_plugin->info.synch] = calloc(VT_PLUGIN_COUNTERS_PER_THREAD, sizeof(struct vt_plugin_single_counter)); current = plugin_cntr_defines->counters[current_plugin->info.synch]; } /* get the number of counters for this thread and synch type*/ current_size = &(plugin_cntr_defines->size_of_counters[current_plugin->info.synch]); vt_cntl_msg(3, "Process %i Thread %s-%s adds own plugin metrics", vt_my_ptrace, thrd->name, thrd->name_suffix); for (j = 0; j < current_plugin->num_selected_events; j++) { if (*current_size >= VT_PLUGIN_COUNTERS_PER_THREAD) { vt_error_msg("You're about to add more then %i plugin counters," "which is impossible\n", VT_PLUGIN_COUNTERS_PER_THREAD); continue; } if (current_plugin->info.synch == VT_PLUGIN_CNTR_ASYNCH_CALLBACK) { if (*current_size == 0) { } } /* add counter */ current[*current_size].from_plugin_id = current_plugin->info.add_counter( current_plugin->selected_events[j]); /* add successfully? */ if (current[*current_size].from_plugin_id < 0) { vt_error_msg( "Error while adding plugin counter \"%s\" to thread \"%s%s\"\n", current_plugin->selected_events[j], thrd->name, thrd->name_suffix); continue; } /* get the vampir trace id for the counter */ current[*current_size].vt_counter_id = current_plugin->vt_counter_ids[j]; current[*current_size].vt_asynch_key = current_plugin->vt_asynch_keys[j]; current[*current_size].enable_counter = current_plugin->info.enable_counter; current[*current_size].disable_counter = current_plugin->info.disable_counter; /* per type stuff */ if (current_plugin->info.synch == VT_PLUGIN_CNTR_SYNCH) /* synch counters have to implement getValue */ current[*current_size].getValue = current_plugin->info.get_current_value; if ((current_plugin->info.synch == VT_PLUGIN_CNTR_ASYNCH_EVENT) || (current_plugin->info.synch == VT_PLUGIN_CNTR_ASYNCH_POST_MORTEM)) { /* these have to implement getAllValues */ current[*current_size].getAllValues = current_plugin->info.get_all_values; } if (current_plugin->info.synch == VT_PLUGIN_CNTR_ASYNCH_CALLBACK) { /* callback should set the callback function */ /* allocate resources */ #if (defined(VT_MT) || defined (VT_HYB) || defined(VT_JAVA)) VTThrd_createMutex( (VTThrdMutex **) &(current[*current_size].callback_mutex) ); /* try to set callback function */ if (current_plugin->info.set_callback_function(¤t[*current_size], current[*current_size].from_plugin_id, callback_function)) { vt_error_msg("Asynchronous callback plugin %s failed " "to set callback function for counter %s.\n", current_plugin->name, current_plugin->selected_events[j]); } current[*current_size].callback_values = malloc( max_values_callback * sizeof(vt_plugin_cntr_timevalue)); if (current[*current_size].callback_values == NULL) { vt_error_msg("Failed to allocate memory for callback buffer\n"); } #else vt_error_msg( "callback events need thread support, you might use" " -vt:mt or -vt:hyb\n"); continue; #endif /* VT_MT || VT_HYB || VT_JAVA */ } current[*current_size].tid = VT_MY_THREAD;/* switch (current_plugin->info.run_per) { case VT_PLUGIN_CNTR_PER_PROCESS: if (thread_group != INVALID_GROUP_NUMBER) current[*current_size].tid = thread_group; break; case VT_PLUGIN_CNTR_PER_HOST: if (current_plugin->info.run_per == VT_PLUGIN_CNTR_PER_HOST) if (host_group != INVALID_GROUP_NUMBER) current[*current_size].tid = host_group; break; case VT_PLUGIN_CNTR_ONCE: if (current_plugin->info.run_per == VT_PLUGIN_CNTR_ONCE) if (all_group != INVALID_GROUP_NUMBER) current[*current_size].tid = all_group; break; }*/ /* Next counter */ (*current_size)++; } }
void vt_cupti_activity_init() { if(!vt_cuptiact_initialized){ #if (defined(VT_MT) || defined(VT_HYB)) VTThrd_createMutex(&VTThrdMutexCuptiAct); #endif VT_CUPTI_ACT_LOCK(); if(!vt_cuptiact_initialized){ vt_cntl_msg(2, "[CUPTI Activity] Initializing ... "); { vt_cuptiact_bufSize = vt_env_cudatrace_bsize(); /* no buffer size < 1024 bytes allowed (see CUPTI documentation) */ if(vt_cuptiact_bufSize < 1024){ if(vt_cuptiact_bufSize > 0){ vt_warning("[CUPTI Activity] Buffer size has to be at least 1024 " "bytes! It has been set to %d.", vt_cuptiact_bufSize); } vt_cuptiact_bufSize = VT_CUPTI_ACT_DEFAULT_BSIZE; } /* queue a global buffer to initialize CUPTI before CUDA init vt_cuptiact_buffer = (uint8_t *)malloc(vt_cuptiact_bufSize); VT_CUPTI_CALL(cuptiActivityEnqueueBuffer(NULL, 0, vt_cuptiact_buffer, vt_cuptiact_bufSize), "cuptiActivityEnqueueBuffer");*/ } #if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_LOCK_IDS(); #endif if(vt_gpu_trace_kernels > 1){ /* define kernel counters */ vt_cuptiact_cid_knStaticSharedMem = vt_def_counter(VT_MASTER_THREAD, "staticSharedMemory", "Bytes", VT_CNTR_ABS | VT_CNTR_NEXT | VT_CNTR_UNSIGNED, vt_cupti_cgid_cuda_kernel, 0); vt_cuptiact_cid_knDynamicSharedMem = vt_def_counter(VT_MASTER_THREAD, "dynamicSharedMemory", "Bytes", VT_CNTR_ABS | VT_CNTR_NEXT | VT_CNTR_UNSIGNED, vt_cupti_cgid_cuda_kernel, 0); vt_cuptiact_cid_knLocalMemTotal = vt_def_counter(VT_MASTER_THREAD, "localMemoryPerKernel", "Bytes", VT_CNTR_ABS | VT_CNTR_NEXT | VT_CNTR_UNSIGNED, vt_cupti_cgid_cuda_kernel, 0); vt_cuptiact_cid_knRegistersPerThread = vt_def_counter(VT_MASTER_THREAD, "registersPerThread", "#", VT_CNTR_ABS | VT_CNTR_NEXT | VT_CNTR_UNSIGNED, vt_cupti_cgid_cuda_kernel, 0); } /* define region for GPU activity flush */ vt_cuptiact_rid_flush = vt_def_region(VT_MASTER_THREAD, "flushActivities", VT_NO_ID, VT_NO_LNO, VT_NO_LNO, "VT_CUDA", VT_FUNCTION); #if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_UNLOCK_IDS(); #endif /*** enable the activities ***/ /* enable kernel tracing */ if(vt_gpu_trace_kernels > 0){ VT_CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL), "cuptiActivityEnable"); } /* enable memory copy tracing */ if(vt_gpu_trace_mcpy){ VT_CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY), "cuptiActivityEnable"); } /* register the finalize function of VampirTrace CUPTI to be called before * the program exits */ atexit(vt_cupti_activity_finalize); vt_cuptiact_initialized = 1; VT_CUPTI_ACT_UNLOCK(); } } }