unsigned Extrae_get_num_threads (void) { #if defined(OMP_SUPPORT) && !defined(OMPT_INSTRUMENTATION) return omp_get_num_threads(); #elif defined(SMPSS_SUPPORT) return css_get_max_threads(); #elif defined(NANOS_SUPPORT) return get_num_threads(); #elif defined(PTHREAD_SUPPORT) return Backend_getNumberOfThreads(); #elif defined(UPC_SUPPORT) return GetNumUPCthreads(); #else return get_num_threads(); #endif }
int HWCBE_PAPI_Add_Set (int pretended_set, int rank, int ncounters, char **counters, char *domain, char *change_at_globalops, char *change_at_time, int num_overflows, char **overflow_counters, unsigned long long *overflow_values) { int i, rc, num_set = HWC_num_sets; PAPI_event_info_t info; #if !defined(PAPI_SAMPLING_SUPPORT) UNREFERENCED_PARAMETER(num_overflows); UNREFERENCED_PARAMETER(overflow_counters); UNREFERENCED_PARAMETER(overflow_values); #endif if (ncounters == 0 || counters == NULL) return 0; if (ncounters > MAX_HWC) { fprintf (stderr, PACKAGE_NAME": You cannot provide more HWC counters than %d (see set %d)\n", MAX_HWC, pretended_set); ncounters = MAX_HWC; } HWC_sets = (struct HWC_Set_t *) realloc (HWC_sets, sizeof(struct HWC_Set_t)* (HWC_num_sets+1)); if (HWC_sets == NULL) { fprintf (stderr, PACKAGE_NAME": Cannot allocate memory for HWC_set (rank %d)\n", rank); return 0; } /* Initialize this set */ HWC_sets[num_set].num_counters = 0; HWC_sets[num_set].eventsets = NULL; #if defined(PAPI_SAMPLING_SUPPORT) HWC_sets[num_set].OverflowCounter = NULL; HWC_sets[num_set].OverflowValue = NULL; HWC_sets[num_set].NumOverflows = 0; #endif for (i = 0; i < ncounters; i++) { /* counter_last_position will hold the address of the end of the counter[i] string This shall be compared with strtoul_check to know if the hex is correct or not */ char *counter_last_position = &(counters[i][strlen(counters[i])]); char *strtoul_check; HWC_sets[num_set].counters[HWC_sets[num_set].num_counters] = strtoul (counters[i], &strtoul_check, 16); if (strtoul_check != counter_last_position) { int EventCode; if (PAPI_event_name_to_code(counters[i], &EventCode) != PAPI_OK) { if (rank == 0) fprintf (stderr, PACKAGE_NAME": Cannot parse HWC %s in set %d, skipping\n", counters[i], pretended_set); } else { HWC_sets[num_set].counters[HWC_sets[num_set].num_counters] = EventCode; } } rc = PAPI_get_event_info (HWC_sets[num_set].counters[HWC_sets[num_set].num_counters], &info); if (rc != PAPI_OK) { if (rank == 0) fprintf (stderr, PACKAGE_NAME": Error! Cannot query information for hardware counter %s (0x%08x). Check set %d.\n", counters[i], HWC_sets[num_set].counters[HWC_sets[num_set].num_counters], pretended_set); HWC_sets[num_set].counters[HWC_sets[num_set].num_counters] = NO_COUNTER; } /* Native events seem that could have info.count = 0! */ else if (rc == PAPI_OK && info.count == 0 && (HWC_sets[num_set].counters[HWC_sets[num_set].num_counters] & PAPI_NATIVE_MASK) == 0) { if (rank == 0) fprintf (stderr, PACKAGE_NAME": Error! Hardware counter %s (0x%08x) is not available. Check set %d.\n", counters[i], HWC_sets[num_set].counters[HWC_sets[num_set].num_counters], pretended_set); HWC_sets[num_set].counters[HWC_sets[num_set].num_counters] = NO_COUNTER; } else { if (rank == 0) HWCBE_PAPI_AddDefinition (HWC_sets[num_set].counters[HWC_sets[num_set].num_counters], info.symbol, (info.event_code & PAPI_PRESET_MASK)?info.short_descr:info.long_descr); HWC_sets[num_set].num_counters++; } } if (HWC_sets[num_set].num_counters == 0) { if (rank == 0) fprintf (stderr, PACKAGE_NAME": Set %d of counters seems to be empty/invalid, skipping\n", pretended_set); return 0; } /* Just check if the user wants us to change the counters in some manner */ if (change_at_time != NULL) { HWC_sets[num_set].change_at = getTimeFromStr (change_at_time, "change-at-time", rank); HWC_sets[num_set].change_type = (HWC_sets[num_set].change_at == 0)?CHANGE_NEVER:CHANGE_TIME; } else if (change_at_globalops != NULL) { HWC_sets[num_set].change_at = strtoul (change_at_globalops, (char **) NULL, 10); HWC_sets[num_set].change_type = (HWC_sets[num_set].change_at == 0)?CHANGE_NEVER:CHANGE_GLOPS; } else HWC_sets[num_set].change_type = CHANGE_NEVER; if (domain != NULL) { if (!strcasecmp(domain, "all")) { if (rank == 0) fprintf (stdout, PACKAGE_NAME": PAPI domain set to ALL for HWC set %d\n", pretended_set); HWC_sets[num_set].domain = PAPI_DOM_ALL; } else if (!strcasecmp(domain, "kernel")) { if (rank == 0) fprintf (stdout, PACKAGE_NAME": PAPI domain set to KERNEL for HWC set %d\n", pretended_set); HWC_sets[num_set].domain = PAPI_DOM_KERNEL; } else if (!strcasecmp(domain, "user")) { if (rank == 0) fprintf (stdout, PACKAGE_NAME": PAPI domain set to USER for HWC set %d\n", pretended_set); HWC_sets[num_set].domain = PAPI_DOM_USER; } else if (!strcasecmp(domain, "other")) { if (rank == 0) fprintf (stdout, PACKAGE_NAME": PAPI domain set to OTHER for HWC set %d\n", pretended_set); HWC_sets[num_set].domain = PAPI_DOM_OTHER; } else { if (rank == 0) fprintf (stdout, PACKAGE_NAME": PAPI domain set to USER for HWC set %d\n", pretended_set); HWC_sets[num_set].domain = PAPI_DOM_USER; } } /* domain != NULL */ else { if (rank == 0) fprintf (stdout, PACKAGE_NAME": PAPI domain set to USER for HWC set %d\n", pretended_set); HWC_sets[num_set].domain = PAPI_DOM_USER; } HWCBE_PAPI_Allocate_eventsets_per_thread (num_set, 0, Backend_getNumberOfThreads()); /* We validate this set */ HWC_num_sets++; if (rank == 0) { fprintf (stdout, PACKAGE_NAME": HWC set %d contains following counters < ", pretended_set); for (i = 0; i < HWC_sets[num_set].num_counters; i++) { if (HWC_sets[num_set].counters[i] != NO_COUNTER) { char counter_name[PAPI_MAX_STR_LEN]; PAPI_event_code_to_name (HWC_sets[num_set].counters[i], counter_name); fprintf (stdout, "%s (0x%08x) ", counter_name, HWC_sets[num_set].counters[i]); } } fprintf (stdout, ">"); if (HWC_sets[num_set].change_type == CHANGE_TIME) fprintf (stdout, " - changing every %lld nanoseconds\n", HWC_sets[num_set].change_at); else if (HWC_sets[num_set].change_type == CHANGE_GLOPS) fprintf (stdout, " - changing every %lld global operations\n", HWC_sets[num_set].change_at); else fprintf (stdout, " - never changes\n"); fflush (stdout); } #if defined(PAPI_SAMPLING_SUPPORT) if (num_overflows > 0) Add_Overflows_To_Set (rank, num_set, pretended_set, num_overflows, overflow_counters, overflow_values); #endif return HWC_sets[num_set].num_counters; }
void Extrae_OpenCL_clCreateCommandQueue (cl_command_queue queue, cl_device_id device, cl_command_queue_properties properties) { if (!Extrae_OpenCL_lookForOpenCLQueue (queue, NULL)) { cl_int err; char _threadname[THREAD_INFO_NAME_LEN]; char _hostname[HOST_NAME_MAX]; char *_device_type; int prev_threadid, found, idx; cl_device_type device_type; cl_event event; idx = nCommandQueues; CommandQueues = (RegisteredCommandQueue_t*) realloc ( CommandQueues, sizeof(RegisteredCommandQueue_t)*(nCommandQueues+1)); if (CommandQueues == NULL) { fprintf (stderr, PACKAGE_NAME": Fatal error! Failed to allocate memory for OpenCL Command Queues\n"); exit (-1); } CommandQueues[idx].queue = queue; CommandQueues[idx].isOutOfOrder = (properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0; err = clGetDeviceInfo (device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL); if (err == CL_SUCCESS) { if (device_type == CL_DEVICE_TYPE_GPU) _device_type = "GPU"; else if (device_type == CL_DEVICE_TYPE_CPU) _device_type = "CPU"; else _device_type = "Other"; } else _device_type = "Unknown"; /* Was the thread created before (i.e. did we executed a cudadevicereset?) */ if (gethostname(_hostname, HOST_NAME_MAX) == 0) sprintf (_threadname, "OpenCL-%s-CQ%d-%s", _device_type, 1+idx, _hostname); else sprintf (_threadname, "OpenCL-%s-CQ%d-%s", _device_type, 1+idx, "unknown-host"); prev_threadid = Extrae_search_thread_name (_threadname, &found); if (found) { /* If thread name existed, reuse its thread id */ CommandQueues[idx].threadid = prev_threadid; } else { /* For timing purposes we change num of threads here instead of doing Backend_getNumberOfThreads() + CUDAdevices*/ Backend_ChangeNumberOfThreads (Backend_getNumberOfThreads() + 1); CommandQueues[idx].threadid = Backend_getNumberOfThreads()-1; /* Set thread name */ Extrae_set_thread_name (CommandQueues[idx].threadid, _threadname); } CommandQueues[idx].nevents = 0; #ifdef CL_VERSION_1_2 err = clEnqueueBarrierWithWaitList (queue, 0, NULL, &event); #else err = clEnqueueBarrier (queue); if (err == CL_SUCCESS) err = clEnqueueMarker (queue, &event); #endif CommandQueues[idx].host_reference_time = TIME; if (err == CL_SUCCESS) { err = clFinish(queue); if (err != CL_SUCCESS) { fprintf (stderr, PACKAGE_NAME": Error in clFinish (error = %d)! Dying...\n", err); exit (-1); } err = clGetEventProfilingInfo (event, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &(CommandQueues[idx].device_reference_time), NULL); if (err != CL_SUCCESS) { fprintf (stderr, PACKAGE_NAME": Error in clGetEventProfilingInfo (error = %d)! Dying...\n", err); exit (-1); } } else { fprintf (stderr, PACKAGE_NAME": Error while looking for clock references in host & accelerator\n"); exit (-1); } nCommandQueues++; } }