示例#1
0
文件: threadid.c 项目: gllort/extrae
unsigned Extrae_get_num_threads (void)
{
#if defined(OMP_SUPPORT) && !defined(OMPT_INSTRUMENTATION)
	return omp_get_num_threads();
#elif defined(SMPSS_SUPPORT)
	return css_get_max_threads();
#elif defined(NANOS_SUPPORT)
	return get_num_threads();
#elif defined(PTHREAD_SUPPORT)
	return Backend_getNumberOfThreads();
#elif defined(UPC_SUPPORT)
	return GetNumUPCthreads();
#else
	return get_num_threads();
#endif
}
int HWCBE_PAPI_Add_Set (int pretended_set, int rank, int ncounters, char **counters,
	char *domain, char *change_at_globalops, char *change_at_time, 
	int num_overflows, char **overflow_counters, unsigned long long *overflow_values)
{
	int i, rc, num_set = HWC_num_sets;
	PAPI_event_info_t info;

#if !defined(PAPI_SAMPLING_SUPPORT)
	UNREFERENCED_PARAMETER(num_overflows);
	UNREFERENCED_PARAMETER(overflow_counters);
	UNREFERENCED_PARAMETER(overflow_values);
#endif
	
	if (ncounters == 0 || counters == NULL)
		return 0;
	
	if (ncounters > MAX_HWC)
	{
		fprintf (stderr, PACKAGE_NAME": You cannot provide more HWC counters than %d (see set %d)\n", MAX_HWC, pretended_set);
		ncounters = MAX_HWC;
	}
	
	HWC_sets = (struct HWC_Set_t *) realloc (HWC_sets, sizeof(struct HWC_Set_t)* (HWC_num_sets+1));
	if (HWC_sets == NULL)
	{
		fprintf (stderr, PACKAGE_NAME": Cannot allocate memory for HWC_set (rank %d)\n", rank);
		return 0;
	}

	/* Initialize this set */
	HWC_sets[num_set].num_counters = 0;
	HWC_sets[num_set].eventsets = NULL;
#if defined(PAPI_SAMPLING_SUPPORT)
	HWC_sets[num_set].OverflowCounter = NULL;
	HWC_sets[num_set].OverflowValue = NULL;
	HWC_sets[num_set].NumOverflows = 0;
#endif

	for (i = 0; i < ncounters; i++)
	{
		/* counter_last_position will hold the address of the end of the 
		   counter[i] string 
		   This shall be compared with strtoul_check to know if the hex
		   is correct or not
		*/
		char *counter_last_position = &(counters[i][strlen(counters[i])]);
		char *strtoul_check;

		HWC_sets[num_set].counters[HWC_sets[num_set].num_counters] = 
			strtoul (counters[i], &strtoul_check, 16);

		if (strtoul_check != counter_last_position)
		{
			int EventCode;
			if (PAPI_event_name_to_code(counters[i], &EventCode) != PAPI_OK)
			{
				if (rank == 0)
					fprintf (stderr, PACKAGE_NAME": Cannot parse HWC %s in set %d, skipping\n", counters[i], pretended_set);
			}
			else
			{
				HWC_sets[num_set].counters[HWC_sets[num_set].num_counters] = EventCode;
			}
		}

		rc = PAPI_get_event_info (HWC_sets[num_set].counters[HWC_sets[num_set].num_counters], &info);
		if (rc != PAPI_OK)
		{
			if (rank == 0)
				fprintf (stderr, PACKAGE_NAME": Error! Cannot query information for hardware counter %s (0x%08x). Check set %d.\n", counters[i], HWC_sets[num_set].counters[HWC_sets[num_set].num_counters], pretended_set);

			HWC_sets[num_set].counters[HWC_sets[num_set].num_counters] = NO_COUNTER;
		}
		/* Native events seem that could have info.count = 0! */
		else if (rc == PAPI_OK && info.count == 0 && (HWC_sets[num_set].counters[HWC_sets[num_set].num_counters] & PAPI_NATIVE_MASK) == 0)
		{
			if (rank == 0)
				fprintf (stderr, PACKAGE_NAME": Error! Hardware counter %s (0x%08x) is not available. Check set %d.\n", counters[i], HWC_sets[num_set].counters[HWC_sets[num_set].num_counters], pretended_set);

			HWC_sets[num_set].counters[HWC_sets[num_set].num_counters] = NO_COUNTER;
		}
		else 
		{
			if (rank == 0)
				HWCBE_PAPI_AddDefinition (HWC_sets[num_set].counters[HWC_sets[num_set].num_counters],
					info.symbol, (info.event_code & PAPI_PRESET_MASK)?info.short_descr:info.long_descr);

			HWC_sets[num_set].num_counters++;
		}
	}

	if (HWC_sets[num_set].num_counters == 0)
	{
		if (rank == 0)
			fprintf (stderr, PACKAGE_NAME": Set %d of counters seems to be empty/invalid, skipping\n", pretended_set);
		return 0;
	}

	/* Just check if the user wants us to change the counters in some manner */
	if (change_at_time != NULL)
	{
		HWC_sets[num_set].change_at = getTimeFromStr (change_at_time, 
			"change-at-time", rank);
		HWC_sets[num_set].change_type = 
				(HWC_sets[num_set].change_at == 0)?CHANGE_NEVER:CHANGE_TIME;
	}
	else if (change_at_globalops != NULL)
	{
		HWC_sets[num_set].change_at = strtoul (change_at_globalops, (char **) NULL, 10);
		HWC_sets[num_set].change_type = 
			(HWC_sets[num_set].change_at == 0)?CHANGE_NEVER:CHANGE_GLOPS;
	}
	else
		HWC_sets[num_set].change_type = CHANGE_NEVER;
	
	if (domain != NULL)
	{
		if (!strcasecmp(domain, "all"))
		{
			if (rank == 0)
				fprintf (stdout, PACKAGE_NAME": PAPI domain set to ALL for HWC set %d\n",
					pretended_set);
			HWC_sets[num_set].domain = PAPI_DOM_ALL;
		}	
		else if (!strcasecmp(domain, "kernel"))
		{
			if (rank == 0)
				fprintf (stdout, PACKAGE_NAME": PAPI domain set to KERNEL for HWC set %d\n",
					pretended_set);
			HWC_sets[num_set].domain = PAPI_DOM_KERNEL;
		}	
		else if (!strcasecmp(domain, "user"))
		{
			if (rank == 0)
				fprintf (stdout, PACKAGE_NAME": PAPI domain set to USER for HWC set %d\n",
					pretended_set);
			HWC_sets[num_set].domain = PAPI_DOM_USER;
		}	
		else if (!strcasecmp(domain, "other"))
		{
			if (rank == 0)
				fprintf (stdout, PACKAGE_NAME": PAPI domain set to OTHER for HWC set %d\n",
					pretended_set);
			HWC_sets[num_set].domain = PAPI_DOM_OTHER;
		}	
		else
		{
			if (rank == 0)
				fprintf (stdout, PACKAGE_NAME": PAPI domain set to USER for HWC set %d\n",
					pretended_set);
			HWC_sets[num_set].domain = PAPI_DOM_USER;
		}
	} /* domain != NULL */
	else
	{
		if (rank == 0)
			fprintf (stdout, PACKAGE_NAME": PAPI domain set to USER for HWC set %d\n",
				pretended_set);
		HWC_sets[num_set].domain = PAPI_DOM_USER;
	}

	HWCBE_PAPI_Allocate_eventsets_per_thread (num_set, 0, Backend_getNumberOfThreads());

	/* We validate this set */
	HWC_num_sets++;

	if (rank == 0)
	{
		fprintf (stdout, PACKAGE_NAME": HWC set %d contains following counters < ", pretended_set);
		for (i = 0; i < HWC_sets[num_set].num_counters; i++)
		{
			if (HWC_sets[num_set].counters[i] != NO_COUNTER)
			{
				char counter_name[PAPI_MAX_STR_LEN];

				PAPI_event_code_to_name (HWC_sets[num_set].counters[i], counter_name);
				fprintf (stdout, "%s (0x%08x) ", counter_name, HWC_sets[num_set].counters[i]);
			}
		}
		fprintf (stdout, ">");

		if (HWC_sets[num_set].change_type == CHANGE_TIME)
			fprintf (stdout, " - changing every %lld nanoseconds\n", HWC_sets[num_set].change_at);
		else if (HWC_sets[num_set].change_type == CHANGE_GLOPS)
			fprintf (stdout, " - changing every %lld global operations\n", HWC_sets[num_set].change_at);
		else
			fprintf (stdout, " - never changes\n");

		fflush (stdout);
	}

#if defined(PAPI_SAMPLING_SUPPORT)
	if (num_overflows > 0)
		Add_Overflows_To_Set (rank, num_set, pretended_set, num_overflows,
			overflow_counters, overflow_values);
#endif

	return HWC_sets[num_set].num_counters;
}
void Extrae_OpenCL_clCreateCommandQueue (cl_command_queue queue,
	cl_device_id device, cl_command_queue_properties properties)
{
	if (!Extrae_OpenCL_lookForOpenCLQueue (queue, NULL))
	{
		cl_int err;
		char _threadname[THREAD_INFO_NAME_LEN];
		char _hostname[HOST_NAME_MAX];
		char *_device_type;
		int prev_threadid, found, idx;
		cl_device_type device_type;
		cl_event event;

		idx = nCommandQueues;
		CommandQueues = (RegisteredCommandQueue_t*) realloc (
			CommandQueues,
			sizeof(RegisteredCommandQueue_t)*(nCommandQueues+1));
		if (CommandQueues == NULL)
		{
			fprintf (stderr, PACKAGE_NAME": Fatal error! Failed to allocate memory for OpenCL Command Queues\n");
			exit (-1);
		}

		CommandQueues[idx].queue = queue;
		CommandQueues[idx].isOutOfOrder =
			(properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;

		err = clGetDeviceInfo (device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
		if (err == CL_SUCCESS)
		{
			if (device_type  == CL_DEVICE_TYPE_GPU)
				_device_type = "GPU";
			else if (device_type == CL_DEVICE_TYPE_CPU)
				_device_type = "CPU";
			else
				_device_type = "Other";
		}
		else
			_device_type = "Unknown";

		/* Was the thread created before (i.e. did we executed a cudadevicereset?) */
		if (gethostname(_hostname, HOST_NAME_MAX) == 0)
			sprintf (_threadname, "OpenCL-%s-CQ%d-%s", _device_type, 1+idx,
			  _hostname);
		else
			sprintf (_threadname, "OpenCL-%s-CQ%d-%s", _device_type, 1+idx,
			  "unknown-host");

		prev_threadid = Extrae_search_thread_name (_threadname, &found);

		if (found)
		{
			/* If thread name existed, reuse its thread id */
			CommandQueues[idx].threadid = prev_threadid;
		}
		else
		{
			/* For timing purposes we change num of threads here instead of doing Backend_getNumberOfThreads() + CUDAdevices*/
			Backend_ChangeNumberOfThreads (Backend_getNumberOfThreads() + 1);
			CommandQueues[idx].threadid = Backend_getNumberOfThreads()-1;

			/* Set thread name */
			Extrae_set_thread_name (CommandQueues[idx].threadid, _threadname);
		}

		CommandQueues[idx].nevents = 0;

#ifdef CL_VERSION_1_2
		err = clEnqueueBarrierWithWaitList (queue, 0, NULL, &event);
#else
		err = clEnqueueBarrier (queue);
		if (err == CL_SUCCESS)
			err = clEnqueueMarker (queue, &event);
#endif
		CommandQueues[idx].host_reference_time = TIME;

		if (err == CL_SUCCESS)
		{
			err = clFinish(queue);
			if (err != CL_SUCCESS)
			{
				fprintf (stderr, PACKAGE_NAME": Error in clFinish (error = %d)! Dying...\n", err);
				exit (-1);
			}

			err = clGetEventProfilingInfo (event, CL_PROFILING_COMMAND_SUBMIT,
				sizeof(cl_ulong), &(CommandQueues[idx].device_reference_time),
				NULL);
			if (err != CL_SUCCESS)
			{
				fprintf (stderr, PACKAGE_NAME": Error in clGetEventProfilingInfo (error = %d)! Dying...\n", err);
				exit (-1);
			}
		}
		else
		{
			fprintf (stderr, PACKAGE_NAME": Error while looking for clock references in host & accelerator\n");
			exit (-1);
		}

		nCommandQueues++;
	}
}