void initTrace() { // Enqueue a couple of buffers in the global queue queueNewBuffer(NULL, 0); queueNewBuffer(NULL, 0); // device activity record is created when CUDA initializes, so we // want to enable it before cuInit() or any CUDA runtime call CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_DEVICE)); // CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY)); cuptiActivityEnable(CUPTI_ACTIVITY_KIND_CONTEXT); // cuptiActivityEnable(CUPTI_ACTIVITY_KIND_DRIVER); // cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME); cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY); cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMSET); cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL); CUpti_SubscriberHandle subscriber; CUPTI_CALL(cuptiSubscribe(&subscriber, (CUpti_CallbackFunc)traceCallback, NULL)); CUPTI_CALL(cuptiEnableDomain(1, subscriber, CUPTI_CB_DOMAIN_RESOURCE)); CUPTI_CALL(cuptiEnableDomain(1, subscriber, CUPTI_CB_DOMAIN_SYNCHRONIZE)); //add by wukai //CUPIT_CALL(cuptiEnableDomain(1, subscriber, CUPTI_CB_DOMAIN_RUNTIME_API)); CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); }
__attribute__((constructor)) void Trace_start() { cuptierr = cuptiSubscribe(&subscriber, (CUpti_CallbackFunc)getTimestampCallback , &trace); CHECK_CUPTI_ERROR(cuptierr, "cuptiSubscribe"); cuptierr = cuptiEnableDomain(1, subscriber, CUPTI_CB_DOMAIN_RUNTIME_API); CHECK_CUPTI_ERROR(cuptierr, "cuptiEnableDomain"); printf("<-----------register Trace_end--------------->\n"); atexit (Trace_end); }
__attribute__((constructor)) void initTrace() { //get the arguments from the environment variables int deviceId, sampRate; CUcontext cuCtx; deviceId = atoi(getenv("GPU_DEVICE_ID")); cuInit(0); cuCtxCreate(&cuCtx,0,deviceId); CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted)); CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_PC_SAMPLING)); //CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_BRANCH)); CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); CUPTI_CALL(cuptiSubscribe(&g_subscriber, (CUpti_CallbackFunc)traceCallback, NULL)); CUPTI_CALL(cuptiEnableDomain(1, g_subscriber, CUPTI_CB_DOMAIN_RESOURCE)); CUpti_ActivityPCSamplingConfig config; sampRate=atoi(getenv("PC_SAMPLING_RATE")); config.samplingPeriod= sampRate; CUPTI_CALL(cuptiActivityConfigurePCSampling(cuCtx, &config)); }
int cupti_metrics_enable() { CUptiResult res; if (!initialised || !metricCount) { return 0; } // We cannot initialise anything here - we only hook CUDA context // creation in order to do the actual work later. if (subscriber == NULL) { // Create subscription CUPTI_CALL(cuptiSubscribe(&subscriber, (CUpti_CallbackFunc)cupti_callback, NULL)); // Bind callbacks CUPTI_CALL(cuptiEnableCallback(1, subscriber, CUPTI_CB_DOMAIN_RESOURCE, CUPTI_CBID_RESOURCE_CONTEXT_CREATED)); CUPTI_CALL(cuptiEnableCallback(1, subscriber, CUPTI_CB_DOMAIN_RESOURCE, CUPTI_CBID_RESOURCE_CONTEXT_DESTROY_STARTING)); // Hook kernel launch. This feels hacky, but is the approach // callback_metric.cu takes. #ifndef LOG_ALL_DRIVER_CALLS CUPTI_CALL(cuptiEnableCallback(1, subscriber, CUPTI_CB_DOMAIN_DRIVER_API, CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel)); #else int i; for (i = CUPTI_DRIVER_TRACE_CBID_cuInit; i < CUPTI_DRIVER_TRACE_CBID_SIZE; i++) { CUPTI_CALL(cuptiEnableCallback(1, subscriber, CUPTI_CB_DOMAIN_DRIVER_API, i)); } #endif } }