Пример #1
0
static void
printActivity(CUpti_Activity *record)
{
  switch (record->kind) {
  case CUPTI_ACTIVITY_KIND_DEVICE:
    {
      CUpti_ActivityDevice *device = (CUpti_ActivityDevice *)record;
      printf("DEVICE %s (%u), capability %u.%u, global memory (bandwidth %u GB/s, size %u MB), "
             "multiprocessors %u, clock %u MHz\n",
             device->name, device->id, 
             device->computeCapabilityMajor, device->computeCapabilityMinor,
             (unsigned int)(device->globalMemoryBandwidth / 1024 / 1024),
             (unsigned int)(device->globalMemorySize / 1024 / 1024),
             device->numMultiprocessors, (unsigned int)(device->coreClockRate / 1000));
      break;
    }
  case CUPTI_ACTIVITY_KIND_CONTEXT:
    {
      CUpti_ActivityContext *context = (CUpti_ActivityContext *)record;
      printf("CONTEXT %u, device %u, compute API %s\n",
             context->contextId, context->deviceId, 
             (context->computeApiKind == CUPTI_ACTIVITY_COMPUTE_API_CUDA) ? "CUDA" : "unknown");
      break;
    }
  case CUPTI_ACTIVITY_KIND_MEMCPY:
    {
      CUpti_ActivityMemcpy *memcpy = (CUpti_ActivityMemcpy *)record;
      printf("MEMCPY %s [ %llu - %llu ] device %u, context %u, stream %u, correlation %u/r%u\n",
             getMemcpyKindString((CUpti_ActivityMemcpyKind)memcpy->copyKind),
             (unsigned long long)(memcpy->start - startTimestamp),
             (unsigned long long)(memcpy->end - startTimestamp),
             memcpy->deviceId, memcpy->contextId, memcpy->streamId, 
             memcpy->correlationId, memcpy->runtimeCorrelationId);
      break;
    }
  case CUPTI_ACTIVITY_KIND_MEMSET:
    {
      CUpti_ActivityMemset *memset = (CUpti_ActivityMemset *)record;
      printf("MEMSET value=%u [ %llu - %llu ] device %u, context %u, stream %u, correlation %u/r%u\n",
             memset->value,
             (unsigned long long)(memset->start - startTimestamp),
             (unsigned long long)(memset->end - startTimestamp),
             memset->deviceId, memset->contextId, memset->streamId, 
             memset->correlationId, memset->runtimeCorrelationId);
      break;
    }
  case CUPTI_ACTIVITY_KIND_KERNEL:
    {
      CUpti_ActivityKernel *kernel = (CUpti_ActivityKernel *)record;
      printf("KERNEL \"%s\" [ %llu - %llu ] device %u, context %u, stream %u, correlation %u/r%u\n",
             kernel->name,
             (unsigned long long)(kernel->start - startTimestamp),
             (unsigned long long)(kernel->end - startTimestamp),
             kernel->deviceId, kernel->contextId, kernel->streamId, 
             kernel->correlationId, kernel->runtimeCorrelationId);
      printf("    grid [%u,%u,%u], block [%u,%u,%u], shared memory (static %u, dynamic %u)\n",
             kernel->gridX, kernel->gridY, kernel->gridZ,
             kernel->blockX, kernel->blockY, kernel->blockZ,
             kernel->staticSharedMemory, kernel->dynamicSharedMemory);
      break;
    }
  case CUPTI_ACTIVITY_KIND_DRIVER:
    {
      CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record;
      printf("DRIVER cbid=%u [ %llu - %llu ] process %u, thread %u, correlation %u\n",
             api->cbid,
             (unsigned long long)(api->start - startTimestamp),
             (unsigned long long)(api->end - startTimestamp),
             api->processId, api->threadId, api->correlationId);
      break;
    }
  case CUPTI_ACTIVITY_KIND_RUNTIME:
    {
      CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record;
      printf("RUNTIME cbid=%u [ %llu - %llu ] process %u, thread %u, correlation %u\n",
             api->cbid,
             (unsigned long long)(api->start - startTimestamp),
             (unsigned long long)(api->end - startTimestamp),
             api->processId, api->threadId, api->correlationId);
      break;
    }
  default:
    printf("  <unknown>\n");
    break;
  }
}
Пример #2
0
static void
printActivity(CUpti_Activity *record)
{
  printf ("<--------------------------------print-------------------------->\n");
  switch (record->kind) {
  case CUPTI_ACTIVITY_KIND_DEVICE:
    {
      CUpti_ActivityDevice *device = (CUpti_ActivityDevice *)record;
     printf("DEVICE %s (%u), capability %u.%u, global memory (bandwidth %u GB/s, size %u MB), "
             "multiprocessors %u, clock %u MHz\n",
             device->name, device->id, 
             device->computeCapabilityMajor, device->computeCapabilityMinor,
             (unsigned int)(device->globalMemoryBandwidth / 1024 / 1024),
             (unsigned int)(device->globalMemorySize / 1024 / 1024),
             device->numMultiprocessors, (unsigned int)(device->coreClockRate / 1000)); 
      break;
    }
  case CUPTI_ACTIVITY_KIND_CONTEXT:
    {
      CUpti_ActivityContext *context = (CUpti_ActivityContext *)record;
     printf("CONTEXT %u, device %u, compute API %s\n",
             context->contextId, context->deviceId, 
             (context->computeApiKind == CUPTI_ACTIVITY_COMPUTE_API_CUDA) ? "CUDA" :
             (context->computeApiKind == CUPTI_ACTIVITY_COMPUTE_API_OPENCL) ? "OpenCL" : "unknown"); 
    break;
    }
  case CUPTI_ACTIVITY_KIND_MEMCPY:
    {
      	CUpti_ActivityMemcpy *memcpy = (CUpti_ActivityMemcpy *)record;
  /*    	CUPTI_Event Event;
      	Event.pid = getpid ();
      	Event.device_id = memcpy->deviceId;
     	Event.context_id = memcpy->contextId;
      	Event.stream_id = memcpy->streamId;
      	Event.type = CUDA_MEM;
      	Event.name = getMemcpyKindString((CUpti_ActivityMemcpyKind)memcpy->copyKind);
      	snprintf (Event.seg1, 80, "%llu Bytes", memcpy->bytes);
	snprintf (Event.seg2, 80, "%s", getTypeString(memcpy->srcKind));
	snprintf (Event.seg3, 80, "%s", getTypeString(memcpy->dstKind));
	Event.starttime = (long long unsigned)(memcpy->start);
      	Event.endtime = (long long unsigned)(memcpy->end);
      
     	send_CUPTI_info (Event);*/

      printf("MEMCPY %s [ %llu - %llu ] device %u, context %u, stream %u, correlation %u/r%u\n",
             getMemcpyKindString((CUpti_ActivityMemcpyKind)memcpy->copyKind),
             (unsigned long long)(memcpy->start ),
	     (unsigned long long)(memcpy->start - startTimestamp),
             (unsigned long long)(memcpy->end ),
             (unsigned long long)(memcpy->end - startTimestamp),
             memcpy->deviceId, memcpy->contextId, memcpy->streamId, 
             memcpy->correlationId, memcpy->runtimeCorrelationId);
      break;
    }
  case CUPTI_ACTIVITY_KIND_MEMSET:
    {
      CUpti_ActivityMemset *memset = (CUpti_ActivityMemset *)record;
      printf("MEMSET value=%u [ %llu - %llu ] device %u, context %u, stream %u, correlation %u/r%u\n",
             memset->value,
             (unsigned long long)(memset->start - startTimestamp),
             (unsigned long long)(memset->end - startTimestamp),
             memset->deviceId, memset->contextId, memset->streamId, 
             memset->correlationId, memset->runtimeCorrelationId);  
      break;
    }
  case CUPTI_ACTIVITY_KIND_KERNEL:
    {
     	CUpti_ActivityKernel *kernel = (CUpti_ActivityKernel *)record;
/*	CUPTI_Event Event;
      	Event.pid = getpid ();
      	Event.device_id = kernel->deviceId;
      	Event.context_id = kernel->contextId;
      	Event.stream_id = kernel->streamId;
      	Event.type = CUDA_KER;
      	Event.name = kernel->name;
      	snprintf (Event.seg1, 80, "Grid (%d,%d,%d), Block (%d,%d,%d)", kernel->gridX, kernel->gridY, kernel->gridZ, kernel->blockX, kernel->blockY, kernel->blockZ);
	snprintf (Event.seg2, 80, "dynamicSharedMemory %d, staticSharedMemory %d", kernel->dynamicSharedMemory, kernel->staticSharedMemory);
	snprintf (Event.seg3, 80, "localMemoryPerThread %u, localMemoryTotal %u", kernel->localMemoryPerThread, kernel->localMemoryTotal);
	Event.starttime = (long long unsigned)(kernel->start);
      	Event.endtime = (long long unsigned)(kernel->end);
	
	send_CUPTI_info (Event);*/
      printf("KERNEL \"%s\" [ %llu - %llu ] device %u, context %u, stream %u, correlation %u/r%u\n",
             kernel->name,
             (unsigned long long)(kernel->start - startTimestamp),
             (unsigned long long)(kernel->end - startTimestamp),
             kernel->deviceId, kernel->contextId, kernel->streamId, 
             kernel->correlationId, kernel->runtimeCorrelationId);
      printf("    grid [%u,%u,%u], block [%u,%u,%u], shared memory (static %u, dynamic %u)\n",
             kernel->gridX, kernel->gridY, kernel->gridZ,
             kernel->blockX, kernel->blockY, kernel->blockZ,
             kernel->staticSharedMemory, kernel->dynamicSharedMemory);
      break;
    }
  case CUPTI_ACTIVITY_KIND_DRIVER:
    {
      CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record;
      printf("DRIVER cbid=%u [ %llu - %llu ] process %u, thread %u, correlation %u\n",
             api->cbid,
             (unsigned long long)(api->start - startTimestamp),
             (unsigned long long)(api->end - startTimestamp),
             api->processId, api->threadId, api->correlationId); 
      break;
    }
  case CUPTI_ACTIVITY_KIND_RUNTIME:
    {
      CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record;
      printf("RUNTIME cbid=%u [ %llu - %llu ] process %u, thread %u, correlation %u\n",
             api->cbid,
             (unsigned long long)(api->start - startTimestamp),
             (unsigned long long)(api->end - startTimestamp),
             api->processId, api->threadId, api->correlationId); 
      break;
    }
  default:
    printf("  <unknown>\n");
    break;
  }
}