static void printActivity(CUpti_Activity *record) { switch (record->kind) { case CUPTI_ACTIVITY_KIND_DEVICE: { CUpti_ActivityDevice *device = (CUpti_ActivityDevice *)record; printf("DEVICE %s (%u), capability %u.%u, global memory (bandwidth %u GB/s, size %u MB), " "multiprocessors %u, clock %u MHz\n", device->name, device->id, device->computeCapabilityMajor, device->computeCapabilityMinor, (unsigned int)(device->globalMemoryBandwidth / 1024 / 1024), (unsigned int)(device->globalMemorySize / 1024 / 1024), device->numMultiprocessors, (unsigned int)(device->coreClockRate / 1000)); break; } case CUPTI_ACTIVITY_KIND_CONTEXT: { CUpti_ActivityContext *context = (CUpti_ActivityContext *)record; printf("CONTEXT %u, device %u, compute API %s\n", context->contextId, context->deviceId, (context->computeApiKind == CUPTI_ACTIVITY_COMPUTE_API_CUDA) ? "CUDA" : "unknown"); break; } case CUPTI_ACTIVITY_KIND_MEMCPY: { CUpti_ActivityMemcpy *memcpy = (CUpti_ActivityMemcpy *)record; printf("MEMCPY %s [ %llu - %llu ] device %u, context %u, stream %u, correlation %u/r%u\n", getMemcpyKindString((CUpti_ActivityMemcpyKind)memcpy->copyKind), (unsigned long long)(memcpy->start - startTimestamp), (unsigned long long)(memcpy->end - startTimestamp), memcpy->deviceId, memcpy->contextId, memcpy->streamId, memcpy->correlationId, memcpy->runtimeCorrelationId); break; } case CUPTI_ACTIVITY_KIND_MEMSET: { CUpti_ActivityMemset *memset = (CUpti_ActivityMemset *)record; printf("MEMSET value=%u [ %llu - %llu ] device %u, context %u, stream %u, correlation %u/r%u\n", memset->value, (unsigned long long)(memset->start - startTimestamp), (unsigned long long)(memset->end - startTimestamp), memset->deviceId, memset->contextId, memset->streamId, memset->correlationId, memset->runtimeCorrelationId); break; } case CUPTI_ACTIVITY_KIND_KERNEL: { CUpti_ActivityKernel *kernel = (CUpti_ActivityKernel *)record; printf("KERNEL \"%s\" [ %llu - %llu ] device %u, context %u, stream %u, correlation %u/r%u\n", kernel->name, (unsigned long long)(kernel->start - startTimestamp), (unsigned long long)(kernel->end - startTimestamp), kernel->deviceId, kernel->contextId, kernel->streamId, kernel->correlationId, kernel->runtimeCorrelationId); printf(" grid [%u,%u,%u], block [%u,%u,%u], shared memory (static %u, dynamic %u)\n", kernel->gridX, kernel->gridY, kernel->gridZ, kernel->blockX, kernel->blockY, kernel->blockZ, kernel->staticSharedMemory, kernel->dynamicSharedMemory); break; } case CUPTI_ACTIVITY_KIND_DRIVER: { CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record; printf("DRIVER cbid=%u [ %llu - %llu ] process %u, thread %u, correlation %u\n", api->cbid, (unsigned long long)(api->start - startTimestamp), (unsigned long long)(api->end - startTimestamp), api->processId, api->threadId, api->correlationId); break; } case CUPTI_ACTIVITY_KIND_RUNTIME: { CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record; printf("RUNTIME cbid=%u [ %llu - %llu ] process %u, thread %u, correlation %u\n", api->cbid, (unsigned long long)(api->start - startTimestamp), (unsigned long long)(api->end - startTimestamp), api->processId, api->threadId, api->correlationId); break; } default: printf(" <unknown>\n"); break; } }
static void printActivity(CUpti_Activity *record) { printf ("<--------------------------------print-------------------------->\n"); switch (record->kind) { case CUPTI_ACTIVITY_KIND_DEVICE: { CUpti_ActivityDevice *device = (CUpti_ActivityDevice *)record; printf("DEVICE %s (%u), capability %u.%u, global memory (bandwidth %u GB/s, size %u MB), " "multiprocessors %u, clock %u MHz\n", device->name, device->id, device->computeCapabilityMajor, device->computeCapabilityMinor, (unsigned int)(device->globalMemoryBandwidth / 1024 / 1024), (unsigned int)(device->globalMemorySize / 1024 / 1024), device->numMultiprocessors, (unsigned int)(device->coreClockRate / 1000)); break; } case CUPTI_ACTIVITY_KIND_CONTEXT: { CUpti_ActivityContext *context = (CUpti_ActivityContext *)record; printf("CONTEXT %u, device %u, compute API %s\n", context->contextId, context->deviceId, (context->computeApiKind == CUPTI_ACTIVITY_COMPUTE_API_CUDA) ? "CUDA" : (context->computeApiKind == CUPTI_ACTIVITY_COMPUTE_API_OPENCL) ? "OpenCL" : "unknown"); break; } case CUPTI_ACTIVITY_KIND_MEMCPY: { CUpti_ActivityMemcpy *memcpy = (CUpti_ActivityMemcpy *)record; /* CUPTI_Event Event; Event.pid = getpid (); Event.device_id = memcpy->deviceId; Event.context_id = memcpy->contextId; Event.stream_id = memcpy->streamId; Event.type = CUDA_MEM; Event.name = getMemcpyKindString((CUpti_ActivityMemcpyKind)memcpy->copyKind); snprintf (Event.seg1, 80, "%llu Bytes", memcpy->bytes); snprintf (Event.seg2, 80, "%s", getTypeString(memcpy->srcKind)); snprintf (Event.seg3, 80, "%s", getTypeString(memcpy->dstKind)); Event.starttime = (long long unsigned)(memcpy->start); Event.endtime = (long long unsigned)(memcpy->end); send_CUPTI_info (Event);*/ printf("MEMCPY %s [ %llu - %llu ] device %u, context %u, stream %u, correlation %u/r%u\n", getMemcpyKindString((CUpti_ActivityMemcpyKind)memcpy->copyKind), (unsigned long long)(memcpy->start ), (unsigned long long)(memcpy->start - startTimestamp), (unsigned long long)(memcpy->end ), (unsigned long long)(memcpy->end - startTimestamp), memcpy->deviceId, memcpy->contextId, memcpy->streamId, memcpy->correlationId, memcpy->runtimeCorrelationId); break; } case CUPTI_ACTIVITY_KIND_MEMSET: { CUpti_ActivityMemset *memset = (CUpti_ActivityMemset *)record; printf("MEMSET value=%u [ %llu - %llu ] device %u, context %u, stream %u, correlation %u/r%u\n", memset->value, (unsigned long long)(memset->start - startTimestamp), (unsigned long long)(memset->end - startTimestamp), memset->deviceId, memset->contextId, memset->streamId, memset->correlationId, memset->runtimeCorrelationId); break; } case CUPTI_ACTIVITY_KIND_KERNEL: { CUpti_ActivityKernel *kernel = (CUpti_ActivityKernel *)record; /* CUPTI_Event Event; Event.pid = getpid (); Event.device_id = kernel->deviceId; Event.context_id = kernel->contextId; Event.stream_id = kernel->streamId; Event.type = CUDA_KER; Event.name = kernel->name; snprintf (Event.seg1, 80, "Grid (%d,%d,%d), Block (%d,%d,%d)", kernel->gridX, kernel->gridY, kernel->gridZ, kernel->blockX, kernel->blockY, kernel->blockZ); snprintf (Event.seg2, 80, "dynamicSharedMemory %d, staticSharedMemory %d", kernel->dynamicSharedMemory, kernel->staticSharedMemory); snprintf (Event.seg3, 80, "localMemoryPerThread %u, localMemoryTotal %u", kernel->localMemoryPerThread, kernel->localMemoryTotal); Event.starttime = (long long unsigned)(kernel->start); Event.endtime = (long long unsigned)(kernel->end); send_CUPTI_info (Event);*/ printf("KERNEL \"%s\" [ %llu - %llu ] device %u, context %u, stream %u, correlation %u/r%u\n", kernel->name, (unsigned long long)(kernel->start - startTimestamp), (unsigned long long)(kernel->end - startTimestamp), kernel->deviceId, kernel->contextId, kernel->streamId, kernel->correlationId, kernel->runtimeCorrelationId); printf(" grid [%u,%u,%u], block [%u,%u,%u], shared memory (static %u, dynamic %u)\n", kernel->gridX, kernel->gridY, kernel->gridZ, kernel->blockX, kernel->blockY, kernel->blockZ, kernel->staticSharedMemory, kernel->dynamicSharedMemory); break; } case CUPTI_ACTIVITY_KIND_DRIVER: { CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record; printf("DRIVER cbid=%u [ %llu - %llu ] process %u, thread %u, correlation %u\n", api->cbid, (unsigned long long)(api->start - startTimestamp), (unsigned long long)(api->end - startTimestamp), api->processId, api->threadId, api->correlationId); break; } case CUPTI_ACTIVITY_KIND_RUNTIME: { CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record; printf("RUNTIME cbid=%u [ %llu - %llu ] process %u, thread %u, correlation %u\n", api->cbid, (unsigned long long)(api->start - startTimestamp), (unsigned long long)(api->end - startTimestamp), api->processId, api->threadId, api->correlationId); break; } default: printf(" <unknown>\n"); break; } }