void memcpytest2_sizes(size_t maxElem=0, size_t offset=0) { printSep(); printf ("test: %s<%s>\n", __func__, TYPENAME(T)); int deviceId; HIPCHECK(hipGetDevice(&deviceId)); size_t free, total; HIPCHECK(hipMemGetInfo(&free, &total)); if (maxElem == 0) { maxElem = free/sizeof(T)/5; } printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB offset=%lu\n", deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0, offset); for (size_t elem=64; elem+offset<=maxElem; elem*=2) { HIPCHECK ( hipDeviceReset() ); memcpytest2<T>(elem+offset, 0, 1, 1, 0); // unpinned host HIPCHECK ( hipDeviceReset() ); memcpytest2<T>(elem+offset, 1, 1, 1, 0); // pinned host } }
int main() { int numDevices = 0; int device; HIPCHECK(hipGetDeviceCount(&numDevices)); for(int i=0;i<numDevices;i++){ HIPCHECK(hipSetDevice(i)); HIPCHECK(hipGetDevice(&device)); HIPASSERT(device == i); } passed(); }
unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N) { int device; HIPCHECK(hipGetDevice(&device)); hipDeviceProp_t props; HIPCHECK(hipGetDeviceProperties(&props, device)); unsigned blocks = props.multiProcessorCount * blocksPerCU; if (blocks * threadsPerBlock > N) { blocks = (N + threadsPerBlock - 1) / threadsPerBlock; } return blocks; }