void memcpytest2_sizes(size_t maxElem=0, size_t offset=0)
{
    printSep();
    printf ("test: %s<%s>\n", __func__,  TYPENAME(T));

    int deviceId;
    HIPCHECK(hipGetDevice(&deviceId));

    size_t free, total;
    HIPCHECK(hipMemGetInfo(&free, &total));

    if (maxElem == 0) {
        maxElem = free/sizeof(T)/5;
    }

    printf ("  device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB)    maxSize=%6.1fMB offset=%lu\n", 
            deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0, offset);

    for (size_t elem=64; elem+offset<=maxElem; elem*=2) {
        HIPCHECK ( hipDeviceReset() );
        memcpytest2<T>(elem+offset, 0, 1, 1, 0);  // unpinned host
        HIPCHECK ( hipDeviceReset() );
        memcpytest2<T>(elem+offset, 1, 1, 1, 0);  // pinned host
    }
}
int main()
{
    int numDevices = 0;
    int device;
    HIPCHECK(hipGetDeviceCount(&numDevices));
    for(int i=0;i<numDevices;i++){
        HIPCHECK(hipSetDevice(i));
        HIPCHECK(hipGetDevice(&device));
        HIPASSERT(device == i);
    }
    passed();
}
Example #3
0
unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N) {
    int device;
    HIPCHECK(hipGetDevice(&device));
    hipDeviceProp_t props;
    HIPCHECK(hipGetDeviceProperties(&props, device));

    unsigned blocks = props.multiProcessorCount * blocksPerCU;
    if (blocks * threadsPerBlock > N) {
        blocks = (N + threadsPerBlock - 1) / threadsPerBlock;
    }

    return blocks;
}