void memory_t<COI>::asyncCopyTo(memory_v *dest, const uintptr_t bytes, const uintptr_t destOffset, const uintptr_t srcOffset){ coiStream &stream = *((coiStream*) dev->currentStream); const uintptr_t bytes_ = (bytes == 0) ? size : bytes; OCCA_CHECK((bytes_ + destOffset) <= dest->size); OCCA_CHECK((bytes_ + srcOffset) <= size); OCCA_COI_CHECK("Memory: Blocking on Memory Transfer", COIEventWait(1, &(stream.lastEvent), -1, true, NULL, NULL) ); OCCA_COI_CHECK("Memory: Copy From", COIBufferCopy(*((coiMemory*) dest->handle), *((coiMemory*) handle), destOffset, srcOffset, bytes_, COI_COPY_UNSPECIFIED, false, NULL, &(stream.lastEvent))); }
void memory_t<COI>::copyFrom(const void *source, const uintptr_t bytes, const uintptr_t offset){ coiStream &stream = *((coiStream*) dev->currentStream); const uintptr_t bytes_ = (bytes == 0) ? size : bytes; OCCA_CHECK((bytes_ + offset) <= size); OCCA_COI_CHECK("Memory: Blocking on Memory Transfer", COIEventWait(1, &(stream.lastEvent), -1, true, NULL, NULL) ); OCCA_COI_CHECK("Memory: Copy From", COIBufferWrite(*((coiMemory*) handle), offset, source, bytes_, COI_COPY_UNSPECIFIED, false, NULL, &(stream.lastEvent))); OCCA_COI_CHECK("Memory: Blocking on Memory Transfer", COIEventWait(1, &(stream.lastEvent), -1, true, NULL, NULL) ); }
double timer::toc(std::string key, occa::kernel &kernel){ double elapsedTime = 0.; if(profileApplication){ assert(key == keyStack.top()); OCCA_CHECK(key == keyStack.top(), "Error in timer " << key << '\n'); if(profileKernels){ if(deviceInitialized) occaHandle.finish(); double currentTime = occa::currentTime(); elapsedTime = (currentTime - timeStack.top()); // times[keyStack].timeTaken += kernel.timeTaken(); times[keyStack].timeTaken += elapsedTime; times[keyStack].numCalls++; } keyStack.pop(); timeStack.pop(); } return elapsedTime; }
void memory_t<CUDA>::asyncCopyTo(memory_v *dest, const uintptr_t bytes, const uintptr_t destOffset, const uintptr_t srcOffset){ const CUstream &stream = *((CUstream*) dev->currentStream); const uintptr_t bytes_ = (bytes == 0) ? size : bytes; OCCA_CHECK((bytes_ + srcOffset) <= size); OCCA_CHECK((bytes_ + destOffset) <= dest->size); void *dstPtr, *srcPtr; if(!isTexture) srcPtr = (void*) ((CUDATextureData_t*) handle)->array; else srcPtr = handle; if( !(dest->isTexture) ) dstPtr = (void*) ((CUDATextureData_t*) dest->handle)->array; else dstPtr = dest->handle; if(!isTexture){ if(!dest->isTexture) OCCA_CUDA_CHECK("Memory: Asynchronous Copy To [Memory -> Memory]", cuMemcpyDtoDAsync(*((CUdeviceptr*) dstPtr) + destOffset, *((CUdeviceptr*) srcPtr) + srcOffset, bytes_, stream) ); else OCCA_CUDA_CHECK("Memory: Asynchronous Copy To [Memory -> Texture]", cuMemcpyDtoA((CUarray) dstPtr , destOffset, *((CUdeviceptr*) srcPtr) + srcOffset, bytes_) ); } else{ if(dest->isTexture) OCCA_CUDA_CHECK("Memory: Asynchronous Copy To [Texture -> Memory]", cuMemcpyAtoD(*((CUdeviceptr*) dstPtr) + destOffset, (CUarray) srcPtr , srcOffset, bytes_) ); else OCCA_CUDA_CHECK("Memory: Asynchronous Copy To [Texture -> Texture]", cuMemcpyAtoA((CUarray) dstPtr, destOffset, (CUarray) srcPtr, srcOffset, bytes_) ); } }
void memory_t<CUDA>::copyFrom(const memory_v *source, const uintptr_t bytes, const uintptr_t destOffset, const uintptr_t srcOffset){ const uintptr_t bytes_ = (bytes == 0) ? size : bytes; OCCA_CHECK((bytes_ + destOffset) <= size); OCCA_CHECK((bytes_ + srcOffset) <= source->size); void *dstPtr, *srcPtr; if(!isTexture) dstPtr = (void*) ((CUDATextureData_t*) handle)->array; else dstPtr = handle; if( !(source->isTexture) ) srcPtr = (void*) ((CUDATextureData_t*) source->handle)->array; else srcPtr = source->handle; if(!isTexture){ if(!source->isTexture) OCCA_CUDA_CHECK("Memory: Copy From [Memory -> Memory]", cuMemcpyDtoD(*((CUdeviceptr*) dstPtr) + destOffset, *((CUdeviceptr*) srcPtr) + srcOffset, bytes_) ); else OCCA_CUDA_CHECK("Memory: Copy From [Memory -> Texture]", cuMemcpyDtoA((CUarray) dstPtr , destOffset, *((CUdeviceptr*) srcPtr) + srcOffset, bytes_) ); } else{ if(source->isTexture) OCCA_CUDA_CHECK("Memory: Copy From [Texture -> Memory]", cuMemcpyAtoD(*((CUdeviceptr*) dstPtr) + destOffset, (CUarray) srcPtr , srcOffset, bytes_) ); else OCCA_CUDA_CHECK("Memory: Copy From [Texture -> Texture]", cuMemcpyAtoA((CUarray) dstPtr, destOffset, (CUarray) srcPtr, srcOffset, bytes_) ); } }
std::string getFilename(const int id){ OCCA_CHECK((0 <= id) && (id < filesInDatabase), "File with ID [" << id << "] was not found"); mutex.lock(); std::string filename = itsMap[id]; mutex.unlock(); return filename; }
void OCCA_RFUNC occaArgumentListAddArg(occaArgumentList list, int argPos, void *type) { occaArgumentList_t &list_ = *list; if(list_.argc < (argPos + 1)) { OCCA_CHECK(argPos < OCCA_MAX_ARGS, "Kernels can only have at most [" << OCCA_MAX_ARGS << "] arguments," << " [" << argPos << "] arguments were set"); list_.argc = (argPos + 1); } list_.argv[argPos] = (occaType_t*) type; }
void memory_t<CUDA>::asyncCopyTo(void *dest, const uintptr_t bytes, const uintptr_t offset){ const CUstream &stream = *((CUstream*) dev->currentStream); const uintptr_t bytes_ = (bytes == 0) ? size : bytes; OCCA_CHECK((bytes_ + offset) <= size); if(!isTexture) OCCA_CUDA_CHECK("Memory: Asynchronous Copy To", cuMemcpyDtoHAsync(dest, *((CUdeviceptr*) handle) + offset, bytes_, stream) ); else OCCA_CUDA_CHECK("Texture Memory: Asynchronous Copy To", cuMemcpyAtoHAsync(dest,((CUDATextureData_t*) handle)->array, offset, bytes_, stream) ); }
void memory_t<CUDA>::asyncCopyFrom(const void *source, const uintptr_t bytes, const uintptr_t offset){ const CUstream &stream = *((CUstream*) dev->currentStream); const uintptr_t bytes_ = (bytes == 0) ? size : bytes; OCCA_CHECK((bytes_ + offset) <= size); if(!isTexture) OCCA_CUDA_CHECK("Memory: Asynchronous Copy From", cuMemcpyHtoDAsync(*((CUdeviceptr*) handle) + offset, source, bytes_, stream) ); else OCCA_CUDA_CHECK("Texture Memory: Asynchronous Copy From", cuMemcpyHtoAAsync(((CUDATextureData_t*) handle)->array, offset, source, bytes_, stream) ); }
double timer::toc(std::string key){ double elapsedTime = 0.; if(profileApplication){ assert(key == keyStack.top()); OCCA_CHECK(key == keyStack.top(), "Error in timer " << key << '\n'); double currentTime = occa::currentTime(); elapsedTime = (currentTime - timeStack.top()); times[keyStack].timeTaken += elapsedTime; times[keyStack].numCalls++; keyStack.pop(); timeStack.pop(); } return elapsedTime; }
void memory_t<CUDA>::copyTo(void *dest, const uintptr_t bytes, const uintptr_t offset){ const uintptr_t bytes_ = (bytes == 0) ? size : bytes; OCCA_CHECK((bytes_ + offset) <= size); if(!isTexture) OCCA_CUDA_CHECK("Memory: Copy To", cuMemcpyDtoH(dest, *((CUdeviceptr*) handle) + offset, bytes_) ); else{ if(textureInfo.dim == 1) OCCA_CUDA_CHECK("Texture Memory: Copy To", cuMemcpyAtoH(dest, ((CUDATextureData_t*) handle)->array, offset, bytes_) ); else{ CUDA_MEMCPY2D info; info.srcXInBytes = offset; info.srcY = 0; info.srcMemoryType = CU_MEMORYTYPE_ARRAY; info.srcArray = ((CUDATextureData_t*) handle)->array; info.dstXInBytes = 0; info.dstY = 0; info.dstMemoryType = CU_MEMORYTYPE_HOST; info.dstHost = dest; info.dstPitch = 0; info.WidthInBytes = textureInfo.w * textureInfo.bytesInEntry; info.Height = (bytes_ / info.WidthInBytes); cuMemcpy2D(&info); dev->finish(); } } }
std::string getCachedName(const std::string &filename, const std::string &salt){ //---[ Place Somewhere Else ]----- char *c_cachePath = getenv("OCCA_CACHE_DIR"); std::string occaCachePath; if(c_cachePath == NULL){ std::stringstream ss; #if (OCCA_OS == LINUX_OS) || (OCCA_OS == OSX_OS) char *c_home = getenv("HOME"); ss << c_home << "/._occa"; std::string defaultCacheDir = ss.str(); mkdir(defaultCacheDir.c_str(), 0755); #else char *c_home = getenv("USERPROFILE"); ss << c_home << "\\AppData\\Local\\OCCA"; std::string defaultCacheDir = ss.str(); LPCSTR w_defaultCacheDir = defaultCacheDir.c_str(); BOOL mkdirStatus = CreateDirectoryA(w_defaultCacheDir, NULL); if(mkdirStatus == FALSE) assert(GetLastError() == ERROR_ALREADY_EXISTS); # if OCCA_64_BIT ss << "\\amd64"; // use different dir's fro 32 and 64 bit # else ss << "\\x86"; // use different dir's fro 32 and 64 bit # endif defaultCacheDir = ss.str(); w_defaultCacheDir = defaultCacheDir.c_str(); mkdirStatus = CreateDirectoryA(w_defaultCacheDir, NULL); if(mkdirStatus == FALSE) assert(GetLastError() == ERROR_ALREADY_EXISTS); #endif occaCachePath = defaultCacheDir; } else occaCachePath = c_cachePath; const int chars = occaCachePath.size(); OCCA_CHECK(chars > 0); #if (OCCA_OS == LINUX_OS) || (OCCA_OS == OSX_OS) const char slashChar = '/'; #else const char slashChar = '\\'; #endif // Take out the pesky //'s int pos = 0; for(int i = 0; i < chars; ++i){ if(occaCachePath[i] == slashChar) while(i < (chars - 1) && occaCachePath[i + 1] == slashChar) ++i; occaCachePath[pos++] = occaCachePath[i]; } if(occaCachePath[pos - 1] != slashChar){ if(pos != chars) occaCachePath[pos] = slashChar; else occaCachePath += slashChar; } //================================ const std::string fileContents = readFile(filename); const std::string contentsSHA = fnv(fileContents + salt); // Only taking the first 16 characters return occaCachePath + contentsSHA.substr(0, 16); }
void device_t<COI>::setup(const int device, const int memoryAllocated){ data = new COIDeviceData_t; OCCA_EXTRACT_DATA(COI, Device); uint32_t deviceCount; OCCA_COI_CHECK("Device: Get Count", COIEngineGetCount(COI_ISA_MIC, &deviceCount)); OCCA_CHECK(device < deviceCount); OCCA_COI_CHECK("Device: Get Handle", COIEngineGetHandle(COI_ISA_MIC, device, &data_.deviceID) ); std::stringstream salt; salt << "COI" << occaCOIMain; std::string cachedBinary = getCachedName("occaCOIMain", salt.str()); struct stat buffer; bool fileExists = (stat(cachedBinary.c_str(), &buffer) == 0); if(fileExists) std::cout << "Found cached binary of [occaCOIMain] in [" << cachedBinary << "]\n"; else{ //---[ Write File ]----------------- std::string prefix, name; getFilePrefixAndName(cachedBinary, prefix, name); const std::string iCachedBinary = prefix + "i_" + name; if(haveFile(cachedBinary)){ std::cout << "Making [" << iCachedBinary << "]\n"; std::ofstream fs; fs.open(iCachedBinary.c_str()); fs << occaCOIMain; fs.close(); std::stringstream command; command << dev->dHandle->compiler << " -o " << cachedBinary << " -x c++" << ' ' << dev->dHandle->compilerFlags << ' ' << iCachedBinary; const std::string &sCommand = command.str(); std::cout << "Compiling [" << functionName << "]\n" << sCommand << "\n\n"; system(sCommand.c_str()); releaseFile(cachedBinary); } else waitForFile(cachedBinary); } // [-] Tentative std::string SINK_LD_LIBRARY_PATH; char *c_SINK_LD_LIBRARY_PATH = getenv("SINK_LD_LIBRARY_PATH"); if(c_SINK_LD_LIBRARY_PATH != NULL) SINK_LD_LIBRARY_PATH = std::string(c_SINK_LD_LIBRARY_PATH); OCCA_COI_CHECK("Device: Initializing", COIProcessCreateFromFile(data_.deviceID, cachedBinary.c_str(), 0 , NULL, true, NULL, true, NULL, memoryAllocated ? memoryAllocated : (4 << 30), // 4 GB SINK_LD_LIBRARY_PATH.c_str(), &(data_.chiefID)) ); const char *kernelNames[] = {"occaKernelWith1Argument" , "occaKernelWith2Arguments" , "occaKernelWith3Arguments" , "occaKernelWith4Arguments" , "occaKernelWith5Arguments" , "occaKernelWith6Arguments" , "occaKernelWith7Arguments" , "occaKernelWith8Arguments" , "occaKernelWith9Arguments" , "occaKernelWith10Arguments", "occaKernelWith11Arguments", "occaKernelWith12Arguments", "occaKernelWith13Arguments", "occaKernelWith14Arguments", "occaKernelWith15Arguments", "occaKernelWith16Arguments", "occaKernelWith17Arguments", "occaKernelWith18Arguments", "occaKernelWith19Arguments", "occaKernelWith20Arguments", "occaKernelWith21Arguments", "occaKernelWith22Arguments", "occaKernelWith23Arguments", "occaKernelWith24Arguments", "occaKernelWith25Arguments"}; // [-] More hard-coding, if you know what I mean OCCA_COI_CHECK("Device: Getting Kernel Wrappers", COIProcessGetFunctionHandles(data_.chiefID, 25, kernelNames, data_.kernelWrapper)); }