Beispiel #1
0
  void memory_t<COI>::asyncCopyTo(memory_v *dest,
                                  const uintptr_t bytes,
                                  const uintptr_t destOffset,
                                  const uintptr_t srcOffset){
    coiStream &stream = *((coiStream*) dev->currentStream);

    const uintptr_t bytes_ = (bytes == 0) ? size : bytes;

    OCCA_CHECK((bytes_ + destOffset) <= dest->size);
    OCCA_CHECK((bytes_ + srcOffset)  <=       size);

    OCCA_COI_CHECK("Memory: Blocking on Memory Transfer",
                   COIEventWait(1, &(stream.lastEvent),
                                -1, true, NULL, NULL) );

    OCCA_COI_CHECK("Memory: Copy From",
                   COIBufferCopy(*((coiMemory*) dest->handle),
                                 *((coiMemory*) handle),
                                 destOffset,
                                 srcOffset,
                                 bytes_,
                                 COI_COPY_UNSPECIFIED,
                                 false, NULL,
                                 &(stream.lastEvent)));
  }
Beispiel #2
0
  void memory_t<COI>::copyFrom(const void *source,
                               const uintptr_t bytes,
                               const uintptr_t offset){
    coiStream &stream = *((coiStream*) dev->currentStream);

    const uintptr_t bytes_ = (bytes == 0) ? size : bytes;

    OCCA_CHECK((bytes_ + offset) <= size);

    OCCA_COI_CHECK("Memory: Blocking on Memory Transfer",
                   COIEventWait(1, &(stream.lastEvent),
                                -1, true, NULL, NULL) );

    OCCA_COI_CHECK("Memory: Copy From",
                   COIBufferWrite(*((coiMemory*) handle),
                                  offset,
                                  source,
                                  bytes_,
                                  COI_COPY_UNSPECIFIED,
                                  false, NULL,
                                  &(stream.lastEvent)));

    OCCA_COI_CHECK("Memory: Blocking on Memory Transfer",
                   COIEventWait(1, &(stream.lastEvent),
                                -1, true, NULL, NULL) );
  }
Beispiel #3
0
  double timer::toc(std::string key, occa::kernel &kernel){

    double elapsedTime = 0.;

    if(profileApplication){

      assert(key == keyStack.top());

      OCCA_CHECK(key == keyStack.top(),
                 "Error in timer " << key << '\n');

      if(profileKernels){
        if(deviceInitialized)
          occaHandle.finish();

        double currentTime = occa::currentTime();
        elapsedTime = (currentTime - timeStack.top());
        // times[keyStack].timeTaken += kernel.timeTaken();
        times[keyStack].timeTaken += elapsedTime;
        times[keyStack].numCalls++;
      }

      keyStack.pop();
      timeStack.pop();
    }

    return elapsedTime;
  }
Beispiel #4
0
  void memory_t<CUDA>::asyncCopyTo(memory_v *dest,
                                   const uintptr_t bytes,
                                   const uintptr_t destOffset,
                                   const uintptr_t srcOffset){
    const CUstream &stream = *((CUstream*) dev->currentStream);
    const uintptr_t bytes_ = (bytes == 0) ? size : bytes;

    OCCA_CHECK((bytes_ + srcOffset)  <= size);
    OCCA_CHECK((bytes_ + destOffset) <= dest->size);

    void *dstPtr, *srcPtr;

    if(!isTexture)
      srcPtr = (void*) ((CUDATextureData_t*) handle)->array;
    else
      srcPtr = handle;

    if( !(dest->isTexture) )
      dstPtr = (void*) ((CUDATextureData_t*) dest->handle)->array;
    else
      dstPtr = dest->handle;

    if(!isTexture){
      if(!dest->isTexture)
        OCCA_CUDA_CHECK("Memory: Asynchronous Copy To [Memory -> Memory]",
                        cuMemcpyDtoDAsync(*((CUdeviceptr*) dstPtr) + destOffset,
                                          *((CUdeviceptr*) srcPtr) + srcOffset,
                                          bytes_, stream) );
      else
        OCCA_CUDA_CHECK("Memory: Asynchronous Copy To [Memory -> Texture]",
                        cuMemcpyDtoA((CUarray) dstPtr         , destOffset,
                                     *((CUdeviceptr*) srcPtr) + srcOffset,
                                     bytes_) );
    }
    else{
      if(dest->isTexture)
        OCCA_CUDA_CHECK("Memory: Asynchronous Copy To [Texture -> Memory]",
                        cuMemcpyAtoD(*((CUdeviceptr*) dstPtr) + destOffset,
                                     (CUarray) srcPtr         , srcOffset,
                                     bytes_) );
      else
        OCCA_CUDA_CHECK("Memory: Asynchronous Copy To [Texture -> Texture]",
                        cuMemcpyAtoA((CUarray) dstPtr, destOffset,
                                     (CUarray) srcPtr, srcOffset,
                                     bytes_) );
    }
  }
Beispiel #5
0
  void memory_t<CUDA>::copyFrom(const memory_v *source,
                                const uintptr_t bytes,
                                const uintptr_t destOffset,
                                const uintptr_t srcOffset){
    const uintptr_t bytes_ = (bytes == 0) ? size : bytes;

    OCCA_CHECK((bytes_ + destOffset) <= size);
    OCCA_CHECK((bytes_ + srcOffset)  <= source->size);

    void *dstPtr, *srcPtr;

    if(!isTexture)
      dstPtr = (void*) ((CUDATextureData_t*) handle)->array;
    else
      dstPtr = handle;

    if( !(source->isTexture) )
      srcPtr = (void*) ((CUDATextureData_t*) source->handle)->array;
    else
      srcPtr = source->handle;

    if(!isTexture){
      if(!source->isTexture)
        OCCA_CUDA_CHECK("Memory: Copy From [Memory -> Memory]",
                        cuMemcpyDtoD(*((CUdeviceptr*) dstPtr) + destOffset,
                                     *((CUdeviceptr*) srcPtr) + srcOffset,
                                     bytes_) );
      else
        OCCA_CUDA_CHECK("Memory: Copy From [Memory -> Texture]",
                        cuMemcpyDtoA((CUarray) dstPtr         , destOffset,
                                     *((CUdeviceptr*) srcPtr) + srcOffset,
                                     bytes_) );
    }
    else{
      if(source->isTexture)
        OCCA_CUDA_CHECK("Memory: Copy From [Texture -> Memory]",
                        cuMemcpyAtoD(*((CUdeviceptr*) dstPtr) + destOffset,
                                     (CUarray) srcPtr         , srcOffset,
                                     bytes_) );
      else
        OCCA_CUDA_CHECK("Memory: Copy From [Texture -> Texture]",
                        cuMemcpyAtoA((CUarray) dstPtr, destOffset,
                                     (CUarray) srcPtr, srcOffset,
                                     bytes_) );
    }
  }
Beispiel #6
0
    std::string getFilename(const int id){
      OCCA_CHECK((0 <= id) && (id < filesInDatabase),
                 "File with ID [" << id << "] was not found");

      mutex.lock();

      std::string filename = itsMap[id];

      mutex.unlock();

      return filename;
    }
Beispiel #7
0
void OCCA_RFUNC occaArgumentListAddArg(occaArgumentList list,
                                       int argPos,
                                       void *type) {

  occaArgumentList_t &list_ = *list;
  if(list_.argc < (argPos + 1)) {
    OCCA_CHECK(argPos < OCCA_MAX_ARGS,
               "Kernels can only have at most [" << OCCA_MAX_ARGS << "] arguments,"
               << " [" << argPos << "] arguments were set");

    list_.argc = (argPos + 1);
  }
  list_.argv[argPos] = (occaType_t*) type;
}
Beispiel #8
0
  void memory_t<CUDA>::asyncCopyTo(void *dest,
                                   const uintptr_t bytes,
                                   const uintptr_t offset){
    const CUstream &stream = *((CUstream*) dev->currentStream);
    const uintptr_t bytes_ = (bytes == 0) ? size : bytes;

    OCCA_CHECK((bytes_ + offset) <= size);

    if(!isTexture)
      OCCA_CUDA_CHECK("Memory: Asynchronous Copy To",
                      cuMemcpyDtoHAsync(dest, *((CUdeviceptr*) handle) + offset, bytes_, stream) );
    else
      OCCA_CUDA_CHECK("Texture Memory: Asynchronous Copy To",
                      cuMemcpyAtoHAsync(dest,((CUDATextureData_t*) handle)->array, offset, bytes_, stream) );
  }
Beispiel #9
0
  void memory_t<CUDA>::asyncCopyFrom(const void *source,
                                     const uintptr_t bytes,
                                     const uintptr_t offset){
    const CUstream &stream = *((CUstream*) dev->currentStream);
    const uintptr_t bytes_ = (bytes == 0) ? size : bytes;

    OCCA_CHECK((bytes_ + offset) <= size);

    if(!isTexture)
      OCCA_CUDA_CHECK("Memory: Asynchronous Copy From",
                      cuMemcpyHtoDAsync(*((CUdeviceptr*) handle) + offset, source, bytes_, stream) );
    else
      OCCA_CUDA_CHECK("Texture Memory: Asynchronous Copy From",
                      cuMemcpyHtoAAsync(((CUDATextureData_t*) handle)->array, offset, source, bytes_, stream) );
  }
Beispiel #10
0
  double timer::toc(std::string key){

    double elapsedTime = 0.;

    if(profileApplication){
      assert(key == keyStack.top());

      OCCA_CHECK(key == keyStack.top(),
                 "Error in timer " << key << '\n');

      double currentTime = occa::currentTime();
      elapsedTime = (currentTime - timeStack.top());

      times[keyStack].timeTaken += elapsedTime;
      times[keyStack].numCalls++;

      keyStack.pop();
      timeStack.pop();
    }

    return elapsedTime;
  }
Beispiel #11
0
  void memory_t<CUDA>::copyTo(void *dest,
                              const uintptr_t bytes,
                              const uintptr_t offset){
    const uintptr_t bytes_ = (bytes == 0) ? size : bytes;

    OCCA_CHECK((bytes_ + offset) <= size);

    if(!isTexture)
      OCCA_CUDA_CHECK("Memory: Copy To",
                      cuMemcpyDtoH(dest, *((CUdeviceptr*) handle) + offset, bytes_) );
    else{
      if(textureInfo.dim == 1)
        OCCA_CUDA_CHECK("Texture Memory: Copy To",
                        cuMemcpyAtoH(dest, ((CUDATextureData_t*) handle)->array, offset, bytes_) );
      else{
        CUDA_MEMCPY2D info;

        info.srcXInBytes   = offset;
        info.srcY          = 0;
        info.srcMemoryType = CU_MEMORYTYPE_ARRAY;
        info.srcArray      = ((CUDATextureData_t*) handle)->array;

        info.dstXInBytes   = 0;
        info.dstY          = 0;
        info.dstMemoryType = CU_MEMORYTYPE_HOST;
        info.dstHost       = dest;
        info.dstPitch      = 0;

        info.WidthInBytes = textureInfo.w * textureInfo.bytesInEntry;
        info.Height       = (bytes_ / info.WidthInBytes);

        cuMemcpy2D(&info);

        dev->finish();
      }
    }
  }
Beispiel #12
0
  std::string getCachedName(const std::string &filename,
                            const std::string &salt){
    //---[ Place Somewhere Else ]-----
    char *c_cachePath = getenv("OCCA_CACHE_DIR");

    std::string occaCachePath;

    if(c_cachePath == NULL){
      std::stringstream ss;
#if (OCCA_OS == LINUX_OS) || (OCCA_OS == OSX_OS)
      char *c_home = getenv("HOME");
      ss << c_home << "/._occa";

      std::string defaultCacheDir = ss.str();
      mkdir(defaultCacheDir.c_str(), 0755);
#else
      char *c_home = getenv("USERPROFILE");

      ss << c_home << "\\AppData\\Local\\OCCA";

      std::string defaultCacheDir = ss.str();
      LPCSTR w_defaultCacheDir = defaultCacheDir.c_str();
      BOOL mkdirStatus = CreateDirectoryA(w_defaultCacheDir, NULL);

      if(mkdirStatus == FALSE)
        assert(GetLastError() == ERROR_ALREADY_EXISTS);

#  if OCCA_64_BIT
      ss << "\\amd64";  // use different dir's fro 32 and 64 bit
#  else
      ss << "\\x86";    // use different dir's fro 32 and 64 bit
#  endif

      defaultCacheDir = ss.str();

      w_defaultCacheDir = defaultCacheDir.c_str();
      mkdirStatus = CreateDirectoryA(w_defaultCacheDir, NULL);

      if(mkdirStatus == FALSE)
        assert(GetLastError() == ERROR_ALREADY_EXISTS);
#endif
      occaCachePath = defaultCacheDir;
    }
    else
      occaCachePath = c_cachePath;

    const int chars = occaCachePath.size();

    OCCA_CHECK(chars > 0);

#if (OCCA_OS == LINUX_OS) || (OCCA_OS == OSX_OS)
    const char slashChar = '/';
#else
    const char slashChar = '\\';
#endif

    // Take out the pesky //'s
    int pos = 0;

    for(int i = 0; i < chars; ++i){
      if(occaCachePath[i] == slashChar)
        while(i < (chars - 1) && occaCachePath[i + 1] == slashChar)
          ++i;

      occaCachePath[pos++] = occaCachePath[i];
    }

    if(occaCachePath[pos - 1] != slashChar){
      if(pos != chars)
        occaCachePath[pos] = slashChar;
      else
        occaCachePath += slashChar;
    }
    //================================

    const std::string fileContents = readFile(filename);
    const std::string contentsSHA  = fnv(fileContents + salt);

    // Only taking the first 16 characters
    return occaCachePath + contentsSHA.substr(0, 16);
  }
Beispiel #13
0
  void device_t<COI>::setup(const int device, const int memoryAllocated){
    data = new COIDeviceData_t;

    OCCA_EXTRACT_DATA(COI, Device);

    uint32_t deviceCount;
    OCCA_COI_CHECK("Device: Get Count",
                   COIEngineGetCount(COI_ISA_MIC, &deviceCount));

    OCCA_CHECK(device < deviceCount);

    OCCA_COI_CHECK("Device: Get Handle",
                   COIEngineGetHandle(COI_ISA_MIC, device, &data_.deviceID) );

    std::stringstream salt;
    salt << "COI"
         << occaCOIMain;

    std::string cachedBinary = getCachedName("occaCOIMain", salt.str());

    struct stat buffer;
    bool fileExists = (stat(cachedBinary.c_str(), &buffer) == 0);

    if(fileExists)
      std::cout << "Found cached binary of [occaCOIMain] in [" << cachedBinary << "]\n";
    else{
      //---[ Write File ]-----------------
      std::string prefix, name;

      getFilePrefixAndName(cachedBinary, prefix, name);

      const std::string iCachedBinary = prefix + "i_" + name;

      if(haveFile(cachedBinary)){
        std::cout << "Making [" << iCachedBinary << "]\n";

        std::ofstream fs;
        fs.open(iCachedBinary.c_str());

        fs << occaCOIMain;

        fs.close();

        std::stringstream command;

        command << dev->dHandle->compiler
                << " -o " << cachedBinary
                << " -x c++"
                << ' '    << dev->dHandle->compilerFlags
                << ' '    << iCachedBinary;

        const std::string &sCommand = command.str();

        std::cout << "Compiling [" << functionName << "]\n" << sCommand << "\n\n";

        system(sCommand.c_str());

        releaseFile(cachedBinary);
      }
      else
        waitForFile(cachedBinary);
    }

    // [-] Tentative
    std::string SINK_LD_LIBRARY_PATH;

    char *c_SINK_LD_LIBRARY_PATH = getenv("SINK_LD_LIBRARY_PATH");
    if(c_SINK_LD_LIBRARY_PATH != NULL)
      SINK_LD_LIBRARY_PATH = std::string(c_SINK_LD_LIBRARY_PATH);

    OCCA_COI_CHECK("Device: Initializing",
                   COIProcessCreateFromFile(data_.deviceID,
                                            cachedBinary.c_str(),
                                            0   , NULL,
                                            true, NULL,
                                            true, NULL,
                                            memoryAllocated ? memoryAllocated : (4 << 30), // 4 GB
                                            SINK_LD_LIBRARY_PATH.c_str(),
                                            &(data_.chiefID)) );

    const char *kernelNames[] = {"occaKernelWith1Argument"  , "occaKernelWith2Arguments" , "occaKernelWith3Arguments" ,
                                 "occaKernelWith4Arguments" , "occaKernelWith5Arguments" , "occaKernelWith6Arguments" ,
                                 "occaKernelWith7Arguments" , "occaKernelWith8Arguments" , "occaKernelWith9Arguments" ,
                                 "occaKernelWith10Arguments", "occaKernelWith11Arguments", "occaKernelWith12Arguments",
                                 "occaKernelWith13Arguments", "occaKernelWith14Arguments", "occaKernelWith15Arguments",
                                 "occaKernelWith16Arguments", "occaKernelWith17Arguments", "occaKernelWith18Arguments",
                                 "occaKernelWith19Arguments", "occaKernelWith20Arguments", "occaKernelWith21Arguments",
                                 "occaKernelWith22Arguments", "occaKernelWith23Arguments", "occaKernelWith24Arguments",
                                 "occaKernelWith25Arguments"};

    // [-] More hard-coding, if you know what I mean
    OCCA_COI_CHECK("Device: Getting Kernel Wrappers",
                   COIProcessGetFunctionHandles(data_.chiefID,
                                                25,
                                                kernelNames,
                                                data_.kernelWrapper));
  }