Example #1
0
  deviceIdentifier device_t<CUDA>::getIdentifier() const {
    deviceIdentifier dID;

    dID.mode_ = CUDA;

    const size_t archPos = compilerFlags.find("-arch=sm_");

    if(archPos == std::string::npos){
      OCCA_EXTRACT_DATA(CUDA, Device);

      std::stringstream archSM_;

      int major, minor;
      OCCA_CUDA_CHECK("Getting CUDA Device Arch",
                      cuDeviceComputeCapability(&major, &minor, data_.device) );

      archSM_ << major << minor;

      dID.flagMap["sm_arch"] = archSM_.str();
    }
    else{
      const char *c0 = (compilerFlags.c_str() + archPos);
      const char *c1 = c0;

      while((*c0 != '\0') && (*c0 != ' '))
        ++c1;

      dID.flagMap["sm_arch"] = std::string(c0, c1 - c0);
    }

    return dID;
  }
Example #2
0
  void device_t<CUDA>::free(){
    OCCA_EXTRACT_DATA(CUDA, Device);

    OCCA_CUDA_CHECK("Device: Freeing Context",
                    cuCtxDestroy(data_.context) );

    delete (CUDADeviceData_t*) data;
  }
Example #3
0
  stream device_t<CUDA>::createStream(){
    OCCA_EXTRACT_DATA(CUDA, Device);

    CUstream *retStream = new CUstream;

    OCCA_CUDA_CHECK("Device: createStream",
                    cuStreamCreate(retStream, CU_STREAM_DEFAULT));

    return retStream;
  }
Example #4
0
  void device_t<COI>::free(){
    OCCA_EXTRACT_DATA(COI, Device);

    OCCA_COI_CHECK("Device: Freeing Chief Processes",
                   COIProcessDestroy(data_.chiefID,
                                     -1,
                                     false,
                                     NULL,
                                     NULL));

    delete data;
  }
Example #5
0
  stream device_t<COI>::genStream(){
    OCCA_EXTRACT_DATA(COI, Device);

    coiStream *retStream = new coiStream;

    OCCA_COI_CHECK("Device: Generating a Stream",
                   COIPipelineCreate(data_.chiefID,
                                     NULL, 0,
                                     &(retStream->handle)) );

    return retStream;
  }
Example #6
0
  int device_t<CUDA>::simdWidth(){
    if(simdWidth_)
      return simdWidth_;

    OCCA_EXTRACT_DATA(CUDA, Device);

    OCCA_CUDA_CHECK("Device: Get Warp Size",
                    cuDeviceGetAttribute(&simdWidth_,
                                         CU_DEVICE_ATTRIBUTE_WARP_SIZE,
                                         data_.device) );

    return simdWidth_;
  }
Example #7
0
  kernel_t<CUDA>* kernel_t<CUDA>::buildFromBinary(const std::string &filename,
                                                 const std::string &functionName_){
    OCCA_EXTRACT_DATA(CUDA, Kernel);

    functionName = functionName_;

    OCCA_CUDA_CHECK("Kernel (" + functionName + ") : Loading Module",
                    cuModuleLoad(&data_.module, filename.c_str()));

    OCCA_CUDA_CHECK("Kernel (" + functionName + ") : Loading Function",
                    cuModuleGetFunction(&data_.function, data_.module, functionName.c_str()));

    return this;
  }
Example #8
0
  kernel_t<CUDA>* kernel_t<CUDA>::loadFromLibrary(const char *cache,
                                                  const std::string &functionName_){
    OCCA_EXTRACT_DATA(CUDA, Kernel);

    functionName = functionName_;

    OCCA_CUDA_CHECK("Kernel (" + functionName + ") : Loading Module",
                    cuModuleLoadData(&data_.module, cache));

    OCCA_CUDA_CHECK("Kernel (" + functionName + ") : Loading Function",
                    cuModuleGetFunction(&data_.function, data_.module, functionName.c_str()));

    return this;
  }
Example #9
0
  kernel_v* device_t<COI>::buildKernelFromBinary(const std::string &filename,
                                                 const std::string &functionName){
    OCCA_EXTRACT_DATA(COI, Device);

    kernel_v *k = new kernel_t<COI>;

    k->dev  = dev;
    k->data = new COIKernelData_t;

    COIKernelData_t &kData_ = *((COIKernelData_t*) k->data);

    kData_.chiefID = data_.chiefID;

    k->buildFromBinary(filename, functionName);
    return k;
  }
Example #10
0
  kernel_v* device_t<CUDA>::loadKernelFromLibrary(const char *cache,
                                                  const std::string &functionName){
    OCCA_EXTRACT_DATA(CUDA, Device);

    kernel_v *k = new kernel_t<CUDA>;

    k->dev  = dev;
    k->data = new CUDAKernelData_t;

    CUDAKernelData_t &kData_ = *((CUDAKernelData_t*) k->data);

    kData_.device  = data_.device;
    kData_.context = data_.context;

    k->loadFromLibrary(cache, functionName);
    return k;
  }
Example #11
0
  kernel_v* device_t<CUDA>::buildKernelFromBinary(const std::string &filename,
                                                 const std::string &functionName){
    OCCA_EXTRACT_DATA(CUDA, Device);

    kernel_v *k = new kernel_t<CUDA>;

    k->dev  = dev;
    k->data = new CUDAKernelData_t;

    CUDAKernelData_t &kData_ = *((CUDAKernelData_t*) k->data);

    kData_.device  = data_.device;
    kData_.context = data_.context;

    k->buildFromBinary(filename, functionName);
    return k;
  }
Example #12
0
  memory_v* device_t<CUDA>::malloc(const uintptr_t bytes,
                                   void *source){
    OCCA_EXTRACT_DATA(CUDA, Device);

    memory_v *mem = new memory_t<CUDA>;

    mem->dev    = dev;
    mem->handle = new CUdeviceptr;
    mem->size   = bytes;

    OCCA_CUDA_CHECK("Device: malloc",
                    cuMemAlloc((CUdeviceptr*) mem->handle, bytes));

    if(source != NULL)
      mem->copyFrom(source, bytes, 0);

    return mem;
  }
Example #13
0
  void device_t<CUDA>::setup(argInfoMap &aim){
    cuda::init();

    data = new CUDADeviceData_t;

    OCCA_EXTRACT_DATA(CUDA, Device);

    if(!aim.has("deviceID")){
      std::cout << "[CUDA] device not given [deviceID]\n";
      throw 1;
    }

    const int deviceID = aim.iGet("deviceID");

    OCCA_CUDA_CHECK("Device: Creating Device",
                    cuDeviceGet(&data_.device, deviceID));

    OCCA_CUDA_CHECK("Device: Creating Context",
                    cuCtxCreate(&data_.context, CU_CTX_SCHED_AUTO, data_.device));
  }
Example #14
0
  std::string kernel_t<CUDA>::getCachedBinaryName(const std::string &filename,
                                                  kernelInfo &info_){
    OCCA_EXTRACT_DATA(CUDA, Kernel);

    info_.addDefine("OCCA_USING_GPU" , 1);
    info_.addDefine("OCCA_USING_CUDA", 1);

    info_.addOCCAKeywords(occaCUDADefines);

    std::stringstream salt;

    salt << "CUDA"
         << info_.salt()
         << parser::version
         << dev->dHandle->compilerEnvScript
         << dev->dHandle->compiler
         << dev->dHandle->compilerFlags;

    return getCachedName(filename, salt.str());
  }
Example #15
0
  memory_v* device_t<COI>::malloc(const uintptr_t bytes,
                                  void *source){
    OCCA_EXTRACT_DATA(COI, Device);

    memory_v *mem = new memory_t<COI>;

    mem->dev    = dev;
    mem->handle = new coiMemory;
    mem->size   = bytes;

    OCCA_COI_CHECK("Device: Malloc",
                   COIBufferCreate(bytes,
                                   COI_BUFFER_NORMAL,
                                   0,
                                   source,
                                   1,
                                   &(data_.chiefID),
                                   (coiMemory*) mem->handle) );

    return mem;
  }
Example #16
0
  kernel_t<COI>* kernel_t<COI>::buildFromBinary(const std::string &filename,
                                                const std::string &functionName_){
    OCCA_EXTRACT_DATA(COI, Kernel);

    functionName = functionName_;


    std::string libPath, soname;

    getFilePrefixAndName(filename, libPath, soname);

    for(int i = 0; i < soname.size(); ++i){
      if(soname[i] == '.'){
        soname = soname.substr(0, i);
        break;
      }
    }

    COILIBRARY outLibrary;

    OCCA_COI_CHECK("Kernel: Loading Kernel To Chief",
                   COIProcessLoadLibraryFromFile(data_.chiefID,
                                                 filename.c_str(),
                                                 soname.c_str(),
                                                 NULL,
                                                 &outLibrary));

    const char *c_functionName = functionName.c_str();

    OCCA_COI_CHECK("Kernel: Getting Handle",
                   COIProcessGetFunctionHandles(data_.chiefID,
                                                1,
                                                &c_functionName,
                                                &(data_.kernel)));

    return this;
  }
Example #17
0
  kernel_t<COI>* kernel_t<COI>::buildFromSource(const std::string &filename,
                                                          const std::string &functionName_,
                                                          const kernelInfo &info_){
    functionName = functionName_;

    kernelInfo info = info_;
    info.addDefine("OCCA_USING_CPU", 1);
    info.addDefine("OCCA_USING_COI", 1);

    info.addOCCAKeywords(occaCOIDefines);

    std::stringstream salt;
    salt << "COI"
         << info.salt()
         << dev->dHandle->compilerEnvScript
         << dev->dHandle->compiler
         << dev->dHandle->compilerFlags
         << functionName;

    std::string cachedBinary = getCachedName(filename, salt.str());
    std::string libPath, soname;

    getFilePrefixAndName(cachedBinary, libPath, soname);

    std::string libName = "lib" + soname + ".so";

    cachedBinary = libPath + libName;

    struct stat buffer;
    bool fileExists = (stat(cachedBinary.c_str(), &buffer) == 0);

    if(fileExists){
      std::cout << "Found cached binary of [" << filename << "] in [" << cachedBinary << "]\n";
      return buildFromBinary(cachedBinary, functionName);
    }

    if(!haveFile(cachedBinary)){
      waitForFile(cachedBinary);

      return buildFromBinary(cachedBinary, functionName);
    }

    std::string iCachedBinary = createIntermediateSource(filename,
                                                         cachedBinary,
                                                         info);

    std::stringstream command;

    if(dev->dHandle->compilerEnvScript.size())
      command << dev->dHandle->compilerEnvScript << " && ";

    command << dev->dHandle->compiler
#if (OCCA_OS == LINUX_OS) || (OCCA_OS == OSX_OS)
            << " -x c++ -w -nodefaultlibs -fPIC -shared"
#else
            << " /TP /LD /D MC_CL_EXE"
#endif
            << ' '    << dev->dHandle->compilerFlags
            << ' '    << info.flags
            << ' '    << iCachedBinary
#if (OCCA_OS == LINUX_OS) || (OCCA_OS == OSX_OS)
            << " -o " << cachedBinary
#else
            << " /link /OUT:" << cachedBinary
#endif
            << std::endl;

    const std::string &sCommand = command.str();

    std::cout << "Compiling [" << functionName << "]\n" << sCommand << "\n";

    const int compileError = system(sCommand.c_str());

    if(compileError){
      releaseFile(cachedBinary);
      throw 1;
    }

    OCCA_EXTRACT_DATA(COI, Kernel);

    COILIBRARY outLibrary;

    const COIRESULT loadingLibraryResult = COIProcessLoadLibraryFromFile(data_.chiefID,
                                                                         cachedBinary.c_str(),
                                                                         soname.c_str(),
                                                                         NULL,
                                                                         &outLibrary);

    if(errorCode != COI_SUCCESS)
      releaseFile(cachedBinary);

    OCCA_COI_CHECK("Kernel: Loading Kernel To Chief", loadingLibraryResult);

    const char *c_functionName = functionName.c_str();

    const COIRESULT getFunctionHandleResult = COIProcessGetFunctionHandles(data_.chiefID,
                                                                           1,
                                                                           &c_functionName,
                                                                           &(data_.kernel));

    if(errorCode != COI_SUCCESS)
      releaseFile(cachedBinary);

    OCCA_COI_CHECK("Kernel: Getting Handle", getFunctionHandleResult);

    releaseFile(cachedBinary);

    return this;
  }
Example #18
0
  memory_v* device_t<CUDA>::talloc(const int dim, const occa::dim &dims,
                                   void *source,
                                   occa::formatType type, const int permissions){
    OCCA_EXTRACT_DATA(CUDA, Device);

    memory_v *mem = new memory_t<CUDA>;

    mem->dev    = dev;
    mem->handle = new CUDATextureData_t;
    mem->size   = ((dim == 1) ? dims.x : (dims.x * dims.y)) * type.bytes();

    mem->isTexture = true;
    mem->textureInfo.dim  = dim;

    mem->textureInfo.w = dims.x;
    mem->textureInfo.h = dims.y;
    mem->textureInfo.d = dims.z;

    mem->textureInfo.bytesInEntry = type.bytes();

    CUarray &array        = ((CUDATextureData_t*) mem->handle)->array;
    CUsurfObject &surface = ((CUDATextureData_t*) mem->handle)->surface;

    CUDA_ARRAY_DESCRIPTOR arrayDesc;
    CUDA_RESOURCE_DESC surfDesc;

    memset(&arrayDesc, 0, sizeof(arrayDesc));
    memset(&surfDesc , 0, sizeof(surfDesc));

    arrayDesc.Width       = dims.x;
    arrayDesc.Height      = (dim == 1) ? 0 : dims.y;
    arrayDesc.Format      = *((CUarray_format*) type.format<CUDA>());
    arrayDesc.NumChannels = type.count();

    OCCA_CUDA_CHECK("Device: Creating Array",
                    cuArrayCreate(&array, (CUDA_ARRAY_DESCRIPTOR*) &arrayDesc) );

    surfDesc.res.array.hArray = array;
    surfDesc.resType = CU_RESOURCE_TYPE_ARRAY;

    OCCA_CUDA_CHECK("Device: Creating Surface Object",
                    cuSurfObjectCreate(&surface, &surfDesc) );

    mem->textureInfo.arg = new int;
    *((int*) mem->textureInfo.arg) = CUDA_ADDRESS_CLAMP;

    mem->copyFrom(source);

    /*
      if(dims == 3){
      CUDA_ARRAY3D_DESCRIPTOR arrayDesc;
      memset(&arrayDesc, 0, sizeof(arrayDesc);

      arrayDesc.Width  = size.x;
      arrayDesc.Height = size.y;
      arrayDesc.Depth  = size.z;

      arrayDesc.Format      = type.format<CUDA>();
      arrayDesc.NumChannels = type.count();

      cuArray3DCreate(&arr, (CUDA_ARRAY3D_DESCRIPTOR*) &arrayDesc);
      }
    */

    return mem;
  }
Example #19
0
  kernel_t<CUDA>* kernel_t<CUDA>::buildFromSource(const std::string &filename,
                                                  const std::string &functionName_,
                                                  const kernelInfo &info_){
    OCCA_EXTRACT_DATA(CUDA, Kernel);

    functionName = functionName_;

    kernelInfo info = info_;
    std::string cachedBinary = getCachedBinaryName(filename, info);

    struct stat buffer;
    const bool fileExists = (stat(cachedBinary.c_str(), &buffer) == 0);

    if(fileExists){
      std::cout << "Found cached binary of [" << filename << "] in [" << cachedBinary << "]\n";
      return buildFromBinary(cachedBinary, functionName);
    }

    if(!haveFile(cachedBinary)){
      waitForFile(cachedBinary);

      return buildFromBinary(cachedBinary, functionName);
    }

    std::string iCachedBinary = createIntermediateSource(filename,
                                                         cachedBinary,
                                                         info);

    std::string libPath, soname;
    getFilePrefixAndName(cachedBinary, libPath, soname);

    std::string oCachedBinary = libPath + "o_" + soname + ".o";

    std::string archSM = "";

    if(dev->dHandle->compilerFlags.find("-arch=sm_") == std::string::npos){
      std::stringstream archSM_;

      int major, minor;
      OCCA_CUDA_CHECK("Kernel (" + functionName + ") : Getting CUDA Device Arch",
                      cuDeviceComputeCapability(&major, &minor, data_.device) );

      archSM_ << " -arch=sm_" << major << minor << ' ';

      archSM = archSM_.str();
    }

    std::stringstream command;

    //---[ PTX Check Command ]----------
    if(dev->dHandle->compilerEnvScript.size())
      command << dev->dHandle->compilerEnvScript << " && ";

    command << dev->dHandle->compiler
            << ' '          << dev->dHandle->compilerFlags
            << archSM
            << " -Xptxas -v,-dlcm=cg,-abi=no"
            << ' '          << info.flags
            << " -x cu -c " << iCachedBinary
            << " -o "       << oCachedBinary;

    const std::string &ptxCommand = command.str();

    std::cout << "Compiling [" << functionName << "]\n" << ptxCommand << "\n";

#if (OCCA_OS == LINUX_OS) || (OCCA_OS == OSX_OS)
    const int ptxError = system(ptxCommand.c_str());
#else
    const int ptxError = system(("\"" +  ptxCommand + "\"").c_str());
#endif

    // Not needed here I guess
    // if(ptxError){
    //   releaseFile(cachedBinary);
    //   throw 1;
    // }

    //---[ Compiling Command ]----------
    command.str("");

    command << dev->dHandle->compiler
            << " -o "       << cachedBinary
            << " -ptx -I."
            << ' '          << dev->dHandle->compilerFlags
            << archSM
            << ' '          << info.flags
            << " -x cu "    << iCachedBinary;

    const std::string &sCommand = command.str();

    std::cout << sCommand << '\n';

    const int compileError = system(sCommand.c_str());

    if(compileError){
      releaseFile(cachedBinary);
      throw 1;
    }

    const CUresult moduleLoadError = cuModuleLoad(&data_.module,
                                                  cachedBinary.c_str());

    if(moduleLoadError)
      releaseFile(cachedBinary);

    OCCA_CUDA_CHECK("Kernel (" + functionName + ") : Loading Module",
                    moduleLoadError);

    const CUresult moduleGetFunctionError = cuModuleGetFunction(&data_.function,
                                                                data_.module,
                                                                functionName.c_str());

    if(moduleGetFunctionError)
      releaseFile(cachedBinary);

    OCCA_CUDA_CHECK("Kernel (" + functionName + ") : Loading Function",
                    moduleGetFunctionError);

    releaseFile(cachedBinary);

    return this;
  }
Example #20
0
  void device_t<COI>::setup(const int device, const int memoryAllocated){
    data = new COIDeviceData_t;

    OCCA_EXTRACT_DATA(COI, Device);

    uint32_t deviceCount;
    OCCA_COI_CHECK("Device: Get Count",
                   COIEngineGetCount(COI_ISA_MIC, &deviceCount));

    OCCA_CHECK(device < deviceCount);

    OCCA_COI_CHECK("Device: Get Handle",
                   COIEngineGetHandle(COI_ISA_MIC, device, &data_.deviceID) );

    std::stringstream salt;
    salt << "COI"
         << occaCOIMain;

    std::string cachedBinary = getCachedName("occaCOIMain", salt.str());

    struct stat buffer;
    bool fileExists = (stat(cachedBinary.c_str(), &buffer) == 0);

    if(fileExists)
      std::cout << "Found cached binary of [occaCOIMain] in [" << cachedBinary << "]\n";
    else{
      //---[ Write File ]-----------------
      std::string prefix, name;

      getFilePrefixAndName(cachedBinary, prefix, name);

      const std::string iCachedBinary = prefix + "i_" + name;

      if(haveFile(cachedBinary)){
        std::cout << "Making [" << iCachedBinary << "]\n";

        std::ofstream fs;
        fs.open(iCachedBinary.c_str());

        fs << occaCOIMain;

        fs.close();

        std::stringstream command;

        command << dev->dHandle->compiler
                << " -o " << cachedBinary
                << " -x c++"
                << ' '    << dev->dHandle->compilerFlags
                << ' '    << iCachedBinary;

        const std::string &sCommand = command.str();

        std::cout << "Compiling [" << functionName << "]\n" << sCommand << "\n\n";

        system(sCommand.c_str());

        releaseFile(cachedBinary);
      }
      else
        waitForFile(cachedBinary);
    }

    // [-] Tentative
    std::string SINK_LD_LIBRARY_PATH;

    char *c_SINK_LD_LIBRARY_PATH = getenv("SINK_LD_LIBRARY_PATH");
    if(c_SINK_LD_LIBRARY_PATH != NULL)
      SINK_LD_LIBRARY_PATH = std::string(c_SINK_LD_LIBRARY_PATH);

    OCCA_COI_CHECK("Device: Initializing",
                   COIProcessCreateFromFile(data_.deviceID,
                                            cachedBinary.c_str(),
                                            0   , NULL,
                                            true, NULL,
                                            true, NULL,
                                            memoryAllocated ? memoryAllocated : (4 << 30), // 4 GB
                                            SINK_LD_LIBRARY_PATH.c_str(),
                                            &(data_.chiefID)) );

    const char *kernelNames[] = {"occaKernelWith1Argument"  , "occaKernelWith2Arguments" , "occaKernelWith3Arguments" ,
                                 "occaKernelWith4Arguments" , "occaKernelWith5Arguments" , "occaKernelWith6Arguments" ,
                                 "occaKernelWith7Arguments" , "occaKernelWith8Arguments" , "occaKernelWith9Arguments" ,
                                 "occaKernelWith10Arguments", "occaKernelWith11Arguments", "occaKernelWith12Arguments",
                                 "occaKernelWith13Arguments", "occaKernelWith14Arguments", "occaKernelWith15Arguments",
                                 "occaKernelWith16Arguments", "occaKernelWith17Arguments", "occaKernelWith18Arguments",
                                 "occaKernelWith19Arguments", "occaKernelWith20Arguments", "occaKernelWith21Arguments",
                                 "occaKernelWith22Arguments", "occaKernelWith23Arguments", "occaKernelWith24Arguments",
                                 "occaKernelWith25Arguments"};

    // [-] More hard-coding, if you know what I mean
    OCCA_COI_CHECK("Device: Getting Kernel Wrappers",
                   COIProcessGetFunctionHandles(data_.chiefID,
                                                25,
                                                kernelNames,
                                                data_.kernelWrapper));
  }