/** @brief returns whether or not the profile is likely to be slow on a particular device
  *  @param dev the given device*/
 bool is_slow(viennacl::ocl::device const & dev) const{
   bool res = false;
   if(dev.type()==CL_DEVICE_TYPE_GPU){
     std::size_t warp_size = 32;
     if(dev.vendor_id()==4098)
       warp_size = 64;
     res = static_cast<bool>(((local_size_1_*local_size_2_)%warp_size)>0);
   }
   return res || is_slow_impl(dev);
 }
        /** @brief returns whether or not the profile leads to undefined behavior on particular device
         *  @param dev the given device*/
        bool is_invalid(viennacl::ocl::device const & dev, size_t scalartype_size) const{
          //Query device informations
          size_t lmem_available = static_cast<size_t>(dev.local_mem_size());
          size_t max_workgroup_size = dev.max_work_group_size();

          std::vector<size_t> max_work_item_sizes = dev.max_work_item_sizes();
          bool invalid_work_group_sizes = local_size_1_*local_size_2_ > max_workgroup_size
              || local_size_1_ > max_work_item_sizes[0]
              || local_size_2_ > max_work_item_sizes[1]; // uses too much resources

          return  invalid_work_group_sizes
              || lmem_used(scalartype_size)>lmem_available
              || invalid_impl(dev, scalartype_size);
        }
예제 #3
0
 /** @brief Convenience function for setting devices for a context */
 inline void setup_context(long i,
                           viennacl::ocl::device const & device)
 {
   std::vector<cl_device_id> device_id_array(1);
   device_id_array[0] = device.id();
   viennacl::ocl::backend<>::setup_context(i, device_id_array);
 }
예제 #4
0
          bool invalid_base(viennacl::ocl::device const & dev, size_t lmem_used) const{
            //Query profile informations
            std::pair<size_t, size_t> workgroup_size = local_work_size();

            //Query device informations
            size_t lmem_available = viennacl::ocl::info<CL_DEVICE_LOCAL_MEM_SIZE>(dev.id());
            size_t max_workgroup_size = viennacl::ocl::info<CL_DEVICE_MAX_WORK_GROUP_SIZE>(dev.id());
            std::vector<size_t> max_work_item_sizes = viennacl::ocl::info<CL_DEVICE_MAX_WORK_ITEM_SIZES>(dev.id());

            bool invalid_work_group_sizes = workgroup_size.first*workgroup_size.second > max_workgroup_size; // uses too much resources
            invalid_work_group_sizes = invalid_work_group_sizes || workgroup_size.first > max_work_item_sizes[0];
            if(max_work_item_sizes.size()>1) invalid_work_group_sizes = invalid_work_group_sizes || workgroup_size.second > max_work_item_sizes[1];

            return  invalid_work_group_sizes
                || lmem_used>lmem_available;
          }