Beispiel #1
0
void print_kernel_invocation(cl_kernel entry) {
  assert(kernels().count(entry) == 1 && "Kernel not found(2)");
  kernelInfo_t *info = &kernels()[entry];
  unsigned work_dim = info->work_dim;
  unsigned *global_work_size = info->global_work_size;
  unsigned *local_work_size = info->local_work_size;
  printf("\nSENTINEL %s ", info->name);
  if (work_dim == 1) {
    printf("--global_size=%d ", global_work_size[0]);
    printf("--local_size=%d ", local_work_size[0]);
  } else if (work_dim == 2) {
    printf("--global_size=[%d,%d] ", global_work_size[0], global_work_size[1]);
    printf("--local_size=[%d,%d] ", local_work_size[0], local_work_size[1]);
  } else if (work_dim == 3) {
    printf("--global_size=[%d,%d,%d] ", global_work_size[0], global_work_size[1], global_work_size[2]);
    printf("--local_size=[%d,%d,%d] ", local_work_size[0], local_work_size[1], local_work_size[2]);
  }
  for (std::vector<void *>::iterator it = info->args.begin(),
       end = info->args.end();
       it != end;
       ++it) {
    unsigned i = std::distance(info->args.begin(), it);
    int *x = static_cast<int *>(*it);
    if (x) printf("%d:%d ", i, *x);
    else printf("%d:- ", i);
  }
  printf("\n");
}
std::unique_ptr<typename ElementaryPotentialOperator<
    BasisFunctionType, KernelType, ResultType>::LocalAssembler>
ElementaryPotentialOperator<BasisFunctionType, KernelType, ResultType>::
    makeAssembler(const Space<BasisFunctionType> &space,
                  const arma::Mat<CoordinateType> &evaluationPoints,
                  const QuadratureStrategy &quadStrategy,
                  const EvaluationOptions &options) const {
  // Collect the standard set of data necessary for construction of
  // assemblers
  typedef Fiber::RawGridGeometry<CoordinateType> RawGridGeometry;
  typedef std::vector<const Fiber::Shapeset<BasisFunctionType> *>
  ShapesetPtrVector;
  typedef std::vector<std::vector<ResultType>> CoefficientsVector;
  typedef LocalAssemblerConstructionHelper Helper;

  shared_ptr<RawGridGeometry> rawGeometry;
  shared_ptr<GeometryFactory> geometryFactory;
  shared_ptr<Fiber::OpenClHandler> openClHandler;
  shared_ptr<ShapesetPtrVector> shapesets;

  shared_ptr<const Grid> grid = space.grid();
  Helper::collectGridData(space, rawGeometry, geometryFactory);
  Helper::makeOpenClHandler(options.parallelizationOptions().openClOptions(),
                            rawGeometry, openClHandler);
  Helper::collectShapesets(space, shapesets);

  // Now create the assembler
  return quadStrategy.makeAssemblerForPotentialOperators(
      evaluationPoints, geometryFactory, rawGeometry, shapesets,
      make_shared_from_ref(kernels()),
      make_shared_from_ref(trialTransformations()),
      make_shared_from_ref(integral()), openClHandler,
      options.parallelizationOptions(), options.verbosityLevel());
}
Beispiel #3
0
CLWProgram::CLWProgram(cl_program program)
: ReferenceCounter<cl_program, clRetainProgram, clReleaseProgram>(program)
{
    cl_int status = CL_SUCCESS;
    cl_uint numKernels;
    status = clCreateKernelsInProgram(*this, 0, nullptr, &numKernels);
    ThrowIf(numKernels == 0, CL_BUILD_ERROR, "clCreateKernelsInProgram return 0 kernels");

    ThrowIf(status != CL_SUCCESS, status, "clCreateKernelsInProgram failed");
    
    std::vector<cl_kernel> kernels(numKernels);
    status = clCreateKernelsInProgram(*this, numKernels, &kernels[0], nullptr);
    
    ThrowIf(status != CL_SUCCESS, status, "clCreateKernelsInProgram failed");
    
    std::for_each(kernels.begin(), kernels.end(), [this](cl_kernel k)
                  {
                      size_t size = 0;
                      cl_int res;
                      
                      res = clGetKernelInfo(k, CL_KERNEL_FUNCTION_NAME, 0, nullptr, &size);
                      ThrowIf(res != CL_SUCCESS, res, "clGetKernelInfo failed");
                      
                      std::vector<char> temp(size);
                      res = clGetKernelInfo(k, CL_KERNEL_FUNCTION_NAME, size, &temp[0], nullptr);
                      ThrowIf(res != CL_SUCCESS, res, "clGetKernelInfo failed");
                      
                      std::string funcName(temp.begin(), temp.end()-1);
                      kernels_[funcName] = CLWKernel::Create(k);
                  });
}
Beispiel #4
0
cl_int clSetKernelArg(cl_kernel kernel, cl_uint arg_index,
                      size_t arg_size, const void *arg_value) {
  assert(kernels().count(kernel) == 1 && "Kernel not found(0)");
  if (kernels()[kernel].args.size() < arg_index+1) {
		kernels()[kernel].args.resize(arg_index + 1);
  }
  if (arg_value) {
    kernels()[kernel].args[arg_index] = const_cast<void *>(arg_value);
  } else {
    kernels()[kernel].args[arg_index] = NULL;
  }
  if (realClSetKernelArg == NULL)
    realClSetKernelArg = (clSetKernelArg_t)dlsym(RTLD_NEXT,"clSetKernelArg");
  assert(realClSetKernelArg != NULL && "clSetKernelArg is null");
  return realClSetKernelArg(kernel, arg_index, arg_size, arg_value);
}
Beispiel #5
0
void print_kernel_invocation(const char *entry) {
  dim3 gridDim = kernelInfo().gridDim;
  dim3 blockDim = kernelInfo().blockDim;
  printf("SENTINEL %s ", kernels()[entry]);
  if (gridDim.y == 1 && gridDim.z == 1) {
    printf("--gridDim=%d ", gridDim.x);
  } else if (gridDim.z == 1) {
    printf("--gridDim=[%d,%d] ", gridDim.x, gridDim.y);
  } else {
    printf("--gridDim=[%d,%d,%d] ", gridDim.x, gridDim.y, gridDim.z);
  }
  if (blockDim.y == 1 && blockDim.z == 1) {
    printf("--blockDim=%d ", blockDim.x);
  } else if (blockDim.z == 1) {
    printf("--blockDim=[%d,%d] ", blockDim.x, blockDim.y);
  } else {
    printf("--blockDim=[%d,%d,%d] ", blockDim.x, blockDim.y, blockDim.z);
  }
  for (std::list<void *>::iterator it = kernelInfo().args.begin(),
       end = kernelInfo().args.end();
       it != end;
       ++it) {
    unsigned i = std::distance(kernelInfo().args.begin(), it);
    printf("%d:%d ", i, *(static_cast<int *>(*it)));
  }
  printf("\n");
}
Beispiel #6
0
cl_kernel clCreateKernel(cl_program program, const char *kernel_name, cl_int *errcode_ret) {
  if (realClCreateKernel == NULL)
    realClCreateKernel = (clCreateKernel_t)dlsym(RTLD_NEXT,"clCreateKernel");
  assert(realClCreateKernel != NULL && "clCreateKernel is null");
  cl_kernel k = realClCreateKernel(program, kernel_name, errcode_ret);
  kernels()[k].name = kernel_name;
  return k;
}
Beispiel #7
0
cl_int clEnqueueNDRangeKernel(
  cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim,
  const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size,
  cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) {
  assert(kernels().count(kernel) == 1 && "Kernel not found(1)");
  assert(work_dim <= 3 && "work_dim is too large");
  kernels()[kernel].work_dim = work_dim;
  for (unsigned int i=0; i<work_dim; i++) {
    kernels()[kernel].global_work_size[i] = global_work_size[i];
    kernels()[kernel].local_work_size[i] = local_work_size == NULL ? 0 : local_work_size[i];
  }
  print_kernel_invocation(kernel);
  if (realClEnqueueNDRangeKernel == NULL)
    realClEnqueueNDRangeKernel = (clEnqueueNDRangeKernel_t)dlsym(RTLD_NEXT,"clEnqueueNDRangeKernel");
  assert(realClEnqueueNDRangeKernel != NULL && "clEnqueueNDRangeKernel is null");
  return realClEnqueueNDRangeKernel(
    command_queue, kernel, work_dim,
    global_work_offset, global_work_size, local_work_size,
    num_events_in_wait_list, event_wait_list, event);
}
std::auto_ptr<typename ElementaryPotentialOperator<
BasisFunctionType, KernelType, ResultType>::Evaluator>
ElementaryPotentialOperator<BasisFunctionType, KernelType, ResultType>::
makeEvaluator(
        const GridFunction<BasisFunctionType, ResultType>& argument,
        const QuadratureStrategy& quadStrategy,
        const EvaluationOptions& options) const
{
    // Collect the standard set of data necessary for construction of
    // evaluators and assemblers
    typedef Fiber::RawGridGeometry<CoordinateType> RawGridGeometry;
    typedef std::vector<const Fiber::Shapeset<BasisFunctionType>*> ShapesetPtrVector;
    typedef std::vector<std::vector<ResultType> > CoefficientsVector;
    typedef LocalAssemblerConstructionHelper Helper;

    shared_ptr<RawGridGeometry> rawGeometry;
    shared_ptr<GeometryFactory> geometryFactory;
    shared_ptr<Fiber::OpenClHandler> openClHandler;
    shared_ptr<ShapesetPtrVector> shapesets;

    const Space<BasisFunctionType>& space = *argument.space();
    shared_ptr<const Grid> grid = space.grid();
    Helper::collectGridData(space,
                            rawGeometry, geometryFactory);
    Helper::makeOpenClHandler(options.parallelizationOptions().openClOptions(),
                              rawGeometry, openClHandler);
    Helper::collectShapesets(space, shapesets);

    // In addition, get coefficients of argument's expansion in each element
    const GridView& view = space.gridView();
    const int elementCount = view.entityCount(0);

    shared_ptr<CoefficientsVector> localCoefficients =
            boost::make_shared<CoefficientsVector>(elementCount);

    std::auto_ptr<EntityIterator<0> > it = view.entityIterator<0>();
    for (int i = 0; i < elementCount; ++i) {
        const Entity<0>& element = it->entity();
        argument.getLocalCoefficients(element, (*localCoefficients)[i]);
        it->next();
    }

    // Now create the evaluator
    return quadStrategy.makeEvaluatorForIntegralOperators(
                geometryFactory, rawGeometry,
                shapesets,
                make_shared_from_ref(kernels()),
                make_shared_from_ref(trialTransformations()),
                make_shared_from_ref(integral()),
                localCoefficients,
                openClHandler,
                options.parallelizationOptions());
}
Beispiel #9
0
void __cudaRegisterFunction(void **fatCubinHandle, const char *hostFun, char *deviceFun,
                            const char *deviceName, int thread_limit, uint3 *tid,
                            uint3 *bid, dim3 *bDim, dim3 *gDim, int *wSize) {
  kernels()[hostFun] = deviceFun;
  if (realCudaRegisterFunction == NULL) {
    realCudaRegisterFunction = (cudaRegisterFunction_t)dlsym(RTLD_NEXT,"__cudaRegisterFunction");
  }
  assert(realCudaRegisterFunction != NULL && "cudaRegisterFunction is null");
  realCudaRegisterFunction(fatCubinHandle, hostFun, deviceFun,
                           deviceName, thread_limit, tid,
                           bid, bDim, gDim, wSize);
}
std::pair<
shared_ptr<typename HypersingularIntegralOperator<
BasisFunctionType, KernelType, ResultType>::LocalAssembler>,
shared_ptr<typename HypersingularIntegralOperator<
BasisFunctionType, KernelType, ResultType>::LocalAssembler>
>
HypersingularIntegralOperator<BasisFunctionType, KernelType, ResultType>::
reallyMakeAssemblers(
        const QuadratureStrategy& quadStrategy,
        const shared_ptr<const GeometryFactory>& testGeometryFactory,
        const shared_ptr<const GeometryFactory>& trialGeometryFactory,
        const shared_ptr<const Fiber::RawGridGeometry<CoordinateType> >& testRawGeometry,
        const shared_ptr<const Fiber::RawGridGeometry<CoordinateType> >& trialRawGeometry,
        const shared_ptr<const std::vector<const Fiber::Shapeset<BasisFunctionType>*> >& testShapesets,
        const shared_ptr<const std::vector<const Fiber::Shapeset<BasisFunctionType>*> >& trialShapesets,
        const shared_ptr<const Fiber::OpenClHandler>& openClHandler,
        const ParallelizationOptions& parallelizationOptions,
        VerbosityLevel::Level verbosityLevel,
        bool cacheSingularIntegrals,
        bool makeSeparateOffDiagonalAssembler) const
{
    std::pair<shared_ptr<LocalAssembler>, shared_ptr<LocalAssembler> > result;
    // first element: "standard" assembler
    // second element: assembler used for admissible (off-diagonal)
    // H-matrix blocks in "disassembled mode"
    result.first.reset(quadStrategy.makeAssemblerForIntegralOperators(
                           testGeometryFactory, trialGeometryFactory,
                           testRawGeometry, trialRawGeometry,
                           testShapesets, trialShapesets,
                           make_shared_from_ref(testTransformations()),
                           make_shared_from_ref(kernels()),
                           make_shared_from_ref(trialTransformations()),
                           make_shared_from_ref(integral()),
                           openClHandler, parallelizationOptions, verbosityLevel,
                           cacheSingularIntegrals).release());
    if (makeSeparateOffDiagonalAssembler)
        result.second.reset(quadStrategy.makeAssemblerForIntegralOperators(
                                testGeometryFactory, trialGeometryFactory,
                                testRawGeometry, trialRawGeometry,
                                testShapesets, trialShapesets,
                                make_shared_from_ref(offDiagonalTestTransformations()),
                                make_shared_from_ref(offDiagonalKernels()),
                                make_shared_from_ref(offDiagonalTrialTransformations()),
                                make_shared_from_ref(offDiagonalIntegral()),
                                openClHandler, parallelizationOptions, verbosityLevel,
                                false /*cacheSingularIntegrals*/).release());
    else
        result.second = result.first;
    return result;
}
Beispiel #11
0
void FFT::cpx ( double *real , double *imag , int isign ) // Complex array
{
   int i, factors[64] ;

   if (npts == 1)
      return ;

   for (i=0 ; i<n_facs ; i++)
      factors[i] = all_factors[i] ;

   kernels ( real , imag , ntot , npts , nspan , isign , n_facs ,
             rwork , rwork+max_factor , rwork+2*max_factor ,
             rwork+3*max_factor , factors ) ;

   permute ( real , imag , ntot , npts , nspan , abs(isign) , n_facs ,
             n_sq_facs , rwork , rwork+max_factor , iwork , factors ,
             max_factor ) ;
}
BOOST_AUTO_TEST_CASE_TEMPLATE(evaluateOnGrid_works_for_points_on_y_axis,
                              ValueType, kernel_types)
{
    typedef Fiber::Laplace3dDoubleLayerPotentialKernelFunctor<ValueType> Functor;
    typedef Fiber::DefaultCollectionOfKernels<Functor> Kernels;
    Kernels kernels((Functor()));

    Fiber::GeometricalData<typename Functor::CoordinateType>
            testGeomData, trialGeomData;
    const int worldDim = 3;
    const int testPointCount = 2, trialPointCount = 3;
    // Note: points lying on y axis only
    testGeomData.globals.resize(worldDim, testPointCount);
    testGeomData.globals.fill(0.);
    testGeomData.globals(1, 1) = 1.;

    trialGeomData.globals.resize(worldDim, trialPointCount);
    trialGeomData.globals.fill(0.);
    trialGeomData.globals(1, 0) = 2.;
    trialGeomData.globals(1, 1) = 3.;
    trialGeomData.globals(1, 2) = 4.;

    trialGeomData.normals.resize(worldDim, trialPointCount);
    trialGeomData.normals.fill(0.);
    trialGeomData.normals(1, 0) = 1.;
    trialGeomData.normals(1, 1) = 1.;
    trialGeomData.normals(1, 2) = 1.;

    Fiber::CollectionOf4dArrays<ValueType> result;
    kernels.evaluateOnGrid(testGeomData, trialGeomData, result);

    Fiber::_4dArray<ValueType> expected(1, 1, testPointCount, trialPointCount);
    for (int trialPoint = 0; trialPoint < trialPointCount; ++trialPoint)
        for (int testPoint = 0; testPoint < testPointCount; ++testPoint) {
            typename Functor::CoordinateType diff =
                    std::abs(testGeomData.globals(1, testPoint) -
                             trialGeomData.globals(1, trialPoint));
            expected(0, 0, testPoint, trialPoint) =
                    -(1. / (4. * M_PI)) / diff / diff;
        }

    BOOST_CHECK(check_arrays_are_close<ValueType>(result[0], expected, 1e-6));
}
Beispiel #13
0
    void SpicePosition::init() {
        erract_c("SET", 0, (char*)"IGNORE");

        const char * k = "../cfg/kernels.furnsh";
        furnsh_c(k);

        std::vector<std::string> ks;
        kernels(ks);

        for (std::vector<std::string>::iterator i = ks.begin(); i != ks.end(); i++) {
            getbodies(*i);
        }



        for (BodyConstantsConstIterator it = m_bodies.begin(); it != m_bodies.end(); it++) {
            EphemeridesInterval & interval = m_intervals[(*it)->id()];
            getinterval(ks, (*it)->id(), interval);

            if (it == m_bodies.begin()) {
                m_interval.a = interval.a;
                m_interval.b = interval.b;
            } else {
                m_interval.a = std::max(m_interval.a, interval.a);
                m_interval.b = std::min(m_interval.b, interval.b);
            }
        }

        m_interval.a += 100;
        m_interval.b -= 100;

        char timea[1000], timeb[1000];

        timout_c (m_interval.a, "YYYY ::TDB", sizeof(timea), timea);
        timout_c (m_interval.b, "YYYY ::TDB", sizeof(timeb), timeb);

        erract_c("SET", 0, (char*)"DEFAULT");
    }
shared_ptr<vtkMeshModel> BuildGPModel(SEXP pPCA_,SEXP kernels_, SEXP ncomp_,SEXP nystroem_,SEXP useEmp_, SEXP combine_,SEXP combineEmp_, SEXP isoScale_, SEXP centroid_) {
  try { 
    bool useEmp = as<bool>(useEmp_);
    int combine = as<int>(combine_);
    int combineEmp = as<int>(combineEmp_);
    double isoScale = as<double>(isoScale_);
    unsigned int nystroem = as<unsigned int>(nystroem_);
    unsigned int numberOfComponents = as<unsigned int>(ncomp_);
    std::list<MatrixValuedKernelType*> mKerns;
    std::list<GaussianKernel*> gKerns;
    std::list<MultiscaleKernel*> bsKerns;
    vtkPoint centroid = SEXP2vtkPoint(centroid_);
    List kernels(kernels_);
  
    shared_ptr<vtkMeshModel> model = pPCA2statismo(pPCA_);
    MatrixValuedKernelType* mvKernel;
    // set up the gaussian kernel to be incremented over a list of parameters
    NumericVector params = kernels[0];
    //if params[0] == 0 Gaussian Kernel
    //else Multiscale kernel
    if (params.size() == 2) {
      GaussianKernel* gk = new GaussianKernel(params[0]);
      gKerns.push_back(gk);
      mvKernel = new UncorrelatedMatrixValuedKernel<vtkPoint>(gk, model->GetRepresenter()->GetDimensions());
      
    } else {
      MultiscaleKernel* gk1 = new MultiscaleKernel(params[0],params[2]);
      bsKerns.push_back(gk1);
      mvKernel = new UncorrelatedMatrixValuedKernel<vtkPoint>(gk1, model->GetRepresenter()->GetDimensions());
      
    }
      MatrixValuedKernelType* sumKernel = new ScaledKernel<vtkPoint>(mvKernel, params[1]);
    //iterate over the remaining kernel parameters
    for (unsigned int i = 1; i < kernels.size();i++) {
      params = kernels[i];
      GaussianKernel* gkNew = new GaussianKernel(params[0]);
      MatrixValuedKernelType* mvGk = new UncorrelatedMatrixValuedKernel<vtkPoint>(gkNew, model->GetRepresenter()->GetDimensions());
      MatrixValuedKernelType* scaledGk = new ScaledKernel<vtkPoint>(mvGk, params[1]);
      //keep track of allocated objects
      gKerns.push_back(gkNew);
      mKerns.push_back(mvGk);
      mKerns.push_back(scaledGk);
      if (combine == 0)
	sumKernel = new SumKernel<vtkPoint>(sumKernel, scaledGk);
      else
	sumKernel = new ProductKernel<vtkPoint>(sumKernel, scaledGk);
    }
    if (useEmp) {
      // get the empiric kernel
      MatrixValuedKernelType* statModelKernel = new StatisticalModelKernel<vtkPolyData>(model.get());
      mKerns.push_back(statModelKernel);
      // add the empiric kernel on top
      if (combineEmp == 0)
	sumKernel = new SumKernel<vtkPoint>(sumKernel, statModelKernel);
      else
	sumKernel = new ProductKernel<vtkPoint>(sumKernel, statModelKernel);
    }
    if (isoScale > 0) {
      MatrixValuedKernelType* isoKernel = new IsoKernel(3,isoScale,centroid);
      mKerns.push_back(isoKernel);
      sumKernel = new SumKernel<vtkPoint>(sumKernel, isoKernel);
    }
    mKerns.push_back(sumKernel);
    //build new model
    shared_ptr<ModelBuilderType> modelBuilder(ModelBuilderType::Create(model->GetRepresenter()));
    shared_ptr<vtkMeshModel> combinedModel(modelBuilder->BuildNewModel(model->DrawMean(), *sumKernel, numberOfComponents,nystroem));
    //tidy up
    for (std::list<MatrixValuedKernelType*>::iterator it = mKerns.begin(); it != mKerns.end(); it++) {
      if (*it != NULL) {
	delete *it;
      }
    }
    for (std::list<GaussianKernel*>::iterator it = gKerns.begin(); it != gKerns.end(); it++) {
      if (*it != NULL) {
	delete *it;
      }
    }
    for (std::list<MultiscaleKernel*>::iterator it = bsKerns.begin(); it != bsKerns.end(); it++) {
      if (*it != NULL) {
	delete *it;
      }
    }
    return combinedModel;
  
  } catch (StatisticalModelException& e) {
    ::Rf_error("Exception occured while building the shape model\n");
    ::Rf_error("%s\n",  e.what());
    //shared_ptr<vtkMeshModel> model(NULL);
    //return model;
  } catch (std::exception& e) {
    ::Rf_error( e.what());
    //shared_ptr<vtkMeshModel> model(NULL);    
    //return model;
  } catch (...) {
    ::Rf_error("unknown exception");
    //shared_ptr<vtkMeshModel> model(NULL);
    //return model;
  }
}
BOOST_AUTO_TEST_CASE_TEMPLATE(evaluateOnGrid_agrees_with_evaluateAtPointPairs,
                              ValueType, kernel_types)
{
    typedef Fiber::Laplace3dDoubleLayerPotentialKernelFunctor<ValueType> Functor;
    typedef Fiber::DefaultCollectionOfKernels<Functor> Kernels;
    Kernels kernels((Functor()));

    typedef Fiber::GeometricalData<typename Functor::CoordinateType> GeomData;

    const int worldDim = 3;
    const int testPointCount = 2, trialPointCount = 3;

    // Collect data with evaluateOnGrid
    GeomData testGeomDataOnGrid, trialGeomDataOnGrid;

    testGeomDataOnGrid.globals.resize(worldDim, testPointCount);
    testGeomDataOnGrid.globals.fill(0.);
    testGeomDataOnGrid.globals(0, 1) = 1.;

    trialGeomDataOnGrid.globals.resize(worldDim, trialPointCount);
    trialGeomDataOnGrid.globals.fill(1.);
    trialGeomDataOnGrid.globals(0, 0) = 2.;
    trialGeomDataOnGrid.globals(0, 1) = 3.;
    trialGeomDataOnGrid.globals(0, 2) = 4.;

    trialGeomDataOnGrid.normals.resize(worldDim, trialPointCount);
    trialGeomDataOnGrid.normals.fill(0.);
    trialGeomDataOnGrid.normals(1, 0) = 1.;
    trialGeomDataOnGrid.normals(1, 1) = 1.;
    trialGeomDataOnGrid.normals(1, 2) = 1.;

    Fiber::CollectionOf4dArrays<ValueType> resultOnGrid;
    kernels.evaluateOnGrid(testGeomDataOnGrid, trialGeomDataOnGrid, resultOnGrid);

    Vector<ValueType> convertedResultOnGrid(testPointCount * trialPointCount);
    for (int testPoint = 0; testPoint < testPointCount; ++testPoint)
        for (int trialPoint = 0; trialPoint < trialPointCount; ++trialPoint)
            convertedResultOnGrid(testPoint + trialPoint * testPointCount) =
                    resultOnGrid[0](0, 0, testPoint, trialPoint);

    // Collect data with evaluateAtPointPairs
    GeomData testGeomDataAtPointPairs, trialGeomDataAtPointPairs;
    testGeomDataAtPointPairs.globals.resize(worldDim, testPointCount * trialPointCount);
    trialGeomDataAtPointPairs.globals.resize(worldDim, testPointCount * trialPointCount);
    trialGeomDataAtPointPairs.normals.resize(worldDim, testPointCount * trialPointCount);
    for (int testPoint = 0; testPoint < testPointCount; ++testPoint)
        for (int trialPoint = 0; trialPoint < trialPointCount; ++trialPoint) {
            testGeomDataAtPointPairs.globals.col(testPoint + trialPoint * testPointCount) =
                    testGeomDataOnGrid.globals.col(testPoint);
            trialGeomDataAtPointPairs.globals.col(testPoint + trialPoint * testPointCount) =
                    trialGeomDataOnGrid.globals.col(trialPoint);
            trialGeomDataAtPointPairs.normals.col(testPoint + trialPoint * testPointCount) =
                    trialGeomDataOnGrid.normals.col(trialPoint);
        }

    Fiber::CollectionOf3dArrays<ValueType> resultAtPointPairs;
    kernels.evaluateAtPointPairs(testGeomDataAtPointPairs, trialGeomDataAtPointPairs,
                                 resultAtPointPairs);
    Vector<ValueType> convertedResultAtPointPairs(testPointCount * trialPointCount);
    for (int point = 0; point < testPointCount * trialPointCount; ++point)
        convertedResultAtPointPairs(point) =
                resultAtPointPairs[0](0, 0, point);

    BOOST_CHECK(check_arrays_are_close<ValueType>(
                    convertedResultAtPointPairs, convertedResultOnGrid, 1e-6));
}
Beispiel #16
0
/**
 * @brief Creates an array of objects containing the OpenCL variables of each device
 * @param trDataBase The training database which will contain the instances and the features
 * @param selInstances The instances choosen as initial centroids
 * @param transposedTrDataBase The training database already transposed
 * @param conf The structure with all configuration parameters
 * @return A pointer containing the objects
 */
CLDevice *createDevices(const float *const trDataBase, const int *const selInstances, const float *const transposedTrDataBase, Config *const conf) {


	/********** Find the OpenCL devices specified in configuration ***********/

	// OpenCL variables
	cl_uint numPlatformsDevices;
	cl_device_type deviceType;
	cl_program program;
	cl_kernel kernel;
	cl_int status;

	// Others variables
	auto allDevices = getAllDevices();
	CLDevice *devices = new CLDevice[conf -> nDevices + (conf -> ompThreads > 0)];

	for (int dev = 0; dev < conf -> nDevices; ++dev) {

		bool found = false;
		for (int allDev = 0; allDev < allDevices.size() && !found; ++allDev) {

			// Get the specified OpenCL device
			char dbuff[120];
			check(clGetDeviceInfo(allDevices[allDev], CL_DEVICE_NAME, sizeof(dbuff), dbuff, NULL) != CL_SUCCESS, "%s\n", CL_ERROR_DEVICE_NAME);

			// If the device exists...
			if (conf -> devices[dev] == dbuff) {
				devices[dev].device = allDevices[allDev];
				devices[dev].deviceName = dbuff;
				check(clGetDeviceInfo(devices[dev].device, CL_DEVICE_TYPE, sizeof(cl_device_type), &(devices[dev].deviceType), NULL) != CL_SUCCESS, "%s\n", CL_ERROR_DEVICE_TYPE);


				/********** Device local memory usage ***********/

				long int usedMemory = conf -> nFeatures * sizeof(cl_uchar); // Chromosome of the individual
				usedMemory += conf -> trNInstances * sizeof(cl_uchar); // Mapping buffer
				usedMemory += conf -> K * conf -> nFeatures * sizeof(cl_float); // Centroids buffer
				usedMemory += conf -> trNInstances * sizeof(cl_float); // DistCentroids buffer
				usedMemory += conf -> K * sizeof(cl_int); // Samples_in_k buffer

				// Get the maximum local memory size
				long int maxMemory;
				check(clGetDeviceInfo(devices[dev].device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(long int), &maxMemory, NULL) != CL_SUCCESS, "%s\n", CL_ERROR_DEVICE_MAXMEM);

				// Avoid exceeding the maximum local memory available. 1024 bytes of margin
				check(usedMemory > maxMemory - 1024, "%s:\n\tMax memory: %ld bytes\n\tAllow memory: %ld bytes\n\tUsed memory: %ld bytes\n", CL_ERROR_DEVICE_LOCALMEM, maxMemory, maxMemory - 1024, usedMemory);


				/********** Create context ***********/

				devices[dev].context = clCreateContext(NULL, 1, &(devices[dev].device), 0, 0, &status);
				check(status != CL_SUCCESS, "%s\n", CL_ERROR_DEVICE_CONTEXT);


				/********** Create Command queue ***********/

				devices[dev].commandQueue = clCreateCommandQueue(devices[dev].context, devices[dev].device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE, &status);
				check(status != CL_SUCCESS, "%s\n", CL_ERROR_DEVICE_QUEUE);


				/********** Create kernel ***********/

				// Open the file containing the kernels
				std::fstream kernels(conf -> kernelsFileName.c_str(), std::fstream::in);
				check(!kernels.is_open(), "%s\n", CL_ERROR_FILE_OPEN);

				// Obtain the size
				kernels.seekg(0, kernels.end);
				size_t fSize = kernels.tellg();
				kernels.seekg(0, kernels.beg);

				char *kernelSource = new char[fSize];
				kernels.read(kernelSource, fSize);
				kernels.close();

				// Create program
				program = clCreateProgramWithSource(devices[dev].context, 1, (const char **) &kernelSource, &fSize, &status);
				check(status != CL_SUCCESS, "%s\n", CL_ERROR_PROGRAM_BUILD);

				// Build program for the device in the context
				char buildOptions[196];
				sprintf(buildOptions, "-I include -D N_INSTANCES=%d -D N_FEATURES=%d -D N_OBJECTIVES=%d -D K=%d -D MAX_ITER_KMEANS=%d", conf -> trNInstances, conf -> nFeatures, conf -> nObjectives, conf -> K, conf -> maxIterKmeans);
				if (clBuildProgram(program, 1, &(devices[dev].device), buildOptions, 0, 0) != CL_SUCCESS) {
					char buffer[4096];
					fprintf(stderr, "Error: Could not build the program\n");
					check(clGetProgramBuildInfo(program, devices[dev].device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL) != CL_SUCCESS, "%s\n", CL_ERROR_PROGRAM_ERRORS);
					check(true, "%s\n", buffer);
				}

				// Create kernel
				const char *kernelName = (devices[dev].deviceType == CL_DEVICE_TYPE_GPU) ? "kmeansGPU" : "";
				devices[dev].kernel = clCreateKernel(program, kernelName, &status);
				check(status != CL_SUCCESS, "%s\n", CL_ERROR_KERNEL_BUILD);


				/******* Work-items *******/

				devices[dev].computeUnits = atoi(conf -> computeUnits[dev].c_str());
				devices[dev].wiLocal = atoi(conf -> wiLocal[dev].c_str());
				devices[dev].wiGlobal = devices[dev].computeUnits * devices[dev].wiLocal;


				/******* Create and write the databases and centroids buffers. Create the subpopulations buffer. Set kernel arguments *******/

				// Create buffers
				devices[dev].objSubpopulations = clCreateBuffer(devices[dev].context, CL_MEM_READ_WRITE, conf -> familySize * sizeof(Individual), 0, &status);
				check(status != CL_SUCCESS, "%s\n", CL_ERROR_OBJECT_SUBPOPS);

				devices[dev].objTrDataBase = clCreateBuffer(devices[dev].context, CL_MEM_READ_ONLY, conf -> trNInstances * conf -> nFeatures * sizeof(cl_float), 0, &status);
				check(status != CL_SUCCESS, "%s\n", CL_ERROR_OBJECT_TRDB);

				devices[dev].objTransposedTrDataBase = clCreateBuffer(devices[dev].context, CL_MEM_READ_ONLY, conf -> trNInstances * conf -> nFeatures * sizeof(cl_float), 0, &status);
				check(status != CL_SUCCESS, "%s\n", CL_ERROR_OBJECT_TTRDB);

				devices[dev].objSelInstances = clCreateBuffer(devices[dev].context, CL_MEM_READ_ONLY, conf -> K * sizeof(cl_int), 0, &status);
				check(status != CL_SUCCESS, "%s\n", CL_ERROR_OBJECT_CENTROIDS);

				// Sets kernel arguments
				check(clSetKernelArg(devices[dev].kernel, 0, sizeof(cl_mem), (void *)&(devices[dev].objSubpopulations)) != CL_SUCCESS, "%s\n", CL_ERROR_KERNEL_ARGUMENT1);

				check(clSetKernelArg(devices[dev].kernel, 1, sizeof(cl_mem), (void *)&(devices[dev].objSelInstances)) != CL_SUCCESS, "%s\n", CL_ERROR_KERNEL_ARGUMENT2);

				check(clSetKernelArg(devices[dev].kernel, 2, sizeof(cl_mem), (void *)&(devices[dev].objTrDataBase)) != CL_SUCCESS, "%s\n", CL_ERROR_KERNEL_ARGUMENT3);

				check(clSetKernelArg(devices[dev].kernel, 5, sizeof(cl_mem), (void *)&(devices[dev].objTransposedTrDataBase)) != CL_SUCCESS, "%s\n", CL_ERROR_KERNEL_ARGUMENT6);

				// Write buffers
				check(clEnqueueWriteBuffer(devices[dev].commandQueue, devices[dev].objTrDataBase, CL_FALSE, 0, conf -> trNInstances * conf -> nFeatures * sizeof(cl_float), trDataBase, 0, NULL, NULL) != CL_SUCCESS, "%s\n", CL_ERROR_ENQUEUE_TRDB);
				check(clEnqueueWriteBuffer(devices[dev].commandQueue, devices[dev].objSelInstances, CL_FALSE, 0, conf -> K * sizeof(cl_int), selInstances, 0, NULL, NULL) != CL_SUCCESS, "%s\n", CL_ERROR_ENQUEUE_CENTROIDS);
				check(clEnqueueWriteBuffer(devices[dev].commandQueue, devices[dev].objTransposedTrDataBase, CL_FALSE, 0, conf -> trNInstances * conf -> nFeatures * sizeof(cl_float), transposedTrDataBase, 0, NULL, NULL) != CL_SUCCESS, "%s\n", CL_ERROR_ENQUEUE_TTRDB);

				// Resources used are released
				delete[] kernelSource;
				clReleaseProgram(program);

				found = true;
				allDevices.erase(allDevices.begin() + allDev);
			}
		}

		check(!found, "%s\n", CL_ERROR_DEVICE_FOUND);
	}


	/********** Add the CPU if has been enabled in configuration ***********/

	if (conf -> ompThreads > 0) {
		devices[conf -> nDevices].deviceType = CL_DEVICE_TYPE_CPU;
		devices[conf -> nDevices].computeUnits = conf -> ompThreads;
		++(conf -> nDevices);
	}

	return devices;
}