コード例 #1
0
ファイル: saxpy.hpp プロジェクト: KratosCSIC/trunk
        void core(vcl_size_t /*kernel_id*/, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector<detail::mapping_type> const & mapping) const {
          stream << "for(unsigned int i = get_global_id(0) ; i < N ; i += get_global_size(0))" << std::endl;
          stream << "{" << std::endl;
          stream.inc_tab();

          //Fetches entries to registers
          std::set<std::string>  fetched;
          for(std::vector<detail::mapping_type>::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it)
            for(detail::mapping_type::const_reverse_iterator iit = it->rbegin() ; iit != it->rend() ; ++iit)
              //Useless to fetch cpu scalars into registers
              if(detail::mapped_handle * p = dynamic_cast<detail::mapped_handle *>(iit->second.get()))
                p->fetch( std::make_pair("i","0"), vector_size_, fetched, stream);

          //Generates all the expression, in order
          vcl_size_t i = 0;
          for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){
            std::string str;
            detail::traverse(it->first, it->second, detail::expression_generation_traversal(std::make_pair("i","0"), -1, str, mapping[i++]));
            stream << str << ";" << std::endl;
          }

          //Writes back
          for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it)
             //Gets the mapped object at the LHS of each expression
            if(detail::mapped_handle * p = dynamic_cast<detail::mapped_handle *>(at(mapping.at(std::distance(statements.begin(),it)), std::make_pair(&it->second, detail::LHS_NODE_TYPE)).get()))
              p->write_back( std::make_pair("i", "0"), fetched, stream);

          stream.dec_tab();
          stream << "}" << std::endl;
        }
コード例 #2
0
      void core(unsigned int /*kernel_id*/, utils::kernel_generation_stream& stream, statements_container const & statements, std::vector<mapping_type> const & mapping) const
      {
        statements_container::data_type::const_iterator sit;
        std::vector<mapping_type>::const_iterator mit;

        stream << "for(unsigned int i = get_global_id(0) ; i < N ; i += get_global_size(0))" << std::endl;
        stream << "{" << std::endl;
        stream.inc_tab();

        //Registers already allocated
        std::set<std::string>  cache;

        //Fetch
        std::string rhs_suffix = "reg";
        std::string lhs_suffix = statements.order()==statements_container::INDEPENDENT?"tmp":rhs_suffix;

        for(mit = mapping.begin(), sit = statements.data().begin() ; sit != statements.data().end() ; ++sit, ++mit)
        {
          tree_parsing::read_write(tree_parsing::read_write_traversal::FETCH, parameters_.simd_width, lhs_suffix, cache, *sit, sit->root(), index_tuple("i", "N"), stream, *mit, tree_parsing::LHS_NODE_TYPE);
          tree_parsing::read_write(tree_parsing::read_write_traversal::FETCH, parameters_.simd_width, rhs_suffix, cache, *sit, sit->root(), index_tuple("i", "N"), stream, *mit, tree_parsing::RHS_NODE_TYPE);
        }

        //Generates all the expression, in order
        for(mit = mapping.begin(), sit = statements.data().begin() ; sit != statements.data().end() ; ++sit, ++mit)
          stream << tree_parsing::evaluate_expression(*sit, sit->root(), index_tuple("i", "N"), 0, *mit, tree_parsing::PARENT_NODE_TYPE) << ";" << std::endl;

        //Write back
        for(mit = mapping.begin(), sit = statements.data().begin() ; sit != statements.data().end() ; ++sit, ++mit)
        {
          tree_parsing::read_write(tree_parsing::read_write_traversal::WRITE_BACK, parameters_.simd_width, lhs_suffix, cache,*sit, sit->root(), index_tuple("i", "N"), stream, *mit, tree_parsing::LHS_NODE_TYPE);
        }

        stream.dec_tab();
        stream << "}" << std::endl;
      }
コード例 #3
0
ファイル: saxpy_vector.hpp プロジェクト: bollig/rbffd_gpu
            void generate_body_impl(unsigned int i, utils::kernel_generation_stream& kss){
              saxpy_vector_profile const * casted_prof = static_cast<saxpy_vector_profile const *>(prof_);

              symbolic_vector_base * first_vector = static_cast<symbolic_vector_base*>(&(*expressions_.begin())->lhs());
              unsigned int n_unroll = casted_prof->loop_unroll();
              kss << "int i = get_global_id(0)" ; if(n_unroll>1) kss << "*" << n_unroll; kss << ";" << std::endl;
              kss << "if(i<" << first_vector->size() << "){" << std::endl;
              kss.inc_tab();
              //Set access indices
              for(std::list<tools::shared_ptr<symbolic_binary_expression_tree_infos_base> >::iterator it=expressions_.begin() ; it!=expressions_.end();++it)
                for(unsigned int j=0 ; j < n_unroll ; ++j){
                  if(j==0) (*it)->access_index(j,"i","0");
                  else (*it)->access_index(j,"i + " + utils::to_string(j),"0");
                  (*it)->fetch(j,kss);
                }
              //Compute expressions
              for(std::list<tools::shared_ptr<symbolic_binary_expression_tree_infos_base> >::iterator it=expressions_.begin() ; it!=expressions_.end();++it)
                for(unsigned int j=0 ; j < n_unroll ; ++j)
                  kss << (*it)->generate(j) << ";" << std::endl;
              for(std::list<tools::shared_ptr<symbolic_binary_expression_tree_infos_base> >::iterator it=expressions_.begin() ; it!=expressions_.end();++it)
                for(unsigned int j=0 ; j < n_unroll ; ++j)
                  (*it)->write_back(j,kss);
              kss << "}" << std::endl;
              kss.dec_tab();

              for(unsigned int i=0 ; i < n_unroll ; ++i)
                for(std::list<tools::shared_ptr<symbolic_binary_expression_tree_infos_base> >::iterator it = expressions_.begin(); it != expressions_.end() ; ++it)
                  (*it)->clear_private_value(i);

            }
コード例 #4
0
ファイル: reduction_utils.hpp プロジェクト: GnsP/viennacl-dev
 inline void reduce_1d_local_memory(utils::kernel_generation_stream & stream, std::size_t size, std::vector<std::string> const & bufs, std::vector<scheduler::op_element> const & rops)
 {
     //Reduce local memory
     for(std::size_t stride = size/2 ; stride>0 ; stride /=2){
       stream << "barrier(CLK_LOCAL_MEM_FENCE); " << std::endl;
       stream << "if(lid < " << stride << "){" << std::endl;
       stream.inc_tab();
       for(std::size_t k = 0 ; k < bufs.size() ; ++k){
           std::string acc = bufs[k] + "[lid]";
           std::string str = bufs[k] + "[lid + " + utils::to_string(stride) + "]";
           compute_reduction(stream,acc,str,rops[k]);
       }
       stream.dec_tab();
       stream << "}" << std::endl;
     }
 }
コード例 #5
0
ファイル: saxpy.hpp プロジェクト: KratosCSIC/trunk
        void core(vcl_size_t /*kernel_id*/, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector<detail::mapping_type> const & mapping) const {

          for(std::vector<detail::mapping_type>::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it){
            for(detail::mapping_type::const_iterator iit = it->begin() ; iit != it->end() ; ++iit){
              if(detail::mapped_matrix * p = dynamic_cast<detail::mapped_matrix*>(iit->second.get()))
                p->bind_sizes("M","N");
            }
          }

          stream << "for(unsigned int i = get_global_id(0) ; i < M ; i += get_global_size(0))" << std::endl;
          stream << "{" << std::endl;
          stream.inc_tab();
          stream << "for(unsigned int j = get_global_id(1) ; j < N ; j += get_global_size(1))" << std::endl;
          stream << "{" << std::endl;
          stream.inc_tab();

          //Fetches entries to registers
          std::set<std::string>  fetched;
          for(std::vector<detail::mapping_type>::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it)
            for(detail::mapping_type::const_reverse_iterator it2 = it->rbegin() ; it2 != it->rend() ; ++it2)
              if(detail::mapped_matrix * p = dynamic_cast<detail::mapped_matrix *>(it2->second.get()))
                p->fetch(std::make_pair("i", "j"), vector_size_, fetched, stream);


          vcl_size_t i = 0;
          for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){
            std::string str;
            detail::traverse(it->first, it->second, detail::expression_generation_traversal(std::make_pair("i", "j"), -1, str, mapping[i++]));
            stream << str << ";" << std::endl;
          }

          //Writes back
          for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){
            if(detail::mapped_handle * p = dynamic_cast<detail::mapped_handle *>(at(mapping.at(std::distance(statements.begin(),it)), std::make_pair(&it->second,detail::LHS_NODE_TYPE)).get()))
              p->write_back(std::make_pair("i", "j"), fetched, stream);
          }

          stream.dec_tab();
          stream << "}" << std::endl;
          stream.dec_tab();
          stream << "}" << std::endl;
        }
コード例 #6
0
        virtual void operator()(utils::kernel_generation_stream & stream, std::size_t device_offset, statements_type const & statements) const {
          std::vector<detail::mapping_type> mapping(statements.size());

          ///Get Prototype, initialize mapping
          std::string prototype;
          std::set<std::string> already_generated;
          kernel_arguments(statements, prototype);

          {
            std::map<void *, std::size_t> memory;
            unsigned int current_arg = 0;
            std::size_t i = 0;
            for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it)
              detail::traverse(it->first, it->second, detail::map_functor(memory,current_arg,mapping[i++]));
          }

          for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){
            detail::traverse(it->first, it->second, detail::prototype_generation_traversal(already_generated, prototype, vectorization(), mapping[std::distance(statements.begin(), it)]));
          }

          prototype.erase(prototype.size()-1); //Last comma pruned

          //Generate
          for(std::size_t n = 0 ; n < num_kernels() ; ++n){
            //stream << "__attribute__((vec_type_hint()))" << std::endl;
            stream << " __attribute__((reqd_work_group_size(" << local_size_1_ << "," << local_size_2_ << "," << 1 << ")))" << std::endl;
            stream << "__kernel " << "void " << "kernel_" << device_offset << "_" << n << "(" << std::endl;
            stream << prototype << std::endl;
            stream << ")" << std::endl;

            //core:
            stream << "{" << std::endl;
            stream.inc_tab();
            core(n, stream, statements, mapping);
            stream.dec_tab();
            stream << "}" << std::endl;
          }
        }
コード例 #7
0
ファイル: saxpy.hpp プロジェクト: shanagr/viennacl-dev
    void operator()(utils::kernel_generation_stream& kss) {

        unsigned int n_unroll = profile_->loop_unroll();
        symbolic_vector_base * first_vector =  NULL;
        symbolic_matrix_base * first_matrix = NULL;
        if(vector_expressions_.size()) first_vector = static_cast<symbolic_vector_base*>(&(*vector_expressions_.begin())->lhs());
        if(matrix_expressions_.size()) first_matrix = static_cast<symbolic_matrix_base*>(&(*matrix_expressions_.begin())->lhs());
        if(first_vector) {
            kss << "int i = get_global_id(0)" ;
            if(n_unroll>1) kss << "*" << n_unroll;
            kss << ";" << std::endl;
//            kss << "if(i < " << first_vector->size() << "){" << std::endl;
            kss.inc_tab();


            //Set access indices
            for(typename std::list<symbolic_binary_vector_expression_base*>::iterator it=vector_expressions_.begin() ; it!=vector_expressions_.end(); ++it) {
                for(unsigned int j=0 ; j < n_unroll ; ++j) {
                    if(j==0) (*it)->access_index(j,"i","0");
                    else (*it)->access_index(j,"i + " + utils::to_string(j),"0");
                    (*it)->fetch(j,kss);
                }
            }

            //Compute expressions
            for(typename std::list<symbolic_binary_vector_expression_base*>::iterator it=vector_expressions_.begin() ; it!=vector_expressions_.end(); ++it) {
                for(unsigned int j=0 ; j < n_unroll ; ++j) {
                    kss << (*it)->generate(j) << ";" << std::endl;
                }
            }

            for(typename std::list<symbolic_binary_vector_expression_base*>::iterator it=vector_expressions_.begin() ; it!=vector_expressions_.end(); ++it) {
                for(unsigned int j=0 ; j < n_unroll ; ++j) {
                    (*it)->write_back(j,kss);
                }
            }

            kss.dec_tab();
//            kss << "}" << std::endl;
        }
        if(first_matrix) {
            if(first_matrix->is_rowmajor()) {
                kss << "unsigned int r = get_global_id(0)/" << first_matrix->internal_size2() << ";" << std::endl;
                kss << "unsigned int c = get_global_id(0)%" << first_matrix->internal_size2() << ";" << std::endl;
            }
            else {
                kss << "unsigned int r = get_global_id(0)%" << first_matrix->internal_size1() << ";" << std::endl;
                kss << "unsigned int c = get_global_id(0)/" << first_matrix->internal_size1() << ";" << std::endl;
            }
            kss << "if(r < " << first_matrix->internal_size1() << "){" << std::endl;
            kss.inc_tab();

            //Set access indices
            for(typename std::list<symbolic_binary_matrix_expression_base*>::iterator it=matrix_expressions_.begin() ; it!=matrix_expressions_.end(); ++it) {
                (*it)->access_index(0,"r","c");
                (*it)->fetch(0,kss);
            }

            //Compute expressions
            for(std::list<symbolic_binary_matrix_expression_base*>::iterator it = matrix_expressions_.begin(); it!=matrix_expressions_.end(); ++it)
                kss << (*it)->generate(0) << ";" << std::endl;

            for(typename std::list<symbolic_binary_matrix_expression_base*>::iterator it=matrix_expressions_.begin() ; it!=matrix_expressions_.end(); ++it)
                (*it)->write_back(0,kss);


            kss.dec_tab();
            kss << "}" << std::endl;
        }
        for(unsigned int i=0 ; i < n_unroll ; ++i) {
            for(std::list<symbolic_binary_vector_expression_base*>::iterator it = vector_expressions_.begin(); it != vector_expressions_.end() ; ++it)
                (*it)->clear_private_value(i);
            for(std::list<symbolic_binary_matrix_expression_base*>::iterator it = matrix_expressions_.begin(); it != matrix_expressions_.end() ; ++it)
                (*it)->clear_private_value(i);
            for(std::list<symbolic_binary_scalar_expression_base*>::iterator it = scalar_expressions_.begin() ; it != scalar_expressions_.end(); ++it)
                (*it)->clear_private_value(i);
        }
    }