void GpuRule::generateKernel(Transform& trans, CodeGenerator& o, bool local) { std::string SUFFIX; if(local) SUFFIX = "_local"; else SUFFIX = "_nolocal"; CLCodeGenerator clcodegen(o.cgPtr()); IterationDefinition iterdef(*_rule, _rule->getSelfDependency(), _rule->isSingleCall()); std::vector<std::string> packedargs = iterdef.packedargs(); std::vector<std::string> packedargnames = iterdef.packedargnames(); o.os() << "// GPURULE DECL CODE " << _rule->id() << " " << this << "\n"; // Create variables to hold handles to program, kernel o.os( ) << "cl_program " << "clprog_" << _rule->id() << SUFFIX << " = 0;\n"; o.os( ) << "cl_kernel " << "clkern_" << _rule->id() << SUFFIX << " = 0;\n"; // Create init function call o.beginFunc("void", codename()+"_init"+SUFFIX, std::vector<std::string>(),false); trans.addInitCall(codename()+"_init"+SUFFIX); _rule->generateOpenCLKernel( trans, clcodegen, iterdef, local); //o.os( ) << "/* -- Testing purposes only, to make this easy to read --\n\n"; //clcodegen.outputStringTo( o.os( ) ); //o.os( ) << "\n*/\n"; o.os( ) << "const char* clsrc = "; clcodegen.outputEscapedStringTo( o.os( ) ); o.os( ) << ";\n"; o.os() << "bool rv = OpenCLUtil::buildKernel(clprog_" << _rule->id() << SUFFIX << ", clkern_" << _rule->id() << SUFFIX << ", clsrc);\n"; o.os() << "JASSERT(rv);\n"; o.endFunc(); // Get kernel o.beginFunc("cl_kernel", "get_kernel_" + jalib::XToString(_rule->id()) + SUFFIX); #ifdef DEBUG o.write("JASSERT(clkern_" + jalib::XToString(_rule->id()) + SUFFIX + " != 0);"); #endif o.write("return clkern_" + jalib::XToString(_rule->id()) + SUFFIX + ";"); o.endFunc(); // Get program o.beginFunc("cl_program", "get_program_" + jalib::XToString(_rule->id()) + SUFFIX); #ifdef DEBUG o.write("JASSERT(clprog_" + jalib::XToString(_rule->id()) + SUFFIX + " != 0);"); #endif o.write("return clprog_" + jalib::XToString(_rule->id()) + SUFFIX + ";"); o.endFunc(); }
void petabricks::CodeGenerator::mkCreateGpuSpatialMethodCallTask( const std::string& transname, const std::string& taskname, const std::string& objname, const std::string& methodname, const SimpleRegion& region, std::vector<RegionNodeGroup>& regionNodesGroups, int nodeID, int gpuCopyOut, RegionList to, bool divisible) { std::string taskclass ; int dim_int = region.totalDimensions(); std::string lastdim = jalib::XToString(dim_int - 1); std::string max = region.maxCoord()[dim_int - 1]->toString(); std::string min = region.minCoord()[dim_int - 1]->toString(); if(!divisible) { taskclass = "petabricks::CreateGpuSpatialMethodCallTask<"+objname + ", " + jalib::XToString(region.totalDimensions()) + ", &" + objname + "::" + methodname + TX_OPENCL_POSTFIX + "_createtasks" + ">"; //beginIf(min+"<"+max); comment("MARKER 6"); write("IndexT _tmp_begin[] = {" + region.getIterationLowerBounds() + "};"); write("IndexT _tmp_end[] = {" + region.getIterationUpperBounds() + "};"); write("RegionNodeGroupMapPtr groups = new RegionNodeGroupMap();"); for(std::vector<RegionNodeGroup>::iterator group = regionNodesGroups.begin(); group != regionNodesGroups.end(); ++group){ write("{"); incIndent(); write("std::set<int> ids;"); for(std::vector<int>::iterator id = group->nodeIDs().begin(); id != group->nodeIDs().end(); ++id){ write("ids.insert("+jalib::XToString(*id)+");"); } write("groups->insert(RegionNodeGroup(\""+group->matrixName()+"\",ids));"); decIndent(); write("}"); } write(taskname+" = new "+taskclass+"(this,_tmp_begin, _tmp_end, "+jalib::XToString(nodeID)+", groups, "+jalib::XToString(gpuCopyOut)+");"); //endIf(); return; } std::string n_div = "cont_" + jalib::XToString(_contCounter++); write(taskname + " = new petabricks::MethodCallTask<"+_curClass+", &"+_curClass+"::"+n_div+">( this );"); // Add divider function CodeGenerator helper = forkhelper(); helper.beginFunc("DynamicTaskPtr", n_div); helper.write("DynamicTaskPtr _fini = new NullDynamicTask();"); // Assign the gpu-cpu division point. helper.write("ElementT gpu_ratio = "+transname+"_gpuratio/8.0;"); std::string div = "div"; RegionPtr proxy = to.front(); helper.write("IndexT totalRow = "+proxy->matrix()->name()+".size("+jalib::XToString(dim_int - 1)+");"); helper.write("IndexT div = ceil(gpu_ratio * totalRow);"); helper.beginIf("div > " + max); helper.write("div = "+max+";"); helper.endIf(); // GPU taskclass = "petabricks::CreateGpuSpatialMethodCallTask<"+objname + ", " + jalib::XToString(dim_int) + ", &" + objname + "::" + methodname + TX_OPENCL_POSTFIX + "_createtasks" + ">"; helper.comment("MARKER 6"); //helper.beginIf(min+" < div"); helper.write("IndexT _gpu_begin[] = {" + region.getIterationLowerBounds() + "};"); helper.write("IndexT _gpu_end[] = {" + region.getIterationMiddleEnd(div) + "};"); helper.write("RegionNodeGroupMapPtr groups = new RegionNodeGroupMap();"); for(std::vector<RegionNodeGroup>::iterator group = regionNodesGroups.begin(); group != regionNodesGroups.end(); ++group){ helper.write("{"); helper.incIndent(); helper.write("std::set<int> ids;"); for(std::vector<int>::iterator id = group->nodeIDs().begin(); id != group->nodeIDs().end(); ++id){ helper.write("ids.insert("+jalib::XToString(*id)+");"); } helper.write("groups->insert(RegionNodeGroup(\""+group->matrixName()+"\",ids));"); helper.decIndent(); helper.write("}"); } helper.write("DynamicTaskPtr gpu_task = new "+taskclass+"(this,_gpu_begin, _gpu_end, "+jalib::XToString(nodeID)+", groups, "+jalib::XToString(gpuCopyOut)+");"); helper.write("gpu_task->enqueue();"); helper.write("_fini->dependsOn(gpu_task);"); //helper.endIf(); // CPU taskclass = "petabricks::SpatialMethodCallTask<CLASS" ", " + jalib::XToString(dim_int) + ", &CLASS::" + methodname + "_workstealing_wrap" //+ ", &CLASS::" + methodname + "_workstealing" + ">"; helper.beginIf("div < " + max); helper.beginIf("div < " + min); helper.write("div = "+min+";"); helper.endIf(); helper.comment("MARKER 6"); helper.write("IndexT _cpu_begin[] = {" + region.getIterationMiddleBegin(div) + "};"); helper.write("IndexT _cpu_end[] = {" + region.getIterationUpperBounds() + "};"); helper.write("DynamicTaskPtr cpu_task = new "+taskclass+"(this,_cpu_begin, _cpu_end);"); helper.write("cpu_task->enqueue();"); helper.write("_fini->dependsOn(cpu_task);"); helper.endIf(); helper.write("return _fini;"); helper.endFunc(); }