//note that we keep mint for pragma as well //#pragma mint for //#pragma omp for void MintCudaMidend::replaceMintForWithOmpFor(SgSourceFile* file) { Rose_STL_Container<SgNode*> nodeList = NodeQuery::querySubTree(file, V_SgPragmaDeclaration); Rose_STL_Container<SgNode*>::reverse_iterator nodeListIterator = nodeList.rbegin(); for ( ;nodeListIterator !=nodeList.rend(); ++nodeListIterator) { SgPragmaDeclaration* node = isSgPragmaDeclaration(*nodeListIterator); ROSE_ASSERT(node != NULL); //checks if the syntax is correct and the parallel region is followed by //a basic block if(MintPragmas::isForLoopPragma(node)) { SgStatement* loop = getNextStatement(node); ROSE_ASSERT(loop); if(isSgForStatement(loop)) { removeStatement(loop); SgOmpForStatement* omp_stmt = new SgOmpForStatement(NULL, loop); setOneSourcePositionForTransformation(omp_stmt); loop->set_parent(omp_stmt); insertStatementAfter(node, omp_stmt); } } } }
void CudaOptimizer::swapLoopAndIf(SgFunctionDeclaration* kernel, MintForClauses_t clauseList) { //step 1: only if nesting level > 0 then consider swapping //we only handle two cases //case 1: // for () --- > if () // for () --- > for () // if () --- > for () // { } --- > { } //case 2: // for () --- > if () // if () --- > for () // if () --- > for () // { } --- > { } SgBasicBlock* kernel_body = kernel->get_definition()->get_body(); if(clauseList.nested > 1 ) { //case 1: two loops, 1 if if(clauseList.chunksize.z != 1 && clauseList.chunksize.y != 1 ) { //int swappable = 2 ; //there has to be a for loop with gidz iterator Rose_STL_Container<SgNode*> forloops = NodeQuery::querySubTree(kernel_body, V_SgForStatement); Rose_STL_Container<SgNode*>::reverse_iterator loopIt; for(loopIt = forloops.rbegin() ; loopIt != forloops.rend() ; loopIt++ ) { SgForStatement* cur_loop = isSgForStatement(*loopIt); SgInitializedName* index_var = getLoopIndexVariable(cur_loop); string index_var_str = index_var->get_name().str(); if(index_var_str == GIDZ && index_var_str == GIDY ) { SgBasicBlock* loop_body = isSgBasicBlock(cur_loop->get_loop_body()); Rose_STL_Container<SgNode*> ifs = NodeQuery::querySubTree(loop_body, V_SgIfStmt); SgIfStmt* cur_if = isSgIfStmt(*(ifs.begin())); SgBasicBlock* bb = buildBasicBlock(); insertStatementBefore(cur_loop, bb); SgForStatement* for_stmt = deepCopy(cur_loop); ROSE_ASSERT(for_stmt); //add the loop into the new basic block appendStatement(for_stmt, bb); //if(true && swappable-- != 0 ) if(isSwappable(cur_loop, cur_if)) { swapForWithIf(cur_loop, cur_if, bb); } else { //return } } } //end of for loopIt } //end of case 1 //only one for loop but there may be 1 or 2 ifs if ( (clauseList.chunksize.z != 1 && clauseList.chunksize.y == 1) || (clauseList.chunksize.z == 1 && clauseList.chunksize.y != 1 )) { int swappable = clauseList.nested - 1 ; while(swappable-- > 0 ) { //there has to be a for loop with gidz iterator Rose_STL_Container<SgNode*> forloops = NodeQuery::querySubTree(kernel_body, V_SgForStatement); SgForStatement* cur_loop = isSgForStatement(*(forloops.begin())); ROSE_ASSERT(cur_loop); SgInitializedName* index_var = getLoopIndexVariable(cur_loop); string index_var_str = index_var->get_name().str(); if((index_var_str == GIDZ && clauseList.chunksize.z != 1) || (index_var_str == GIDY && clauseList.chunksize.y != 1) ) { SgBasicBlock* loop_body = isSgBasicBlock(cur_loop->get_loop_body()); Rose_STL_Container<SgNode*> ifs = NodeQuery::querySubTree(loop_body, V_SgIfStmt); SgIfStmt* cur_if = isSgIfStmt(*(ifs.begin())); //add the loop into the new basic block if(isSwappable(cur_loop, cur_if)) { SgBasicBlock* bb = buildBasicBlock(); insertStatementBefore(cur_loop, bb); swapForWithIf( cur_loop, cur_if , bb); removeStatement(cur_loop); } else swappable = 0 ; } }//end of while } //end of case 2 }//end of if }
void MPI_Code_Generator::lower_xomp (SgSourceFile* file) { ROSE_ASSERT(file != NULL); Rose_STL_Container<SgNode*> nodeList = NodeQuery::querySubTree(file, V_SgStatement); Rose_STL_Container<SgNode*>::reverse_iterator nodeListIterator = nodeList.rbegin(); for ( ;nodeListIterator !=nodeList.rend(); ++nodeListIterator) { SgStatement* node = isSgStatement(*nodeListIterator); ROSE_ASSERT(node != NULL); //debug the order of the statements // cout<<"Debug lower_omp(). stmt:"<<node<<" "<<node->class_name() <<" "<< node->get_file_info()->get_line()<<endl; switch (node->variantT()) { #if 0 case V_SgOmpParallelStatement: { // check if this parallel region is under "omp target" SgNode* parent = node->get_parent(); ROSE_ASSERT (parent != NULL); if (isSgBasicBlock(parent)) // skip the padding block in between. parent= parent->get_parent(); if (isSgOmpTargetStatement(parent)) transOmpTargetParallel(node); else transOmpParallel(node); break; } case V_SgOmpForStatement: case V_SgOmpDoStatement: { // check if the loop is part of the combined "omp parallel for" under the "omp target" directive // TODO: more robust handling of this logic, not just fixed AST form bool is_target_loop = false; SgNode* parent = node->get_parent(); ROSE_ASSERT (parent != NULL); // skip a possible BB between omp parallel and omp for, especially when the omp parallel has multiple omp for loops if (isSgBasicBlock(parent)) parent = parent->get_parent(); SgNode* grand_parent = parent->get_parent(); ROSE_ASSERT (grand_parent != NULL); if (isSgOmpParallelStatement (parent) && isSgOmpTargetStatement(grand_parent) ) is_target_loop = true; if (is_target_loop) { // transOmpTargetLoop (node); // use round-robin scheduler for larger iteration space and better performance transOmpTargetLoop_RoundRobin(node); } else { transOmpLoop(node); } break; } #endif // transform combined "omp target parallel for", represented as separated three directives: omp target, omp parallel, and omp for case V_SgOmpForStatement: { SgOmpTargetStatement * omp_target; SgOmpParallelStatement* omp_parallel; if (isCombinedTargetParallelFor (isSgOmpForStatement(node),&omp_target, &omp_parallel )) { transOmpTargetParallelLoop (isSgOmpForStatement(node)); } break; } case V_SgOmpTargetStatement: { SgOmpTargetStatement* t_stmt = isSgOmpTargetStatement(node); ROSE_ASSERT (t_stmt != NULL); SgStatement* body_stmt = t_stmt->get_body(); SgBasicBlock * body_block = isSgBasicBlock (body_stmt); // transOmpTarget(node); if (isMPIAllBegin (t_stmt)) { // move all body statements to be after omp target if (body_block != NULL) { stripOffBasicBlock (body_block, t_stmt); } else { //TODO: ideally, the body_stmt should be normalized to be a BB even it is only a single statement removeStatement (body_stmt); insertStatementAfter (t_stmt, body_stmt, false); } // remove the pragma stmt after the translation removeStatement (t_stmt); } else if (isMPIMasterBegin (t_stmt)) { transMPIDeviceMaster (t_stmt); } else { // other target directive with followed omp parallel for will be handled when parallel for is translated // cerr<<"Error. Unhandled target directive:" <<t_stmt->unparseToString()<<endl; //ROSE_ASSERT (false); } break; } default: { // do nothing here } }// switch } // end for }
//============================================================================== int main (int argc, char** argv) { // Build the AST used by ROSE vector <string> argvList (argv, argv + argc); if (CommandlineProcessing::isOption(argvList,"-help","", false)) { cout<<"---------------------Tool-Specific Help-----------------------------------"<<endl; cout<<"This is a source analysis to estimate FLOPS and Load/store bytes for loops in your C/C++ or Fortran code."<<endl; cout<<"Usage: "<<argvList[0]<<" -c ["<<report_option<<" result.txt] "<< "input.c"<<endl; cout<<endl; cout<<"The optional "<<report_option<<" option is provided for users to specify where to save the results"<<endl; cout<<"By default, the results will be saved into a file named report.txt"<<endl; cout<<"----------------------Generic Help for ROSE tools--------------------------"<<endl; } if (CommandlineProcessing::isOption(argvList,"-static-counting-only","", true)) { running_mode = e_static_counting; } if (CommandlineProcessing::isOption(argvList,"-debug","", true)) { debug = true; } else debug = false; if (CommandlineProcessing::isOptionWithParameter (argvList, report_option,"", report_filename,true)) { if (debug) cout<<"Using user specified file: "<<report_filename<<" for storing results."<<endl; } else { //report_filename="ai_tool_report.txt"; // this is set in src/ai_measurement.cpp already if (debug) cout<<"Using the default file:"<<report_filename<<" for storing results."<<endl; } //Save -debugdep, -annot file .. etc, // used internally in ReadAnnotation and Loop transformation CmdOptions::GetInstance()->SetOptions(argvList); bool dumpAnnot = CommandlineProcessing::isOption(argvList,"","-dumpannot",true); //Read in annotation files after -annot ArrayAnnotation* annot = ArrayAnnotation::get_inst(); annot->register_annot(); ReadAnnotation::get_inst()->read(); if (dumpAnnot) annot->Dump(); //Strip off custom options and their values to enable backend compiler CommandlineProcessing::removeArgsWithParameters(argvList,"-annot"); SgProject* project = frontend(argvList); // Insert your own manipulations of the AST here... SgFilePtrList file_ptr_list = project->get_fileList(); //visitorTraversal exampleTraversal; for (size_t i = 0; i<file_ptr_list.size(); i++) { SgFile* cur_file = file_ptr_list[i]; SgSourceFile* s_file = isSgSourceFile(cur_file); if (s_file != NULL) { // Preorder is not friendly for transformation //exampleTraversal.traverseWithinFile(s_file, postorder); Rose_STL_Container<SgNode*> nodeList = NodeQuery::querySubTree(s_file,V_SgStatement); if (running_mode == e_analysis_and_instrument) // reverse of pre-order for transformation mode { for (Rose_STL_Container<SgNode *>::reverse_iterator i = nodeList.rbegin(); i != nodeList.rend(); i++) { SgStatement *stmt= isSgStatement(*i); processStatements (stmt); } } else if (running_mode == e_static_counting) // pre-order traverse for analysis only mode { for (Rose_STL_Container<SgNode *>::iterator i = nodeList.begin(); i != nodeList.end(); i++) { SgStatement *stmt= isSgStatement(*i); processStatements (stmt); } } else { cerr<<"Error. unrecognized execution mode:"<< running_mode<<endl; ROSE_ASSERT (false); } } // endif } // end for // Generate source code from AST and invoke your // desired backend compiler return backend(project); }
void MintCudaMidend::lowerMinttoCuda(SgSourceFile* file) { //Searches mint pragmas, and performs necessary transformation //We also check the mint pragmas syntactically //At this point, we only care parallel regions and for loops //But do not process forloops at this point (note that this is bottom-up) //We process forloops when we see a parallel region pragma because they are always //inside of a parallel region. //TODO: Sometimes a forloop is merged with a parallel region. Need to handle these. ROSE_ASSERT(file != NULL); //replaces all the occurrences of mint parallel with omp parallel replaceMintParallelWithOmpParallel(file); replaceMintForWithOmpFor(file); //adds the private and first private into private clause explicitly patchUpPrivateVariables(file); //uses ROSE's patchUpFirstprivateVariables(file); //uses ROSE's //insert openmp specific headers //insertRTLHeaders(file); //check if mint pragma declarations are correct mintPragmasFrontendProcessing(file); //the map has the mapping from the host variables to device variables //where we copy the data Rose_STL_Container<SgNode*> nodeList = NodeQuery::querySubTree(file, V_SgStatement); Rose_STL_Container<SgNode*>::reverse_iterator nodeListIterator = nodeList.rbegin(); for ( ;nodeListIterator !=nodeList.rend(); ++nodeListIterator) { SgStatement* node = isSgStatement(*nodeListIterator); ROSE_ASSERT(node != NULL); switch (node->variantT()) { case V_SgOmpParallelStatement: { //first we handle data transfer pragmas MintHostSymToDevInitMap_t hostToDevVars; processDataTransferPragmas(node, hostToDevVars); #ifdef VERBOSE_2 cout << " INFO:Mint: @ Line " << node->get_file_info()->get_line() << endl; cout << " Processing Mint Parallel Statement" << endl << endl; #endif MintCudaMidend::transOmpParallel(node, hostToDevVars); break; } case V_SgOmpTaskStatement: { //transOmpTask(node); break; } case V_SgOmpForStatement: { //cout << "INFO-mint: Omp For Statement (skipped processing it)" << endl; //LoweringToCuda::transOmpFor(node); //OmpSupport::transOmpFor(node); break; } case V_SgOmpBarrierStatement: { #ifdef VERBOSE_2 cout << " INFO:Mint: @ Line " << node->get_file_info()->get_line() << endl; cout << " Processing Omp Barrier Statement" << endl; #endif transOmpBarrierToCudaBarrier(node); break; } case V_SgOmpFlushStatement: { cout << " INFO:Mint: Processing Omp Flush Statement" << endl; transOmpFlushToCudaBarrier(node); break; } case V_SgOmpThreadprivateStatement: { //transOmpThreadprivate(node); break; } case V_SgOmpTaskwaitStatement: { //transOmpTaskwait(node); break; } case V_SgOmpSingleStatement: { //TODO: we need to check if the loop body becomes a cuda kernel or not. MintCudaMidend::transOmpSingle(node); break; } case V_SgOmpMasterStatement: { //TODO: we need to check if the loop body becomes a cuda kernel or not. MintCudaMidend::transOmpMaster(node); break; } case V_SgOmpAtomicStatement: { //transOmpAtomic(node); break; } case V_SgOmpOrderedStatement: { //transOmpOrdered(node); break; } case V_SgOmpCriticalStatement: { //transOmpCritical(node); break; } default: { //This is any other statement in the source code which is not omp pragma //cout<< node->unparseToString()<<" at line:"<< (node->get_file_info())->get_line()<<endl; // do nothing here } }// switch } #if 0 //3. Special handling for files with main() // rename main() to user_main() SgFunctionDeclaration * mainFunc = findMain(cur_file); if (mainFunc) { renameMainToUserMain(mainFunc); } #endif }
void MintCudaMidend::processLoopsInParallelRegion(SgNode* parallelRegionNode, MintHostSymToDevInitMap_t hostToDevVars, ASTtools::VarSymSet_t& deviceSyms, MintSymSizesMap_t& trfSizes, std::set<SgInitializedName*>& readOnlyVars, const SgVariableSymbol* dev_struct) { Rose_STL_Container<SgNode*> nodeList = NodeQuery::querySubTree(parallelRegionNode, V_SgStatement); Rose_STL_Container<SgNode*>::reverse_iterator nodeListIterator = nodeList.rbegin(); for ( ;nodeListIterator !=nodeList.rend(); ++nodeListIterator) { SgStatement* node = isSgStatement(*nodeListIterator); ROSE_ASSERT(node != NULL); switch (node->variantT()) { case V_SgOmpForStatement: { #ifdef VERBOSE_2 cout << " INFO:Mint: @ Line " << node->get_file_info()->get_line() << endl; cout << " Processing Omp For Statement" << endl << endl; #endif //DataTransferSizes::findTransferSizes(node, trfSizes); bool isBoundaryCond = LoweringToCuda::isBoundaryConditionLoop(node); SgFunctionDeclaration* kernel; MintForClauses_t clauseList; //kernel= LoweringToCuda::transOmpFor(node, hostToDevVars, deviceSyms, readOnlyVars,clauseList, dev_struct) ; kernel= LoweringToCuda::transOmpFor(node, hostToDevVars, deviceSyms, clauseList, dev_struct) ; //swap anyways // x swapping is buggy, need to fix that before allowing x as well if(clauseList.chunksize.x == 1 && ( clauseList.chunksize.z != 1 || clauseList.chunksize.y != 1 )) { //if(MintOptions::GetInstance()->isSwapOpt()) CudaOptimizer::swapLoopAndIf(kernel, clauseList); } if (!isBoundaryCond && MintOptions::GetInstance()->optimize()) //if (MintOptions::GetInstance()->optimize()) { cout << "\n\n INFO:Mint: Optimization is ON. Optimizing ...\n\n" ; CudaOptimizer::optimize(kernel, clauseList); } // MintTools::printAllStatements(isSgNode(kernel)); //MintArrayInterface::linearizeArrays(kernel); break; } default: { //cout << " INFO:Mint: @ Line " << node->get_file_info()->get_line() << endl; //cout << " Currently we only handle for loops" << endl << endl; //do nothing //currently we only handle for loops break; } } } for (ASTtools::VarSymSet_t::const_iterator i = deviceSyms.begin (); i!= deviceSyms.end (); ++i) { SgVariableSymbol* sym= const_cast<SgVariableSymbol*> (*i); SgInitializedName* name = sym->get_declaration(); SgType* type = name->get_type(); if(isSgArrayType(type) || isSgPointerType(type)){ //Check if is of the fields of the struct if(hostToDevVars.find(sym) == hostToDevVars.end()) { string name_str = name->get_name().str(); cerr << " ERR:Mint: Ooops! Did you forget to insert a copy pragma for the variable ("<< name_str << ") ?"<< endl; cerr << " ERR:Mint: Please insert the copy pragma and compile again "<< endl; cerr << " INFO:Mint: Note that copy pragmas should appear right before and after a parallel region" << endl; ROSE_ABORT(); } } } }
void StencilAnalysis::performCornerStencilsAnalysis(SgBasicBlock* basicBlock, const SgInitializedName* array, bool& plane_xy /*false*/, bool& plane_xz /*false*/, bool& plane_yz /*false*/ ) { //For example 19 and 27-stencils require upper and lower planes in shared memory but //7-point only needs the center plane in the shared memory. bool cornerXY = false; bool cornerYZ = false; bool cornerXZ = false; size_t dim = MintArrayInterface::getDimension(array); string candidateVarName = array->get_name().str(); Rose_STL_Container<SgNode*> nodeList = NodeQuery::querySubTree(basicBlock, V_SgPntrArrRefExp); Rose_STL_Container<SgNode*>::reverse_iterator arrayNode = nodeList.rbegin(); for(; arrayNode != nodeList.rend(); arrayNode++) { ROSE_ASSERT(*arrayNode); SgExpression* arrayExp; vector<SgExpression*> subscripts; //first index is i if E[j][i] SgExpression* arrRefWithIndices = isSgExpression(*arrayNode); if(MintArrayInterface::isArrayReference(arrRefWithIndices, &arrayExp, &subscripts)) { SgInitializedName *arrayName = SageInterface::convertRefToInitializedName(isSgVarRefExp (arrayExp)); string var_name= arrayName->get_name().str(); //check if array name matches if(var_name == candidateVarName && subscripts.size() == dim){ //check if subsrcipts are _gidx and _gidy std::vector<SgExpression*>::iterator it; bool cornerX = false; bool cornerY = false; bool cornerZ = false; for(it= subscripts.begin(); it != subscripts.end(); it++) { SgExpression* index = *it; Rose_STL_Container<SgNode*> constList = NodeQuery::querySubTree(index, V_SgIntVal); Rose_STL_Container<SgNode*> indexVarExp = NodeQuery::querySubTree(index, V_SgVarRefExp); if(constList.size() > 0 && indexVarExp.size() == 1 ) { string indexVarStr = isSgExpression(*(indexVarExp.begin())) -> unparseToString(); //corner if(indexVarStr == GIDX ) cornerX = true; else if (indexVarStr == GIDY) cornerY = true; else if (indexVarStr == GIDZ) cornerZ = true; } } cornerXY = (cornerX && cornerY) ? true : cornerXY; cornerXZ = (cornerX && cornerZ) ? true : cornerXZ; cornerYZ = (cornerZ && cornerY) ? true : cornerYZ; } } } plane_xy = cornerXY; plane_xz = cornerXZ; plane_yz = cornerYZ; if(plane_xy) cout << " INFO:Mint: Corner x y planes are shared" << endl; if(plane_xz) cout << " INFO:Mint: Corner x z planes are shared" << endl; if(plane_yz) cout << " INFO:Mint: Corner y z planes are shared" << endl; }
int StencilAnalysis::performHigherOrderAnalysis(SgBasicBlock* basicBlock, const SgInitializedName* array, int num_planes) { //Didem: rewrote in March 01, 2011 to make it more robust //we are interested in only expressions i,j,k + c where c is a constant //and consider only these kinds of expressions because others cannot //benefit from the on-chip memory optimizations //this affects the shared memory offsets [BLOCKDIM + order] //assumes symmetric //x dim : -/+ order //y dim : -/+ order //z dim : -/+ order int maxOrderX = 0; int maxOrderY = 0; int maxOrderZ = 0; size_t dim = MintArrayInterface::getDimension(array); string candidateVarName = array->get_name().str(); Rose_STL_Container<SgNode*> nodeList = NodeQuery::querySubTree(basicBlock, V_SgPntrArrRefExp); Rose_STL_Container<SgNode*>::reverse_iterator arrayNode = nodeList.rbegin(); for(; arrayNode != nodeList.rend(); arrayNode++) { ROSE_ASSERT(*arrayNode); SgExpression* arrayExp; vector<SgExpression*> subscripts; //first index is i if E[j][i] SgExpression* arrRefWithIndices = isSgExpression(*arrayNode); if(MintArrayInterface::isArrayReference(arrRefWithIndices, &arrayExp, &subscripts)) { SgInitializedName *arrayName = SageInterface::convertRefToInitializedName(isSgVarRefExp (arrayExp)); ROSE_ASSERT(arrayName); string var_name= arrayName->get_name().str(); //check if array name matches if(var_name == candidateVarName && subscripts.size() == dim){ int indexNo = 0; //check if subsrcipts are _gidx and _gidy std::vector<SgExpression*>::iterator it; for(it= subscripts.begin(); it != subscripts.end(); it++) { indexNo++; SgExpression* index = *it; Rose_STL_Container<SgNode*> binOpList = NodeQuery::querySubTree(index, V_SgBinaryOp); if(binOpList.size() == 1) { SgBinaryOp* binOp = isSgBinaryOp(*(binOpList.begin())); ROSE_ASSERT(binOp); if(isSgAddOp(binOp) || isSgSubtractOp(binOp)) { SgExpression* lhs = binOp->get_lhs_operand(); SgExpression* rhs = binOp->get_rhs_operand(); ROSE_ASSERT(lhs); ROSE_ASSERT(rhs); SgExpression* varExp = NULL; string varExp_str = ""; int newVal =0; if(isSgIntVal(lhs) && isSgVarRefExp(rhs)) { varExp = isSgVarRefExp(rhs); varExp_str = varExp->unparseToString(); newVal = isSgIntVal(lhs)-> get_value(); } else if (isSgIntVal(rhs) && isSgVarRefExp(lhs)) { varExp = isSgVarRefExp(lhs); varExp_str = varExp->unparseToString(); newVal = isSgIntVal(rhs)-> get_value(); } if(indexNo == 1 && varExp_str == GIDX) maxOrderX = newVal > maxOrderX ? newVal : maxOrderX; if(indexNo == 2 && varExp_str == GIDY) maxOrderY = newVal > maxOrderY ? newVal : maxOrderY; if(indexNo == 3 && varExp_str == GIDZ) maxOrderZ = newVal > maxOrderZ ? newVal : maxOrderZ; }//end of addop subtract op } // end of binary op } //end of for subscripts }//if end of var_name = candidatename }//end of is arrayRef }// end of ptr arr ref loop if(num_planes == 1) return (maxOrderX > maxOrderY) ? maxOrderX : maxOrderY; if(maxOrderX > maxOrderY ) return (maxOrderX > maxOrderZ) ? maxOrderX : maxOrderZ; return (maxOrderY > maxOrderZ) ? maxOrderY : maxOrderZ; } //end of function