/** * Regionize a func, so that each node and each arc in its TransCFG is * "covered". A node is covered if any region contains it. An arc T1->T2 * is covered if either: * * a) T1 and T2 are in the same region R and T2 immediately follows * T1 in R. * b) T2 is the head (first translation) of a region. * * Basic algorithm: * * 1) sort nodes in decreasing weight order * 2) for each node N: * 2.1) if N and all its incoming arcs are covered, then continue * 2.2) select a region starting at this node and mark nodes/arcs as * covered appropriately */ void regionizeFunc(const Func* func, MCGenerator* mcg, RegionVec& regions) { const Timer rf_timer(Timer::regionizeFunc); assert(RuntimeOption::EvalJitPGO); auto const funcId = func->getFuncId(); auto const profData = mcg->tx().profData(); TransCFG cfg(funcId, profData, mcg->tx().getSrcDB(), mcg->getJmpToTransIDMap()); if (Trace::moduleEnabled(HPHP::Trace::pgo, 5)) { auto dotFileName = folly::to<std::string>( "/tmp/func-cfg-", funcId, ".dot"); cfg.print(dotFileName, funcId, profData, nullptr); FTRACE(5, "regionizeFunc: initial CFG for func {} saved to file {}\n", funcId, dotFileName); } TransCFG::ArcPtrVec arcs = cfg.arcs(); std::vector<TransID> nodes = cfg.nodes(); std::sort(nodes.begin(), nodes.end(), [&](TransID tid1, TransID tid2) -> bool { if (RuntimeOption::EvalJitPGORegionSelector == "wholecfg") { auto bcOff1 = profData->transStartBcOff(tid1); auto bcOff2 = profData->transStartBcOff(tid2); if (bcOff1 != bcOff2) return bcOff1 < bcOff2; } if (cfg.weight(tid1) != cfg.weight(tid2)) { return cfg.weight(tid1) > cfg.weight(tid2); } // In case of ties, pick older translations first, in an // attempt to start loops at their headers. return tid1 < tid2; }); TransCFG::ArcPtrSet coveredArcs; TransIDSet coveredNodes; TransIDSet heads; TransIDToRegionMap headToRegion; RegionToTransIDsMap regionToTransIds; regions.clear(); for (auto node : nodes) { if (!coveredNodes.count(node) || !allArcsCovered(cfg.inArcs(node), coveredArcs)) { TransID newHead = node; FTRACE(6, "regionizeFunc: selecting trace to cover node {}\n", newHead); TransIDSet selectedSet; TransIDVec selectedVec; RegionDescPtr region; if (RuntimeOption::EvalJitPGORegionSelector == "hottrace") { region = selectHotTrace(newHead, profData, cfg, selectedSet, &selectedVec); } else if (RuntimeOption::EvalJitPGORegionSelector == "wholecfg") { region = selectWholeCFG(newHead, profData, cfg, selectedSet, &selectedVec); } else { always_assert(0 && "Invalid value for EvalJitPGORegionSelector"); } FTRACE(6, "regionizeFunc: selected region to cover node {}\n{}\n", newHead, show(*region)); profData->setOptimized(profData->transSrcKey(newHead)); assert(selectedVec.size() > 0 && selectedVec[0] == newHead); regions.push_back(region); heads.insert(newHead); markCovered(cfg, region, selectedVec, heads, coveredNodes, coveredArcs); regionToTransIds[region] = selectedVec; headToRegion[newHead] = region; FTRACE(6, "regionizeFunc: selected trace: {}\n", folly::join(", ", selectedVec)); } } assert(coveredNodes.size() == cfg.nodes().size()); assert(coveredArcs.size() == arcs.size()); sortRegions(regions, func, cfg, profData, headToRegion, regionToTransIds); if (debug && Trace::moduleEnabled(HPHP::Trace::pgo, 5)) { FTRACE(5, "\n--------------------------------------------\n" "regionizeFunc({}): computed regions:\n", funcId); for (auto region : regions) { FTRACE(5, "{}\n\n", show(*region)); } } }
std::vector<unsigned int> Preprocessor::isolateRegions () { boost::timer timer; timer.restart(); // Traverse the press clip searching the ink pixels where the flooding process will start from std::vector<PixelCoordinates> seeds(0); seeds.reserve(clip_.size()); for ( unsigned int i = 0; i < clipHeight_; ++i ) { for ( unsigned int j = 0; j < clipWidth_; ++j ) { if ( clip_.at(i * clipWidth_ + j) == 1 ) seeds.push_back( PixelCoordinates(i,j) ); } } // Build the initial list of regions by applying the flooding algorithm regions_.clear(); std::deque<bool> visited(clip_.size(), false); for ( std::vector<PixelCoordinates>::iterator s = seeds.begin(); s != seeds.end(); ++s ) { int row = s->first; int column = s->second; if ( not visited.at(row * clipWidth_ + column) ) { visited.at(row * clipWidth_ + column) = true; // This seed begins a new region Region region; region.addCoordinates( PixelCoordinates(row, column) ); // Explore the immediate neighbourhood for ( int i = row-1; (i <= row+1) && (i < static_cast<int>(clipHeight_)); ++i ) { for ( int j = column-1; (j <= column+1) && (j < static_cast<int>(clipWidth_)); ++j ) { if ( i >= 0 && j >= 0 ) { if ( clip_.at(i * clipWidth_ + j) == 1 && not visited.at(i * clipWidth_ + j) ) { visited.at(i * clipWidth_ + j) = true; region.addCoordinates( PixelCoordinates(i,j) ); } } } } // Explore the neighbours of the neighbours unsigned int k = 1; while ( region.size() > k ) { PixelCoordinates coordinates( region.at(k) ); for ( int i = coordinates.first-1; (i <= static_cast<int>(coordinates.first+1)) && (i < static_cast<int>(clipHeight_)); ++i ) { for ( int j = coordinates.second-1; (j <= static_cast<int>(coordinates.second+1)) && (j < static_cast<int>(clipWidth_)); ++j ) { if ( i >= 0 && j >= 0 ) { if ( clip_.at(i * clipWidth_ + j) == 1 && not visited.at(i * clipWidth_ + j) ) { visited.at(i * clipWidth_ + j) = true; region.addCoordinates( PixelCoordinates(i, j) ); } } } } ++k; } regions_.push_back(region); } } findLineDelimiters(visited); organizeRegionsIntoLines(); mergeVerticallyOverlappedRegions(); averageCharacterHeight_ = std::accumulate (regions_.begin(), regions_.end(), 0.0, accumulateHeightIncrement()) / regions_.size(); averageCharacterWidth_ = std::accumulate (regions_.begin(), regions_.end(), 0.0, accumulateWidthIncrement()) / regions_.size(); for( RegionLines::iterator i = inlineRegions_.begin(); i != inlineRegions_.end(); ++i ) sortRegions(i->second); std::vector<unsigned int> spaceLocations = findSpacesBetweenWords(); statistics_.nRegions(regions_.size()); statistics_.nLines(delimiters_.size()); statistics_.averageCharacterHeight(averageCharacterHeight_); statistics_.averageCharacterWidth(averageCharacterWidth_); statistics_.segmentationTime(timer.elapsed()); return spaceLocations; }