void unpackMetrics(Prof::CallPath::Profile& profile, const ParallelAnalysis::PackedMetrics& packedMetrics) { Prof::CCT::Tree& cct = *profile.cct(); // 1. unpack 'packedMetrics' into temporary derived metrics [mBegId, // mEndId) in 'profile' uint mBegId = packedMetrics.mBegId(), mEndId = packedMetrics.mEndId(); DIAG_Assert(packedMetrics.numNodes() == cct.maxDenseId() + 1, ""); DIAG_Assert(packedMetrics.numMetrics() == mEndId - mBegId, ""); for (uint nodeId = 1; nodeId < packedMetrics.numNodes(); ++nodeId) { for (uint mId1 = 0, mId2 = mBegId; mId2 < mEndId; ++mId1, ++mId2) { Prof::CCT::ANode* n = cct.findNode(nodeId); n->demandMetric(mId2) = packedMetrics.idx(nodeId, mId1); } } // 2. update derived metrics [mDrvdBeg, mDrvdEnd) based on new // values in [mBegId, mEndId) uint mDrvdBeg = packedMetrics.mDrvdBegId(); uint mDrvdEnd = packedMetrics.mDrvdEndId(); cct.root()->computeMetricsIncr(*profile.metricMgr(), mDrvdBeg, mDrvdEnd, Prof::Metric::AExprIncr::FnCombine); }
// mergeCilkMain: cilk_main is called from two distinct call sites // within the runtime, resulting in an undesirable bifurcation within // the CCT. The easiest way to fix this is to use a normalization // step. static void mergeCilkMain(Prof::CallPath::Profile& prof) { using namespace Prof; CCT::ProcFrm* mainFrm = NULL; // 1. attempt to find 'CilkNameMgr::cilkmain' for (CCT::ANodeIterator it(prof.cct()->root(), &CCT::ANodeTyFilter[CCT::ANode::TyProcFrm]); it.Current(); ++it) { CCT::ProcFrm* x = static_cast<CCT::ProcFrm*>(it.current()); if (x->procName() == CilkNameMgr::cilkmain) { mainFrm = x; break; } } // 2. merge any sibling 'CilkNameMgr::cilkmain' if (mainFrm) { CCT::ANodeChildIterator it(mainFrm->parent(), &CCT::ANodeTyFilter[CCT::ANode::TyProcFrm]); for ( ; it.Current(); /* */) { CCT::ProcFrm* x = static_cast<CCT::ProcFrm*>(it.current()); it++; // advance iterator -- it is pointing at 'x' if (x->procName() == CilkNameMgr::cilkmain) { mainFrm->merge(x); // deletes 'x' } } } }
void Analysis::CallPath:: noteStaticStructureOnLeaves(Prof::CallPath::Profile& prof) { const Prof::Struct::Root* rootStrct = prof.structure()->root(); Prof::CCT::ANodeIterator it(prof.cct()->root(), NULL/*filter*/, true/*leavesOnly*/, IteratorStack::PreOrder); for (Prof::CCT::ANode* n = NULL; (n = it.current()); ++it) { Prof::CCT::ADynNode* n_dyn = dynamic_cast<Prof::CCT::ADynNode*>(n); if (n_dyn) { Prof::LoadMap::LMId_t lmId = n_dyn->lmId(); // ok if LoadMap::LMId_NULL Prof::LoadMap::LM* loadmap_lm = prof.loadmap()->lm(lmId); const string& lm_nm = loadmap_lm->name(); const Prof::Struct::LM* lmStrct = rootStrct->findLM(lm_nm); DIAG_Assert(lmStrct, "failed to find Struct::LM: " << lm_nm); VMA lm_ip = n_dyn->lmIP(); const Prof::Struct::ACodeNode* strct = lmStrct->findByVMA(lm_ip); DIAG_Assert(strct, "Analysis::CallPath::noteStaticStructureOnLeaves: failed to find structure for: " << n_dyn->toStringMe(Prof::CCT::Tree::OFlg_DebugAll)); n->structure(strct); } } }
// overlayStaticStructure: Create frames for CCT::Call and CCT::Stmt // nodes using a preorder walk over the CCT. void Analysis::CallPath:: overlayStaticStructure(Prof::CallPath::Profile& prof, Prof::LoadMap::LM* loadmap_lm, Prof::Struct::LM* lmStrct, BinUtil::LM* lm) { overlayStaticStructure(prof.cct()->root(), loadmap_lm, lmStrct, lm); }
void Analysis::CallPath::pruneBySummaryMetrics(Prof::CallPath::Profile& prof, uint8_t* prunedNodes) { VMAIntervalSet ivalset; const Prof::Metric::Mgr& mMgrGbl = *(prof.metricMgr()); for (uint mId = 0; mId < mMgrGbl.size(); ++mId) { const Prof::Metric::ADesc* m = mMgrGbl.metric(mId); if (m->isVisible() && m->type() == Prof::Metric::ADesc::TyIncl && (m->nameBase().find("Sum") != string::npos)) { ivalset.insert(VMAInterval(mId, mId + 1)); // [ ) } } prof.cct()->root()->pruneByMetrics(*prof.metricMgr(), ivalset, prof.cct()->root(), 0.001, prunedNodes); }
static void noteStaticStructure(Prof::CallPath::Profile& prof) { using namespace Prof; const Prof::CCT::ANode* cct_root = prof.cct()->root(); for (CCT::ANodeIterator it(cct_root); it.Current(); ++it) { CCT::ANode* x = it.current(); Prof::Struct::ACodeNode* strct = x->structure(); if (strct) { strct->demandMetric(CallPath::Profile::StructMetricIdFlg) += 1.0; } } }
// Assumes: metrics are of type Metric::SampledDesc and values are // only at leaves (CCT::Stmt) void MetricComponentsFact::make(Prof::CallPath::Profile& prof) { using namespace Prof; // ------------------------------------------------------------ // Create destination metric descriptors and mapping from source // metrics to destination metrics // ------------------------------------------------------------ std::vector<uint> metricSrcIds; std::vector<uint> metricDstIds; Metric::Mgr* metricMgr = prof.metricMgr(); uint numMetrics_orig = metricMgr->size(); for (uint mId = 0; mId < numMetrics_orig; ++mId) { Metric::ADesc* m = metricMgr->metric(mId); if (MetricComponentsFact::isTimeMetric(m)) { DIAG_Assert(typeid(*m) == typeid(Metric::SampledDesc), DIAG_UnexpectedInput << "temporary sanity check"); MetricComponentsFact::convertToWorkMetric(m); metricSrcIds.push_back(m->id()); Metric::ADesc* m_new = m->clone(); m_new->nameBase("overhead"); m_new->description("parallel overhead"); metricMgr->insert(m_new); DIAG_Assert(m_new->id() >= numMetrics_orig, "Currently, we assume new metrics are added at the end of the metric vector."); metricDstIds.push_back(m_new->id()); } } if (metricSrcIds.empty()) { return; } // ------------------------------------------------------------ // Create values for metric components // ------------------------------------------------------------ make(prof.cct()->root(), metricSrcIds, metricDstIds, false); }
void packMetrics(const Prof::CallPath::Profile& profile, ParallelAnalysis::PackedMetrics& packedMetrics) { Prof::CCT::Tree& cct = *profile.cct(); // pack derived metrics [mDrvdBeg, mDrvdEnd) from 'profile' into // 'packedMetrics' uint mDrvdBeg = packedMetrics.mDrvdBegId(); uint mDrvdEnd = packedMetrics.mDrvdEndId(); DIAG_Assert(packedMetrics.numNodes() == cct.maxDenseId() + 1, ""); DIAG_Assert(packedMetrics.numMetrics() == mDrvdEnd - mDrvdBeg, ""); for (Prof::CCT::ANodeIterator it(cct.root()); it.Current(); ++it) { Prof::CCT::ANode* n = it.current(); for (uint mId1 = 0, mId2 = mDrvdBeg; mId2 < mDrvdEnd; ++mId1, ++mId2) { packedMetrics.idx(n->id(), mId1) = n->metric(mId2); } } }
// makeReturnCountMetric: A return count refers to the number of times // a given CCT node is called by its parent context. However, when // hpcrun records return counts, there is no structure (e.g. procedure // frames) in the CCT. An an example, in the CCT fragment below, the // return count [3] at 0xc means that 0xc returned to 0xbeef 3 times. // Simlarly, 0xbeef returned to its caller 5 times. // // | | // ip: 0xbeef [5] | // / | \ | // 0xa [1] 0xb [2] 0xc [3] | // | | | | // // To be able to say procedure F is called by procedure G x times // within this context, it is necessary to aggregate these counts at // the newly added procedure frames (Struct::ProcFrm). static void makeReturnCountMetric(Prof::CallPath::Profile& prof) { std::vector<uint> retCntId; // ------------------------------------------------------- // find return count metrics, if any // ------------------------------------------------------- Prof::Metric::Mgr* metricMgr = prof.metricMgr(); for (uint i = 0; i < metricMgr->size(); ++i) { Prof::Metric::ADesc* m = metricMgr->metric(i); if (m->nameBase().find(HPCRUN_METRIC_RetCnt) != string::npos) { retCntId.push_back(m->id()); m->computedType(Prof::Metric::ADesc::ComputedTy_Final); m->type(Prof::Metric::ADesc::TyExcl); } } if (retCntId.empty()) { return; } // ------------------------------------------------------- // propagate and aggregate return counts // ------------------------------------------------------- Prof::CCT::ANode* cct_root = prof.cct()->root(); Prof::CCT::ANodeIterator it(cct_root, NULL/*filter*/, false/*leavesOnly*/, IteratorStack::PostOrder); for (Prof::CCT::ANode* n = NULL; (n = it.current()); ++it) { if (typeid(*n) != typeid(Prof::CCT::ProcFrm) && n != cct_root) { Prof::CCT::ANode* n_parent = n->parent(); for (uint i = 0; i < retCntId.size(); ++i) { uint mId = retCntId[i]; n_parent->demandMetric(mId) += n->demandMetric(mId); n->metric(mId) = 0.0; } } } }
static void pruneTrivialNodes(Prof::CallPath::Profile& prof) { pruneTrivialNodes(prof.cct()->root()); }
static void coalesceStmts(Prof::CallPath::Profile& prof) { coalesceStmts(prof.cct()->root()); }
// make: ...temporary holding pattern... void MPIBlameShiftIdlenessFact::make(Prof::CallPath::Profile& prof) { using namespace Prof; // ------------------------------------------------------------ // Create destination metric descriptors and mapping from source // metrics to destination metrics // ------------------------------------------------------------ std::vector<uint> metricSrcIds; std::vector<uint> metricBalanceIds; std::vector<uint> metricImbalInclIds, metricImbalExclIds; std::vector<uint> metricIdleInclIds; Metric::Mgr* metricMgr = prof.metricMgr(); uint numMetrics_orig = metricMgr->size(); for (uint mId = 0; mId < numMetrics_orig; ++mId) { Metric::ADesc* m = metricMgr->metric(mId); // find main source metric if (MetricComponentsFact::isTimeMetric(m) && MetricComponentsFact::isDerivedMetric(m, s_sum) && m->type() == Metric::ADesc::TyIncl && m->isVisible() /* not a temporary */) { DIAG_Assert(m->computedType() == Prof::Metric::ADesc::ComputedTy_NonFinal, DIAG_UnexpectedInput); metricSrcIds.push_back(m->id()); // FIXME: For now we use only Metric::ADesc::DerivedIncrDesc() // We should also support Metric::ADesc::DerivedDesc() DIAG_Assert(typeid(*m) == typeid(Metric::DerivedIncrDesc), DIAG_UnexpectedInput); Metric::DerivedIncrDesc* m_imbalIncl = static_cast<Metric::DerivedIncrDesc*>(m->clone()); m_imbalIncl->nameBase("imbalance" + s_sum); m_imbalIncl->description("imbalance for MPI SPMD executions"); m_imbalIncl->expr(new Metric::SumIncr(Metric::IData::npos, // FIXME:Sum Metric::IData::npos)); Metric::DerivedIncrDesc* m_imbalExcl = static_cast<Metric::DerivedIncrDesc*>(m_imbalIncl->clone()); m_imbalExcl->type(Metric::ADesc::TyExcl); m_imbalExcl->expr(new Metric::SumIncr(Metric::IData::npos, Metric::IData::npos)); m_imbalIncl->partner(m_imbalExcl); m_imbalExcl->partner(m_imbalIncl); Metric::DerivedIncrDesc* m_idleIncl = static_cast<Metric::DerivedIncrDesc*>(m->clone()); m_idleIncl->nameBase("idleness" + s_sum); m_idleIncl->description("idleness for MPI executions"); m_idleIncl->partner(NULL); m_idleIncl->expr(new Metric::SumIncr(Metric::IData::npos, // FIXME:Sum Metric::IData::npos)); metricMgr->insert(m_imbalIncl); metricMgr->insert(m_imbalExcl); metricMgr->insert(m_idleIncl); m_imbalIncl->expr()->accumId(m_imbalIncl->id()); m_imbalExcl->expr()->accumId(m_imbalExcl->id()); m_idleIncl->expr()->accumId(m_idleIncl->id()); DIAG_Assert(m_imbalIncl->id() >= numMetrics_orig && m_imbalExcl->id() >= numMetrics_orig, "Currently, we assume new metrics are added at the end of the metric vector."); metricImbalInclIds.push_back(m_imbalIncl->id()); metricImbalExclIds.push_back(m_imbalExcl->id()); metricIdleInclIds.push_back(m_idleIncl->id()); } // find secondary source metric if (MetricComponentsFact::isTimeMetric(m) && MetricComponentsFact::isDerivedMetric(m, s_cfvar) && m->type() == Metric::ADesc::TyIncl && m->isVisible() /* not a temporary */) { DIAG_Assert(m->computedType() == Prof::Metric::ADesc::ComputedTy_NonFinal, DIAG_UnexpectedInput); metricBalanceIds.push_back(m->id()); } } DIAG_Assert(metricSrcIds.size() == metricBalanceIds.size(), DIAG_UnexpectedInput); if (metricSrcIds.empty()) { return; } // ------------------------------------------------------------ // Create values for metric components // ------------------------------------------------------------ // Note that metrics are non-finalized! CCT::ANode* cctRoot = prof.cct()->root(); uint metricBalancedId = metricBalanceIds[0]; Metric::AExprIncr* metricBalancedExpr = dynamic_cast<Metric::DerivedIncrDesc*>(metricMgr->metric(metricBalancedId))->expr(); Metric::IData cctRoot_mdata(*cctRoot); metricBalancedExpr->finalize(cctRoot_mdata); double balancedThreshold = 1.2 * cctRoot_mdata.demandMetric(metricBalancedId); makeMetrics(cctRoot, metricSrcIds, metricImbalInclIds, metricImbalExclIds, metricIdleInclIds, metricBalancedId, metricBalancedExpr, balancedThreshold, NULL, NULL); VMAIntervalSet metricDstInclIdSet; for (uint i = 0; i < metricImbalInclIds.size(); ++i) { uint mId = metricImbalInclIds[i]; metricDstInclIdSet.insert(VMAInterval(mId, mId + 1)); // [ ) mId = metricIdleInclIds[i]; metricDstInclIdSet.insert(VMAInterval(mId, mId + 1)); // [ ) } cctRoot->aggregateMetricsIncl(metricDstInclIdSet); }