Esempio n. 1
0
void Vxls::splitCritEdges() {
  smart::vector<unsigned> preds;
  preds.resize(unit.blocks.size());
  for (auto pred : blocks) {
    auto succlist = succs(unit.blocks[pred]);
    for (auto succ : succlist) {
      preds[succ]++;
    }
  }
  auto resort = false;
  for (auto pred : blocks) {
    auto succlist = succs(unit.blocks[pred]);
    if (succlist.size() <= 1) continue;
    for (auto& succ : succlist) {
      if (preds[succ] <= 1) continue;
      // split the critical edge.
      auto middle = unit.makeBlock(unit.blocks[succ].area);
      unit.blocks[middle].code.emplace_back(jmp{succ});
      succ = middle;
      resort = true;
    }
  }
  if (resort) {
    blocks = sortBlocks(unit);
  }
}
Esempio n. 2
0
void lowerForARM(Vunit& unit) {
    assertx(check(unit));

    // block order doesn't matter, but only visit reachable blocks.
    auto blocks = sortBlocks(unit);

    for (auto b : blocks) {
        auto oldCode = std::move(unit.blocks[b].code);
        Vout v{unit, b};

        for (auto& inst : oldCode) {
            v.setOrigin(inst.origin);

            switch (inst.op) {
#define O(nm, imm, use, def) \
        case Vinstr::nm: \
          lower(inst.nm##_, v); \
          break;

                VASM_OPCODES
#undef O
            }
        }
    }

    assertx(check(unit));
    printUnit(kVasmARMFoldLevel, "after lowerForARM", unit);
}
Esempio n. 3
0
// Remove dead instructions by doing a traditional liveness analysis.
// instructions that mutate memory, physical registers, or status flags
// are considered useful. All branches are considered useful.
//
// Given SSA, there's a faster sparse version of this algorithm that marks
// useful instructions in one pass, then transitively marks pure instructions
// that define inputs to useful instructions. However it requires a mapping
// from vreg numbers to the instruction that defines them, and a way to address
// individual instructions.
//
// We could remove useless branches by computing the post-dominator tree and
// RDF(b) for each block; then a branch is only useful if it controls whether
// or not a useful block executes, and useless branches can be forwarded to
// the nearest useful post-dominator.
void removeDeadCode(Vunit& unit) {
  auto blocks = sortBlocks(unit);
  jit::vector<LiveSet> livein(unit.blocks.size());
  LiveSet live(unit.next_vr);
  auto pass = [&](bool mutate) {
    bool changed = false;
    for (auto blockIt = blocks.end(); blockIt != blocks.begin();) {
      auto b = *--blockIt;
      auto& block = unit.blocks[b];
      live.reset();
      for (auto s : succs(block)) {
        if (!livein[s].empty()) {
          live |= livein[s];
        }
      }
      for (auto i = block.code.end(); i != block.code.begin();) {
        auto& inst = *--i;
        auto useful = effectful(inst);
        visitDefs(unit, inst, [&](Vreg r) {
          if (r.isPhys() || live.test(r)) {
            useful = true;
            live.reset(r);
          }
        });
        if (useful) {
          visitUses(unit, inst, [&](Vreg r) {
            live.set(r);
          });
        } else if (mutate) {
          inst = nop{};
          changed = true;
        }
      }
      if (mutate) {
        assert(live == livein[b]);
      } else {
        if (live != livein[b]) {
          livein[b] = live;
          changed = true;
        }
      }
    }
    return changed;
  };
  // analyze until livein reaches a fixed point
  while (pass(false)) {}
  // nop-out useless instructions
  if (pass(true)) {
    for (auto b : blocks) {
      auto& code = unit.blocks[b].code;
      auto end = std::remove_if(code.begin(), code.end(), [&](Vinstr& inst) {
        return inst.op == Vinstr::nop;
      });
      code.erase(end, code.end());
    }
    printUnit(kVasmDCELevel, "after vasm-dead", unit);
  }
}
Esempio n. 4
0
 Clusterizer(Vunit& unit, const Scale& scale)
     : m_unit(unit)
     , m_scale(scale)
     , m_blocks(sortBlocks(unit)) {
   initClusters();
   clusterize();
   sortClusters();
   splitHotColdClusters();
   FTRACE(1, "{}", toString());
 }
Esempio n. 5
0
static void sortBlocks(SWHirschbergBlock list[], int start, int end) {

    SWHirschbergBlock key;
    int frontIdx;
    int backIdx;
    int pivot;

    if (start < end) {

        pivot = (start + end) / 2;
        swapBlocks(&list[start], &list[pivot]);
        key = list[start];

        frontIdx = start + 1;
        backIdx = end;

        while (frontIdx <= backIdx) {

            while (
                frontIdx <= end && 
                compareBlocks(&list[frontIdx], &key) <= 0
            ) {
                frontIdx++;
            }

            while (
                backIdx >= start && 
                compareBlocks(&list[backIdx], &key) > 0
            ) {
                backIdx--;
            }

            if (frontIdx < backIdx) {
                swapBlocks(&list[frontIdx], &list[backIdx]);
            }
        }

        swapBlocks(&list[start], &list[backIdx]);

        sortBlocks(list, start, backIdx - 1);
        sortBlocks(list, backIdx + 1, end);
    }
}
Esempio n. 6
0
// Remove dead instructions by doing a traditional liveness analysis.
// instructions that mutate memory, physical registers, or status flags
// are considered useful. All branches are considered useful.
//
// Given SSA, there's a faster sparse version of this algorithm that marks
// useful instructions in one pass, then transitively marks pure instructions
// that define inputs to useful instructions. However it requires a mapping
// from vreg numbers to the instruction that defines them, and a way to address
// individual instructions.
//
// We could remove useless branches by computing the post-dominator tree and
// RDF(b) for each block; then a branch is only useful if it controls whether
// or not a useful block executes, and useless branches can be forwarded to
// the nearest useful post-dominator.
void removeDeadCode(Vunit& unit) {
  Timer timer(Timer::vasm_dce);
  auto blocks = sortBlocks(unit);
  jit::vector<LiveSet> livein(unit.blocks.size());
  LiveSet live(unit.next_vr);

  auto pass = [&](bool mutate) {
    bool changed = false;
    for (auto blockIt = blocks.end(); blockIt != blocks.begin();) {
      auto b = *--blockIt;
      auto& block = unit.blocks[b];
      live.reset();
      for (auto s : succs(block)) {
        if (!livein[s].empty()) {
          live |= livein[s];
        }
      }
      for (auto i = block.code.end(); i != block.code.begin();) {
        auto& inst = *--i;
        auto useful = effectful(inst);
        visitDefs(unit, inst, [&](Vreg r) {
          if (r.isPhys() || live.test(r)) {
            useful = true;
            live.reset(r);
          }
        });
        if (useful) {
          visitUses(unit, inst, [&](Vreg r) {
            live.set(r);
          });
        } else if (mutate) {
          inst = nop{};
          changed = true;
        }
      }
      if (mutate) {
        assertx(live == livein[b]);
      } else {
        if (live != livein[b]) {
          livein[b] = live;
          changed = true;
        }
      }
    }
    return changed;
  };

  // analyze until livein reaches a fixed point
  while (pass(false)) {}
  auto const changed = pass(true);
  removeTrivialNops(unit);
  if (changed) {
    printUnit(kVasmDCELevel, "after vasm-dead", unit);
  }
}
Esempio n. 7
0
void logTranslation(const TransEnv& env, const TransRange& range) {
  auto nanos = HPHP::Timer::GetThreadCPUTimeNanos() - env.unit->startNanos();
  auto& cols = *env.unit->logEntry();
  auto& context = env.unit->context();
  auto kind = show(context.kind);
  cols.setStr("trans_kind", !debug ? kind : kind + "_debug");
  if (context.func) {
    cols.setStr("func", context.func->fullName()->data());
  }
  cols.setInt("jit_sample_rate", RuntimeOption::EvalJitSampleRate);
  // timing info
  cols.setInt("jit_micros", nanos / 1000);
  // hhir stats
  cols.setInt("max_tmps", env.unit->numTmps());
  cols.setInt("max_blocks", env.unit->numBlocks());
  cols.setInt("max_insts", env.unit->numInsts());
  auto hhir_blocks = rpoSortCfg(*env.unit);
  cols.setInt("num_blocks", hhir_blocks.size());
  size_t num_insts = 0;
  for (auto b : hhir_blocks) num_insts += b->instrs().size();
  cols.setInt("num_insts", num_insts);
  // vasm stats
  if (env.vunit) {
    cols.setInt("max_vreg", env.vunit->next_vr);
    cols.setInt("max_vblocks", env.vunit->blocks.size());
    cols.setInt("max_vcalls", env.vunit->vcallArgs.size());
    size_t max_vinstr = 0;
    for (auto& blk : env.vunit->blocks) max_vinstr += blk.code.size();
    cols.setInt("max_vinstr", max_vinstr);
    cols.setInt("num_vconst", env.vunit->constToReg.size());
    auto vblocks = sortBlocks(*env.vunit);
    size_t num_vinstr[kNumAreas] = {0, 0, 0};
    size_t num_vblocks[kNumAreas] = {0, 0, 0};
    for (auto b : vblocks) {
      const auto& block = env.vunit->blocks[b];
      num_vinstr[(int)block.area_idx] += block.code.size();
      num_vblocks[(int)block.area_idx]++;
    }
    cols.setInt("num_vinstr_main", num_vinstr[(int)AreaIndex::Main]);
    cols.setInt("num_vinstr_cold", num_vinstr[(int)AreaIndex::Cold]);
    cols.setInt("num_vinstr_frozen", num_vinstr[(int)AreaIndex::Frozen]);
    cols.setInt("num_vblocks_main", num_vblocks[(int)AreaIndex::Main]);
    cols.setInt("num_vblocks_cold", num_vblocks[(int)AreaIndex::Cold]);
    cols.setInt("num_vblocks_frozen", num_vblocks[(int)AreaIndex::Frozen]);
  }
  // x64 stats
  cols.setInt("main_size", range.main.size());
  cols.setInt("cold_size", range.cold.size());
  cols.setInt("frozen_size", range.frozen.size());

  // finish & log
  StructuredLog::log("hhvm_jit", cols);
}
Esempio n. 8
0
void Vxls::allocate() {
  blocks = sortBlocks(unit);
  splitCritEdges();
  computePositions();
  analyzeRsp();
  buildIntervals();
  walkIntervals();
  resolveSplits();
  lowerCopyargs();
  resolveEdges();
  renameOperands();
  insertCopies();
}
Esempio n. 9
0
File: vasm.cpp Progetto: chregu/hhvm
jit::vector<Vlabel> layoutBlocks(const Vunit& unit) {
  auto blocks = sortBlocks(unit);
  // Partition into main/cold/frozen areas without changing relative order, and
  // the end{} block will be last.
  auto coldIt = std::stable_partition(blocks.begin(), blocks.end(),
    [&](Vlabel b) {
      return unit.blocks[b].area == AreaIndex::Main &&
             unit.blocks[b].code.back().op != Vinstr::fallthru;
    });
  std::stable_partition(coldIt, blocks.end(),
    [&](Vlabel b) {
      return unit.blocks[b].area == AreaIndex::Cold &&
             unit.blocks[b].code.back().op != Vinstr::fallthru;
    });
  return blocks;
}
Esempio n. 10
0
/*
 * Branch fusion:
 * Analyze blocks one at a time, looking for the sequence:
 *
 *   setcc cc, f1 => b
 *   ...
 *   testb b, b => f2
 *   ...
 *   jcc E|NE, f2
 *
 * If found, and f2 is only used by the jcc, then change the code to:
 *
 *   setcc cc, f1 => b
 *   ...
 *   nop
 *   ...
 *   jcc !cc|cc, f1
 *
 * Later, vasm-dead will clean up the nop, and the setcc if b became dead.
 *
 * During the search, any other instruction that has a status flag result
 * will reset the pattern matcher. No instruction can "kill" flags,
 * since flags are SSA variables. However the transformation we want to
 * make extends the setcc flags lifetime, and we don't want it to overlap
 * another flag's lifetime.
 */
void fuseBranches(Vunit& unit) {
  auto blocks = sortBlocks(unit);
  jit::vector<unsigned> uses(unit.next_vr);
  for (auto b : blocks) {
    for (auto& inst : unit.blocks[b].code) {
      visitUses(unit, inst, [&](Vreg r) {
        uses[r]++;
      });
    }
  }
  bool should_print = false;
  for (auto b : blocks) {
    auto& code = unit.blocks[b].code;
    ConditionCode cc;
    Vreg setcc_flags, setcc_dest, testb_flags;
    unsigned testb_index;
    for (unsigned i = 0, n = code.size(); i < n; ++i) {
      if (code[i].op == Vinstr::setcc) {
        cc = code[i].setcc_.cc;
        setcc_flags = code[i].setcc_.sf;
        setcc_dest = code[i].setcc_.d;
        continue;
      }
      if (setcc_flags.isValid() &&
          match_testb(code[i], setcc_dest) &&
          uses[code[i].testb_.sf] == 1) {
        testb_flags = code[i].testb_.sf;
        testb_index = i;
        continue;
      }
      if (match_jcc(code[i], testb_flags)) {
        code[testb_index] = nop{}; // erase the testb
        auto& jcc = code[i].jcc_;
        jcc.cc = jcc.cc == CC_NE ? cc : ccNegate(cc);
        jcc.sf = setcc_flags;
        should_print = true;
        continue;
      }
      if (setcc_flags.isValid() && sets_flags(code[i])) {
        setcc_flags = testb_flags = Vreg{};
      }
    }
  }
  if (should_print) {
    printUnit(kVasmFusionLevel, "after vasm-fusion", unit);
  }
}
Esempio n. 11
0
void foldImms(Vunit& unit) {
    assertx(check(unit)); // especially, SSA
    // block order doesn't matter, but only visit reachable blocks.
    auto blocks = sortBlocks(unit);

    // Use flag for each registers.  If a SR is used then
    // certain optimizations will not fire since they do not
    // set the condition codes as the original instruction(s)
    // would.
    jit::vector<bool> used(unit.next_vr);
    for (auto b : blocks) {
        for (auto& inst : unit.blocks[b].code) {
            visitUses(unit, inst, [&](Vreg r) {
                used[r] = true;
            });
        }
    }

    Folder folder(std::move(used));
    folder.vals.resize(unit.next_vr);
    folder.valid.resize(unit.next_vr);
    // figure out which Vregs are constants and stash their values.
    for (auto& entry : unit.constToReg) {
        folder.valid.set(entry.second);
        folder.vals[entry.second] = entry.first.val;
    }
    // now mutate instructions
    for (auto b : blocks) {
        for (auto& inst : unit.blocks[b].code) {
            switch (inst.op) {
#define O(name, imms, uses, defs)\
        case Vinstr::name: {\
          auto origin = inst.origin;\
          folder.fold(inst.name##_, inst);\
          inst.origin = origin;\
          break;\
        }
                VASM_OPCODES
#undef O
            }
        }
    }
    printUnit(kVasmImmsLevel, "after foldImms", unit);
}
Esempio n. 12
0
/**
 * Chain the retranslation blocks.  This method enforces that, for
 * each region block, all its successor have distinct SrcKeys.
 */
void RegionDesc::chainRetransBlocks() {

  jit::vector<Chain> chains;
  BlockToChainMap block2chain;

  // 1. Initially assign each region block to its own chain.
  for (auto b : blocks()) {
    auto bid = b->id();
    auto cid = chains.size();
    chains.push_back({cid, {bid}});
    block2chain[bid] = cid;
  }

  // 2. For each block, if it has 2 successors with the same SrcKey,
  //    then merge the successors' chains into one.
  for (auto b : blocks()) {
    auto bid = b->id();
    const auto& succSet = succs(bid);
    for (auto it1 = succSet.begin(); it1 != succSet.end(); it1++) {
      auto bid1 = *it1;
      auto cid1 = block2chain[bid1];
      for (auto it2 = it1 + 1; it2 != succSet.end(); it2++) {
        auto bid2 = *it2;
        auto cid2 = block2chain[bid2];
        if (data(bid1).block->start() == data(bid2).block->start()) {
          mergeChains(chains[cid1], chains[cid2], block2chain);
        }
      }
    }
  }

  // 3. Sort each chain.  In general, we want to sort each chain in
  //    decreasing order of profile weights.  However, note that this
  //    transformation can turn acyclic graphs into cyclic ones (see
  //    example below).  Therefore, if JitLoops are disabled, we
  //    instead sort each chain following the original block order,
  //    which prevents loops from being generated if the region was
  //    originally acyclic.
  //
  //    Here's an example showing how an acyclic CFG can become cyclic
  //    by chaining its retranslation blocks:
  //
  //      - Region before chaining retranslation blocks, where B2' and B2"
  //        are retranslations starting at the same SrcKey:
  //          B1  -> B2'
  //          B1  -> B2"
  //          B2' -> B3
  //          B3  -> B2"
  //
  //      - Region after sorting the chain as B2" -R-> B2':
  //          B1  ->   B2"
  //          B2" -R-> B2'
  //          B2' ->   B3
  //          B3  ->   B2"
  //        Note the cycle: B2" -R-> B2' -> B3 -> B2".
  //
  auto profData = mcg->tx().profData();

  auto weight = [&](RegionDesc::BlockId bid) {
    return hasTransID(bid) ? profData->absTransCounter(getTransID(bid)) : 0;
  };

  auto sortGeneral = [&](RegionDesc::BlockId bid1, RegionDesc::BlockId bid2) {
    return weight(bid1) > weight(bid2);
  };

  using SortFun = std::function<bool(RegionDesc::BlockId, RegionDesc::BlockId)>;
  SortFun sortFunc = sortGeneral;

  hphp_hash_map<RegionDesc::BlockId, uint32_t> origBlockOrder;
  if (!RuntimeOption::EvalJitLoops) {
    for (uint32_t i = 0; i < m_blocks.size(); i++) {
      origBlockOrder[m_blocks[i]->id()] = i;
    }
    auto sortAcyclic = [&](RegionDesc::BlockId bid1, RegionDesc::BlockId bid2) {
      return origBlockOrder[bid1] < origBlockOrder[bid2];
    };
    sortFunc = sortAcyclic;
  }

  TRACE(1, "chainRetransBlocks: computed chains:\n");
  for (auto& c : chains) {
    std::sort(c.blocks.begin(), c.blocks.end(), sortFunc);

    if (Trace::moduleEnabled(Trace::region, 1) && c.blocks.size() > 0) {
      FTRACE(1, "  -> {} (w={})", c.blocks[0], weight(c.blocks[0]));
      for (size_t i = 1; i < c.blocks.size(); i++) {
        FTRACE(1, ", {} (w={})", c.blocks[i], weight(c.blocks[i]));
      }
      FTRACE(1, "\n");
    }
  }

  // 4. Set the nextRetrans blocks according to the computed chains.
  for (auto& c : chains) {
    if (c.blocks.size() == 0) continue;
    for (size_t i = 0; i < c.blocks.size() - 1; i++) {
      setNextRetrans(c.blocks[i], c.blocks[i + 1]);
    }
  }

  // 5. For each block with multiple successors in the same chain,
  //    only keep the successor that first appears in the chain.
  for (auto b : blocks()) {
    auto& succSet = data(b->id()).succs;
    for (auto s : succSet) {
      auto& c = chains[block2chain[s]];
      auto selectedSucc = findFirstInSet(c, succSet);
      for (auto other : c.blocks) {
        if (other == selectedSucc) continue;
        succSet.erase(other);
      }
    }
  }

  // 6. Reorder the blocks in the region in topological order (if
  //    region is acyclic), since the previous steps may break it.
  sortBlocks();
}
Esempio n. 13
0
static void sort(SWHirschbergData* data) {
    if (!data->sorted) {
        data->sorted = 0;
        sortBlocks(data->blocks, 0, data->blockNmr - 1);
    }
}