示例#1
0
void cgStLocRange(IRLS& env, const IRInstruction* inst) {
  auto const range = inst->extra<StLocRange>();

  if (range->start >= range->end) return;

  auto const fp = srcLoc(env, inst, 0).reg();
  auto const loc = srcLoc(env, inst, 1);
  auto const val = inst->src(1);
  auto& v = vmain(env);

  auto ireg = v.makeReg();
  auto nreg = v.makeReg();

  v << lea{fp[localOffset(range->start)], ireg};
  v << lea{fp[localOffset(range->end)], nreg};

  doWhile(v, CC_NE, {ireg},
    [&] (const VregList& in, const VregList& out) {
      auto const i = in[0];
      auto const res = out[0];
      auto const sf = v.makeReg();

      storeTV(v, i[0], loc, val);
      v << subqi{int32_t{sizeof(Cell)}, i, res, v.makeReg()};
      v << cmpq{res, nreg, sf};
      return sf;
    }
  );
}
int peano::kernel::spacetreegrid::SingleLevelEnumerator::operator() (int localVertexNumber) const {
    peano::kernel::spacetreegrid::SingleLevelEnumerator::LocalVertexIntegerIndex localOffset;
    int base   = TWO_POWER_D_DIVIDED_BY_TWO;
    for (int d=DIMENSIONS-1; d>=0; d--) {
        localOffset(d)     = localVertexNumber / base;
        assertion5( localOffset(d)>=0, localOffset, localVertexNumber, d, base, _discreteOffset );
        assertion5( localOffset(d)<=1, localOffset, localVertexNumber, d, base, _discreteOffset );
        localVertexNumber -= localOffset(d) * base;
        base              /= 2;
    }
    localOffset += _discreteOffset;
    return peano::kernel::spacetreegrid::SingleLevelEnumerator::lineariseVertexIndex( localOffset );
}
示例#3
0
void cgCheckLoc(IRLS& env, const IRInstruction* inst) {
  auto const baseOff = localOffset(inst->extra<CheckLoc>()->locId);
  auto const base = srcLoc(env, inst, 0).reg()[baseOff];

  emitTypeCheck(vmain(env), env, inst->typeParam(),
                base + TVOFF(m_type), base + TVOFF(m_data), inst->taken());
}
示例#4
0
void cgLdLocPseudoMain(IRLS& env, const IRInstruction* inst) {
  auto const fp = srcLoc(env, inst, 0).reg();
  auto const off = localOffset(inst->extra<LdLocPseudoMain>()->locId);
  auto& v = vmain(env);

  irlower::emitTypeCheck(v, env, inst->typeParam(), fp[off + TVOFF(m_type)],
                         fp[off + TVOFF(m_data)], inst->taken());
  loadTV(v, inst->dst(), dstLoc(env, inst, 0), fp[off]);
}
示例#5
0
void CodeGenerator::cgGuardLoc(IRInstruction* inst) {
  auto const rFP = x2a(curOpd(inst->src(0)).reg());
  auto const baseOff = localOffset(inst->extra<GuardLoc>()->locId);
  emitTypeTest(
    inst->typeParam(),
    rFP[baseOff + TVOFF(m_type)],
    rFP[baseOff + TVOFF(m_data)],
    [&] (ConditionCode cc) {
      auto const destSK = SrcKey(curFunc(), m_unit.bcOff());
      auto const destSR = m_tx64->getSrcRec(destSK);
      destSR->emitFallbackJump(this->m_mainCode, ccNegate(cc));
    });
}
示例#6
0
    void operator()(ThreadParams& params,
            const std::string& name, T_Scalar value,
            const std::string& attrName = "", T_Attribute attribute = T_Attribute())
    {
        log<picLog::INPUT_OUTPUT>("HDF5: write %1%D scalars: %2%") % simDim % name;

        // Size over all processes
        Dimensions globalSize(1, 1, 1);
        // Offset for this process
        Dimensions localOffset(0, 0, 0);
        // Offset for all processes
        Dimensions globalOffset(0, 0, 0);

        for (uint32_t d = 0; d < simDim; ++d)
        {
            globalSize[d] = Environment<simDim>::get().GridController().getGpuNodes()[d];
            localOffset[d] = Environment<simDim>::get().GridController().getPosition()[d];
        }

        Dimensions localSize(1, 1, 1);

        // avoid deadlock between not finished pmacc tasks and mpi calls in adios
        __getTransactionEvent().waitForFinished();

        typename traits::PICToSplash<T_Scalar>::type splashType;
        params.dataCollector->writeDomain(params.currentStep,            /* id == time step */
                                           globalSize,                   /* total size of dataset over all processes */
                                           localOffset,                  /* write offset for this process */
                                           splashType,                   /* data type */
                                           simDim,                       /* NDims spatial dimensionality of the field */
                                           splash::Selection(localSize), /* data size of this process */
                                           name.c_str(),                 /* data set name */
                                           splash::Domain(
                                                  globalOffset,          /* offset of the global domain */
                                                  globalSize             /* size of the global domain */
                                           ),
                                           DomainCollector::GridType,
                                           &value);

        if(!attrName.empty())
        {
            /*simulation attribute for data*/
            typename traits::PICToSplash<T_Attribute>::type attType;

            log<picLog::INPUT_OUTPUT>("HDF5: write attribute %1% for scalars: %2%") % attrName % name;
            params.dataCollector->writeAttribute(params.currentStep,
                                                  attType, name.c_str(),
                                                  attrName.c_str(), &attribute);
        }
    }
示例#7
0
void cgCreateAAWH(IRLS& env, const IRInstruction* inst) {
  auto const fp = srcLoc(env, inst, 0).reg();
  auto const extra = inst->extra<CreateAAWHData>();

  cgCallHelper(
    vmain(env),
    env,
    CallSpec::direct(c_AwaitAllWaitHandle::fromFrameNoCheck),
    callDest(env, inst),
    SyncOptions::Sync,
    argGroup(env, inst)
      .imm(extra->count)
      .ssa(1)
      .addr(fp, localOffset(extra->first))
  );
}
示例#8
0
void DiskEmissionUtil::samplePosition(hkVector4& position, hkPseudoRandomGenerator* pseudoRandomGenerator) const
{
	// Pick a random point on the disk (see http://mathworld.wolfram.com/DiskPointPicking.html)
	hkReal radius = m_radius * hkMath::sqrt(pseudoRandomGenerator->getRandReal01());
	hkReal theta = pseudoRandomGenerator->getRandReal01() * 2.f * HK_REAL_PI;
	hkReal diskX = radius * hkMath::cos(theta);
	hkReal diskY = radius * hkMath::sin(theta);

	hkReal offsetX = pseudoRandomGenerator->getRandRange(-m_outOfPlaneVariance, m_outOfPlaneVariance);
	hkReal offsetY = diskY + pseudoRandomGenerator->getRandRange(-m_inPlaneVariance, m_inPlaneVariance);
	hkReal offsetZ = diskX + pseudoRandomGenerator->getRandRange(-m_inPlaneVariance, m_inPlaneVariance);
	hkVector4 localOffset(offsetX, offsetY, offsetZ);

	m_orthonormalBasis.multiplyVector(localOffset, position);
	position.add4(m_position);
}
示例#9
0
void cgCountWHNotDone(IRLS& env, const IRInstruction* inst) {
  auto const fp = srcLoc(env, inst, 0).reg();
  auto const extra = inst->extra<CountWHNotDone>();

  auto& v = vmain(env);
  auto const base = v.makeReg();
  auto const loc = v.cns((extra->count - 1) * 2);
  auto const cnt = v.cns(0);

  v << lea{fp[localOffset(extra->first + extra->count - 1)], base};

  auto out = doWhile(v, CC_GE, {loc, cnt},
    [&] (const VregList& in, const VregList& out) {
      auto const loc_in  = in[0],  cnt_in  = in[1];
      auto const loc_out = out[0], cnt_out = out[1];
      auto const sf1 = v.makeReg();
      auto const sf2 = v.makeReg();
      auto const obj = v.makeReg();

      // We depend on this in the test with 0x0E below.
      static_assert(c_WaitHandle::STATE_SUCCEEDED == 0, "");
      static_assert(c_WaitHandle::STATE_FAILED == 1, "");

      v << load{base[loc_in * 8], obj};
      v << testbim{0x0E, obj[WH::stateOff()], sf1};
      cond(v, CC_NZ, sf1, cnt_out,
        [&] (Vout& v) {
          auto ret = v.makeReg();
          v << incq{cnt_in, ret, v.makeReg()};
          return ret;
        },
        [&] (Vout& v) { return cnt_in; }
      );

      // Add 2 to the loop variable because we can only scale by at most 8.
      v << subqi{2, loc_in, loc_out, sf2};
      return sf2;
    }
  );

  v << copy{out[1], dstLoc(env, inst, 0).reg()};
}
示例#10
0
AnimSkeleton::AnimSkeleton(const HFMModel& hfmModel) {
    // convert to std::vector of joints
    std::vector<HFMJoint> joints;
    joints.reserve(hfmModel.joints.size());
    for (auto& joint : hfmModel.joints) {
        joints.push_back(joint);
    }
    buildSkeletonFromJoints(joints, hfmModel.jointRotationOffsets);

    // we make a copy of the inverseBindMatrices in order to prevent mutating the model bind pose
    // when we are dealing with a joint offset in the model
    for (int i = 0; i < (int)hfmModel.meshes.size(); i++) {
        const HFMMesh& mesh = hfmModel.meshes.at(i);
        std::vector<HFMCluster> dummyClustersList;

        for (int j = 0; j < mesh.clusters.size(); j++) {
            std::vector<glm::mat4> bindMatrices;
            // cast into a non-const reference, so we can mutate the FBXCluster
            HFMCluster& cluster = const_cast<HFMCluster&>(mesh.clusters.at(j));

            HFMCluster localCluster;
            localCluster.jointIndex = cluster.jointIndex;
            localCluster.inverseBindMatrix = cluster.inverseBindMatrix;
            localCluster.inverseBindTransform.evalFromRawMatrix(localCluster.inverseBindMatrix);

            // if we have a joint offset in the fst file then multiply its inverse by the
            // model cluster inverse bind matrix
            if (hfmModel.jointRotationOffsets.contains(cluster.jointIndex)) {
                AnimPose localOffset(hfmModel.jointRotationOffsets[cluster.jointIndex], glm::vec3());
                localCluster.inverseBindMatrix = (glm::mat4)localOffset.inverse() * cluster.inverseBindMatrix;
                localCluster.inverseBindTransform.evalFromRawMatrix(localCluster.inverseBindMatrix);
            }
            dummyClustersList.push_back(localCluster);
        }
        _clusterBindMatrixOriginalValues.push_back(dummyClustersList);
    }
}
示例#11
0
void AnimSkeleton::buildSkeletonFromJoints(const std::vector<HFMJoint>& joints, const QMap<int, glm::quat> jointOffsets) {

    _joints = joints;
    _jointsSize = (int)joints.size();
    // build a cache of bind poses

    // build a chache of default poses
    _absoluteDefaultPoses.reserve(_jointsSize);
    _relativeDefaultPoses.reserve(_jointsSize);
    _relativePreRotationPoses.reserve(_jointsSize);
    _relativePostRotationPoses.reserve(_jointsSize);

    // iterate over HFMJoints and extract the bind pose information.
    for (int i = 0; i < _jointsSize; i++) {

        // build pre and post transforms
        glm::mat4 preRotationTransform = _joints[i].preTransform * glm::mat4_cast(_joints[i].preRotation);
        glm::mat4 postRotationTransform = glm::mat4_cast(_joints[i].postRotation) * _joints[i].postTransform;
        _relativePreRotationPoses.push_back(AnimPose(preRotationTransform));
        _relativePostRotationPoses.push_back(AnimPose(postRotationTransform));

        // build relative and absolute default poses
        glm::mat4 relDefaultMat = glm::translate(_joints[i].translation) * preRotationTransform * glm::mat4_cast(_joints[i].rotation) * postRotationTransform;
        AnimPose relDefaultPose(relDefaultMat);

        int parentIndex = getParentIndex(i);
        if (parentIndex >= 0) {
            _absoluteDefaultPoses.push_back(_absoluteDefaultPoses[parentIndex] * relDefaultPose);
        } else {
            _absoluteDefaultPoses.push_back(relDefaultPose);
        }
    }

    for (int k = 0; k < _jointsSize; k++) {
        if (jointOffsets.contains(k)) {
            AnimPose localOffset(jointOffsets[k], glm::vec3());
            _absoluteDefaultPoses[k] = _absoluteDefaultPoses[k] * localOffset;
        }
    }
    // re-compute relative poses
    _relativeDefaultPoses = _absoluteDefaultPoses;
    convertAbsolutePosesToRelative(_relativeDefaultPoses);

    for (int i = 0; i < _jointsSize; i++) {
        _jointIndicesByName[_joints[i].name] = i;
    }

    // build mirror map.
    _nonMirroredIndices.clear();
    _mirrorMap.reserve(_jointsSize);
    for (int i = 0; i < _jointsSize; i++) {
        if (_joints[i].name != "Hips" && _joints[i].name != "Spine" &&
            _joints[i].name != "Spine1" && _joints[i].name != "Spine2" &&
            _joints[i].name != "Neck" && _joints[i].name != "Head" &&
            !((_joints[i].name.startsWith("Left") || _joints[i].name.startsWith("Right")) &&
              _joints[i].name != "LeftEye" && _joints[i].name != "RightEye")) {
            // HACK: we don't want to mirror some joints so we remember their indices
            // so we can restore them after a future mirror operation
            _nonMirroredIndices.push_back(i);
        }
        int mirrorJointIndex = -1;
        if (_joints[i].name.startsWith("Left")) {
            QString mirrorJointName = QString(_joints[i].name).replace(0, 4, "Right");
            mirrorJointIndex = nameToJointIndex(mirrorJointName);
        } else if (_joints[i].name.startsWith("Right")) {
            QString mirrorJointName = QString(_joints[i].name).replace(0, 5, "Left");
            mirrorJointIndex = nameToJointIndex(mirrorJointName);
        }
        if (mirrorJointIndex >= 0) {
            _mirrorMap.push_back(mirrorJointIndex);
        } else {
            _mirrorMap.push_back(i);
        }
    }
}
示例#12
0
//-------------------------------------------------------------------------
void CLam::AttachLAMLight(bool attach, CItem* pLightAttach, eGeometrySlot slot)
{
    //GameWarning("CLam::AttachLight");

    int id = (slot==eIGS_FirstPerson) ? 0 : 1;

    if (attach)
    {
        if (m_lamparams.light_range[id] == 0.f)
            return;

        Vec3 color = m_lamparams.light_color[id] * m_lamparams.light_diffuse_mul[id];
        float specular = 1.0f/m_lamparams.light_diffuse_mul[id];

        string helper;
        Vec3 dir(-1,0,0);
        Vec3 localOffset(0.0f,0.0f,0.0f);

        if (this != pLightAttach)
        {
            SAccessoryParams *params = pLightAttach->GetAccessoryParams(GetEntity()->GetClass()->GetName());
            if (!params)
                return;

            helper = params->attach_helper.c_str();
            if(slot==eIGS_FirstPerson)
                helper.append("_light");

            //Assets don't have same orientation for pistol/rifle.. 8/
            dir = (m_lamparams.isLamRifle && id==0) ? Vec3(-0.1f,-1.0f,0.0f) : Vec3(-1.0f,-0.1f,0.0f);
            dir.Normalize();
        }

        bool fakeLight = false;
        bool castShadows = false;

        //Some MP/SP restrictions for lights
        IRenderNode *pCasterException = NULL;
        if(CActor *pOwner = pLightAttach->GetOwnerActor())
        {
            if(gEnv->bMultiplayer)
            {
                if(!pOwner->IsClient())
                    fakeLight = true;
                else
                    castShadows = true;
            }
            else
            {
                if(pOwner->IsPlayer())
                    castShadows = true;
                //castShadows = false; //Not for now
            }

            if(castShadows)
            {
                if(IEntityRenderProxy* pRenderProxy = static_cast<IEntityRenderProxy*>(pOwner->GetEntity()->GetProxy(ENTITY_PROXY_RENDER)))
                    pCasterException = pRenderProxy->GetRenderNode();
            }
        }

        m_lightID[id] = pLightAttach->AttachLightEx(slot, 0, true, fakeLight, castShadows, pCasterException, m_lamparams.light_range[id], color, specular, m_lamparams.light_texture[id], m_lamparams.light_fov[id], helper.c_str(), localOffset, dir, m_lamparams.light_material[id].c_str(), m_lamparams.light_hdr_dyn[id]);

        if (m_lightID[id])
            ++s_lightCount;

        // sounds
        pLightAttach->PlayAction(g_pItemStrings->enable_light);

        if (m_lightSoundId == INVALID_SOUNDID)
            m_lightSoundId = pLightAttach->PlayAction(g_pItemStrings->use_light);

        //Detach the non-needed light
        uint8 other = id^1;
        if (m_lightID[other])
        {
            pLightAttach->AttachLightEx(other, m_lightID[other], false, true),
                         m_lightID[other] = 0;
            --s_lightCount;
        }
    }
    else
    {
        if (m_lightID[id])
        {
            pLightAttach->AttachLightEx(slot, m_lightID[id], false, true);
            m_lightID[id] = 0;
            --s_lightCount;

            PlayAction(g_pItemStrings->disable_light);
            StopSound(m_lightSoundId);
            m_lightSoundId = INVALID_SOUNDID;
        }
    }

    //GameWarning("Global light count = %d", s_lightCount);
}
示例#13
0
void cgStLocPseudoMain(IRLS& env, const IRInstruction* inst) {
  auto const fp = srcLoc(env, inst, 0).reg();
  auto const off = localOffset(inst->extra<StLocPseudoMain>()->locId);
  storeTV(vmain(env), fp[off], srcLoc(env, inst, 1), inst->src(1));
}
示例#14
0
TCA emitFreeLocalsHelpers(CodeBlock& cb, UniqueStubs& us) {
  // The address of the first local is passed in the second argument register.
  // We use the third and fourth as scratch registers.
  auto const local = rarg(1);
  auto const last = rarg(2);
  auto const type = rarg(3);
  CGMeta fixups;

  // This stub is very hot; keep it cache-aligned.
  align(cb, &fixups, Alignment::CacheLine, AlignContext::Dead);
  auto const release = emitDecRefHelper(cb, fixups, local, type, local | last);

  auto const decref_local = [&] (Vout& v) {
    auto const sf = v.makeReg();

    // We can't do a byte load here---we have to sign-extend since we use
    // `type' as a 32-bit array index to the destructor table.
    v << loadzbl{local[TVOFF(m_type)], type};
    emitCmpTVType(v, sf, KindOfRefCountThreshold, type);

    ifThen(v, CC_G, sf, [&] (Vout& v) {
      v << call{release, arg_regs(3)};
    });
  };

  auto const next_local = [&] (Vout& v) {
    v << addqi{static_cast<int>(sizeof(TypedValue)),
               local, local, v.makeReg()};
  };

  alignJmpTarget(cb);

  us.freeManyLocalsHelper = vwrap(cb, fixups, [&] (Vout& v) {
    // We always unroll the final `kNumFreeLocalsHelpers' decrefs, so only loop
    // until we hit that point.
    v << lea{rvmfp()[localOffset(kNumFreeLocalsHelpers - 1)], last};

    doWhile(v, CC_NZ, {},
      [&] (const VregList& in, const VregList& out) {
        auto const sf = v.makeReg();

        decref_local(v);
        next_local(v);
        v << cmpq{local, last, sf};
        return sf;
      }
    );
  });

  for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) {
    us.freeLocalsHelpers[i] = vwrap(cb, [&] (Vout& v) {
      decref_local(v);
      if (i != 0) next_local(v);
    });
  }

  // All the stub entrypoints share the same ret.
  vwrap(cb, fixups, [] (Vout& v) { v << ret{}; });

  // This stub is hot, so make sure to keep it small.
  // Alas, we have more work to do in this under Windows,
  // so we can't be this small :(
#ifndef _WIN32
  always_assert(Stats::enabled() ||
                (cb.frontier() - release <= 4 * x64::cache_line_size()));
#endif

  fixups.process(nullptr);
  return release;
}
示例#15
0
TCA emitFreeLocalsHelpers(CodeBlock& cb, DataBlock& data, UniqueStubs& us) {
  // The address of the first local is passed in the second argument register.
  // We use the third and fourth as scratch registers.
  auto const local = rarg(1);
  auto const last = rarg(2);
  auto const type = rarg(3);
  CGMeta fixups;

  // This stub is very hot; keep it cache-aligned.
  align(cb, &fixups, Alignment::CacheLine, AlignContext::Dead);
  auto const release =
    emitDecRefHelper(cb, data, fixups, local, type, local | last);

  auto const decref_local = [&] (Vout& v) {
    auto const sf = v.makeReg();

    // We can't do a byte load here---we have to sign-extend since we use
    // `type' as a 32-bit array index to the destructor table.
    v << loadzbl{local[TVOFF(m_type)], type};
    emitCmpTVType(v, sf, KindOfRefCountThreshold, type);

    ifThen(v, CC_G, sf, [&] (Vout& v) {
      auto const dword_size = sizeof(int64_t);

      // saving return value on the stack, but keeping it 16-byte aligned
      v << mflr{rfuncln()};
      v << lea {rsp()[-2 * dword_size], rsp()};
      v << store{rfuncln(), rsp()[0]};

      v << call{release, arg_regs(3)};

      // restore the return value from the stack
      v << load{rsp()[0], rfuncln()};
      v << lea {rsp()[2 * dword_size], rsp()};
      v << mtlr{rfuncln()};
    });
  };

  auto const next_local = [&] (Vout& v) {
    v << addqi{static_cast<int>(sizeof(TypedValue)),
               local, local, v.makeReg()};
  };

  alignJmpTarget(cb);

  us.freeManyLocalsHelper = vwrap(cb, data, fixups, [&] (Vout& v) {
    // We always unroll the final `kNumFreeLocalsHelpers' decrefs, so only loop
    // until we hit that point.
    v << lea{rvmfp()[localOffset(kNumFreeLocalsHelpers - 1)], last};

    doWhile(v, CC_NZ, {},
      [&] (const VregList& in, const VregList& out) {
        auto const sf = v.makeReg();

        decref_local(v);
        next_local(v);
        v << cmpq{local, last, sf};
        return sf;
      }
    );
  });

  for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) {
    us.freeLocalsHelpers[i] = vwrap(cb, data, [&] (Vout& v) {
      decref_local(v);
      if (i != 0) next_local(v);
    });
  }

  // All the stub entrypoints share the same ret.
  vwrap(cb, data, fixups, [] (Vout& v) { v << ret{}; });

  // This stub is hot, so make sure to keep it small.
#if 0
  // TODO(gut): Currently this assert fails.
  // Take a closer look when looking at performance
  always_assert(Stats::enabled() ||
                (cb.frontier() - release <= 4 * cache_line_size()));
#endif

  fixups.process(nullptr);
  return release;
}
示例#16
0
TCA emitFreeLocalsHelpers(CodeBlock& cb, DataBlock& data, UniqueStubs& us) {
  // The address of the first local is passed in the second argument register.
  // We use the third and fourth as scratch registers.
  auto const local = rarg(1);
  auto const last = rarg(2);
  auto const type = rarg(3);
  CGMeta fixups;
  TCA freeLocalsHelpers[kNumFreeLocalsHelpers];
  TCA freeManyLocalsHelper;

  // This stub is very hot; keep it cache-aligned.
  align(cb, &fixups, Alignment::CacheLine, AlignContext::Dead);
  auto const release =
    emitDecRefHelper(cb, data, fixups, local, type, local | last);

  auto const decref_local = [&] (Vout& v) {
    auto const sf = v.makeReg();

    // We can't use emitLoadTVType() here because it does a byte load, and we
    // need to sign-extend since we use `type' as a 32-bit array index to the
    // destructor table.
    v << loadzbl{local[TVOFF(m_type)], type};
    emitCmpTVType(v, sf, KindOfRefCountThreshold, type);

    ifThen(v, CC_G, sf, [&] (Vout& v) {
      v << call{release, arg_regs(3)};
    });
  };

  auto const next_local = [&] (Vout& v) {
    v << addqi{static_cast<int>(sizeof(TypedValue)),
               local, local, v.makeReg()};
  };

  alignJmpTarget(cb);

  freeManyLocalsHelper = vwrap(cb, data, [&] (Vout& v) {
    // We always unroll the final `kNumFreeLocalsHelpers' decrefs, so only loop
    // until we hit that point.
    v << lea{rvmfp()[localOffset(kNumFreeLocalsHelpers - 1)], last};

    // Set up frame linkage to avoid an indirect fixup.
    v << copy{rsp(), rfp()};

    doWhile(v, CC_NZ, {},
      [&] (const VregList& in, const VregList& out) {
        auto const sf = v.makeReg();

        decref_local(v);
        next_local(v);
        v << cmpq{local, last, sf};
        return sf;
      }
    );
  });

  for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) {
    freeLocalsHelpers[i] = vwrap(cb, data, [&] (Vout& v) {
      decref_local(v);
      if (i != 0) next_local(v);
    });
  }

  // All the stub entrypoints share the same ret.
  vwrap(cb, data, fixups, [] (Vout& v) {
    v << popp{rfp(), rlr()};
    v << ret{};
  });

  // Create a table of branches
  us.freeManyLocalsHelper = vwrap(cb, data, [&] (Vout& v) {
    v << pushp{rlr(), rfp()};

    // rvmfp() is needed by the freeManyLocalsHelper stub above, so frame
    // linkage setup is deferred until after its use in freeManyLocalsHelper.
    v << jmpi{freeManyLocalsHelper};
  });
  for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) {
    us.freeLocalsHelpers[i] = vwrap(cb, data, [&] (Vout& v) {
      // We set up frame linkage to avoid an indirect fixup.
      v << pushp{rlr(), rfp()};
      v << copy{rsp(), rfp()};
      v << jmpi{freeLocalsHelpers[i]};
    });
  }

  // FIXME: This stub is hot, so make sure to keep it small.
#if 0
  always_assert(Stats::enabled() ||
                (cb.frontier() - release <= 4 * x64::cache_line_size()));
#endif

  fixups.process(nullptr);
  return release;
}