void cgStLocRange(IRLS& env, const IRInstruction* inst) { auto const range = inst->extra<StLocRange>(); if (range->start >= range->end) return; auto const fp = srcLoc(env, inst, 0).reg(); auto const loc = srcLoc(env, inst, 1); auto const val = inst->src(1); auto& v = vmain(env); auto ireg = v.makeReg(); auto nreg = v.makeReg(); v << lea{fp[localOffset(range->start)], ireg}; v << lea{fp[localOffset(range->end)], nreg}; doWhile(v, CC_NE, {ireg}, [&] (const VregList& in, const VregList& out) { auto const i = in[0]; auto const res = out[0]; auto const sf = v.makeReg(); storeTV(v, i[0], loc, val); v << subqi{int32_t{sizeof(Cell)}, i, res, v.makeReg()}; v << cmpq{res, nreg, sf}; return sf; } ); }
int peano::kernel::spacetreegrid::SingleLevelEnumerator::operator() (int localVertexNumber) const { peano::kernel::spacetreegrid::SingleLevelEnumerator::LocalVertexIntegerIndex localOffset; int base = TWO_POWER_D_DIVIDED_BY_TWO; for (int d=DIMENSIONS-1; d>=0; d--) { localOffset(d) = localVertexNumber / base; assertion5( localOffset(d)>=0, localOffset, localVertexNumber, d, base, _discreteOffset ); assertion5( localOffset(d)<=1, localOffset, localVertexNumber, d, base, _discreteOffset ); localVertexNumber -= localOffset(d) * base; base /= 2; } localOffset += _discreteOffset; return peano::kernel::spacetreegrid::SingleLevelEnumerator::lineariseVertexIndex( localOffset ); }
void cgCheckLoc(IRLS& env, const IRInstruction* inst) { auto const baseOff = localOffset(inst->extra<CheckLoc>()->locId); auto const base = srcLoc(env, inst, 0).reg()[baseOff]; emitTypeCheck(vmain(env), env, inst->typeParam(), base + TVOFF(m_type), base + TVOFF(m_data), inst->taken()); }
void cgLdLocPseudoMain(IRLS& env, const IRInstruction* inst) { auto const fp = srcLoc(env, inst, 0).reg(); auto const off = localOffset(inst->extra<LdLocPseudoMain>()->locId); auto& v = vmain(env); irlower::emitTypeCheck(v, env, inst->typeParam(), fp[off + TVOFF(m_type)], fp[off + TVOFF(m_data)], inst->taken()); loadTV(v, inst->dst(), dstLoc(env, inst, 0), fp[off]); }
void CodeGenerator::cgGuardLoc(IRInstruction* inst) { auto const rFP = x2a(curOpd(inst->src(0)).reg()); auto const baseOff = localOffset(inst->extra<GuardLoc>()->locId); emitTypeTest( inst->typeParam(), rFP[baseOff + TVOFF(m_type)], rFP[baseOff + TVOFF(m_data)], [&] (ConditionCode cc) { auto const destSK = SrcKey(curFunc(), m_unit.bcOff()); auto const destSR = m_tx64->getSrcRec(destSK); destSR->emitFallbackJump(this->m_mainCode, ccNegate(cc)); }); }
void operator()(ThreadParams& params, const std::string& name, T_Scalar value, const std::string& attrName = "", T_Attribute attribute = T_Attribute()) { log<picLog::INPUT_OUTPUT>("HDF5: write %1%D scalars: %2%") % simDim % name; // Size over all processes Dimensions globalSize(1, 1, 1); // Offset for this process Dimensions localOffset(0, 0, 0); // Offset for all processes Dimensions globalOffset(0, 0, 0); for (uint32_t d = 0; d < simDim; ++d) { globalSize[d] = Environment<simDim>::get().GridController().getGpuNodes()[d]; localOffset[d] = Environment<simDim>::get().GridController().getPosition()[d]; } Dimensions localSize(1, 1, 1); // avoid deadlock between not finished pmacc tasks and mpi calls in adios __getTransactionEvent().waitForFinished(); typename traits::PICToSplash<T_Scalar>::type splashType; params.dataCollector->writeDomain(params.currentStep, /* id == time step */ globalSize, /* total size of dataset over all processes */ localOffset, /* write offset for this process */ splashType, /* data type */ simDim, /* NDims spatial dimensionality of the field */ splash::Selection(localSize), /* data size of this process */ name.c_str(), /* data set name */ splash::Domain( globalOffset, /* offset of the global domain */ globalSize /* size of the global domain */ ), DomainCollector::GridType, &value); if(!attrName.empty()) { /*simulation attribute for data*/ typename traits::PICToSplash<T_Attribute>::type attType; log<picLog::INPUT_OUTPUT>("HDF5: write attribute %1% for scalars: %2%") % attrName % name; params.dataCollector->writeAttribute(params.currentStep, attType, name.c_str(), attrName.c_str(), &attribute); } }
void cgCreateAAWH(IRLS& env, const IRInstruction* inst) { auto const fp = srcLoc(env, inst, 0).reg(); auto const extra = inst->extra<CreateAAWHData>(); cgCallHelper( vmain(env), env, CallSpec::direct(c_AwaitAllWaitHandle::fromFrameNoCheck), callDest(env, inst), SyncOptions::Sync, argGroup(env, inst) .imm(extra->count) .ssa(1) .addr(fp, localOffset(extra->first)) ); }
void DiskEmissionUtil::samplePosition(hkVector4& position, hkPseudoRandomGenerator* pseudoRandomGenerator) const { // Pick a random point on the disk (see http://mathworld.wolfram.com/DiskPointPicking.html) hkReal radius = m_radius * hkMath::sqrt(pseudoRandomGenerator->getRandReal01()); hkReal theta = pseudoRandomGenerator->getRandReal01() * 2.f * HK_REAL_PI; hkReal diskX = radius * hkMath::cos(theta); hkReal diskY = radius * hkMath::sin(theta); hkReal offsetX = pseudoRandomGenerator->getRandRange(-m_outOfPlaneVariance, m_outOfPlaneVariance); hkReal offsetY = diskY + pseudoRandomGenerator->getRandRange(-m_inPlaneVariance, m_inPlaneVariance); hkReal offsetZ = diskX + pseudoRandomGenerator->getRandRange(-m_inPlaneVariance, m_inPlaneVariance); hkVector4 localOffset(offsetX, offsetY, offsetZ); m_orthonormalBasis.multiplyVector(localOffset, position); position.add4(m_position); }
void cgCountWHNotDone(IRLS& env, const IRInstruction* inst) { auto const fp = srcLoc(env, inst, 0).reg(); auto const extra = inst->extra<CountWHNotDone>(); auto& v = vmain(env); auto const base = v.makeReg(); auto const loc = v.cns((extra->count - 1) * 2); auto const cnt = v.cns(0); v << lea{fp[localOffset(extra->first + extra->count - 1)], base}; auto out = doWhile(v, CC_GE, {loc, cnt}, [&] (const VregList& in, const VregList& out) { auto const loc_in = in[0], cnt_in = in[1]; auto const loc_out = out[0], cnt_out = out[1]; auto const sf1 = v.makeReg(); auto const sf2 = v.makeReg(); auto const obj = v.makeReg(); // We depend on this in the test with 0x0E below. static_assert(c_WaitHandle::STATE_SUCCEEDED == 0, ""); static_assert(c_WaitHandle::STATE_FAILED == 1, ""); v << load{base[loc_in * 8], obj}; v << testbim{0x0E, obj[WH::stateOff()], sf1}; cond(v, CC_NZ, sf1, cnt_out, [&] (Vout& v) { auto ret = v.makeReg(); v << incq{cnt_in, ret, v.makeReg()}; return ret; }, [&] (Vout& v) { return cnt_in; } ); // Add 2 to the loop variable because we can only scale by at most 8. v << subqi{2, loc_in, loc_out, sf2}; return sf2; } ); v << copy{out[1], dstLoc(env, inst, 0).reg()}; }
AnimSkeleton::AnimSkeleton(const HFMModel& hfmModel) { // convert to std::vector of joints std::vector<HFMJoint> joints; joints.reserve(hfmModel.joints.size()); for (auto& joint : hfmModel.joints) { joints.push_back(joint); } buildSkeletonFromJoints(joints, hfmModel.jointRotationOffsets); // we make a copy of the inverseBindMatrices in order to prevent mutating the model bind pose // when we are dealing with a joint offset in the model for (int i = 0; i < (int)hfmModel.meshes.size(); i++) { const HFMMesh& mesh = hfmModel.meshes.at(i); std::vector<HFMCluster> dummyClustersList; for (int j = 0; j < mesh.clusters.size(); j++) { std::vector<glm::mat4> bindMatrices; // cast into a non-const reference, so we can mutate the FBXCluster HFMCluster& cluster = const_cast<HFMCluster&>(mesh.clusters.at(j)); HFMCluster localCluster; localCluster.jointIndex = cluster.jointIndex; localCluster.inverseBindMatrix = cluster.inverseBindMatrix; localCluster.inverseBindTransform.evalFromRawMatrix(localCluster.inverseBindMatrix); // if we have a joint offset in the fst file then multiply its inverse by the // model cluster inverse bind matrix if (hfmModel.jointRotationOffsets.contains(cluster.jointIndex)) { AnimPose localOffset(hfmModel.jointRotationOffsets[cluster.jointIndex], glm::vec3()); localCluster.inverseBindMatrix = (glm::mat4)localOffset.inverse() * cluster.inverseBindMatrix; localCluster.inverseBindTransform.evalFromRawMatrix(localCluster.inverseBindMatrix); } dummyClustersList.push_back(localCluster); } _clusterBindMatrixOriginalValues.push_back(dummyClustersList); } }
void AnimSkeleton::buildSkeletonFromJoints(const std::vector<HFMJoint>& joints, const QMap<int, glm::quat> jointOffsets) { _joints = joints; _jointsSize = (int)joints.size(); // build a cache of bind poses // build a chache of default poses _absoluteDefaultPoses.reserve(_jointsSize); _relativeDefaultPoses.reserve(_jointsSize); _relativePreRotationPoses.reserve(_jointsSize); _relativePostRotationPoses.reserve(_jointsSize); // iterate over HFMJoints and extract the bind pose information. for (int i = 0; i < _jointsSize; i++) { // build pre and post transforms glm::mat4 preRotationTransform = _joints[i].preTransform * glm::mat4_cast(_joints[i].preRotation); glm::mat4 postRotationTransform = glm::mat4_cast(_joints[i].postRotation) * _joints[i].postTransform; _relativePreRotationPoses.push_back(AnimPose(preRotationTransform)); _relativePostRotationPoses.push_back(AnimPose(postRotationTransform)); // build relative and absolute default poses glm::mat4 relDefaultMat = glm::translate(_joints[i].translation) * preRotationTransform * glm::mat4_cast(_joints[i].rotation) * postRotationTransform; AnimPose relDefaultPose(relDefaultMat); int parentIndex = getParentIndex(i); if (parentIndex >= 0) { _absoluteDefaultPoses.push_back(_absoluteDefaultPoses[parentIndex] * relDefaultPose); } else { _absoluteDefaultPoses.push_back(relDefaultPose); } } for (int k = 0; k < _jointsSize; k++) { if (jointOffsets.contains(k)) { AnimPose localOffset(jointOffsets[k], glm::vec3()); _absoluteDefaultPoses[k] = _absoluteDefaultPoses[k] * localOffset; } } // re-compute relative poses _relativeDefaultPoses = _absoluteDefaultPoses; convertAbsolutePosesToRelative(_relativeDefaultPoses); for (int i = 0; i < _jointsSize; i++) { _jointIndicesByName[_joints[i].name] = i; } // build mirror map. _nonMirroredIndices.clear(); _mirrorMap.reserve(_jointsSize); for (int i = 0; i < _jointsSize; i++) { if (_joints[i].name != "Hips" && _joints[i].name != "Spine" && _joints[i].name != "Spine1" && _joints[i].name != "Spine2" && _joints[i].name != "Neck" && _joints[i].name != "Head" && !((_joints[i].name.startsWith("Left") || _joints[i].name.startsWith("Right")) && _joints[i].name != "LeftEye" && _joints[i].name != "RightEye")) { // HACK: we don't want to mirror some joints so we remember their indices // so we can restore them after a future mirror operation _nonMirroredIndices.push_back(i); } int mirrorJointIndex = -1; if (_joints[i].name.startsWith("Left")) { QString mirrorJointName = QString(_joints[i].name).replace(0, 4, "Right"); mirrorJointIndex = nameToJointIndex(mirrorJointName); } else if (_joints[i].name.startsWith("Right")) { QString mirrorJointName = QString(_joints[i].name).replace(0, 5, "Left"); mirrorJointIndex = nameToJointIndex(mirrorJointName); } if (mirrorJointIndex >= 0) { _mirrorMap.push_back(mirrorJointIndex); } else { _mirrorMap.push_back(i); } } }
//------------------------------------------------------------------------- void CLam::AttachLAMLight(bool attach, CItem* pLightAttach, eGeometrySlot slot) { //GameWarning("CLam::AttachLight"); int id = (slot==eIGS_FirstPerson) ? 0 : 1; if (attach) { if (m_lamparams.light_range[id] == 0.f) return; Vec3 color = m_lamparams.light_color[id] * m_lamparams.light_diffuse_mul[id]; float specular = 1.0f/m_lamparams.light_diffuse_mul[id]; string helper; Vec3 dir(-1,0,0); Vec3 localOffset(0.0f,0.0f,0.0f); if (this != pLightAttach) { SAccessoryParams *params = pLightAttach->GetAccessoryParams(GetEntity()->GetClass()->GetName()); if (!params) return; helper = params->attach_helper.c_str(); if(slot==eIGS_FirstPerson) helper.append("_light"); //Assets don't have same orientation for pistol/rifle.. 8/ dir = (m_lamparams.isLamRifle && id==0) ? Vec3(-0.1f,-1.0f,0.0f) : Vec3(-1.0f,-0.1f,0.0f); dir.Normalize(); } bool fakeLight = false; bool castShadows = false; //Some MP/SP restrictions for lights IRenderNode *pCasterException = NULL; if(CActor *pOwner = pLightAttach->GetOwnerActor()) { if(gEnv->bMultiplayer) { if(!pOwner->IsClient()) fakeLight = true; else castShadows = true; } else { if(pOwner->IsPlayer()) castShadows = true; //castShadows = false; //Not for now } if(castShadows) { if(IEntityRenderProxy* pRenderProxy = static_cast<IEntityRenderProxy*>(pOwner->GetEntity()->GetProxy(ENTITY_PROXY_RENDER))) pCasterException = pRenderProxy->GetRenderNode(); } } m_lightID[id] = pLightAttach->AttachLightEx(slot, 0, true, fakeLight, castShadows, pCasterException, m_lamparams.light_range[id], color, specular, m_lamparams.light_texture[id], m_lamparams.light_fov[id], helper.c_str(), localOffset, dir, m_lamparams.light_material[id].c_str(), m_lamparams.light_hdr_dyn[id]); if (m_lightID[id]) ++s_lightCount; // sounds pLightAttach->PlayAction(g_pItemStrings->enable_light); if (m_lightSoundId == INVALID_SOUNDID) m_lightSoundId = pLightAttach->PlayAction(g_pItemStrings->use_light); //Detach the non-needed light uint8 other = id^1; if (m_lightID[other]) { pLightAttach->AttachLightEx(other, m_lightID[other], false, true), m_lightID[other] = 0; --s_lightCount; } } else { if (m_lightID[id]) { pLightAttach->AttachLightEx(slot, m_lightID[id], false, true); m_lightID[id] = 0; --s_lightCount; PlayAction(g_pItemStrings->disable_light); StopSound(m_lightSoundId); m_lightSoundId = INVALID_SOUNDID; } } //GameWarning("Global light count = %d", s_lightCount); }
void cgStLocPseudoMain(IRLS& env, const IRInstruction* inst) { auto const fp = srcLoc(env, inst, 0).reg(); auto const off = localOffset(inst->extra<StLocPseudoMain>()->locId); storeTV(vmain(env), fp[off], srcLoc(env, inst, 1), inst->src(1)); }
TCA emitFreeLocalsHelpers(CodeBlock& cb, UniqueStubs& us) { // The address of the first local is passed in the second argument register. // We use the third and fourth as scratch registers. auto const local = rarg(1); auto const last = rarg(2); auto const type = rarg(3); CGMeta fixups; // This stub is very hot; keep it cache-aligned. align(cb, &fixups, Alignment::CacheLine, AlignContext::Dead); auto const release = emitDecRefHelper(cb, fixups, local, type, local | last); auto const decref_local = [&] (Vout& v) { auto const sf = v.makeReg(); // We can't do a byte load here---we have to sign-extend since we use // `type' as a 32-bit array index to the destructor table. v << loadzbl{local[TVOFF(m_type)], type}; emitCmpTVType(v, sf, KindOfRefCountThreshold, type); ifThen(v, CC_G, sf, [&] (Vout& v) { v << call{release, arg_regs(3)}; }); }; auto const next_local = [&] (Vout& v) { v << addqi{static_cast<int>(sizeof(TypedValue)), local, local, v.makeReg()}; }; alignJmpTarget(cb); us.freeManyLocalsHelper = vwrap(cb, fixups, [&] (Vout& v) { // We always unroll the final `kNumFreeLocalsHelpers' decrefs, so only loop // until we hit that point. v << lea{rvmfp()[localOffset(kNumFreeLocalsHelpers - 1)], last}; doWhile(v, CC_NZ, {}, [&] (const VregList& in, const VregList& out) { auto const sf = v.makeReg(); decref_local(v); next_local(v); v << cmpq{local, last, sf}; return sf; } ); }); for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) { us.freeLocalsHelpers[i] = vwrap(cb, [&] (Vout& v) { decref_local(v); if (i != 0) next_local(v); }); } // All the stub entrypoints share the same ret. vwrap(cb, fixups, [] (Vout& v) { v << ret{}; }); // This stub is hot, so make sure to keep it small. // Alas, we have more work to do in this under Windows, // so we can't be this small :( #ifndef _WIN32 always_assert(Stats::enabled() || (cb.frontier() - release <= 4 * x64::cache_line_size())); #endif fixups.process(nullptr); return release; }
TCA emitFreeLocalsHelpers(CodeBlock& cb, DataBlock& data, UniqueStubs& us) { // The address of the first local is passed in the second argument register. // We use the third and fourth as scratch registers. auto const local = rarg(1); auto const last = rarg(2); auto const type = rarg(3); CGMeta fixups; // This stub is very hot; keep it cache-aligned. align(cb, &fixups, Alignment::CacheLine, AlignContext::Dead); auto const release = emitDecRefHelper(cb, data, fixups, local, type, local | last); auto const decref_local = [&] (Vout& v) { auto const sf = v.makeReg(); // We can't do a byte load here---we have to sign-extend since we use // `type' as a 32-bit array index to the destructor table. v << loadzbl{local[TVOFF(m_type)], type}; emitCmpTVType(v, sf, KindOfRefCountThreshold, type); ifThen(v, CC_G, sf, [&] (Vout& v) { auto const dword_size = sizeof(int64_t); // saving return value on the stack, but keeping it 16-byte aligned v << mflr{rfuncln()}; v << lea {rsp()[-2 * dword_size], rsp()}; v << store{rfuncln(), rsp()[0]}; v << call{release, arg_regs(3)}; // restore the return value from the stack v << load{rsp()[0], rfuncln()}; v << lea {rsp()[2 * dword_size], rsp()}; v << mtlr{rfuncln()}; }); }; auto const next_local = [&] (Vout& v) { v << addqi{static_cast<int>(sizeof(TypedValue)), local, local, v.makeReg()}; }; alignJmpTarget(cb); us.freeManyLocalsHelper = vwrap(cb, data, fixups, [&] (Vout& v) { // We always unroll the final `kNumFreeLocalsHelpers' decrefs, so only loop // until we hit that point. v << lea{rvmfp()[localOffset(kNumFreeLocalsHelpers - 1)], last}; doWhile(v, CC_NZ, {}, [&] (const VregList& in, const VregList& out) { auto const sf = v.makeReg(); decref_local(v); next_local(v); v << cmpq{local, last, sf}; return sf; } ); }); for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) { us.freeLocalsHelpers[i] = vwrap(cb, data, [&] (Vout& v) { decref_local(v); if (i != 0) next_local(v); }); } // All the stub entrypoints share the same ret. vwrap(cb, data, fixups, [] (Vout& v) { v << ret{}; }); // This stub is hot, so make sure to keep it small. #if 0 // TODO(gut): Currently this assert fails. // Take a closer look when looking at performance always_assert(Stats::enabled() || (cb.frontier() - release <= 4 * cache_line_size())); #endif fixups.process(nullptr); return release; }
TCA emitFreeLocalsHelpers(CodeBlock& cb, DataBlock& data, UniqueStubs& us) { // The address of the first local is passed in the second argument register. // We use the third and fourth as scratch registers. auto const local = rarg(1); auto const last = rarg(2); auto const type = rarg(3); CGMeta fixups; TCA freeLocalsHelpers[kNumFreeLocalsHelpers]; TCA freeManyLocalsHelper; // This stub is very hot; keep it cache-aligned. align(cb, &fixups, Alignment::CacheLine, AlignContext::Dead); auto const release = emitDecRefHelper(cb, data, fixups, local, type, local | last); auto const decref_local = [&] (Vout& v) { auto const sf = v.makeReg(); // We can't use emitLoadTVType() here because it does a byte load, and we // need to sign-extend since we use `type' as a 32-bit array index to the // destructor table. v << loadzbl{local[TVOFF(m_type)], type}; emitCmpTVType(v, sf, KindOfRefCountThreshold, type); ifThen(v, CC_G, sf, [&] (Vout& v) { v << call{release, arg_regs(3)}; }); }; auto const next_local = [&] (Vout& v) { v << addqi{static_cast<int>(sizeof(TypedValue)), local, local, v.makeReg()}; }; alignJmpTarget(cb); freeManyLocalsHelper = vwrap(cb, data, [&] (Vout& v) { // We always unroll the final `kNumFreeLocalsHelpers' decrefs, so only loop // until we hit that point. v << lea{rvmfp()[localOffset(kNumFreeLocalsHelpers - 1)], last}; // Set up frame linkage to avoid an indirect fixup. v << copy{rsp(), rfp()}; doWhile(v, CC_NZ, {}, [&] (const VregList& in, const VregList& out) { auto const sf = v.makeReg(); decref_local(v); next_local(v); v << cmpq{local, last, sf}; return sf; } ); }); for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) { freeLocalsHelpers[i] = vwrap(cb, data, [&] (Vout& v) { decref_local(v); if (i != 0) next_local(v); }); } // All the stub entrypoints share the same ret. vwrap(cb, data, fixups, [] (Vout& v) { v << popp{rfp(), rlr()}; v << ret{}; }); // Create a table of branches us.freeManyLocalsHelper = vwrap(cb, data, [&] (Vout& v) { v << pushp{rlr(), rfp()}; // rvmfp() is needed by the freeManyLocalsHelper stub above, so frame // linkage setup is deferred until after its use in freeManyLocalsHelper. v << jmpi{freeManyLocalsHelper}; }); for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) { us.freeLocalsHelpers[i] = vwrap(cb, data, [&] (Vout& v) { // We set up frame linkage to avoid an indirect fixup. v << pushp{rlr(), rfp()}; v << copy{rsp(), rfp()}; v << jmpi{freeLocalsHelpers[i]}; }); } // FIXME: This stub is hot, so make sure to keep it small. #if 0 always_assert(Stats::enabled() || (cb.frontier() - release <= 4 * x64::cache_line_size())); #endif fixups.process(nullptr); return release; }