bool TargetNVC0::canDualIssue(const Instruction *a, const Instruction *b) const { const OpClass clA = operationClass[a->op]; const OpClass clB = operationClass[b->op]; if (getChipset() >= 0xe4) { // not texturing // not if the 2nd instruction isn't necessarily executed if (clA == OPCLASS_TEXTURE || clA == OPCLASS_FLOW) return false; // Check that a and b don't write to the same sources, nor that b reads // anything that a writes. if (!a->canCommuteDefDef(b) || !a->canCommuteDefSrc(b)) return false; // anything with MOV if (a->op == OP_MOV || b->op == OP_MOV) return true; if (clA == clB) { switch (clA) { // there might be more case OPCLASS_COMPARE: if ((a->op == OP_MIN || a->op == OP_MAX) && (b->op == OP_MIN || b->op == OP_MAX)) break; return false; case OPCLASS_ARITH: break; default: return false; } // only F32 arith or integer additions return (a->dType == TYPE_F32 || a->op == OP_ADD || b->dType == TYPE_F32 || b->op == OP_ADD); } // nothing with TEXBAR if (a->op == OP_TEXBAR || b->op == OP_TEXBAR) return false; // no loads and stores accessing the same space if ((clA == OPCLASS_LOAD && clB == OPCLASS_STORE) || (clB == OPCLASS_LOAD && clA == OPCLASS_STORE)) if (a->src(0).getFile() == b->src(0).getFile()) return false; // no > 32-bit ops if (typeSizeof(a->dType) > 4 || typeSizeof(b->dType) > 4 || typeSizeof(a->sType) > 4 || typeSizeof(b->sType) > 4) return false; return true; } else { return false; // info not needed (yet) } }
ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty) { reg = proto->reg; reg.type = ty; reg.size = typeSizeof(ty); }
bool TargetNVC0::isAccessSupported(DataFile file, DataType ty) const { if (ty == TYPE_NONE) return false; if (file == FILE_MEMORY_CONST) { if (getChipset() >= NVISA_GM107_CHIPSET) return typeSizeof(ty) <= 4; else if (getChipset() >= NVISA_GK104_CHIPSET) // wrong encoding ? return typeSizeof(ty) <= 8; } if (ty == TYPE_B96) return false; return true; }
bool TargetNV50::isAccessSupported(DataFile file, DataType ty) const { if (ty == TYPE_B96 || ty == TYPE_NONE) return false; if (typeSizeof(ty) > 4) return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL); return true; }
bool TargetNVC0::isAccessSupported(DataFile file, DataType ty) const { if (ty == TYPE_NONE) return false; if (file == FILE_MEMORY_CONST && getChipset() >= 0xe0) // wrong encoding ? return typeSizeof(ty) <= 8; if (ty == TYPE_B96) return (file == FILE_SHADER_INPUT) || (file == FILE_SHADER_OUTPUT); return true; }
bool TargetNVC0::canDualIssue(const Instruction *a, const Instruction *b) const { const OpClass clA = operationClass[a->op]; const OpClass clB = operationClass[b->op]; if (getChipset() >= 0xe4) { // not texturing // not if the 2nd instruction isn't necessarily executed if (clA == OPCLASS_TEXTURE || clA == OPCLASS_FLOW) return false; // anything with MOV if (a->op == OP_MOV || b->op == OP_MOV) return true; if (clA == clB) { // only F32 arith or integer additions if (clA != OPCLASS_ARITH) return false; return (a->dType == TYPE_F32 || a->op == OP_ADD || b->dType == TYPE_F32 || b->op == OP_ADD); } // nothing with TEXBAR if (a->op == OP_TEXBAR || b->op == OP_TEXBAR) return false; // no loads and stores accessing the the same space if ((clA == OPCLASS_LOAD && clB == OPCLASS_STORE) || (clB == OPCLASS_LOAD && clA == OPCLASS_STORE)) if (a->src(0).getFile() == b->src(0).getFile()) return false; // no > 32-bit ops if (typeSizeof(a->dType) > 4 || typeSizeof(b->dType) > 4 || typeSizeof(a->sType) > 4 || typeSizeof(b->sType) > 4) return false; return true; } else { return false; // info not needed (yet) } }
// long: rrr, arr, rcr, acr, rrc, arc, gcr, grr // short: rr, ar, rc, gr // immd: ri, gi bool TargetNV50::insnCanLoad(const Instruction *i, int s, const Instruction *ld) const { DataFile sf = ld->src(0).getFile(); if (sf == FILE_IMMEDIATE && (i->predSrc >= 0 || i->flagsDef >= 0)) return false; if (s >= opInfo[i->op].srcNr) return false; if (!(opInfo[i->op].srcFiles[s] & (1 << (int)sf))) return false; if (s == 2 && i->src(1).getFile() != FILE_GPR) return false; // NOTE: don't rely on flagsDef for (int d = 0; i->defExists(d); ++d) if (i->def(d).getFile() == FILE_FLAGS) return false; unsigned mode = 0; for (int z = 0; z < Target::operationSrcNr[i->op]; ++z) { DataFile zf = (z == s) ? sf : i->src(z).getFile(); switch (zf) { case FILE_GPR: break; case FILE_MEMORY_SHARED: case FILE_SHADER_INPUT: mode |= 1 << (z * 2); break; case FILE_MEMORY_CONST: mode |= 2 << (z * 2); break; case FILE_IMMEDIATE: mode |= 3 << (z * 2); default: break; } } switch (mode) { case 0x00: case 0x01: case 0x03: case 0x08: case 0x09: case 0x0c: case 0x20: case 0x21: break; case 0x0d: if (ld->bb->getProgram()->getType() != Program::TYPE_GEOMETRY) return false; default: return false; } uint8_t ldSize; if ((i->op == OP_MUL || i->op == OP_MAD) && !isFloatType(i->dType)) { // 32-bit MUL will be split into 16-bit MULs if (ld->src(0).isIndirect(0)) return false; if (sf == FILE_IMMEDIATE) return false; ldSize = 2; } else { ldSize = typeSizeof(ld->dType); } if (sf == FILE_IMMEDIATE) return true; // Check if memory access is encodable: if (ldSize < 4 && sf == FILE_SHADER_INPUT) // no < 4-byte aligned a[] access return false; if (ld->getSrc(0)->reg.data.offset > (int32_t)(127 * ldSize)) return false; if (ld->src(0).isIndirect(0)) { for (int z = 0; i->srcExists(z); ++z) if (i->src(z).isIndirect(0)) return false; // s[] access only possible in CP, $aX always applies if (sf == FILE_MEMORY_SHARED) return true; if (!ld->bb) // can't check type ... return false; Program::Type pt = ld->bb->getProgram()->getType(); // $aX applies to c[] only in VP, FP, GP if p[] is not accessed if (pt == Program::TYPE_COMPUTE) return false; if (pt == Program::TYPE_GEOMETRY) { if (sf == FILE_MEMORY_CONST) return i->src(s).getFile() != FILE_SHADER_INPUT; return sf == FILE_SHADER_INPUT; } return sf == FILE_MEMORY_CONST; } return true; }
bool TargetNVC0::insnCanLoad(const Instruction *i, int s, const Instruction *ld) const { DataFile sf = ld->src(0).getFile(); // immediate 0 can be represented by GPR $r63/$r255 if (sf == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0) return (!i->isPseudo() && !i->asTex() && i->op != OP_EXPORT && i->op != OP_STORE); if (s >= opInfo[i->op].srcNr) return false; if (!(opInfo[i->op].srcFiles[s] & (1 << (int)sf))) return false; // indirect loads can only be done by OP_LOAD/VFETCH/INTERP on nvc0 if (ld->src(0).isIndirect(0)) return false; for (int k = 0; i->srcExists(k); ++k) { if (i->src(k).getFile() == FILE_IMMEDIATE) { if (k == 2 && i->op == OP_SUCLAMP) // special case continue; if (i->getSrc(k)->reg.data.u64 != 0) return false; } else if (i->src(k).getFile() != FILE_GPR && i->src(k).getFile() != FILE_PREDICATE) { return false; } } // not all instructions support full 32 bit immediates if (sf == FILE_IMMEDIATE) { Storage ® = ld->getSrc(0)->asImm()->reg; if (opInfo[i->op].immdBits != 0xffffffff || typeSizeof(i->sType) > 4) { switch (i->sType) { case TYPE_F64: if (reg.data.u64 & 0x00000fffffffffffULL) return false; break; case TYPE_F32: if (reg.data.u32 & 0xfff) return false; break; case TYPE_S32: case TYPE_U32: // with u32, 0xfffff counts as 0xffffffff as well if (reg.data.s32 > 0x7ffff || reg.data.s32 < -0x80000) return false; break; case TYPE_U8: case TYPE_S8: case TYPE_U16: case TYPE_S16: case TYPE_F16: break; default: return false; } } else if (i->op == OP_MAD || i->op == OP_FMA) { // requires src == dst, cannot decide before RA // (except if we implement more constraints) if (ld->getSrc(0)->asImm()->reg.data.u32 & 0xfff) return false; } else if (i->op == OP_ADD && i->sType == TYPE_F32) { // add f32 LIMM cannot saturate if (i->saturate && (reg.data.u32 & 0xfff)) return false; } } return true; }