22#define DEBUG_TYPE "amdgpu-regbanklegalize"
25using namespace AMDGPU;
30 :
B(
B),
MRI(*
B.getMRI()), MUI(MUI), RBI(RBI), RBLRules(RBLRules),
31 SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
32 VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
33 VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
50 lower(
MI, Mapping, WaterfallSgprs);
66 unsigned ByteOffset = 0;
67 for (
LLT PartTy : LLTBreakdown) {
69 if (ByteOffset == 0) {
70 BasePlusOffset =
Base;
76 auto LoadPart = B.
buildLoad({DstRB, PartTy}, BasePlusOffset, *OffsetMMO);
77 LoadPartRegs.
push_back(LoadPart.getReg(0));
78 ByteOffset += PartTy.getSizeInBytes();
89 if (MRI.
getType(Reg) == MergeTy) {
93 for (
unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i)
94 MergeTyParts.
push_back(Unmerge.getReg(i));
105 assert(
MI.getNumMemOperands() == 1);
112 auto WideLoad = B.
buildLoad({DstRB, WideTy},
Base, *WideMMO);
118 auto Unmerge = B.
buildUnmerge({DstRB, MergeTy}, WideLoad);
122 for (
unsigned i = 0; i < NumElts; ++i) {
123 MergeTyParts.
push_back(Unmerge.getReg(i));
127 MI.eraseFromParent();
140 MI.getOpcode() == AMDGPU::G_SEXT ? -1 : 1);
145 B.
buildSelect(
MI.getOperand(0).getReg(),
MI.getOperand(1).getReg(), True,
147 MI.eraseFromParent();
154 if (
MI.getOpcode() == AMDGPU::G_ZEXT) {
163 {MI.getOperand(1).getReg(), Hi});
164 MI.eraseFromParent();
168 uint64_t ConstVal =
MI.getOperand(1).getCImm()->getZExtValue();
171 MI.eraseFromParent();
184 auto AndLo = B.
buildAnd(VgprRB_S32, Src64.getReg(0), One);
186 auto AndHi = B.
buildAnd(VgprRB_S32, Src64.getReg(1), Zero);
189 assert(Ty == S32 || Ty == S16);
195 MI.eraseFromParent();
201 unsigned Opc =
MI.getOpcode();
202 auto Lo = B.
buildInstr(Opc, {VgprRB_S32}, {Op1.getReg(0), Op2.getReg(0)});
203 auto Hi = B.
buildInstr(Opc, {VgprRB_S32}, {Op1.getReg(1), Op2.getReg(1)});
205 MI.eraseFromParent();
222 else if (
Size / 128 == 4)
230 else if (DstTy == S96)
231 splitLoad(
MI, {S64, S32}, S32);
232 else if (DstTy == V3S32)
233 splitLoad(
MI, {V2S32, S32}, S32);
234 else if (DstTy == V6S16)
235 splitLoad(
MI, {V4S16, V2S16}, V2S16);
246 else if (DstTy == V3S32)
247 widenLoad(
MI, V4S32, S32);
248 else if (DstTy == V6S16)
249 widenLoad(
MI, V8S16, V2S16);
403void RegBankLegalizeHelper::applyMappingDst(
407 for (; OpIdx < MethodIDs.
size(); ++OpIdx) {
408 if (MethodIDs[OpIdx] ==
None)
415 switch (MethodIDs[OpIdx]) {
433 assert(Ty == getTyFromID(MethodIDs[OpIdx]));
434 assert(RB == getRegBankFromID(MethodIDs[OpIdx]));
450 assert(Ty == getBTyFromID(MethodIDs[OpIdx], Ty));
451 assert(RB == getRegBankFromID(MethodIDs[OpIdx]));
461 B.
buildInstr(AMDGPU::G_AMDGPU_COPY_SCC_VCC, {SgprRB_S32}, {NewDst});
467 assert(Ty == getTyFromID(MethodIDs[OpIdx]));
470 Op.setReg(NewVgprDst);
480 assert(Ty == getBTyFromID(MethodIDs[OpIdx], Ty));
483 Op.setReg(NewVgprDst);
506void RegBankLegalizeHelper::applyMappingSrc(
510 for (
unsigned i = 0; i < MethodIDs.
size(); ++OpIdx, ++i) {
511 if (MethodIDs[i] ==
None || MethodIDs[i] ==
IntrId || MethodIDs[i] ==
Imm)
519 switch (MethodIDs[i]) {
522 assert(RB == VccRB || RB == SgprRB);
526 B.
buildInstr(AMDGPU::G_AMDGPU_COPY_VCC_SCC, {VccRB_S1}, {Aext});
527 Op.setReg(CopyVcc_Scc.getReg(0));
540 assert(Ty == getTyFromID(MethodIDs[i]));
541 assert(RB == getRegBankFromID(MethodIDs[i]));
551 assert(Ty == getBTyFromID(MethodIDs[i], Ty));
552 assert(RB == getRegBankFromID(MethodIDs[i]));
563 assert(Ty == getTyFromID(MethodIDs[i]));
566 Op.setReg(CopyToVgpr.getReg(0));
577 assert(Ty == getBTyFromID(MethodIDs[i], Ty));
580 Op.setReg(CopyToVgpr.getReg(0));
590 Op.setReg(Aext.getReg(0));
601 auto BoolInReg = B.
buildAnd(SgprRB_S32, Aext, Cst1);
602 Op.setReg(BoolInReg.getReg(0));
608 auto Sext = B.
buildSExt(SgprRB_S32, Reg);
609 Op.setReg(Sext.getReg(0));
626 MI.getOperand(0).setReg(NewDst);
629 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
638 MI.getOperand(i).setReg(NewUse.getReg(0));
650 "before RegBankLegalize to lower lane mask(vcc) phis");
669 for (
unsigned i = StartOpIdx; i <= EndOpIdx; ++i) {
670 if (
MRI.getRegBankOrNull(
MI.getOperand(i).getReg()) != RB)
679 unsigned NumDefs =
MI.getNumDefs();
680 unsigned NumOperands =
MI.getNumOperands();
688 for (
unsigned i = NumDefs; i < NumOperands; ++i) {
692 MI.getOperand(i).setReg(Copy.getReg(0));
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
Provides AMDGPU specific target descriptions.
static bool verifyRegBankOnOperands(MachineInstr &MI, const RegisterBank *RB, MachineRegisterInfo &MRI, unsigned StartOpIdx, unsigned EndOpIdx)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static Register UseReg(const MachineOperand &MO)
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void applyMappingTrivial(MachineInstr &MI)
RegBankLegalizeHelper(MachineIRBuilder &B, const MachineUniformityInfo &MUI, const RegisterBankInfo &RBI, const RegBankLegalizeRules &RBLRules)
void findRuleAndApplyMapping(MachineInstr &MI)
void applyMappingPHI(MachineInstr &MI)
const SetOfRulesForOpcode & getRulesForOpc(MachineInstr &MI) const
const RegBankLLTMapping & findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
This class represents an Operation in the Expression.
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
const RegisterBank * getRegBank(Register Reg) const
Return the register bank of Reg.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
const RegisterBank * getRegBankOrNull(Register Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
Holds all the information related to register banks.
This class implements the register bank concept.
Wrapper class representing virtual and physical registers.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void buildReadAnyLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc, const RegisterBankInfo &RBI)
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LoweringMethodID LoweringMethod
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping