45 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass" 76 unsigned LdarOp,
unsigned StlrOp,
unsigned CmpOp,
77 unsigned ExtendImm,
unsigned ZeroReg,
109 static uint64_t
getChunk(uint64_t Imm,
unsigned ChunkIdx) {
110 assert(ChunkIdx < 4 &&
"Out of range chunk index specified!");
112 return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
117 static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
118 Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
139 for (
unsigned Idx = 0; Idx < 4; ++Idx)
143 for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
144 Chunk != End; ++Chunk) {
145 const uint64_t ChunkVal = Chunk->first;
146 const unsigned Count = Chunk->second;
148 uint64_t Encoding = 0;
152 if ((Count != 2 && Count != 3) || !
canUseOrr(ChunkVal, Encoding))
155 const bool CountThree = Count == 3;
160 .addReg(AArch64::XZR)
166 unsigned ShiftAmt = 0;
169 for (; ShiftAmt < 64; ShiftAmt += 16) {
170 Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
172 if (Imm16 != ChunkVal)
188 transferImpOps(MI, MIB, MIB1);
194 for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
195 Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
197 if (Imm16 != ChunkVal)
209 transferImpOps(MI, MIB, MIB2);
238 static uint64_t
updateImm(uint64_t Imm,
unsigned Idx,
bool Clear) {
239 const uint64_t
Mask = 0xFFFF;
243 Imm &= ~(Mask << (Idx * 16));
246 Imm |= Mask << (Idx * 16);
268 const int NotSet = -1;
269 const uint64_t
Mask = 0xFFFF;
271 int StartIdx = NotSet;
274 for (
int Idx = 0; Idx < 4; ++Idx) {
275 int64_t Chunk =
getChunk(UImm, Idx);
277 Chunk = (Chunk << 48) >> 48;
286 if (StartIdx == NotSet || EndIdx == NotSet)
290 uint64_t Outside = 0;
292 uint64_t Inside =
Mask;
297 if (StartIdx > EndIdx) {
302 uint64_t OrrImm = UImm;
303 int FirstMovkIdx = NotSet;
304 int SecondMovkIdx = NotSet;
308 for (
int Idx = 0; Idx < 4; ++Idx) {
309 const uint64_t Chunk =
getChunk(UImm, Idx);
313 if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
314 OrrImm =
updateImm(OrrImm, Idx, Outside == 0);
317 if (FirstMovkIdx == NotSet)
324 }
else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
325 OrrImm =
updateImm(OrrImm, Idx, Inside != Mask);
328 if (FirstMovkIdx == NotSet)
334 assert(FirstMovkIdx != NotSet &&
"Constant materializable with single ORR!");
337 uint64_t Encoding = 0;
342 .addReg(AArch64::XZR)
348 const bool SingleMovk = SecondMovkIdx == NotSet;
361 transferImpOps(MI, MIB, MIB1);
375 transferImpOps(MI, MIB, MIB2);
388 const unsigned Mask = 0xFFFF;
390 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
399 unsigned OneChunks = 0;
400 unsigned ZeroChunks = 0;
401 for (
unsigned Shift = 0; Shift < BitSize; Shift += 16) {
402 const unsigned Chunk = (Imm >> Shift) & Mask;
413 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
416 unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
420 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
422 transferImpOps(MI, MIB, MIB);
431 if (OneChunks >= (BitSize / 16) - 2 || ZeroChunks >= (BitSize / 16) - 2)
432 return expandMOVImmSimple(MBB, MBBI, BitSize, OneChunks, ZeroChunks);
434 assert(BitSize == 64 &&
"All 32-bit immediates can be expanded with a" 445 for (
unsigned Shift = 0; Shift < BitSize; Shift += 16) {
446 uint64_t ShiftedMask = (0xFFFFULL << Shift);
447 uint64_t ZeroChunk = UImm & ~ShiftedMask;
448 uint64_t OneChunk = UImm | ShiftedMask;
449 uint64_t RotatedImm = (UImm << 32) | (UImm >> 32);
450 uint64_t ReplicateChunk = ZeroChunk | (RotatedImm & ShiftedMask);
454 BitSize, Encoding)) {
459 .addReg(AArch64::XZR)
473 transferImpOps(MI, MIB, MIB1);
487 if (OneChunks || ZeroChunks)
488 return expandMOVImmSimple(MBB, MBBI, BitSize, OneChunks, ZeroChunks);
506 return expandMOVImmSimple(MBB, MBBI, BitSize, OneChunks, ZeroChunks);
515 unsigned ZeroChunks) {
519 const unsigned Mask = 0xFFFF;
528 if (OneChunks > ZeroChunks) {
535 Imm &= (1LL << 32) - 1;
536 FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
538 FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
541 unsigned LastShift = 0;
545 Shift = (TZ / 16) * 16;
546 LastShift = ((63 - LZ) / 16) * 16;
548 unsigned Imm16 = (Imm >> Shift) & Mask;
562 if (Shift == LastShift) {
563 transferImpOps(MI, MIB1, MIB1);
569 unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
570 while (Shift < LastShift) {
572 Imm16 = (Imm >> Shift) & Mask;
573 if (Imm16 == (isNeg ? Mask : 0))
584 transferImpOps(MI, MIB1, MIB2);
589 bool AArch64ExpandPseudo::expandCMP_SWAP(
591 unsigned StlrOp,
unsigned CmpOp,
unsigned ExtendImm,
unsigned ZeroReg,
611 MF->
insert(++LoadCmpBB->getIterator(), StoreBB);
612 MF->
insert(++StoreBB->getIterator(), DoneBB);
620 BuildMI(LoadCmpBB, DL,
TII->get(AArch64::MOVZWi), StatusReg)
624 BuildMI(LoadCmpBB, DL,
TII->get(CmpOp), ZeroReg)
628 BuildMI(LoadCmpBB, DL,
TII->get(AArch64::Bcc))
632 LoadCmpBB->addSuccessor(DoneBB);
633 LoadCmpBB->addSuccessor(StoreBB);
638 BuildMI(StoreBB, DL,
TII->get(StlrOp), StatusReg)
641 BuildMI(StoreBB, DL,
TII->get(AArch64::CBNZW))
644 StoreBB->addSuccessor(LoadCmpBB);
645 StoreBB->addSuccessor(DoneBB);
647 DoneBB->splice(DoneBB->end(), &MBB,
MI, MBB.
end());
648 DoneBB->transferSuccessors(&MBB);
652 NextMBBI = MBB.
end();
653 MI.eraseFromParent();
661 StoreBB->clearLiveIns();
663 LoadCmpBB->clearLiveIns();
669 bool AArch64ExpandPseudo::expandCMP_SWAP_128(
693 MF->
insert(++LoadCmpBB->getIterator(), StoreBB);
694 MF->
insert(++StoreBB->getIterator(), DoneBB);
701 BuildMI(LoadCmpBB, DL,
TII->get(AArch64::LDAXPX))
705 BuildMI(LoadCmpBB, DL,
TII->get(AArch64::SUBSXrs), AArch64::XZR)
709 BuildMI(LoadCmpBB, DL,
TII->get(AArch64::CSINCWr), StatusReg)
710 .addUse(AArch64::WZR)
713 BuildMI(LoadCmpBB, DL,
TII->get(AArch64::SUBSXrs), AArch64::XZR)
717 BuildMI(LoadCmpBB, DL,
TII->get(AArch64::CSINCWr), StatusReg)
721 BuildMI(LoadCmpBB, DL,
TII->get(AArch64::CBNZW))
724 LoadCmpBB->addSuccessor(DoneBB);
725 LoadCmpBB->addSuccessor(StoreBB);
730 BuildMI(StoreBB, DL,
TII->get(AArch64::STLXPX), StatusReg)
734 BuildMI(StoreBB, DL,
TII->get(AArch64::CBNZW))
737 StoreBB->addSuccessor(LoadCmpBB);
738 StoreBB->addSuccessor(DoneBB);
740 DoneBB->splice(DoneBB->end(), &MBB,
MI, MBB.
end());
741 DoneBB->transferSuccessors(&MBB);
745 NextMBBI = MBB.
end();
746 MI.eraseFromParent();
754 StoreBB->clearLiveIns();
756 LoadCmpBB->clearLiveIns();
773 case AArch64::ADDWrr:
774 case AArch64::SUBWrr:
775 case AArch64::ADDXrr:
776 case AArch64::SUBXrr:
777 case AArch64::ADDSWrr:
778 case AArch64::SUBSWrr:
779 case AArch64::ADDSXrr:
780 case AArch64::SUBSXrr:
781 case AArch64::ANDWrr:
782 case AArch64::ANDXrr:
783 case AArch64::BICWrr:
784 case AArch64::BICXrr:
785 case AArch64::ANDSWrr:
786 case AArch64::ANDSXrr:
787 case AArch64::BICSWrr:
788 case AArch64::BICSXrr:
789 case AArch64::EONWrr:
790 case AArch64::EONXrr:
791 case AArch64::EORWrr:
792 case AArch64::EORXrr:
793 case AArch64::ORNWrr:
794 case AArch64::ORNXrr:
795 case AArch64::ORRWrr:
796 case AArch64::ORRXrr: {
801 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs;
break;
802 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs;
break;
803 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs;
break;
804 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs;
break;
805 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs;
break;
806 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs;
break;
807 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs;
break;
808 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs;
break;
809 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs;
break;
810 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs;
break;
811 case AArch64::BICWrr: Opcode = AArch64::BICWrs;
break;
812 case AArch64::BICXrr: Opcode = AArch64::BICXrs;
break;
813 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs;
break;
814 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs;
break;
815 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs;
break;
816 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs;
break;
817 case AArch64::EONWrr: Opcode = AArch64::EONWrs;
break;
818 case AArch64::EONXrr: Opcode = AArch64::EONXrs;
break;
819 case AArch64::EORWrr: Opcode = AArch64::EORWrs;
break;
820 case AArch64::EORXrr: Opcode = AArch64::EORXrs;
break;
821 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs;
break;
822 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs;
break;
823 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs;
break;
824 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs;
break;
832 transferImpOps(MI, MIB1, MIB1);
846 TII->get(AArch64::LDRXl), DstReg);
854 "Only expect globals, externalsymbols, or constant pools");
877 "Only expect globals, externalsymbols, or constant pools");
885 transferImpOps(MI, MIB1, MIB2);
891 case AArch64::MOVaddr:
892 case AArch64::MOVaddrJT:
893 case AArch64::MOVaddrCP:
894 case AArch64::MOVaddrBA:
895 case AArch64::MOVaddrTLS:
896 case AArch64::MOVaddrEXT: {
910 transferImpOps(MI, MIB1, MIB2);
914 case AArch64::ADDlowTLS:
924 case AArch64::MOVbaseTLS: {
926 auto SysReg = AArch64SysReg::TPIDR_EL0;
930 SysReg = AArch64SysReg::TPIDR_EL1;
937 case AArch64::MOVi32imm:
938 return expandMOVImm(MBB, MBBI, 32);
939 case AArch64::MOVi64imm:
940 return expandMOVImm(MBB, MBBI, 64);
941 case AArch64::RET_ReallyLR: {
950 transferImpOps(MI, MIB, MIB);
954 case AArch64::CMP_SWAP_8:
955 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
958 AArch64::WZR, NextMBBI);
959 case AArch64::CMP_SWAP_16:
960 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
963 AArch64::WZR, NextMBBI);
964 case AArch64::CMP_SWAP_32:
965 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
968 AArch64::WZR, NextMBBI);
969 case AArch64::CMP_SWAP_64:
970 return expandCMP_SWAP(MBB, MBBI,
971 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
973 AArch64::XZR, NextMBBI);
974 case AArch64::CMP_SWAP_128:
975 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
977 case AArch64::AESMCrrTied:
978 case AArch64::AESIMCrrTied: {
981 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
985 transferImpOps(MI, MIB, MIB);
1001 Modified |= expandMI(MBB, MBBI, NMBBI);
1012 for (
auto &MBB : MF)
1013 Modified |= expandMBB(MBB);
1019 return new AArch64ExpandPseudo();
unsigned getTargetFlags() const
const MachineInstrBuilder & add(const MachineOperand &MO) const
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents lattice values for constants.
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Describe properties that are true of each instruction in the target description file.
unsigned getReg() const
getReg - Returns the register number.
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const AArch64InstrInfo *TII)
Check for identical 16-bit chunks within the constant and if so materialize them with a single ORR in...
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding)
Check whether the given 16-bit chunk replicated to full 64-bit width can be materialized with an ORR ...
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
const HexagonInstrInfo * TII
INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo", AARCH64_EXPAND_PSEUDO_NAME, false, false) static void transferImpOps(MachineInstr &OldMI
Transfer implicit operands on the pseudo instruction to the instructions created from the expansion...
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const char * getSymbolName() const
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
virtual const TargetInstrInfo * getInstrInfo() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
unsigned getKillRegState(bool B)
static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear)
Clear or set all bits in the chunk at the given index.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
unsigned getDeadRegState(bool B)
static bool isStartChunk(uint64_t Chunk)
Check whether this chunk matches the pattern '1...0...'.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
void initializeAArch64ExpandPseudoPass(PassRegistry &)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const GlobalValue * getGlobal() const
FunctionPass class - This class is used to implement most global optimizations.
const Triple & getTargetTriple() const
self_iterator getIterator()
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx)
Helper function which extracts the specified 16-bit chunk from a 64-bit value.
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperand class - Representation of each machine instruction operand.
MachineInstrBuilder MachineInstrBuilder & DefMI
FunctionPass * createAArch64ExpandPseudoPass()
Returns an instance of the pseudo instruction expansion pass.
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned char TargetFlags=0) const
static bool isEndChunk(uint64_t Chunk)
Check whether this chunk matches the pattern '0...1...' This pattern ends a contiguous sequence of on...
static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const AArch64InstrInfo *TII)
Check whether the constant contains a sequence of contiguous ones, which might be interrupted by one ...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
CodeModel::Model getCodeModel() const
Returns the code model.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB)
Convenience function combining computeLiveIns() and addLiveIns().
Representation of each machine instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
A set of physical registers with utility functions to track liveness when walking backward/forward th...
int64_t getOffset() const
Return the offset from the symbol in this operand.
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned char TargetFlags=0) const
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
bool isSymbol() const
isSymbol - Tests if this is a MO_ExternalSymbol operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
#define AARCH64_EXPAND_PSEUDO_NAME
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
StringRef - Represent a constant reference to a string, i.e.
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow...
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
const MachineOperand & getOperand(unsigned i) const