45 #define DEBUG_TYPE "aarch64-ldst-opt" 47 STATISTIC(NumPairCreated,
"Number of load/store pair instructions generated");
48 STATISTIC(NumPostFolded,
"Number of post-index updates folded");
49 STATISTIC(NumPreFolded,
"Number of pre-index updates folded");
51 "Number of load/store from unscaled generated");
52 STATISTIC(NumZeroStoresPromoted,
"Number of narrow zero stores promoted");
53 STATISTIC(NumLoadsFromStoresPromoted,
"Number of loads from stores promoted");
64 #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass" 68 using LdStPairFlags =
struct LdStPairFlags {
72 bool MergeForward =
false;
80 LdStPairFlags() =
default;
82 void setMergeForward(
bool V =
true) { MergeForward = V; }
83 bool getMergeForward()
const {
return MergeForward; }
85 void setSExtIdx(
int V) { SExtIdx = V; }
86 int getSExtIdx()
const {
return SExtIdx; }
113 LdStPairFlags &Flags,
115 bool FindNarrowMerge);
126 const LdStPairFlags &Flags);
132 const LdStPairFlags &Flags);
144 int UnscaledOffset,
unsigned Limit);
155 unsigned BaseReg,
int Offset);
193 static
bool isNarrowStore(
unsigned Opc) {
197 case AArch64::STRBBui:
198 case AArch64::STURBBi:
199 case AArch64::STRHHui:
200 case AArch64::STURHHi:
210 case AArch64::LDRBBui:
211 case AArch64::LDURBBi:
212 case AArch64::LDRSBWui:
213 case AArch64::LDURSBWi:
214 case AArch64::STRBBui:
215 case AArch64::STURBBi:
217 case AArch64::LDRHHui:
218 case AArch64::LDURHHi:
219 case AArch64::LDRSHWui:
220 case AArch64::LDURSHWi:
221 case AArch64::STRHHui:
222 case AArch64::STURHHi:
224 case AArch64::LDRSui:
225 case AArch64::LDURSi:
226 case AArch64::LDRSWui:
227 case AArch64::LDURSWi:
228 case AArch64::LDRWui:
229 case AArch64::LDURWi:
230 case AArch64::STRSui:
231 case AArch64::STURSi:
232 case AArch64::STRWui:
233 case AArch64::STURWi:
235 case AArch64::LDPSWi:
240 case AArch64::LDRDui:
241 case AArch64::LDURDi:
242 case AArch64::LDRXui:
243 case AArch64::LDURXi:
244 case AArch64::STRDui:
245 case AArch64::STURDi:
246 case AArch64::STRXui:
247 case AArch64::STURXi:
253 case AArch64::LDRQui:
254 case AArch64::LDURQi:
255 case AArch64::STRQui:
256 case AArch64::STURQi:
264 bool *IsValidLdStrOpc =
nullptr) {
266 *IsValidLdStrOpc =
true;
270 *IsValidLdStrOpc =
false;
272 case AArch64::STRDui:
273 case AArch64::STURDi:
274 case AArch64::STRQui:
275 case AArch64::STURQi:
276 case AArch64::STRBBui:
277 case AArch64::STURBBi:
278 case AArch64::STRHHui:
279 case AArch64::STURHHi:
280 case AArch64::STRWui:
281 case AArch64::STURWi:
282 case AArch64::STRXui:
283 case AArch64::STURXi:
284 case AArch64::LDRDui:
285 case AArch64::LDURDi:
286 case AArch64::LDRQui:
287 case AArch64::LDURQi:
288 case AArch64::LDRWui:
289 case AArch64::LDURWi:
290 case AArch64::LDRXui:
291 case AArch64::LDURXi:
292 case AArch64::STRSui:
293 case AArch64::STURSi:
294 case AArch64::LDRSui:
295 case AArch64::LDURSi:
297 case AArch64::LDRSWui:
298 return AArch64::LDRWui;
299 case AArch64::LDURSWi:
300 return AArch64::LDURWi;
308 case AArch64::STRBBui:
309 return AArch64::STRHHui;
310 case AArch64::STRHHui:
311 return AArch64::STRWui;
312 case AArch64::STURBBi:
313 return AArch64::STURHHi;
314 case AArch64::STURHHi:
315 return AArch64::STURWi;
316 case AArch64::STURWi:
317 return AArch64::STURXi;
318 case AArch64::STRWui:
319 return AArch64::STRXui;
327 case AArch64::STRSui:
328 case AArch64::STURSi:
329 return AArch64::STPSi;
330 case AArch64::STRDui:
331 case AArch64::STURDi:
332 return AArch64::STPDi;
333 case AArch64::STRQui:
334 case AArch64::STURQi:
335 return AArch64::STPQi;
336 case AArch64::STRWui:
337 case AArch64::STURWi:
338 return AArch64::STPWi;
339 case AArch64::STRXui:
340 case AArch64::STURXi:
341 return AArch64::STPXi;
342 case AArch64::LDRSui:
343 case AArch64::LDURSi:
344 return AArch64::LDPSi;
345 case AArch64::LDRDui:
346 case AArch64::LDURDi:
347 return AArch64::LDPDi;
348 case AArch64::LDRQui:
349 case AArch64::LDURQi:
350 return AArch64::LDPQi;
351 case AArch64::LDRWui:
352 case AArch64::LDURWi:
353 return AArch64::LDPWi;
354 case AArch64::LDRXui:
355 case AArch64::LDURXi:
356 return AArch64::LDPXi;
357 case AArch64::LDRSWui:
358 case AArch64::LDURSWi:
359 return AArch64::LDPSWi;
370 case AArch64::LDRBBui:
371 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
372 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
373 case AArch64::LDURBBi:
374 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
375 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
376 case AArch64::LDRHHui:
377 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
378 StOpc == AArch64::STRXui;
379 case AArch64::LDURHHi:
380 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
381 StOpc == AArch64::STURXi;
382 case AArch64::LDRWui:
383 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
384 case AArch64::LDURWi:
385 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
386 case AArch64::LDRXui:
387 return StOpc == AArch64::STRXui;
388 case AArch64::LDURXi:
389 return StOpc == AArch64::STURXi;
401 case AArch64::STRSui:
402 return AArch64::STRSpre;
403 case AArch64::STRDui:
404 return AArch64::STRDpre;
405 case AArch64::STRQui:
406 return AArch64::STRQpre;
407 case AArch64::STRBBui:
408 return AArch64::STRBBpre;
409 case AArch64::STRHHui:
410 return AArch64::STRHHpre;
411 case AArch64::STRWui:
412 return AArch64::STRWpre;
413 case AArch64::STRXui:
414 return AArch64::STRXpre;
415 case AArch64::LDRSui:
416 return AArch64::LDRSpre;
417 case AArch64::LDRDui:
418 return AArch64::LDRDpre;
419 case AArch64::LDRQui:
420 return AArch64::LDRQpre;
421 case AArch64::LDRBBui:
422 return AArch64::LDRBBpre;
423 case AArch64::LDRHHui:
424 return AArch64::LDRHHpre;
425 case AArch64::LDRWui:
426 return AArch64::LDRWpre;
427 case AArch64::LDRXui:
428 return AArch64::LDRXpre;
429 case AArch64::LDRSWui:
430 return AArch64::LDRSWpre;
432 return AArch64::LDPSpre;
433 case AArch64::LDPSWi:
434 return AArch64::LDPSWpre;
436 return AArch64::LDPDpre;
438 return AArch64::LDPQpre;
440 return AArch64::LDPWpre;
442 return AArch64::LDPXpre;
444 return AArch64::STPSpre;
446 return AArch64::STPDpre;
448 return AArch64::STPQpre;
450 return AArch64::STPWpre;
452 return AArch64::STPXpre;
460 case AArch64::STRSui:
461 case AArch64::STURSi:
462 return AArch64::STRSpost;
463 case AArch64::STRDui:
464 case AArch64::STURDi:
465 return AArch64::STRDpost;
466 case AArch64::STRQui:
467 case AArch64::STURQi:
468 return AArch64::STRQpost;
469 case AArch64::STRBBui:
470 return AArch64::STRBBpost;
471 case AArch64::STRHHui:
472 return AArch64::STRHHpost;
473 case AArch64::STRWui:
474 case AArch64::STURWi:
475 return AArch64::STRWpost;
476 case AArch64::STRXui:
477 case AArch64::STURXi:
478 return AArch64::STRXpost;
479 case AArch64::LDRSui:
480 case AArch64::LDURSi:
481 return AArch64::LDRSpost;
482 case AArch64::LDRDui:
483 case AArch64::LDURDi:
484 return AArch64::LDRDpost;
485 case AArch64::LDRQui:
486 case AArch64::LDURQi:
487 return AArch64::LDRQpost;
488 case AArch64::LDRBBui:
489 return AArch64::LDRBBpost;
490 case AArch64::LDRHHui:
491 return AArch64::LDRHHpost;
492 case AArch64::LDRWui:
493 case AArch64::LDURWi:
494 return AArch64::LDRWpost;
495 case AArch64::LDRXui:
496 case AArch64::LDURXi:
497 return AArch64::LDRXpost;
498 case AArch64::LDRSWui:
499 return AArch64::LDRSWpost;
501 return AArch64::LDPSpost;
502 case AArch64::LDPSWi:
503 return AArch64::LDPSWpost;
505 return AArch64::LDPDpost;
507 return AArch64::LDPQpost;
509 return AArch64::LDPWpost;
511 return AArch64::LDPXpost;
513 return AArch64::STPSpost;
515 return AArch64::STPDpost;
517 return AArch64::STPQpost;
519 return AArch64::STPWpost;
521 return AArch64::STPXpost;
530 case AArch64::LDPSWi:
545 unsigned PairedRegOp = 0) {
546 assert(PairedRegOp < 2 &&
"Unexpected register operand idx.");
573 return (UnscaledStOffset <= UnscaledLdOffset) &&
574 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
579 return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
580 isNarrowStore(Opc)) &&
589 case AArch64::LDRBBui:
590 case AArch64::LDRHHui:
591 case AArch64::LDRWui:
592 case AArch64::LDRXui:
594 case AArch64::LDURBBi:
595 case AArch64::LDURHHi:
596 case AArch64::LDURWi:
597 case AArch64::LDURXi:
608 case AArch64::STRSui:
609 case AArch64::STRDui:
610 case AArch64::STRQui:
611 case AArch64::STRXui:
612 case AArch64::STRWui:
613 case AArch64::STRHHui:
614 case AArch64::STRBBui:
615 case AArch64::LDRSui:
616 case AArch64::LDRDui:
617 case AArch64::LDRQui:
618 case AArch64::LDRXui:
619 case AArch64::LDRWui:
620 case AArch64::LDRHHui:
621 case AArch64::LDRBBui:
623 case AArch64::STURSi:
624 case AArch64::STURDi:
625 case AArch64::STURQi:
626 case AArch64::STURWi:
627 case AArch64::STURXi:
628 case AArch64::LDURSi:
629 case AArch64::LDURDi:
630 case AArch64::LDURQi:
631 case AArch64::LDURWi:
632 case AArch64::LDURXi:
635 case AArch64::LDPSWi:
656 const LdStPairFlags &Flags) {
658 "Expected promotable zero stores.");
666 if (NextI == MergeMI)
669 unsigned Opc = I->getOpcode();
670 bool IsScaled = !
TII->isUnscaledLdSt(Opc);
673 bool MergeForward = Flags.getMergeForward();
693 assert(((OffsetImm & 1) == 0) &&
"Unexpected offset to merge");
702 .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
705 .cloneMergedMemRefs({&*
I, &*MergeMI})
706 .setMIFlags(I->mergeFlagsWith(*MergeMI));
709 LLVM_DEBUG(
dbgs() <<
"Creating wider store. Replacing instructions:\n ");
718 I->eraseFromParent();
719 MergeMI->eraseFromParent();
726 const LdStPairFlags &Flags) {
736 int SExtIdx = Flags.getSExtIdx();
739 bool IsUnscaled =
TII->isUnscaledLdSt(Opc);
740 int OffsetStride = IsUnscaled ?
getMemScale(*I) : 1;
742 bool MergeForward = Flags.getMergeForward();
753 bool PairedIsUnscaled =
TII->isUnscaledLdSt(Paired->getOpcode());
754 if (IsUnscaled != PairedIsUnscaled) {
759 if (PairedIsUnscaled) {
763 "Offset should be a multiple of the stride!");
764 PairedOffset /= MemSize;
766 PairedOffset *= MemSize;
772 if (Offset == PairedOffset + OffsetStride) {
779 SExtIdx = (SExtIdx + 1) % 2;
788 "Unscaled offset cannot be scaled.");
799 if (RegOp0.
isUse()) {
814 MI.clearRegisterKills(Reg,
TRI);
822 .cloneMergedMemRefs({&*
I, &*Paired})
823 .setMIFlags(I->mergeFlagsWith(*Paired));
828 dbgs() <<
"Creating pair load/store. Replacing instructions:\n ");
841 unsigned DstRegX = DstMO.
getReg();
843 unsigned DstRegW =
TRI->getSubReg(DstRegX, AArch64::sub_32);
859 BuildMI(*MBB, InsertionPoint, DL,
TII->get(AArch64::SBFMXri), DstRegX)
872 I->eraseFromParent();
873 Paired->eraseFromParent();
889 bool IsStoreXReg =
TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
892 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
893 "Unexpected RegClass");
896 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
899 if (StRt == LdRt && LoadSize == 8) {
901 LoadI->getIterator())) {
902 if (
MI.killsRegister(StRt,
TRI)) {
903 MI.clearRegisterKills(StRt,
TRI);
910 LoadI->eraseFromParent();
915 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
916 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
917 .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
924 if (!Subtarget->isLittleEndian())
926 bool IsUnscaled =
TII->isUnscaledLdSt(*LoadI);
927 assert(IsUnscaled ==
TII->isUnscaledLdSt(*StoreI) &&
928 "Unsupported ld/st match");
929 assert(LoadSize <= StoreSize &&
"Invalid load size");
930 int UnscaledLdOffset = IsUnscaled
933 int UnscaledStOffset = IsUnscaled
936 int Width = LoadSize * 8;
937 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
938 int Imms = Immr + Width - 1;
939 unsigned DestReg = IsStoreXReg
940 ?
TRI->getMatchingSuperReg(LdRt, AArch64::sub_32,
941 &AArch64::GPR64RegClass)
944 assert((UnscaledLdOffset >= UnscaledStOffset &&
945 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
948 Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
949 Imms = Immr + Width - 1;
950 if (UnscaledLdOffset == UnscaledStOffset) {
951 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12)
957 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
958 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
961 .addImm(AndMaskEncoded)
962 .setMIFlags(LoadI->getFlags());
965 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
966 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
971 .setMIFlags(LoadI->getFlags());
978 if (
MI.killsRegister(StRt,
TRI)) {
979 MI.clearRegisterKills(StRt,
TRI);
994 LoadI->eraseFromParent();
1004 if (Offset % OffsetStride)
1006 Offset /= OffsetStride;
1008 return Offset <= 63 && Offset >= -64;
1016 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1029 return MIa.
mayAlias(AA, MIb,
false);
1042 bool AArch64LoadStoreOpt::findMatchingStore(
1057 ModifiedRegUnits.clear();
1058 UsedRegUnits.clear();
1090 if (!ModifiedRegUnits.available(BaseReg))
1096 }
while (MBBI != B && Count < Limit);
1103 LdStPairFlags &Flags,
1112 "FirstMI shouldn't get here if either of these checks are true.");
1122 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1124 assert(IsValidLdStrOpc &&
1125 "Given Opc should be a Load or Store with an immediate");
1128 Flags.setSExtIdx(NonSExtOpc == (
unsigned)OpcA ? 1 : 0);
1134 if (!PairIsValidLdStrOpc)
1139 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1153 LdStPairFlags &Flags,
unsigned Limit,
1154 bool FindNarrowMerge) {
1161 bool IsUnscaled =
TII->isUnscaledLdSt(FirstMI);
1165 int OffsetStride = IsUnscaled ?
getMemScale(FirstMI) : 1;
1170 ModifiedRegUnits.clear();
1171 UsedRegUnits.clear();
1176 for (
unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
1184 Flags.setSExtIdx(-1);
1196 bool MIIsUnscaled =
TII->isUnscaledLdSt(MI);
1197 if (IsUnscaled != MIIsUnscaled) {
1205 if (MIOffset % MemSize) {
1211 MIOffset /= MemSize;
1213 MIOffset *= MemSize;
1217 if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
1218 (Offset + OffsetStride == MIOffset))) {
1219 int MinOffset = Offset < MIOffset ?
Offset : MIOffset;
1220 if (FindNarrowMerge) {
1225 if ((!IsUnscaled &&
alignTo(MinOffset, 2) != MinOffset) ||
1246 if (IsUnscaled && (
alignTo(MinOffset, OffsetStride) != MinOffset)) {
1271 Flags.setMergeForward(
false);
1282 !
mayAlias(FirstMI, MemInsns, AA)) {
1283 Flags.setMergeForward(
true);
1301 if (!ModifiedRegUnits.available(BaseReg))
1315 assert((Update->getOpcode() == AArch64::ADDXri ||
1316 Update->getOpcode() == AArch64::SUBXri) &&
1317 "Unexpected base register update instruction to merge!");
1322 if (++NextI == Update)
1325 int Value = Update->getOperand(2).getImm();
1327 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
1328 if (Update->getOpcode() == AArch64::SUBXri)
1336 MIB =
BuildMI(*I->getParent(),
I, I->getDebugLoc(),
TII->get(NewOpc))
1341 .setMemRefs(I->memoperands())
1342 .setMIFlags(I->mergeFlagsWith(*Update));
1346 MIB =
BuildMI(*I->getParent(),
I, I->getDebugLoc(),
TII->get(NewOpc))
1351 .addImm(Value / Scale)
1352 .setMemRefs(I->memoperands())
1353 .setMIFlags(I->mergeFlagsWith(*Update));
1373 I->eraseFromParent();
1374 Update->eraseFromParent();
1379 bool AArch64LoadStoreOpt::isMatchingUpdateInsn(
MachineInstr &MemMI,
1381 unsigned BaseReg,
int Offset) {
1385 case AArch64::SUBXri:
1386 case AArch64::ADDXri:
1404 UpdateOffset = -UpdateOffset;
1408 if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256))
1416 if (UpdateOffset % Scale != 0)
1419 int ScaledOffset = UpdateOffset / Scale;
1420 if (ScaledOffset > 63 || ScaledOffset < -64)
1426 if (!Offset || Offset == UpdateOffset)
1445 if (MIUnscaledOffset != UnscaledOffset)
1451 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
1453 if (DestReg == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg))
1459 ModifiedRegUnits.clear();
1460 UsedRegUnits.clear();
1462 for (
unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
1471 if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
1479 if (!ModifiedRegUnits.available(BaseReg) ||
1480 !UsedRegUnits.available(BaseReg))
1498 if (MBBI == B || Offset != 0)
1503 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
1505 if (DestReg == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg))
1511 ModifiedRegUnits.clear();
1512 UsedRegUnits.clear();
1524 if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset))
1532 if (!ModifiedRegUnits.available(BaseReg) ||
1533 !UsedRegUnits.available(BaseReg))
1535 }
while (MBBI != B && Count < Limit);
1539 bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
1553 if (findMatchingStore(MBBI,
LdStLimit, StoreI)) {
1554 ++NumLoadsFromStoresPromoted;
1558 MBBI = promoteLoadFromStore(MBBI, StoreI);
1565 bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
1571 if (!
TII->isCandidateToMergeOrPair(MI))
1575 LdStPairFlags Flags;
1577 findMatchingInsn(MBBI, Flags,
LdStLimit,
true);
1579 ++NumZeroStoresPromoted;
1583 MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
1595 if (!
TII->isCandidateToMergeOrPair(MI))
1601 bool IsUnscaled =
TII->isUnscaledLdSt(MI);
1603 int OffsetStride = IsUnscaled ?
getMemScale(MI) : 1;
1606 Offset -= OffsetStride;
1611 LdStPairFlags Flags;
1613 findMatchingInsn(MBBI, Flags,
LdStLimit,
false);
1616 if (
TII->isUnscaledLdSt(MI))
1617 ++NumUnscaledPairCreated;
1620 MBBI = mergePairedInsns(MBBI, Paired, Flags);
1626 bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
1637 Update = findMatchingUpdateInsnForward(MBBI, 0,
UpdateLimit);
1640 MBBI = mergeUpdateInsn(MBBI, Update,
false);
1653 Update = findMatchingUpdateInsnBackward(MBBI,
UpdateLimit);
1656 MBBI = mergeUpdateInsn(MBBI, Update,
true);
1670 Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset,
UpdateLimit);
1673 MBBI = mergeUpdateInsn(MBBI, Update,
true);
1681 bool EnableNarrowZeroStOpt) {
1711 if (EnableNarrowZeroStOpt)
1728 if (
TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
1757 TRI = Subtarget->getRegisterInfo();
1758 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1763 ModifiedRegUnits.init(*
TRI);
1764 UsedRegUnits.init(*
TRI);
1767 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
1768 for (
auto &MBB : Fn)
1769 Modified |= optimizeBlock(MBB, enableNarrowZeroStOpt);
1787 return new AArch64LoadStoreOpt();
const MachineInstrBuilder & add(const MachineOperand &MO) const
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool isCall(QueryType Type=AnyInBundle) const
This class represents lattice values for constants.
static bool mayAlias(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA)
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
void initializeAArch64LoadStoreOptPass(PassRegistry &)
void push_back(const T &Elt)
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
unsigned getReg() const
getReg - Returns the register number.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
An instruction for reading from memory.
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
static bool isUnscaledLdSt(unsigned Opc)
Return true if this is an unscaled load/store.
static bool isPromotableLoadFromStore(MachineInstr &MI)
AnalysisUsage & addRequired()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
static unsigned getMatchingPairOpcode(unsigned Opc)
const HexagonInstrInfo * TII
void setImplicit(bool Val=true)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
static bool isPairedLdSt(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
An instruction for storing to memory.
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass...
bool mayAlias(AliasAnalysis *AA, MachineInstr &Other, bool UseTBAA)
Returns true if this instruction's memory access aliases the memory access of Other.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static int getMemScale(MachineInstr &MI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
initializer< Ty > init(const Ty &Val)
#define AARCH64_LOAD_STORE_OPT_NAME
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Represent the analysis usage information of a pass.
static unsigned getMatchingWideOpcode(unsigned Opc)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static bool isMergeableLdStUpdate(MachineInstr &MI)
FunctionPass class - This class is used to implement most global optimizations.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
self_iterator getIterator()
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setIsKill(bool Val=true)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
MachineOperand class - Representation of each machine instruction operand.
static bool isPromotableZeroStoreInst(MachineInstr &MI)
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
const MachineBasicBlock * getParent() const
MachineFunctionProperties & set(Property P)
Representation of each machine instruction.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
void setReg(unsigned Reg)
Change the register this operand corresponds to.
static const MachineOperand & getLdStRegOp(const MachineInstr &MI, unsigned PairedRegOp=0)
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static unsigned getPreIndexedOpcode(unsigned Opc)
A set of register units used to track register liveness.
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getPostIndexedOpcode(unsigned Opc)
LLVM Value Representation.
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
StringRef - Represent a constant reference to a string, i.e.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt", AARCH64_LOAD_STORE_OPT_NAME, false, false) static bool isNarrowStore(unsigned Opc)
const MachineOperand & getOperand(unsigned i) const
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
Properties which a MachineFunction may have at a given point in time.