47 #define DEBUG_TYPE "hexbit" 83 unsigned find_first()
const {
90 unsigned find_next(
unsigned Prev)
const {
98 unsigned Idx = v2x(R);
103 unsigned Idx = v2x(R);
116 reference operator[](
unsigned R) {
117 unsigned Idx = v2x(R);
121 bool operator[](
unsigned R)
const {
122 unsigned Idx = v2x(R);
126 bool has(
unsigned R)
const {
127 unsigned Idx = v2x(R);
138 return !Rs.BitVector::test(*
this);
145 void ensure(
unsigned Idx) {
150 static inline unsigned v2x(
unsigned v) {
154 static inline unsigned x2v(
unsigned x) {
164 const PrintRegSet &
P);
175 for (
unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
181 class Transformation;
190 return "Hexagon bit simplification";
208 uint16_t W, uint64_t &U);
209 static bool replaceReg(
unsigned OldR,
unsigned NewR,
213 static bool replaceRegWithSub(
unsigned OldR,
unsigned NewR,
215 static bool replaceSubWithSub(
unsigned OldR,
unsigned OldSR,
221 static bool getUsedBitsInStore(
unsigned Opc,
BitVector &
Bits,
223 static bool getUsedBits(
unsigned Opc,
unsigned OpN,
BitVector &Bits,
236 unsigned NewSub = Hexagon::NoSubRegister);
239 using HBS = HexagonBitSimplify;
244 class Transformation {
248 Transformation(
bool TD) : TopDown(TD) {}
249 virtual ~Transformation() =
default;
259 "Hexagon bit simplification",
false,
false)
266 bool Changed =
false;
269 Changed = T.processBlock(B, AVs);
273 getInstrDefs(
I, Defs);
277 for (
auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(&B)))
278 Changed |= visitBlock(*(DTN->getBlock()), T, NewAVs);
281 Changed |= T.processBlock(B, AVs);
292 if (!
Op.isReg() || !
Op.isDef())
294 unsigned R =
Op.getReg();
301 void HexagonBitSimplify::getInstrUses(
const MachineInstr &MI,
304 if (!
Op.isReg() || !
Op.isUse())
306 unsigned R =
Op.getReg();
317 for (uint16_t i = 0; i <
W; ++i) {
324 if (RC1[B1+i] != RC2[B2+i])
331 uint16_t
B, uint16_t W) {
333 for (uint16_t i = B; i < B+
W; ++i)
340 uint16_t B, uint16_t W, uint64_t &U) {
343 for (uint16_t i = B+W; i >
B; --i) {
355 bool HexagonBitSimplify::replaceReg(
unsigned OldR,
unsigned NewR,
362 for (
auto I = Begin;
I != End;
I = NextI) {
363 NextI = std::next(
I);
369 bool HexagonBitSimplify::replaceRegWithSub(
unsigned OldR,
unsigned NewR,
374 if (hasTiedUse(OldR, MRI, NewSR))
378 for (
auto I = Begin;
I != End;
I = NextI) {
379 NextI = std::next(
I);
386 bool HexagonBitSimplify::replaceSubWithSub(
unsigned OldR,
unsigned OldSR,
391 if (OldSR != NewSR && hasTiedUse(OldR, MRI, NewSR))
395 for (
auto I = Begin;
I != End;
I = NextI) {
396 NextI = std::next(
I);
397 if (
I->getSubReg() != OldSR)
419 switch (RC->
getID()) {
420 case Hexagon::DoubleRegsRegClassID:
421 case Hexagon::HvxWRRegClassID:
423 if (RR.
Sub == Hexagon::isub_hi || RR.
Sub == Hexagon::vsub_hi)
435 bool HexagonBitSimplify::parseRegSequence(
const MachineInstr &
I,
445 assert((Sub1 == SubLo && Sub2 == SubHi) || (Sub1 == SubHi && Sub2 == SubLo));
446 if (Sub1 == SubLo && Sub2 == SubHi) {
451 if (Sub1 == SubHi && Sub2 == SubLo) {
465 bool HexagonBitSimplify::getUsedBitsInStore(
unsigned Opc,
BitVector &
Bits,
467 using namespace Hexagon;
472 case S2_storerbnew_io:
473 case S2_pstorerbt_io:
474 case S2_pstorerbf_io:
475 case S4_pstorerbtnew_io:
476 case S4_pstorerbfnew_io:
477 case S2_pstorerbnewt_io:
478 case S2_pstorerbnewf_io:
479 case S4_pstorerbnewtnew_io:
480 case S4_pstorerbnewfnew_io:
482 case S2_storerbnew_pi:
483 case S2_pstorerbt_pi:
484 case S2_pstorerbf_pi:
485 case S2_pstorerbtnew_pi:
486 case S2_pstorerbfnew_pi:
487 case S2_pstorerbnewt_pi:
488 case S2_pstorerbnewf_pi:
489 case S2_pstorerbnewtnew_pi:
490 case S2_pstorerbnewfnew_pi:
492 case S4_storerbnew_ap:
494 case S2_storerbnew_pr:
496 case S4_storerbnew_ur:
498 case S2_storerbnew_pbr:
500 case S2_storerbnew_pci:
502 case S2_storerbnew_pcr:
504 case S4_storerbnew_rr:
505 case S4_pstorerbt_rr:
506 case S4_pstorerbf_rr:
507 case S4_pstorerbtnew_rr:
508 case S4_pstorerbfnew_rr:
509 case S4_pstorerbnewt_rr:
510 case S4_pstorerbnewf_rr:
511 case S4_pstorerbnewtnew_rr:
512 case S4_pstorerbnewfnew_rr:
514 case S2_storerbnewgp:
515 case S4_pstorerbt_abs:
516 case S4_pstorerbf_abs:
517 case S4_pstorerbtnew_abs:
518 case S4_pstorerbfnew_abs:
519 case S4_pstorerbnewt_abs:
520 case S4_pstorerbnewf_abs:
521 case S4_pstorerbnewtnew_abs:
522 case S4_pstorerbnewfnew_abs:
523 Bits.
set(Begin, Begin+8);
528 case S2_storerhnew_io:
529 case S2_pstorerht_io:
530 case S2_pstorerhf_io:
531 case S4_pstorerhtnew_io:
532 case S4_pstorerhfnew_io:
533 case S2_pstorerhnewt_io:
534 case S2_pstorerhnewf_io:
535 case S4_pstorerhnewtnew_io:
536 case S4_pstorerhnewfnew_io:
538 case S2_storerhnew_pi:
539 case S2_pstorerht_pi:
540 case S2_pstorerhf_pi:
541 case S2_pstorerhtnew_pi:
542 case S2_pstorerhfnew_pi:
543 case S2_pstorerhnewt_pi:
544 case S2_pstorerhnewf_pi:
545 case S2_pstorerhnewtnew_pi:
546 case S2_pstorerhnewfnew_pi:
548 case S4_storerhnew_ap:
550 case S2_storerhnew_pr:
552 case S4_storerhnew_ur:
554 case S2_storerhnew_pbr:
556 case S2_storerhnew_pci:
558 case S2_storerhnew_pcr:
560 case S4_pstorerht_rr:
561 case S4_pstorerhf_rr:
562 case S4_pstorerhtnew_rr:
563 case S4_pstorerhfnew_rr:
564 case S4_storerhnew_rr:
565 case S4_pstorerhnewt_rr:
566 case S4_pstorerhnewf_rr:
567 case S4_pstorerhnewtnew_rr:
568 case S4_pstorerhnewfnew_rr:
570 case S2_storerhnewgp:
571 case S4_pstorerht_abs:
572 case S4_pstorerhf_abs:
573 case S4_pstorerhtnew_abs:
574 case S4_pstorerhfnew_abs:
575 case S4_pstorerhnewt_abs:
576 case S4_pstorerhnewf_abs:
577 case S4_pstorerhnewtnew_abs:
578 case S4_pstorerhnewfnew_abs:
579 Bits.
set(Begin, Begin+16);
584 case S2_pstorerft_io:
585 case S2_pstorerff_io:
586 case S4_pstorerftnew_io:
587 case S4_pstorerffnew_io:
589 case S2_pstorerft_pi:
590 case S2_pstorerff_pi:
591 case S2_pstorerftnew_pi:
592 case S2_pstorerffnew_pi:
600 case S4_pstorerft_rr:
601 case S4_pstorerff_rr:
602 case S4_pstorerftnew_rr:
603 case S4_pstorerffnew_rr:
605 case S4_pstorerft_abs:
606 case S4_pstorerff_abs:
607 case S4_pstorerftnew_abs:
608 case S4_pstorerffnew_abs:
609 Bits.
set(Begin+16, Begin+32);
624 bool HexagonBitSimplify::getUsedBits(
unsigned Opc,
unsigned OpN,
626 using namespace Hexagon;
631 return getUsedBitsInStore(Opc, Bits, Begin);
643 Bits.
set(Begin, Begin+8);
656 Bits.
set(Begin, Begin+16);
664 Bits.
set(Begin+16, Begin+32);
674 Bits.
set(Begin, Begin+8);
684 case A2_addh_h16_sat_ll:
686 case A2_addh_l16_sat_ll:
689 case A2_subh_h16_sat_ll:
691 case A2_subh_l16_sat_ll:
692 case M2_mpy_acc_ll_s0:
693 case M2_mpy_acc_ll_s1:
694 case M2_mpy_acc_sat_ll_s0:
695 case M2_mpy_acc_sat_ll_s1:
698 case M2_mpy_nac_ll_s0:
699 case M2_mpy_nac_ll_s1:
700 case M2_mpy_nac_sat_ll_s0:
701 case M2_mpy_nac_sat_ll_s1:
702 case M2_mpy_rnd_ll_s0:
703 case M2_mpy_rnd_ll_s1:
704 case M2_mpy_sat_ll_s0:
705 case M2_mpy_sat_ll_s1:
706 case M2_mpy_sat_rnd_ll_s0:
707 case M2_mpy_sat_rnd_ll_s1:
708 case M2_mpyd_acc_ll_s0:
709 case M2_mpyd_acc_ll_s1:
712 case M2_mpyd_nac_ll_s0:
713 case M2_mpyd_nac_ll_s1:
714 case M2_mpyd_rnd_ll_s0:
715 case M2_mpyd_rnd_ll_s1:
716 case M2_mpyu_acc_ll_s0:
717 case M2_mpyu_acc_ll_s1:
720 case M2_mpyu_nac_ll_s0:
721 case M2_mpyu_nac_ll_s1:
722 case M2_mpyud_acc_ll_s0:
723 case M2_mpyud_acc_ll_s1:
726 case M2_mpyud_nac_ll_s0:
727 case M2_mpyud_nac_ll_s1:
728 if (OpN == 1 || OpN == 2) {
729 Bits.
set(Begin, Begin+16);
736 case A2_addh_h16_sat_lh:
739 case A2_subh_h16_sat_lh:
740 case M2_mpy_acc_lh_s0:
741 case M2_mpy_acc_lh_s1:
742 case M2_mpy_acc_sat_lh_s0:
743 case M2_mpy_acc_sat_lh_s1:
746 case M2_mpy_nac_lh_s0:
747 case M2_mpy_nac_lh_s1:
748 case M2_mpy_nac_sat_lh_s0:
749 case M2_mpy_nac_sat_lh_s1:
750 case M2_mpy_rnd_lh_s0:
751 case M2_mpy_rnd_lh_s1:
752 case M2_mpy_sat_lh_s0:
753 case M2_mpy_sat_lh_s1:
754 case M2_mpy_sat_rnd_lh_s0:
755 case M2_mpy_sat_rnd_lh_s1:
756 case M2_mpyd_acc_lh_s0:
757 case M2_mpyd_acc_lh_s1:
760 case M2_mpyd_nac_lh_s0:
761 case M2_mpyd_nac_lh_s1:
762 case M2_mpyd_rnd_lh_s0:
763 case M2_mpyd_rnd_lh_s1:
764 case M2_mpyu_acc_lh_s0:
765 case M2_mpyu_acc_lh_s1:
768 case M2_mpyu_nac_lh_s0:
769 case M2_mpyu_nac_lh_s1:
770 case M2_mpyud_acc_lh_s0:
771 case M2_mpyud_acc_lh_s1:
774 case M2_mpyud_nac_lh_s0:
775 case M2_mpyud_nac_lh_s1:
778 case A2_addh_l16_sat_hl:
780 case A2_subh_l16_sat_hl:
782 Bits.
set(Begin, Begin+16);
786 Bits.
set(Begin+16, Begin+32);
793 case A2_addh_h16_sat_hl:
796 case A2_subh_h16_sat_hl:
797 case M2_mpy_acc_hl_s0:
798 case M2_mpy_acc_hl_s1:
799 case M2_mpy_acc_sat_hl_s0:
800 case M2_mpy_acc_sat_hl_s1:
803 case M2_mpy_nac_hl_s0:
804 case M2_mpy_nac_hl_s1:
805 case M2_mpy_nac_sat_hl_s0:
806 case M2_mpy_nac_sat_hl_s1:
807 case M2_mpy_rnd_hl_s0:
808 case M2_mpy_rnd_hl_s1:
809 case M2_mpy_sat_hl_s0:
810 case M2_mpy_sat_hl_s1:
811 case M2_mpy_sat_rnd_hl_s0:
812 case M2_mpy_sat_rnd_hl_s1:
813 case M2_mpyd_acc_hl_s0:
814 case M2_mpyd_acc_hl_s1:
817 case M2_mpyd_nac_hl_s0:
818 case M2_mpyd_nac_hl_s1:
819 case M2_mpyd_rnd_hl_s0:
820 case M2_mpyd_rnd_hl_s1:
821 case M2_mpyu_acc_hl_s0:
822 case M2_mpyu_acc_hl_s1:
825 case M2_mpyu_nac_hl_s0:
826 case M2_mpyu_nac_hl_s1:
827 case M2_mpyud_acc_hl_s0:
828 case M2_mpyud_acc_hl_s1:
831 case M2_mpyud_nac_hl_s0:
832 case M2_mpyud_nac_hl_s1:
834 Bits.
set(Begin+16, Begin+32);
838 Bits.
set(Begin, Begin+16);
845 case A2_addh_h16_sat_hh:
848 case A2_subh_h16_sat_hh:
849 case M2_mpy_acc_hh_s0:
850 case M2_mpy_acc_hh_s1:
851 case M2_mpy_acc_sat_hh_s0:
852 case M2_mpy_acc_sat_hh_s1:
855 case M2_mpy_nac_hh_s0:
856 case M2_mpy_nac_hh_s1:
857 case M2_mpy_nac_sat_hh_s0:
858 case M2_mpy_nac_sat_hh_s1:
859 case M2_mpy_rnd_hh_s0:
860 case M2_mpy_rnd_hh_s1:
861 case M2_mpy_sat_hh_s0:
862 case M2_mpy_sat_hh_s1:
863 case M2_mpy_sat_rnd_hh_s0:
864 case M2_mpy_sat_rnd_hh_s1:
865 case M2_mpyd_acc_hh_s0:
866 case M2_mpyd_acc_hh_s1:
869 case M2_mpyd_nac_hh_s0:
870 case M2_mpyd_nac_hh_s1:
871 case M2_mpyd_rnd_hh_s0:
872 case M2_mpyd_rnd_hh_s1:
873 case M2_mpyu_acc_hh_s0:
874 case M2_mpyu_acc_hh_s1:
877 case M2_mpyu_nac_hh_s0:
878 case M2_mpyu_nac_hh_s1:
879 case M2_mpyud_acc_hh_s0:
880 case M2_mpyud_acc_hh_s1:
883 case M2_mpyud_nac_hh_s0:
884 case M2_mpyud_nac_hh_s1:
885 if (OpN == 1 || OpN == 2) {
886 Bits.
set(Begin+16, Begin+32);
914 switch (RC->
getID()) {
915 case Hexagon::DoubleRegsRegClassID:
916 VerifySR(RC, RR.
Sub);
917 return &Hexagon::IntRegsRegClass;
918 case Hexagon::HvxWRRegClassID:
919 VerifySR(RC, RR.
Sub);
920 return &Hexagon::HvxVRRegClass;
935 auto *DRC = getFinalVRegClass(RD, MRI);
939 return DRC == getFinalVRegClass(RS, MRI);
948 return Op.getSubReg() != NewSub &&
Op.isTied();
954 class DeadCodeElimination {
961 return runOnNode(MDT.getRootNode());
965 bool isDead(
unsigned R)
const;
976 bool DeadCodeElimination::isDead(
unsigned R)
const {
993 bool Changed =
false;
995 for (
auto *DTN : children<MachineDomTreeNode*>(N))
996 Changed |= runOnNode(DTN);
999 std::vector<MachineInstr*> Instrs;
1001 Instrs.push_back(&*I);
1003 for (
auto MI : Instrs) {
1017 bool AllDead =
true;
1020 if (!
Op.isReg() || !
Op.isDef())
1022 unsigned R =
Op.getReg();
1033 for (
unsigned i = 0, n = Regs.
size(); i != n; ++i)
1053 class RedundantInstrElimination :
public Transformation {
1057 : Transformation(
true), HII(hii), HRI(hri),
MRI(mri),
BT(bt) {}
1062 bool isLossyShiftLeft(
const MachineInstr &MI,
unsigned OpN,
1063 unsigned &LostB,
unsigned &LostE);
1064 bool isLossyShiftRight(
const MachineInstr &MI,
unsigned OpN,
1065 unsigned &LostB,
unsigned &LostE);
1066 bool computeUsedBits(
unsigned Reg,
BitVector &Bits);
1082 bool RedundantInstrElimination::isLossyShiftLeft(
const MachineInstr &MI,
1083 unsigned OpN,
unsigned &LostB,
unsigned &LostE) {
1084 using namespace Hexagon;
1087 unsigned ImN, RegN, Width;
1094 case S2_asl_i_p_acc:
1095 case S2_asl_i_p_and:
1096 case S2_asl_i_p_nac:
1098 case S2_asl_i_p_xacc:
1108 case S2_addasl_rrri:
1109 case S4_andi_asl_ri:
1111 case S4_addi_asl_ri:
1112 case S4_subi_asl_ri:
1113 case S2_asl_i_r_acc:
1114 case S2_asl_i_r_and:
1115 case S2_asl_i_r_nac:
1117 case S2_asl_i_r_sat:
1118 case S2_asl_i_r_xacc:
1142 bool RedundantInstrElimination::isLossyShiftRight(
const MachineInstr &MI,
1143 unsigned OpN,
unsigned &LostB,
unsigned &LostE) {
1144 using namespace Hexagon;
1154 case S2_asr_i_p_acc:
1155 case S2_asr_i_p_and:
1156 case S2_asr_i_p_nac:
1158 case S2_lsr_i_p_acc:
1159 case S2_lsr_i_p_and:
1160 case S2_lsr_i_p_nac:
1162 case S2_lsr_i_p_xacc:
1171 case S4_andi_lsr_ri:
1173 case S4_addi_lsr_ri:
1174 case S4_subi_lsr_ri:
1175 case S2_asr_i_r_acc:
1176 case S2_asr_i_r_and:
1177 case S2_asr_i_r_nac:
1179 case S2_lsr_i_r_acc:
1180 case S2_lsr_i_r_and:
1181 case S2_lsr_i_r_nac:
1183 case S2_lsr_i_r_xacc:
1206 bool RedundantInstrElimination::computeUsedBits(
unsigned Reg,
BitVector &Bits) {
1209 std::vector<unsigned> Pending;
1210 Pending.push_back(Reg);
1212 for (
unsigned i = 0; i < Pending.size(); ++i) {
1213 unsigned R = Pending[i];
1220 if (!HBS::getSubregMask(UR, B, W, MRI))
1227 Pending.push_back(DefR);
1252 bool RedundantInstrElimination::computeUsedBits(
const MachineInstr &MI,
1253 unsigned OpN,
BitVector &Bits, uint16_t Begin) {
1256 bool GotBits = HBS::getUsedBits(Opc, OpN,
T, Begin, HII);
1261 if (isLossyShiftLeft(MI, OpN, LB, LE) || isLossyShiftRight(MI, OpN, LB, LE)) {
1265 uint16_t Width = HRI.getRegSizeInBits(*RC);
1268 T.set(Begin, Begin+Width);
1269 assert(LB <= LE && LB < Width && LE <= Width);
1270 T.reset(Begin+LB, Begin+LE);
1286 if (!HBS::getSubregMask(RD, DB, DW, MRI))
1289 if (!HBS::getSubregMask(RS, SB, SW, MRI))
1295 if (!computeUsedBits(RD.
Reg, Used))
1298 for (
unsigned i = 0; i != DW; ++i)
1299 if (Used[i+DB] && DC[DB+i] != SC[SB+i])
1308 bool Changed =
false;
1310 for (
auto I = B.
begin(),
E = B.
end(), NextI =
I; I !=
E; ++
I) {
1311 NextI = std::next(I);
1314 if (MI->
getOpcode() == TargetOpcode::COPY)
1329 for (
auto &
Op : MI->
uses()) {
1335 if (!HBS::isTransparentCopy(RD, RS, MRI))
1339 if (!HBS::getSubregMask(RS, BN, BW, MRI))
1343 if (!usedBitsEqual(RD, RS) && !
HBS::isEqual(DC, 0, SC, BN, BW))
1351 BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
1352 .addReg(RS.
Reg, 0, RS.
Sub);
1353 HBS::replaceSubWithSub(RD.
Reg, RD.
Sub, NewR, 0, MRI);
1376 class ConstGeneration :
public Transformation {
1380 : Transformation(
true), HII(hii),
MRI(mri),
BT(bt) {}
1396 bool ConstGeneration::isTfrConst(
const MachineInstr &MI) {
1399 case Hexagon::A2_combineii:
1400 case Hexagon::A4_combineii:
1401 case Hexagon::A2_tfrsi:
1402 case Hexagon::A2_tfrpi:
1403 case Hexagon::PS_true:
1404 case Hexagon::PS_false:
1406 case Hexagon::CONST64:
1417 if (RC == &Hexagon::IntRegsRegClass) {
1418 BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi),
Reg)
1419 .addImm(int32_t(C));
1423 if (RC == &Hexagon::DoubleRegsRegClass) {
1425 BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrpi),
Reg)
1432 unsigned Opc =
isInt<8>(
Lo) ? Hexagon::A2_combineii
1433 : Hexagon::A4_combineii;
1435 .addImm(int32_t(
Hi))
1436 .addImm(int32_t(Lo));
1440 BuildMI(B, At, DL, HII.get(Hexagon::CONST64),
Reg)
1445 if (RC == &Hexagon::PredRegsRegClass) {
1448 Opc = Hexagon::PS_false;
1449 else if ((C & 0xFF) == 0xFF)
1450 Opc = Hexagon::PS_true;
1463 bool Changed =
false;
1466 for (
auto I = B.
begin(),
E = B.
end(); I !=
E; ++
I) {
1470 HBS::getInstrDefs(*I, Defs);
1471 if (Defs.count() != 1)
1473 unsigned DR = Defs.find_first();
1478 if (HBS::getConst(DRC, 0, DRC.
width(), U)) {
1482 unsigned ImmReg = genTfrConst(MRI.
getRegClass(DR),
C,
B, At, DL);
1484 HBS::replaceReg(DR, ImmReg, MRI);
1485 BT.
put(ImmReg, DRC);
1498 class CopyGeneration :
public Transformation {
1502 : Transformation(
true), HII(hii), HRI(hri),
MRI(mri),
BT(bt) {}
1519 class CopyPropagation :
public Transformation {
1522 : Transformation(
false), HRI(hri),
MRI(mri) {}
1526 static bool isCopyReg(
unsigned Opc,
bool NoConv);
1544 auto *FRC = HBS::getFinalVRegClass(Inp, MRI);
1546 if (!HBS::getSubregMask(Inp, B, W, MRI))
1549 for (
unsigned R = AVs.find_first(); R; R = AVs.find_next(R)) {
1550 if (!
BT.
has(R) || Forbidden[R])
1553 unsigned RW = RC.
width();
1557 if (!HBS::isTransparentCopy(R, Inp, MRI))
1570 if (MRI.
getRegClass(R) != &Hexagon::DoubleRegsRegClass)
1574 Out.
Sub = Hexagon::isub_lo;
1576 Out.
Sub = Hexagon::isub_hi;
1580 if (HBS::isTransparentCopy(Out, Inp, MRI))
1591 bool Changed =
false;
1594 for (
auto I = B.
begin(),
E = B.
end(), NextI =
I; I !=
E;
1595 ++
I, AVB.insert(Defs)) {
1596 NextI = std::next(I);
1598 HBS::getInstrDefs(*I, Defs);
1601 if (CopyPropagation::isCopyReg(Opc,
false) ||
1602 ConstGeneration::isTfrConst(*I))
1608 for (
unsigned R = Defs.find_first(); R; R = Defs.find_next(R)) {
1610 auto *FRC = HBS::getFinalVRegClass(R, MRI);
1612 if (findMatch(R, MR, AVB)) {
1614 BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
1615 .addReg(MR.
Reg, 0, MR.
Sub);
1617 HBS::replaceReg(R, NewR, MRI);
1618 Forbidden.insert(R);
1622 if (FRC == &Hexagon::DoubleRegsRegClass ||
1623 FRC == &Hexagon::HvxWRRegClass) {
1630 if (findMatch(TL, ML, AVB) && findMatch(TH, MH, AVB)) {
1631 auto *FRC = HBS::getFinalVRegClass(R, MRI);
1633 BuildMI(B, At, DL, HII.get(TargetOpcode::REG_SEQUENCE), NewR)
1634 .addReg(ML.
Reg, 0, ML.
Sub)
1639 HBS::replaceReg(R, NewR, MRI);
1640 Forbidden.insert(R);
1649 bool CopyPropagation::isCopyReg(
unsigned Opc,
bool NoConv) {
1651 case TargetOpcode::COPY:
1652 case TargetOpcode::REG_SEQUENCE:
1653 case Hexagon::A4_combineir:
1654 case Hexagon::A4_combineri:
1656 case Hexagon::A2_tfr:
1657 case Hexagon::A2_tfrp:
1658 case Hexagon::A2_combinew:
1659 case Hexagon::V6_vcombine:
1667 bool CopyPropagation::propagateRegCopy(
MachineInstr &MI) {
1668 bool Changed =
false;
1674 case TargetOpcode::COPY:
1675 case Hexagon::A2_tfr:
1676 case Hexagon::A2_tfrp: {
1678 if (!HBS::isTransparentCopy(RD, RS, MRI))
1681 Changed = HBS::replaceRegWithSub(RD.
Reg, RS.
Reg, RS.
Sub, MRI);
1683 Changed = HBS::replaceReg(RD.
Reg, RS.
Reg, MRI);
1686 case TargetOpcode::REG_SEQUENCE: {
1688 if (HBS::parseRegSequence(MI, SL, SH, MRI)) {
1692 Changed = HBS::replaceSubWithSub(RD.
Reg, SubLo, SL.
Reg, SL.
Sub, MRI);
1693 Changed |= HBS::replaceSubWithSub(RD.
Reg, SubHi, SH.
Reg, SH.
Sub, MRI);
1697 case Hexagon::A2_combinew:
1698 case Hexagon::V6_vcombine: {
1703 Changed = HBS::replaceSubWithSub(RD.
Reg, SubLo, RL.Reg, RL.Sub, MRI);
1704 Changed |= HBS::replaceSubWithSub(RD.
Reg, SubHi, RH.
Reg, RH.
Sub, MRI);
1707 case Hexagon::A4_combineir:
1708 case Hexagon::A4_combineri: {
1709 unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1;
1710 unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::isub_lo
1713 Changed = HBS::replaceSubWithSub(RD.
Reg, Sub, RS.
Reg, RS.
Sub, MRI);
1721 std::vector<MachineInstr*> Instrs;
1723 Instrs.push_back(&*I);
1725 bool Changed =
false;
1726 for (
auto I : Instrs) {
1728 if (!CopyPropagation::isCopyReg(Opc,
true))
1730 Changed |= propagateRegCopy(*I);
1741 class BitSimplification :
public Transformation {
1746 : Transformation(
true), MDT(mdt), HII(hii), HRI(hri),
MRI(mri),
1757 unsigned B, RegHalf &RH);
1762 unsigned getCombineOpcode(
bool HLow,
bool LLow);
1784 std::vector<MachineInstr*> NewMIs;
1799 bool BitSimplification::matchHalf(
unsigned SelfR,
1810 while (I < B+16 && RC[I].num())
1815 unsigned Reg = RC[
I].RefI.Reg;
1816 unsigned P = RC[
I].RefI.Pos;
1819 unsigned Pos = P - (I-
B);
1821 if (Reg == 0 || Reg == SelfR)
1829 if (Pos+16 > SC.
width())
1832 for (
unsigned i = 0; i < 16; ++i) {
1841 if (RC[i+B] != SC[i+Pos])
1848 Sub = Hexagon::isub_lo;
1852 Sub = Hexagon::isub_lo;
1856 Sub = Hexagon::isub_hi;
1860 Sub = Hexagon::isub_hi;
1871 if (!HBS::getFinalVRegClass(RH, MRI))
1879 auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum, &HRI, MF);
1880 auto *
RRC = HBS::getFinalVRegClass(R, MRI);
1881 return OpRC->hasSubClassEq(
RRC);
1886 bool BitSimplification::matchPackhl(
unsigned SelfR,
1889 RegHalf L1, H1, L2, H2;
1891 if (!matchHalf(SelfR, RC, 0, L2) || !matchHalf(SelfR, RC, 16, L1))
1893 if (!matchHalf(SelfR, RC, 32, H2) || !matchHalf(SelfR, RC, 48, H1))
1897 if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low)
1899 if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low)
1907 unsigned BitSimplification::getCombineOpcode(
bool HLow,
bool LLow) {
1908 return HLow ? LLow ? Hexagon::A2_combine_ll
1909 : Hexagon::A2_combine_lh
1910 : LLow ? Hexagon::A2_combine_hl
1911 : Hexagon::A2_combine_hh;
1917 bool BitSimplification::genStoreUpperHalf(
MachineInstr *MI) {
1919 if (Opc != Hexagon::S2_storerh_io)
1928 if (!matchHalf(0, RC, 0, H))
1932 MI->
setDesc(HII.get(Hexagon::S2_storerf_io));
1940 bool BitSimplification::genStoreImmediate(
MachineInstr *MI) {
1944 case Hexagon::S2_storeri_io:
1947 case Hexagon::S2_storerh_io:
1950 case Hexagon::S2_storerb_io:
1963 int64_t Off = OffOp.
getImm();
1965 if (!
isUIntN(6+Align, Off) || (Off & ((1<<Align)-1)))
1973 if (!HBS::getConst(RC, 0, RC.
width(), U))
1979 case Hexagon::S2_storerb_io:
1982 case Hexagon::S2_storerh_io:
1985 case Hexagon::S2_storeri_io:
1998 case Hexagon::S2_storerb_io:
1999 MI->
setDesc(HII.get(Hexagon::S4_storeirb_io));
2001 case Hexagon::S2_storerh_io:
2002 MI->
setDesc(HII.get(Hexagon::S4_storeirh_io));
2004 case Hexagon::S2_storeri_io:
2005 MI->
setDesc(HII.get(Hexagon::S4_storeiri_io));
2019 if (Opc == Hexagon::S2_packhl)
2022 if (!matchPackhl(RD.
Reg, RC, Rs, Rt))
2024 if (!validateReg(Rs, Hexagon::S2_packhl, 1) ||
2025 !validateReg(Rt, Hexagon::S2_packhl, 2))
2033 BuildMI(B, At, DL, HII.get(Hexagon::S2_packhl), NewR)
2034 .addReg(Rs.
Reg, 0, Rs.
Sub)
2036 HBS::replaceSubWithSub(RD.
Reg, RD.
Sub, NewR, 0, MRI);
2043 bool BitSimplification::genExtractHalf(
MachineInstr *MI,
2059 if (L.Low && Opc != Hexagon::A2_zxth) {
2060 if (validateReg(L, Hexagon::A2_zxth, 1)) {
2062 BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR)
2063 .addReg(L.Reg, 0, L.Sub);
2065 }
else if (!L.Low && Opc != Hexagon::S2_lsr_i_r) {
2066 if (validateReg(L, Hexagon::S2_lsr_i_r, 1)) {
2068 BuildMI(B, MI, DL, HII.get(Hexagon::S2_lsr_i_r), NewR)
2069 .addReg(L.Reg, 0, L.Sub)
2075 HBS::replaceSubWithSub(RD.
Reg, RD.
Sub, NewR, 0, MRI);
2082 bool BitSimplification::genCombineHalf(
MachineInstr *MI,
2086 if (!matchHalf(RD.
Reg, RC, 0, L) || !matchHalf(RD.
Reg, RC, 16, H))
2089 if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low)
2093 unsigned COpc = getCombineOpcode(H.Low, L.Low);
2096 if (!validateReg(H, COpc, 1) || !validateReg(L, COpc, 2))
2104 BuildMI(B, At, DL, HII.get(COpc), NewR)
2105 .addReg(H.Reg, 0, H.Sub)
2106 .
addReg(L.Reg, 0, L.Sub);
2107 HBS::replaceSubWithSub(RD.
Reg, RD.
Sub, NewR, 0, MRI);
2114 bool BitSimplification::genExtractLow(
MachineInstr *MI,
2118 case Hexagon::A2_zxtb:
2119 case Hexagon::A2_zxth:
2120 case Hexagon::S2_extractu:
2131 unsigned W = RC.
width();
2132 while (W > 0 && RC[W-1].is(0))
2134 if (W == 0 || W == RC.
width())
2136 unsigned NewOpc = (W == 8) ? Hexagon::A2_zxtb
2137 : (W == 16) ? Hexagon::A2_zxth
2138 : (W < 10) ? Hexagon::A2_andir
2139 : Hexagon::S2_extractu;
2143 for (
auto &
Op : MI->
uses()) {
2151 if (!HBS::getSubregMask(RS, BN, BW, MRI))
2155 if (!validateReg(RS, NewOpc, 1))
2161 auto MIB =
BuildMI(B, At, DL, HII.get(NewOpc), NewR)
2162 .addReg(RS.
Reg, 0, RS.
Sub);
2163 if (NewOpc == Hexagon::A2_andir)
2164 MIB.addImm((1 << W) - 1);
2165 else if (NewOpc == Hexagon::S2_extractu)
2166 MIB.addImm(W).addImm(0);
2167 HBS::replaceSubWithSub(RD.
Reg, RD.
Sub, NewR, 0, MRI);
2186 case Hexagon::A4_bitsplit:
2187 case Hexagon::A4_bitspliti:
2191 unsigned W = RC.
width();
2196 unsigned Z = C.width();
2197 while (Z > 0 && C[Z-1].is(0))
2199 return C.width() -
Z;
2203 unsigned Z =
ctlz(RC);
2204 if (Z == 0 || Z == W)
2218 for (
unsigned i = 1; i < W-
Z; ++i) {
2227 for (
unsigned S = AVs.find_first(); S; S = AVs.find_next(S)) {
2231 if (SRC != Hexagon::IntRegsRegClassID &&
2232 SRC != Hexagon::DoubleRegsRegClassID)
2245 if (Pos <= P && (Pos + W-Z) !=
P)
2247 if (P < Pos && (P + Z) != Pos)
2250 if (std::min(P, Pos) != 0 && std::min(P, Pos) != 32)
2254 for (I = 1; I <
Z; ++
I) {
2274 SrcSR = (std::min(Pos, P) == 32) ? Hexagon::isub_hi : Hexagon::isub_lo;
2275 if (!validateReg({SrcR,SrcSR}, Hexagon::A4_bitspliti, 1))
2277 unsigned ImmOp = Pos <= P ? W-
Z :
Z;
2282 if (
In->getOpcode() != Hexagon::A4_bitspliti)
2287 if (
In->getOperand(2).getImm() != ImmOp)
2293 if (!MDT.dominates(DefI, &*At))
2303 auto NewBS =
BuildMI(B, At, DL, HII.get(Hexagon::A4_bitspliti), NewR)
2304 .addReg(SrcR, 0, SrcSR)
2306 NewMIs.push_back(NewBS);
2309 HBS::replaceRegWithSub(RD.
Reg, NewR, Hexagon::isub_lo, MRI);
2310 HBS::replaceRegWithSub(S, NewR, Hexagon::isub_hi, MRI);
2312 HBS::replaceRegWithSub(S, NewR, Hexagon::isub_lo, MRI);
2313 HBS::replaceRegWithSub(RD.
Reg, NewR, Hexagon::isub_hi, MRI);
2327 bool BitSimplification::simplifyTstbit(
MachineInstr *MI,
2330 if (Opc != Hexagon::S2_tstbit_i)
2337 if (!
BT.
has(RS.
Reg) || !HBS::getSubregMask(RS, F, W, MRI))
2352 if (TC == &Hexagon::DoubleRegsRegClass) {
2354 RR.
Sub = Hexagon::isub_lo;
2357 RR.
Sub = Hexagon::isub_hi;
2359 }
else if (TC == &Hexagon::IntRegsRegClass) {
2364 BuildMI(B, At, DL, HII.get(Hexagon::S2_tstbit_i), NewR)
2365 .addReg(RR.
Reg, 0, RR.
Sub)
2367 HBS::replaceReg(RD.
Reg, NewR, MRI);
2371 }
else if (V.
is(0) || V.
is(1)) {
2373 unsigned NewOpc = V.
is(0) ? Hexagon::PS_false : Hexagon::PS_true;
2374 BuildMI(B, At, DL, HII.get(NewOpc), NewR);
2375 HBS::replaceReg(RD.
Reg, NewR, MRI);
2387 bool BitSimplification::simplifyExtractLow(
MachineInstr *MI,
2398 unsigned W = RC.
width();
2407 if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass)
2427 for (
unsigned I = 0; I !=
W; ++
I) {
2431 IsConst = IsConst && (V.
is(0) || V.
is(1));
2436 if (TopV.
is(0) || TopV.
is(1)) {
2437 bool S = TopV.
is(1);
2438 for (--W; W > 0 && RC[W-1].is(S); --
W)
2450 for (--W; W > 0 && RC[W-1] == TopV; --
W)
2465 dbgs() <<
"Cell: " << RC <<
'\n';
2466 dbgs() <<
"Expected bitfield size: " << Len <<
" bits, " 2467 << (Signed ?
"sign" :
"zero") <<
"-extended\n";
2470 bool Changed =
false;
2472 for (
unsigned R = AVs.find_first(); R != 0; R = AVs.find_next(R)) {
2476 unsigned SW = SC.
width();
2482 if (SW < RW || (SW % RW) != 0)
2489 while (Off <= SW-Len) {
2490 unsigned OE = (Off+Len)/RW;
2510 unsigned ExtOpc = 0;
2513 ExtOpc = Signed ? Hexagon::A2_sxtb : Hexagon::A2_zxtb;
2515 ExtOpc = Signed ? Hexagon::A2_sxth : Hexagon::A2_zxth;
2516 else if (Len < 10 && !Signed)
2517 ExtOpc = Hexagon::A2_andir;
2521 Signed ? (RW == 32 ? Hexagon::S4_extract : Hexagon::S4_extractp)
2522 : (RW == 32 ? Hexagon::S2_extractu : Hexagon::S2_extractup);
2526 if (RW != SW && RW*2 != SW)
2529 SR = (Off/RW == 0) ? Hexagon::isub_lo : Hexagon::isub_hi;
2532 if (!validateReg({R,SR}, ExtOpc, 1))
2548 auto MIB =
BuildMI(B, At, DL, HII.get(ExtOpc), NewR)
2551 case Hexagon::A2_sxtb:
2552 case Hexagon::A2_zxtb:
2553 case Hexagon::A2_sxth:
2554 case Hexagon::A2_zxth:
2556 case Hexagon::A2_andir:
2557 MIB.addImm((1u << Len) - 1);
2559 case Hexagon::S4_extract:
2560 case Hexagon::S2_extractu:
2561 case Hexagon::S4_extractp:
2562 case Hexagon::S2_extractup:
2570 HBS::replaceReg(RD.
Reg, NewR, MRI);
2579 bool BitSimplification::simplifyRCmp0(
MachineInstr *MI,
2582 if (Opc != Hexagon::A4_rcmpeqi && Opc != Hexagon::A4_rcmpneqi)
2589 if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass)
2598 bool KnownNZ =
false;
2605 if (!HBS::getSubregMask(SR, F, W, MRI))
2608 for (uint16_t I = F; I != F+
W; ++
I) {
2616 auto ReplaceWithConst = [&] (
int C) {
2618 BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), NewR)
2620 HBS::replaceReg(RD.
Reg, NewR, MRI);
2622 for (uint16_t I = 0; I !=
W; ++
I) {
2631 if (
Op.isGlobal() ||
Op.isBlockAddress())
2634 return Op.getImm() != 0;
2636 return !
Op.getCImm()->isZero();
2638 return !
Op.getFPImm()->isZero();
2643 if (
Op.isGlobal() ||
Op.isBlockAddress())
2646 return Op.getImm() == 0;
2648 return Op.getCImm()->isZero();
2650 return Op.getFPImm()->isZero();
2656 if (KnownZ || KnownNZ) {
2657 assert(KnownZ != KnownNZ &&
"Register cannot be both 0 and non-0");
2658 return ReplaceWithConst(KnownZ == (Opc == Hexagon::A4_rcmpeqi));
2666 if (SR.
Sub == 0 && InpDef->
getOpcode() == Hexagon::C2_muxii) {
2670 bool KnownNZ1 = IsNonZero(Src1), KnownNZ2 = IsNonZero(Src2);
2671 if (KnownNZ1 && KnownNZ2)
2672 return ReplaceWithConst(Opc == Hexagon::A4_rcmpneqi);
2674 bool KnownZ1 = IsZero(Src1), KnownZ2 = IsZero(Src2);
2675 if (KnownZ1 && KnownZ2)
2676 return ReplaceWithConst(Opc == Hexagon::A4_rcmpeqi);
2681 if ((KnownZ1 || KnownNZ1) && (KnownZ2 || KnownNZ2)) {
2683 BuildMI(B, At, DL, HII.get(Hexagon::C2_muxii), NewR)
2685 .addImm(KnownZ1 == (Opc == Hexagon::A4_rcmpeqi))
2686 .addImm(KnownZ2 == (Opc == Hexagon::A4_rcmpeqi));
2687 HBS::replaceReg(RD.
Reg, NewR, MRI);
2704 bool Changed =
false;
2708 for (
auto I = B.
begin(),
E = B.
end(); I !=
E; ++
I, AVB.insert(Defs)) {
2711 HBS::getInstrDefs(*MI, Defs);
2714 if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE)
2718 bool T = genStoreUpperHalf(MI);
2719 T = T || genStoreImmediate(MI);
2724 if (Defs.count() != 1)
2735 if (FRC->
getID() == Hexagon::DoubleRegsRegClassID) {
2736 bool T = genPackhl(MI, RD, RC);
2737 T = T || simplifyExtractLow(MI, RD, RC, AVB);
2742 if (FRC->
getID() == Hexagon::IntRegsRegClassID) {
2743 bool T = genBitSplit(MI, RD, RC, AVB);
2744 T = T || simplifyExtractLow(MI, RD, RC, AVB);
2745 T = T || genExtractHalf(MI, RD, RC);
2746 T = T || genCombineHalf(MI, RD, RC);
2747 T = T || genExtractLow(MI, RD, RC);
2748 T = T || simplifyRCmp0(MI, RD);
2753 if (FRC->
getID() == Hexagon::PredRegsRegClassID) {
2754 bool T = simplifyTstbit(MI, RD, RC);
2768 auto &HII = *HST.getInstrInfo();
2770 MDT = &getAnalysis<MachineDominatorTree>();
2774 Changed = DeadCodeElimination(MF, *MDT).run();
2784 ConstGeneration ImmG(BT, HII, MRI);
2785 Changed |= visitBlock(Entry, ImmG, AIG);
2788 RedundantInstrElimination RIE(BT, HII, HRI, MRI);
2789 bool Ried = visitBlock(Entry, RIE, ARE);
2796 CopyGeneration CopyG(BT, HII, HRI, MRI);
2797 Changed |= visitBlock(Entry, CopyG, ACG);
2800 CopyPropagation CopyP(HRI, MRI);
2801 Changed |= visitBlock(Entry, CopyP, ACP);
2803 Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
2807 BitSimplification BitS(BT, *MDT, HII, HRI, MRI, MF);
2808 Changed |= visitBlock(Entry, BitS, ABS);
2810 Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
2816 DeadCodeElimination(MF, *MDT).run();
2914 using InstrList = std::vector<MachineInstr *>;
2928 bool isConst(
unsigned Reg)
const;
2929 bool isBitShuffle(
const MachineInstr *MI,
unsigned DefR)
const;
2930 bool isStoreInput(
const MachineInstr *MI,
unsigned DefR)
const;
2931 bool isShuffleOf(
unsigned OutR,
unsigned InpR)
const;
2932 bool isSameShuffle(
unsigned OutR1,
unsigned InpR1,
unsigned OutR2,
2933 unsigned &InpR2)
const;
2936 bool processLoop(LoopCand &C);
2944 "Hexagon Loop Rescheduling",
false,
false)
2948 DefR = HexagonLoopRescheduling::getDefReg(&P);
2951 for (
unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) {
2954 LR = P.getOperand(i);
2958 PR = P.getOperand(i);
2962 unsigned HexagonLoopRescheduling::getDefReg(
const MachineInstr *MI) {
2964 HBS::getInstrDefs(*MI, Defs);
2965 if (Defs.count() != 1)
2967 return Defs.find_first();
2970 bool HexagonLoopRescheduling::isConst(
unsigned Reg)
const {
2974 for (
unsigned i = 0, w = RC.
width(); i < w; ++i) {
2976 if (!V.
is(0) && !V.
is(1))
2982 bool HexagonLoopRescheduling::isBitShuffle(
const MachineInstr *MI,
2983 unsigned DefR)
const {
2986 case TargetOpcode::COPY:
2987 case Hexagon::S2_lsr_i_r:
2988 case Hexagon::S2_asr_i_r:
2989 case Hexagon::S2_asl_i_r:
2990 case Hexagon::S2_lsr_i_p:
2991 case Hexagon::S2_asr_i_p:
2992 case Hexagon::S2_asl_i_p:
2993 case Hexagon::S2_insert:
2994 case Hexagon::A2_or:
2995 case Hexagon::A2_orp:
2996 case Hexagon::A2_and:
2997 case Hexagon::A2_andp:
2998 case Hexagon::A2_combinew:
2999 case Hexagon::A4_combineri:
3000 case Hexagon::A4_combineir:
3001 case Hexagon::A2_combineii:
3002 case Hexagon::A4_combineii:
3003 case Hexagon::A2_combine_ll:
3004 case Hexagon::A2_combine_lh:
3005 case Hexagon::A2_combine_hl:
3006 case Hexagon::A2_combine_hh:
3012 bool HexagonLoopRescheduling::isStoreInput(
const MachineInstr *MI,
3013 unsigned InpR)
const {
3024 bool HexagonLoopRescheduling::isShuffleOf(
unsigned OutR,
unsigned InpR)
const {
3025 if (!BTP->has(OutR) || !BTP->has(InpR))
3028 for (
unsigned i = 0, w = OutC.
width(); i < w; ++i) {
3038 bool HexagonLoopRescheduling::isSameShuffle(
unsigned OutR1,
unsigned InpR1,
3039 unsigned OutR2,
unsigned &InpR2)
const {
3040 if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2))
3044 unsigned W = OutC1.
width();
3045 unsigned MatchR = 0;
3046 if (W != OutC2.
width())
3048 for (
unsigned i = 0; i <
W; ++i) {
3054 if (V1.
RefI.
Pos != V2.RefI.Pos)
3058 if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2)
3061 MatchR = V2.RefI.Reg;
3062 else if (V2.RefI.Reg != MatchR)
3071 unsigned NewPredR) {
3076 BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR)
3081 RegMap.
insert(std::make_pair(G.Inp.Reg, PhiR));
3083 for (
unsigned i = G.Ins.size(); i > 0; --i) {
3085 unsigned DR = getDefReg(SI);
3099 unsigned UseR = RegMap[Op.
getReg()];
3102 RegMap.
insert(std::make_pair(DR, NewDR));
3105 HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI);
3108 bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
3111 std::vector<PhiInfo> Phis;
3112 for (
auto &I : *C.LB) {
3115 unsigned PR = getDefReg(&I);
3118 bool BadUse =
false, GoodUse =
false;
3125 if (isBitShuffle(UseI, PR) || isStoreInput(UseI, PR))
3128 if (BadUse || !GoodUse)
3131 Phis.push_back(PhiInfo(I, *C.LB));
3135 dbgs() <<
"Phis: {";
3136 for (
auto &I : Phis) {
3138 <<
printReg(I.PR.Reg, HRI, I.PR.Sub) <<
":b" << I.PB->getNumber()
3139 <<
',' <<
printReg(I.LR.Reg, HRI, I.LR.Sub) <<
":b" 3140 << I.LB->getNumber() <<
')';
3148 bool Changed =
false;
3155 for (
auto I = C.LB->rbegin(),
E = C.LB->rend(); I !=
E; ++
I) {
3162 HBS::getInstrDefs(*I, Defs);
3163 if (Defs.count() != 1)
3165 unsigned DefR = Defs.find_first();
3168 if (!isBitShuffle(&*I, DefR))
3171 bool BadUse =
false;
3172 for (
auto UI = MRI->
use_begin(DefR), UE = MRI->
use_end(); UI != UE; ++UI) {
3175 if (UseI->
isPHI()) {
3178 unsigned Idx = UI.getOperandNo();
3182 auto F =
find(ShufIns, UseI);
3183 if (
F == ShufIns.end())
3189 if (C.EB ==
nullptr)
3198 ShufIns.push_back(&*I);
3210 using InstrGroupList = std::vector<InstrGroup>;
3213 for (
unsigned i = 0, n = ShufIns.size(); i < n; ++i) {
3219 G.Ins.push_back(SI);
3220 G.Out.Reg = getDefReg(SI);
3222 HBS::getInstrUses(*SI, Inputs);
3224 for (
unsigned j = i+1; j < n; ++j) {
3229 HBS::getInstrDefs(*MI, Defs);
3231 if (!Defs.intersects(Inputs))
3235 G.Ins.push_back(MI);
3236 Inputs.remove(Defs);
3238 HBS::getInstrUses(*MI, Inputs);
3239 ShufIns[j] =
nullptr;
3243 if (Inputs.count() > 1)
3245 auto LoopInpEq = [
G] (
const PhiInfo &
P) ->
bool {
3246 return G.Out.Reg ==
P.LR.Reg;
3251 G.Inp.Reg = Inputs.find_first();
3252 Groups.push_back(G);
3256 for (
unsigned i = 0, n = Groups.size(); i < n; ++i) {
3257 InstrGroup &G = Groups[i];
3258 dbgs() <<
"Group[" << i <<
"] inp: " 3259 <<
printReg(G.Inp.Reg, HRI, G.Inp.Sub)
3260 <<
" out: " <<
printReg(G.Out.Reg, HRI, G.Out.Sub) <<
"\n";
3261 for (
unsigned j = 0, m = G.Ins.size(); j < m; ++j)
3262 dbgs() <<
" " << *G.Ins[j];
3266 for (
unsigned i = 0, n = Groups.size(); i < n; ++i) {
3267 InstrGroup &G = Groups[i];
3268 if (!isShuffleOf(G.Out.Reg, G.Inp.Reg))
3270 auto LoopInpEq = [
G] (
const PhiInfo &
P) ->
bool {
3271 return G.Out.Reg ==
P.LR.Reg;
3274 if (
F == Phis.end())
3277 if (!isSameShuffle(G.Out.Reg, G.Inp.Reg,
F->PR.Reg, PrehR)) {
3280 if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi)
3289 unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi
3290 : Hexagon::A2_tfrpi;
3291 auto T = C.PB->getFirstTerminator();
3293 BuildMI(*C.PB,
T, DL, HII->get(TfrI), PrehR)
3306 moveGroup(G, *
F->LB, *
F->PB,
F->LB->getFirstNonPHI(),
F->DefR, PrehR);
3313 bool HexagonLoopRescheduling::runOnMachineFunction(
MachineFunction &MF) {
3318 HII = HST.getInstrInfo();
3319 HRI = HST.getRegisterInfo();
3327 std::vector<LoopCand> Cand;
3329 for (
auto &B : MF) {
3333 bool IsLoop =
false;
3349 if ((*SI)->pred_size() == 1)
3354 Cand.push_back(LoopCand(&B, PB, EB));
3357 bool Changed =
false;
3358 for (
auto &C : Cand)
3359 Changed |= processLoop(C);
3369 return new HexagonLoopRescheduling();
3373 return new HexagonBitSimplify();
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexagon-bit-simplify", "Hexagon bit simplification", false, false) INITIALIZE_PASS_END(HexagonBitSimplify
MachineBasicBlock * getMBB() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
RegisterCell & fill(uint16_t B, uint16_t E, const BitValue &V)
void trace(bool On=false)
static unsigned virtReg2Index(unsigned Reg)
Convert a virtual register number to a 0-based index.
This class represents lattice values for constants.
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
static unsigned index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
void push_back(const T &Elt)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Describe properties that are true of each instruction in the target description file.
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Y = RRC X, rotate right via carry.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
A global registry used in conjunction with static constructors to make pluggable components (like tar...
void visit(const MachineInstr &MI)
unsigned getSubReg() const
bool test(unsigned Idx) const
constexpr bool isInt< 8 >(int64_t x)
unsigned const TargetRegisterInfo * TRI
iterator_range< mop_iterator > operands()
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void clearKillInfo()
Clears kill flags on all operands.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
void clear()
clear - Removes all bits from the bitvector. Does not change capacity.
static use_iterator use_end()
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
static ManagedStatic< DebugCounter > DC
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
unsigned getNumOperands() const
Retuns the total number of operands.
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
int find_first() const
find_first - Returns the index of the first set bit, -1 if none of the bits are set.
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth...
const HexagonRegisterInfo * getRegisterInfo() const override
std::set< RegisterRef > RegisterSet
int find_next(unsigned Prev) const
find_next - Returns the index of the next set bit following the "Prev" bit.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
unsigned getID() const
Return the register class ID number.
zlib-gnu style compression
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
static bool isEqual(const Function &Caller, const Function &Callee)
BitVector & operator|=(const BitVector &RHS)
FunctionPass * createHexagonBitSimplify()
Base class for the actual dominator tree node.
bool reached(const MachineBasicBlock *B) const
static cl::opt< unsigned > MaxExtract("hexbit-max-extract", cl::Hidden, cl::init(std::numeric_limits< unsigned >::max()))
reverse_iterator rbegin()
This corresponds to the llvm.lifetime.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
initializer< Ty > init(const Ty &Val)
MachineInstrBundleIterator< MachineInstr > iterator
static cl::opt< bool > GenBitSplit("hexbit-bitsplit", cl::Hidden, cl::init(true), cl::desc("Generate bitsplit instructions"))
const TargetRegisterInfo * getTargetRegisterInfo() const
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
unsigned const MachineRegisterInfo * MRI
bool anyCommon(const BitVector &RHS) const
Test if any common bits are set.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const RegisterCell & lookup(unsigned Reg) const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
bool any() const
any - Returns true if any bit is set.
Represent the analysis usage information of a pass.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
FunctionPass class - This class is used to implement most global optimizations.
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
FunctionPass * createHexagonLoopRescheduling()
succ_iterator succ_begin()
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const MachineBasicBlock & front() const
pred_iterator pred_begin()
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
static cl::opt< unsigned > MaxBitSplit("hexbit-max-bitsplit", cl::Hidden, cl::init(std::numeric_limits< unsigned >::max()))
static cl::opt< bool > GenExtract("hexbit-extract", cl::Hidden, cl::init(true), cl::desc("Generate extract instructions"))
static const X86InstrFMA3Group Groups[]
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
size_type count() const
count - Returns the number of bits which are set.
static BitValue self(const BitRef &Self=BitRef())
void initializeHexagonBitSimplifyPass(PassRegistry &Registry)
static unsigned CountExtract
iterator_range< use_iterator > use_operands(unsigned Reg) const
bool isDebugValue() const
MachineOperand class - Representation of each machine instruction operand.
reference operator[](unsigned Idx)
CHAIN = SC CHAIN, Imm128 - System call.
static cl::opt< bool > PreserveTiedOps("hexbit-keep-tied", cl::Hidden, cl::init(true), cl::desc("Preserve subregisters in tied operands"))
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
bool is(unsigned T) const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned pred_size() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
hexagon bit Hexagon bit simplification
bool mayStore() const
Return true if this instruction could possibly modify memory.
unsigned succ_size() const
INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched", "Hexagon Loop Rescheduling", false, false) HexagonLoopRescheduling
const MachineBasicBlock * getParent() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static unsigned CountBitSplit
Representation of each machine instruction.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
void setReg(unsigned Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
void setSubReg(unsigned subReg)
bool has(unsigned Reg) const
size_type size() const
size - Returns the number of bits in this bitvector.
void markUsesInDebugValueAsUndef(unsigned Reg) const
markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the specified register as undefined wh...
constexpr char IsConst[]
Key for Kernel::Arg::Metadata::mIsConst.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
void initializeHexagonLoopReschedulingPass(PassRegistry &)
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
RegisterCell get(RegisterRef RR) const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
const HexagonInstrInfo * getInstrInfo() const override
void put(RegisterRef RR, const RegisterCell &RC)
This class implements an extremely fast bulk output stream that can only output to a stream...
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
StringRef - Represent a constant reference to a string, i.e.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
#define LLVM_ATTRIBUTE_UNUSED
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
const MachineOperand & getOperand(unsigned i) const
unsigned getHexagonSubRegIndex(const TargetRegisterClass &RC, unsigned GenIdx) const
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool isSafeToMove(AliasAnalysis *AA, bool &SawStore) const
Return true if it is safe to move this instruction.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...