@@ -2260,23 +2260,24 @@ static unsigned getCommutedVPERMV3Opcode(unsigned Opcode) {
22602260MachineInstr *X86InstrInfo::commuteInstructionImpl (MachineInstr &MI, bool NewMI,
22612261 unsigned OpIdx1,
22622262 unsigned OpIdx2) const {
2263- auto cloneIfNew = [NewMI ](MachineInstr &MI) -> MachineInstr & {
2264- if (NewMI)
2265- return * MI.getParent ()->getParent ()->CloneMachineInstr (&MI);
2266- return MI;
2263+ auto CloneIfNew = [& ](MachineInstr &MI) {
2264+ return std::exchange (NewMI, false )
2265+ ? MI.getParent ()->getParent ()->CloneMachineInstr (&MI)
2266+ : & MI;
22672267 };
2268+ MachineInstr *WorkingMI = nullptr ;
2269+ unsigned Opc = MI.getOpcode ();
22682270
2269- switch (MI.getOpcode ()) {
2270- case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
2271- case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
2272- case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I)
2273- case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I)
2274- case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I)
2275- case X86::SHLD64rri8: { // A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B,
2276- // (64-I)
2277- unsigned Opc;
2271+ switch (Opc) {
2272+ // SHLD B, C, I <-> SHRD C, B, (BitWidth - I)
2273+ case X86::SHRD16rri8:
2274+ case X86::SHLD16rri8:
2275+ case X86::SHRD32rri8:
2276+ case X86::SHLD32rri8:
2277+ case X86::SHRD64rri8:
2278+ case X86::SHLD64rri8: {
22782279 unsigned Size;
2279- switch (MI. getOpcode () ) {
2280+ switch (Opc ) {
22802281 default :
22812282 llvm_unreachable (" Unreachable!" );
22822283 case X86::SHRD16rri8:
@@ -2304,32 +2305,27 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
23042305 Opc = X86::SHRD64rri8;
23052306 break ;
23062307 }
2307- unsigned Amt = MI.getOperand (3 ).getImm ();
2308- auto &WorkingMI = cloneIfNew (MI);
2309- WorkingMI.setDesc (get (Opc));
2310- WorkingMI.getOperand (3 ).setImm (Size - Amt);
2311- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2312- OpIdx1, OpIdx2);
2308+ WorkingMI = CloneIfNew (MI);
2309+ WorkingMI->setDesc (get (Opc));
2310+ WorkingMI->getOperand (3 ).setImm (Size - MI.getOperand (3 ).getImm ());
2311+ break ;
23132312 }
23142313 case X86::PFSUBrr:
2315- case X86::PFSUBRrr: {
2314+ case X86::PFSUBRrr:
23162315 // PFSUB x, y: x = x - y
23172316 // PFSUBR x, y: x = y - x
2318- unsigned Opc =
2319- (X86::PFSUBRrr == MI.getOpcode () ? X86::PFSUBrr : X86::PFSUBRrr);
2320- auto &WorkingMI = cloneIfNew (MI);
2321- WorkingMI.setDesc (get (Opc));
2322- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2323- OpIdx1, OpIdx2);
2324- }
2317+ WorkingMI = CloneIfNew (MI);
2318+ WorkingMI->setDesc (
2319+ get (X86::PFSUBRrr == Opc ? X86::PFSUBrr : X86::PFSUBRrr));
2320+ break ;
23252321 case X86::BLENDPDrri:
23262322 case X86::BLENDPSrri:
23272323 case X86::VBLENDPDrri:
23282324 case X86::VBLENDPSrri:
23292325 // If we're optimizing for size, try to use MOVSD/MOVSS.
23302326 if (MI.getParent ()->getParent ()->getFunction ().hasOptSize ()) {
2331- unsigned Mask, Opc ;
2332- switch (MI. getOpcode () ) {
2327+ unsigned Mask;
2328+ switch (Opc ) {
23332329 default :
23342330 llvm_unreachable (" Unreachable!" );
23352331 case X86::BLENDPDrri:
@@ -2350,12 +2346,10 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
23502346 break ;
23512347 }
23522348 if ((MI.getOperand (3 ).getImm () ^ Mask) == 1 ) {
2353- auto &WorkingMI = cloneIfNew (MI);
2354- WorkingMI.setDesc (get (Opc));
2355- WorkingMI.removeOperand (3 );
2356- return TargetInstrInfo::commuteInstructionImpl (WorkingMI,
2357- /* NewMI=*/ false , OpIdx1,
2358- OpIdx2);
2349+ WorkingMI = CloneIfNew (MI);
2350+ WorkingMI->setDesc (get (Opc));
2351+ WorkingMI->removeOperand (3 );
2352+ break ;
23592353 }
23602354 }
23612355 [[fallthrough]];
@@ -2367,7 +2361,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
23672361 case X86::VPBLENDDYrri:
23682362 case X86::VPBLENDWYrri: {
23692363 int8_t Mask;
2370- switch (MI. getOpcode () ) {
2364+ switch (Opc ) {
23712365 default :
23722366 llvm_unreachable (" Unreachable!" );
23732367 case X86::BLENDPDrri:
@@ -2408,10 +2402,9 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
24082402 // Using int8_t to ensure it will be sign extended to the int64_t that
24092403 // setImm takes in order to match isel behavior.
24102404 int8_t Imm = MI.getOperand (3 ).getImm () & Mask;
2411- auto &WorkingMI = cloneIfNew (MI);
2412- WorkingMI.getOperand (3 ).setImm (Mask ^ Imm);
2413- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2414- OpIdx1, OpIdx2);
2405+ WorkingMI = CloneIfNew (MI);
2406+ WorkingMI->getOperand (3 ).setImm (Mask ^ Imm);
2407+ break ;
24152408 }
24162409 case X86::INSERTPSrr:
24172410 case X86::VINSERTPSrr:
@@ -2428,10 +2421,9 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
24282421 unsigned AltIdx = llvm::countr_zero ((ZMask | (1 << DstIdx)) ^ 15 );
24292422 assert (AltIdx < 4 && " Illegal insertion index" );
24302423 unsigned AltImm = (AltIdx << 6 ) | (AltIdx << 4 ) | ZMask;
2431- auto &WorkingMI = cloneIfNew (MI);
2432- WorkingMI.getOperand (MI.getNumOperands () - 1 ).setImm (AltImm);
2433- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2434- OpIdx1, OpIdx2);
2424+ WorkingMI = CloneIfNew (MI);
2425+ WorkingMI->getOperand (MI.getNumOperands () - 1 ).setImm (AltImm);
2426+ break ;
24352427 }
24362428 return nullptr ;
24372429 }
@@ -2441,8 +2433,8 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
24412433 case X86::VMOVSSrr: {
24422434 // On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD.
24432435 if (Subtarget.hasSSE41 ()) {
2444- unsigned Mask, Opc ;
2445- switch (MI. getOpcode () ) {
2436+ unsigned Mask;
2437+ switch (Opc ) {
24462438 default :
24472439 llvm_unreachable (" Unreachable!" );
24482440 case X86::MOVSDrr:
@@ -2463,31 +2455,24 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
24632455 break ;
24642456 }
24652457
2466- auto &WorkingMI = cloneIfNew (MI);
2467- WorkingMI.setDesc (get (Opc));
2468- WorkingMI.addOperand (MachineOperand::CreateImm (Mask));
2469- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2470- OpIdx1, OpIdx2);
2458+ WorkingMI = CloneIfNew (MI);
2459+ WorkingMI->setDesc (get (Opc));
2460+ WorkingMI->addOperand (MachineOperand::CreateImm (Mask));
2461+ break ;
24712462 }
24722463
2473- // Convert to SHUFPD.
2474- assert (MI.getOpcode () == X86::MOVSDrr &&
2475- " Can only commute MOVSDrr without SSE4.1" );
2476-
2477- auto &WorkingMI = cloneIfNew (MI);
2478- WorkingMI.setDesc (get (X86::SHUFPDrri));
2479- WorkingMI.addOperand (MachineOperand::CreateImm (0x02 ));
2480- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2481- OpIdx1, OpIdx2);
2464+ WorkingMI = CloneIfNew (MI);
2465+ WorkingMI->setDesc (get (X86::SHUFPDrri));
2466+ WorkingMI->addOperand (MachineOperand::CreateImm (0x02 ));
2467+ break ;
24822468 }
24832469 case X86::SHUFPDrri: {
24842470 // Commute to MOVSD.
24852471 assert (MI.getOperand (3 ).getImm () == 0x02 && " Unexpected immediate!" );
2486- auto &WorkingMI = cloneIfNew (MI);
2487- WorkingMI.setDesc (get (X86::MOVSDrr));
2488- WorkingMI.removeOperand (3 );
2489- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2490- OpIdx1, OpIdx2);
2472+ WorkingMI = CloneIfNew (MI);
2473+ WorkingMI->setDesc (get (X86::MOVSDrr));
2474+ WorkingMI->removeOperand (3 );
2475+ break ;
24912476 }
24922477 case X86::PCLMULQDQrr:
24932478 case X86::VPCLMULQDQrr:
@@ -2500,10 +2485,9 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
25002485 unsigned Imm = MI.getOperand (3 ).getImm ();
25012486 unsigned Src1Hi = Imm & 0x01 ;
25022487 unsigned Src2Hi = Imm & 0x10 ;
2503- auto &WorkingMI = cloneIfNew (MI);
2504- WorkingMI.getOperand (3 ).setImm ((Src1Hi << 4 ) | (Src2Hi >> 4 ));
2505- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2506- OpIdx1, OpIdx2);
2488+ WorkingMI = CloneIfNew (MI);
2489+ WorkingMI->getOperand (3 ).setImm ((Src1Hi << 4 ) | (Src2Hi >> 4 ));
2490+ break ;
25072491 }
25082492 case X86::VPCMPBZ128rri:
25092493 case X86::VPCMPUBZ128rri:
@@ -2552,31 +2536,26 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
25522536 case X86::VPCMPWZ256rrik:
25532537 case X86::VPCMPUWZ256rrik:
25542538 case X86::VPCMPWZrrik:
2555- case X86::VPCMPUWZrrik: {
2539+ case X86::VPCMPUWZrrik:
2540+ WorkingMI = CloneIfNew (MI);
25562541 // Flip comparison mode immediate (if necessary).
2557- unsigned Imm = MI.getOperand (MI.getNumOperands () - 1 ).getImm () & 0x7 ;
2558- Imm = X86::getSwappedVPCMPImm (Imm);
2559- auto &WorkingMI = cloneIfNew (MI);
2560- WorkingMI.getOperand (MI.getNumOperands () - 1 ).setImm (Imm);
2561- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2562- OpIdx1, OpIdx2);
2563- }
2542+ WorkingMI->getOperand (MI.getNumOperands () - 1 )
2543+ .setImm (X86::getSwappedVPCMPImm (
2544+ MI.getOperand (MI.getNumOperands () - 1 ).getImm () & 0x7 ));
2545+ break ;
25642546 case X86::VPCOMBri:
25652547 case X86::VPCOMUBri:
25662548 case X86::VPCOMDri:
25672549 case X86::VPCOMUDri:
25682550 case X86::VPCOMQri:
25692551 case X86::VPCOMUQri:
25702552 case X86::VPCOMWri:
2571- case X86::VPCOMUWri: {
2553+ case X86::VPCOMUWri:
2554+ WorkingMI = CloneIfNew (MI);
25722555 // Flip comparison mode immediate (if necessary).
2573- unsigned Imm = MI.getOperand (3 ).getImm () & 0x7 ;
2574- Imm = X86::getSwappedVPCOMImm (Imm);
2575- auto &WorkingMI = cloneIfNew (MI);
2576- WorkingMI.getOperand (3 ).setImm (Imm);
2577- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2578- OpIdx1, OpIdx2);
2579- }
2556+ WorkingMI->getOperand (3 ).setImm (
2557+ X86::getSwappedVPCOMImm (MI.getOperand (3 ).getImm () & 0x7 ));
2558+ break ;
25802559 case X86::VCMPSDZrr:
25812560 case X86::VCMPSSZrr:
25822561 case X86::VCMPPDZrri:
@@ -2594,35 +2573,28 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
25942573 case X86::VCMPPDZ128rrik:
25952574 case X86::VCMPPSZ128rrik:
25962575 case X86::VCMPPDZ256rrik:
2597- case X86::VCMPPSZ256rrik: {
2598- unsigned Imm =
2599- MI.getOperand (MI.getNumExplicitOperands () - 1 ).getImm () & 0x1f ;
2600- Imm = X86::getSwappedVCMPImm (Imm);
2601- auto &WorkingMI = cloneIfNew (MI);
2602- WorkingMI.getOperand (MI.getNumExplicitOperands () - 1 ).setImm (Imm);
2603- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2604- OpIdx1, OpIdx2);
2605- }
2576+ case X86::VCMPPSZ256rrik:
2577+ WorkingMI = CloneIfNew (MI);
2578+ WorkingMI->getOperand (MI.getNumExplicitOperands () - 1 )
2579+ .setImm (X86::getSwappedVCMPImm (
2580+ MI.getOperand (MI.getNumExplicitOperands () - 1 ).getImm () & 0x1f ));
2581+ break ;
26062582 case X86::VPERM2F128rr:
2607- case X86::VPERM2I128rr: {
2583+ case X86::VPERM2I128rr:
26082584 // Flip permute source immediate.
26092585 // Imm & 0x02: lo = if set, select Op1.lo/hi else Op0.lo/hi.
26102586 // Imm & 0x20: hi = if set, select Op1.lo/hi else Op0.lo/hi.
2611- int8_t Imm = MI.getOperand (3 ).getImm () & 0xFF ;
2612- auto &WorkingMI = cloneIfNew (MI);
2613- WorkingMI.getOperand (3 ).setImm (Imm ^ 0x22 );
2614- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2615- OpIdx1, OpIdx2);
2616- }
2587+ WorkingMI = CloneIfNew (MI);
2588+ WorkingMI->getOperand (3 ).setImm ((MI.getOperand (3 ).getImm () & 0xFF ) ^ 0x22 );
2589+ break ;
26172590 case X86::MOVHLPSrr:
26182591 case X86::UNPCKHPDrr:
26192592 case X86::VMOVHLPSrr:
26202593 case X86::VUNPCKHPDrr:
26212594 case X86::VMOVHLPSZrr:
2622- case X86::VUNPCKHPDZ128rr: {
2595+ case X86::VUNPCKHPDZ128rr:
26232596 assert (Subtarget.hasSSE2 () && " Commuting MOVHLP/UNPCKHPD requires SSE2!" );
26242597
2625- unsigned Opc = MI.getOpcode ();
26262598 switch (Opc) {
26272599 default :
26282600 llvm_unreachable (" Unreachable!" );
@@ -2645,20 +2617,17 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
26452617 Opc = X86::VMOVHLPSZrr;
26462618 break ;
26472619 }
2648- auto &WorkingMI = cloneIfNew (MI);
2649- WorkingMI.setDesc (get (Opc));
2650- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2651- OpIdx1, OpIdx2);
2652- }
2620+ WorkingMI = CloneIfNew (MI);
2621+ WorkingMI->setDesc (get (Opc));
2622+ break ;
26532623 case X86::CMOV16rr:
26542624 case X86::CMOV32rr:
26552625 case X86::CMOV64rr: {
2656- auto & WorkingMI = cloneIfNew (MI);
2626+ WorkingMI = CloneIfNew (MI);
26572627 unsigned OpNo = MI.getDesc ().getNumOperands () - 1 ;
26582628 X86::CondCode CC = static_cast <X86::CondCode>(MI.getOperand (OpNo).getImm ());
2659- WorkingMI.getOperand (OpNo).setImm (X86::GetOppositeBranchCondition (CC));
2660- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2661- OpIdx1, OpIdx2);
2629+ WorkingMI->getOperand (OpNo).setImm (X86::GetOppositeBranchCondition (CC));
2630+ break ;
26622631 }
26632632 case X86::VPTERNLOGDZrri:
26642633 case X86::VPTERNLOGDZrmi:
@@ -2702,34 +2671,25 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
27022671 case X86::VPTERNLOGQZ128rmbikz:
27032672 case X86::VPTERNLOGQZ256rmbikz:
27042673 case X86::VPTERNLOGQZrmbikz: {
2705- auto &WorkingMI = cloneIfNew (MI);
2706- commuteVPTERNLOG (WorkingMI, OpIdx1, OpIdx2);
2707- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2708- OpIdx1, OpIdx2);
2674+ WorkingMI = CloneIfNew (MI);
2675+ commuteVPTERNLOG (*WorkingMI, OpIdx1, OpIdx2);
2676+ break ;
27092677 }
2710- default : {
2711- if (isCommutableVPERMV3Instruction (MI.getOpcode ())) {
2712- unsigned Opc = getCommutedVPERMV3Opcode (MI.getOpcode ());
2713- auto &WorkingMI = cloneIfNew (MI);
2714- WorkingMI.setDesc (get (Opc));
2715- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2716- OpIdx1, OpIdx2);
2678+ default :
2679+ if (isCommutableVPERMV3Instruction (Opc)) {
2680+ WorkingMI = CloneIfNew (MI);
2681+ WorkingMI->setDesc (get (getCommutedVPERMV3Opcode (Opc)));
2682+ break ;
27172683 }
27182684
2719- const X86InstrFMA3Group *FMA3Group =
2720- getFMA3Group (MI.getOpcode (), MI.getDesc ().TSFlags );
2721- if (FMA3Group) {
2722- unsigned Opc =
2723- getFMA3OpcodeToCommuteOperands (MI, OpIdx1, OpIdx2, *FMA3Group);
2724- auto &WorkingMI = cloneIfNew (MI);
2725- WorkingMI.setDesc (get (Opc));
2726- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2727- OpIdx1, OpIdx2);
2685+ if (auto *FMA3Group = getFMA3Group (Opc, MI.getDesc ().TSFlags )) {
2686+ WorkingMI = CloneIfNew (MI);
2687+ WorkingMI->setDesc (
2688+ get (getFMA3OpcodeToCommuteOperands (MI, OpIdx1, OpIdx2, *FMA3Group)));
2689+ break ;
27282690 }
2729-
2730- return TargetInstrInfo::commuteInstructionImpl (MI, NewMI, OpIdx1, OpIdx2);
2731- }
27322691 }
2692+ return TargetInstrInfo::commuteInstructionImpl (MI, NewMI, OpIdx1, OpIdx2);
27332693}
27342694
27352695bool X86InstrInfo::findThreeSrcCommutedOpIndices (const MachineInstr &MI,
0 commit comments