Source file src/cmd/compile/internal/ssa/_gen/PPC64Ops.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import "strings"
     8  
     9  // Notes:
    10  //  - Less-than-64-bit integer types live in the low portion of registers.
    11  //    The upper portion is junk.
    12  //  - Boolean types are zero or 1; stored in a byte, with upper bytes of the register containing junk.
    13  //  - *const instructions may use a constant larger than the instruction can encode.
    14  //    In this case the assembler expands to multiple instructions and uses tmp
    15  //    register (R31).
    16  
    17  var regNamesPPC64 = []string{
    18  	"R0", // REGZERO, not used, but simplifies counting in regalloc
    19  	"SP", // REGSP
    20  	"SB", // REGSB
    21  	"R3",
    22  	"R4",
    23  	"R5",
    24  	"R6",
    25  	"R7",
    26  	"R8",
    27  	"R9",
    28  	"R10",
    29  	"R11", // REGCTXT for closures
    30  	"R12",
    31  	"R13", // REGTLS
    32  	"R14",
    33  	"R15",
    34  	"R16",
    35  	"R17",
    36  	"R18",
    37  	"R19",
    38  	"R20",
    39  	"R21",
    40  	"R22",
    41  	"R23",
    42  	"R24",
    43  	"R25",
    44  	"R26",
    45  	"R27",
    46  	"R28",
    47  	"R29",
    48  	"g",   // REGG.  Using name "g" and setting Config.hasGReg makes it "just happen".
    49  	"R31", // REGTMP
    50  
    51  	"F0",
    52  	"F1",
    53  	"F2",
    54  	"F3",
    55  	"F4",
    56  	"F5",
    57  	"F6",
    58  	"F7",
    59  	"F8",
    60  	"F9",
    61  	"F10",
    62  	"F11",
    63  	"F12",
    64  	"F13",
    65  	"F14",
    66  	"F15",
    67  	"F16",
    68  	"F17",
    69  	"F18",
    70  	"F19",
    71  	"F20",
    72  	"F21",
    73  	"F22",
    74  	"F23",
    75  	"F24",
    76  	"F25",
    77  	"F26",
    78  	"F27",
    79  	"F28",
    80  	"F29",
    81  	"F30",
    82  	// "F31", the allocator is limited to 64 entries. We sacrifice this FPR to support XER.
    83  
    84  	"XER",
    85  
    86  	// If you add registers, update asyncPreempt in runtime.
    87  
    88  	// "CR0",
    89  	// "CR1",
    90  	// "CR2",
    91  	// "CR3",
    92  	// "CR4",
    93  	// "CR5",
    94  	// "CR6",
    95  	// "CR7",
    96  
    97  	// "CR",
    98  	// "LR",
    99  	// "CTR",
   100  }
   101  
   102  func init() {
   103  	// Make map from reg names to reg integers.
   104  	if len(regNamesPPC64) > 64 {
   105  		panic("too many registers")
   106  	}
   107  	num := map[string]int{}
   108  	for i, name := range regNamesPPC64 {
   109  		num[name] = i
   110  	}
   111  	buildReg := func(s string) regMask {
   112  		m := regMask(0)
   113  		for _, r := range strings.Split(s, " ") {
   114  			if n, ok := num[r]; ok {
   115  				m |= regMask(1) << uint(n)
   116  				continue
   117  			}
   118  			panic("register " + r + " not found")
   119  		}
   120  		return m
   121  	}
   122  
   123  	var (
   124  		gp  = buildReg("R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29")
   125  		fp  = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30")
   126  		sp  = buildReg("SP")
   127  		sb  = buildReg("SB")
   128  		gr  = buildReg("g")
   129  		xer = buildReg("XER")
   130  		// cr  = buildReg("CR")
   131  		// ctr = buildReg("CTR")
   132  		// lr  = buildReg("LR")
   133  		tmp     = buildReg("R31")
   134  		ctxt    = buildReg("R11")
   135  		callptr = buildReg("R12")
   136  		// tls = buildReg("R13")
   137  		gp01        = regInfo{inputs: nil, outputs: []regMask{gp}}
   138  		gp11        = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
   139  		xergp       = regInfo{inputs: []regMask{xer}, outputs: []regMask{gp}, clobbers: xer}
   140  		gp11cxer    = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}, clobbers: xer}
   141  		gp11xer     = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp, xer}}
   142  		gp1xer1xer  = regInfo{inputs: []regMask{gp | sp | sb, xer}, outputs: []regMask{gp, xer}, clobbers: xer}
   143  		gp21        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
   144  		gp21a0      = regInfo{inputs: []regMask{gp, gp | sp | sb}, outputs: []regMask{gp}}
   145  		gp21cxer    = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}, clobbers: xer}
   146  		gp21xer     = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, xer}, clobbers: xer}
   147  		gp2xer1xer  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, xer}, outputs: []regMask{gp, xer}, clobbers: xer}
   148  		gp31        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
   149  		gp1cr       = regInfo{inputs: []regMask{gp | sp | sb}}
   150  		gp2cr       = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
   151  		crgp        = regInfo{inputs: nil, outputs: []regMask{gp}}
   152  		crgp11      = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}
   153  		crgp21      = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
   154  		gpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
   155  		gploadidx   = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}}
   156  		prefreg     = regInfo{inputs: []regMask{gp | sp | sb}}
   157  		gpstore     = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
   158  		gpstoreidx  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}}
   159  		gpstorezero = regInfo{inputs: []regMask{gp | sp | sb}} // ppc64.REGZERO is reserved zero value
   160  		gpxchg      = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}}
   161  		gpcas       = regInfo{inputs: []regMask{gp | sp | sb, gp, gp}, outputs: []regMask{gp}}
   162  		fp01        = regInfo{inputs: nil, outputs: []regMask{fp}}
   163  		fp11        = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
   164  		fpgp        = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
   165  		gpfp        = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}
   166  		fp21        = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
   167  		fp31        = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
   168  		fp2cr       = regInfo{inputs: []regMask{fp, fp}}
   169  		fpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{fp}}
   170  		fploadidx   = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{fp}}
   171  		fpstore     = regInfo{inputs: []regMask{gp | sp | sb, fp}}
   172  		fpstoreidx  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, fp}}
   173  		callerSave  = regMask(gp | fp | gr | xer)
   174  		first7      = buildReg("R3 R4 R5 R6 R7 R8 R9")
   175  	)
   176  	ops := []opData{
   177  		{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true},                              // arg0 + arg1
   178  		{name: "ADDCC", argLength: 2, reg: gp21, asm: "ADDCC", commutative: true, typ: "(Int,Flags)"},      // arg0 + arg1
   179  		{name: "ADDconst", argLength: 1, reg: gp11, asm: "ADD", aux: "Int64"},                              // arg0 + auxInt
   180  		{name: "ADDCCconst", argLength: 1, reg: gp11cxer, asm: "ADDCCC", aux: "Int64", typ: "(Int,Flags)"}, // arg0 + auxInt sets CC, clobbers XER
   181  		{name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true},                            // arg0+arg1
   182  		{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true},                          // arg0+arg1
   183  		{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"},                                                 // arg0-arg1
   184  		{name: "SUBCC", argLength: 2, reg: gp21, asm: "SUBCC", typ: "(Int,Flags)"},                         // arg0-arg1 sets CC
   185  		{name: "SUBFCconst", argLength: 1, reg: gp11cxer, asm: "SUBC", aux: "Int64"},                       // auxInt - arg0 (carry is ignored)
   186  		{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"},                                               // arg0-arg1
   187  		{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"},                                             // arg0-arg1
   188  
   189  		// Note, the FPU works with float64 in register.
   190  		{name: "XSMINJDP", argLength: 2, reg: fp21, asm: "XSMINJDP"}, // fmin(arg0,arg1)
   191  		{name: "XSMAXJDP", argLength: 2, reg: fp21, asm: "XSMAXJDP"}, // fmax(arg0,arg1)
   192  
   193  		{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
   194  		{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
   195  		{name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
   196  		{name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
   197  		{name: "MADDLD", argLength: 3, reg: gp31, asm: "MADDLD", typ: "Int64"},                  // (arg0*arg1)+arg2 (signed 64-bit)
   198  
   199  		{name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true},                             // (arg0 * arg1) >> 64, signed
   200  		{name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true},                             // (arg0 * arg1) >> 32, signed
   201  		{name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true},                           // (arg0 * arg1) >> 64, unsigned
   202  		{name: "MULHDUCC", argLength: 2, reg: gp21, asm: "MULHDUCC", commutative: true, typ: "(Int64,Flags)"}, // (arg0 * arg1) >> 64, unsigned, sets CC
   203  		{name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true},                           // (arg0 * arg1) >> 32, unsigned
   204  
   205  		{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true},   // arg0*arg1
   206  		{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1
   207  
   208  		{name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD"},   // arg0*arg1 + arg2
   209  		{name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS"}, // arg0*arg1 + arg2
   210  		{name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB"},   // arg0*arg1 - arg2
   211  		{name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS"}, // arg0*arg1 - arg2
   212  
   213  		{name: "SRAD", argLength: 2, reg: gp21cxer, asm: "SRAD"}, // signed arg0 >> (arg1&127), 64 bit width (note: 127, not 63!)
   214  		{name: "SRAW", argLength: 2, reg: gp21cxer, asm: "SRAW"}, // signed arg0 >> (arg1&63), 32 bit width
   215  		{name: "SRD", argLength: 2, reg: gp21, asm: "SRD"},       // unsigned arg0 >> (arg1&127), 64 bit width
   216  		{name: "SRW", argLength: 2, reg: gp21, asm: "SRW"},       // unsigned arg0 >> (arg1&63), 32 bit width
   217  		{name: "SLD", argLength: 2, reg: gp21, asm: "SLD"},       // arg0 << (arg1&127), 64 bit width
   218  		{name: "SLW", argLength: 2, reg: gp21, asm: "SLW"},       // arg0 << (arg1&63), 32 bit width
   219  
   220  		{name: "ROTL", argLength: 2, reg: gp21, asm: "ROTL"},   // arg0 rotate left by arg1 mod 64
   221  		{name: "ROTLW", argLength: 2, reg: gp21, asm: "ROTLW"}, // uint32(arg0) rotate left by arg1 mod 32
   222  		// The following are ops to implement the extended mnemonics for shifts as described in section C.8 of the ISA.
   223  		// The constant shift values are packed into the aux int32.
   224  		{name: "CLRLSLWI", argLength: 1, reg: gp11, asm: "CLRLSLWI", aux: "Int32"}, //
   225  		{name: "CLRLSLDI", argLength: 1, reg: gp11, asm: "CLRLSLDI", aux: "Int32"}, //
   226  
   227  		// Operations which consume or generate the CA (xer)
   228  		{name: "ADDC", argLength: 2, reg: gp21xer, asm: "ADDC", commutative: true, typ: "(UInt64, UInt64)"},    // arg0 + arg1 -> out, CA
   229  		{name: "SUBC", argLength: 2, reg: gp21xer, asm: "SUBC", typ: "(UInt64, UInt64)"},                       // arg0 - arg1 -> out, CA
   230  		{name: "ADDCconst", argLength: 1, reg: gp11xer, asm: "ADDC", typ: "(UInt64, UInt64)", aux: "Int64"},    // arg0 + imm16 -> out, CA
   231  		{name: "SUBCconst", argLength: 1, reg: gp11xer, asm: "SUBC", typ: "(UInt64, UInt64)", aux: "Int64"},    // imm16 - arg0 -> out, CA
   232  		{name: "ADDE", argLength: 3, reg: gp2xer1xer, asm: "ADDE", typ: "(UInt64, UInt64)", commutative: true}, // arg0 + arg1 + CA (arg2) -> out, CA
   233  		{name: "ADDZE", argLength: 2, reg: gp1xer1xer, asm: "ADDZE", typ: "(UInt64, UInt64)"},                  // arg0 + CA (arg1) -> out, CA
   234  		{name: "SUBE", argLength: 3, reg: gp2xer1xer, asm: "SUBE", typ: "(UInt64, UInt64)"},                    // arg0 - arg1 - CA (arg2) -> out, CA
   235  		{name: "ADDZEzero", argLength: 1, reg: xergp, asm: "ADDZE", typ: "UInt64"},                             // CA (arg0) + $0 -> out
   236  		{name: "SUBZEzero", argLength: 1, reg: xergp, asm: "SUBZE", typ: "UInt64"},                             // $0 - CA (arg0) -> out
   237  
   238  		{name: "SRADconst", argLength: 1, reg: gp11cxer, asm: "SRAD", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width
   239  		{name: "SRAWconst", argLength: 1, reg: gp11cxer, asm: "SRAW", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width
   240  		{name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int64"},       // unsigned arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width
   241  		{name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int64"},       // unsigned arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width
   242  		{name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int64"},       // arg0 << auxInt, 0 <= auxInt < 64, 64 bit width
   243  		{name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int64"},       // arg0 << auxInt, 0 <= auxInt < 32, 32 bit width
   244  
   245  		{name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"},   // arg0 rotate left by auxInt bits
   246  		{name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits
   247  		{name: "EXTSWSLconst", argLength: 1, reg: gp11, asm: "EXTSWSLI", aux: "Int64"},
   248  
   249  		{name: "RLWINM", argLength: 1, reg: gp11, asm: "RLWNM", aux: "Int64"},                           // Rotate and mask by immediate "rlwinm". encodePPC64RotateMask describes aux
   250  		{name: "RLWNM", argLength: 2, reg: gp21, asm: "RLWNM", aux: "Int64"},                            // Rotate and mask by "rlwnm". encodePPC64RotateMask describes aux
   251  		{name: "RLWMI", argLength: 2, reg: gp21a0, asm: "RLWMI", aux: "Int64", resultInArg0: true},      // "rlwimi" similar aux encoding as above
   252  		{name: "RLDICL", argLength: 1, reg: gp11, asm: "RLDICL", aux: "Int64"},                          // Auxint is encoded similarly to RLWINM, but only MB and SH are valid. ME is always 63.
   253  		{name: "RLDICLCC", argLength: 1, reg: gp11, asm: "RLDICLCC", aux: "Int64", typ: "(Int, Flags)"}, // Auxint is encoded similarly to RLWINM, but only MB and SH are valid. ME is always 63. Sets CC.
   254  		{name: "RLDICR", argLength: 1, reg: gp11, asm: "RLDICR", aux: "Int64"},                          // Likewise, but only ME and SH are valid. MB is always 0.
   255  
   256  		{name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD"},                          // count leading zeros
   257  		{name: "CNTLZDCC", argLength: 1, reg: gp11, asm: "CNTLZDCC", typ: "(Int, Flags)"}, // count leading zeros, sets CC
   258  		{name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW"},                          // count leading zeros (32 bit)
   259  
   260  		{name: "CNTTZD", argLength: 1, reg: gp11, asm: "CNTTZD"}, // count trailing zeros
   261  		{name: "CNTTZW", argLength: 1, reg: gp11, asm: "CNTTZW"}, // count trailing zeros (32 bit)
   262  
   263  		{name: "POPCNTD", argLength: 1, reg: gp11, asm: "POPCNTD"}, // number of set bits in arg0
   264  		{name: "POPCNTW", argLength: 1, reg: gp11, asm: "POPCNTW"}, // number of set bits in each word of arg0 placed in corresponding word
   265  		{name: "POPCNTB", argLength: 1, reg: gp11, asm: "POPCNTB"}, // number of set bits in each byte of arg0 placed in corresponding byte
   266  
   267  		{name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV"},   // arg0/arg1
   268  		{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"}, // arg0/arg1
   269  
   270  		{name: "DIVD", argLength: 2, reg: gp21, asm: "DIVD", typ: "Int64"},   // arg0/arg1 (signed 64-bit)
   271  		{name: "DIVW", argLength: 2, reg: gp21, asm: "DIVW", typ: "Int32"},   // arg0/arg1 (signed 32-bit)
   272  		{name: "DIVDU", argLength: 2, reg: gp21, asm: "DIVDU", typ: "Int64"}, // arg0/arg1 (unsigned 64-bit)
   273  		{name: "DIVWU", argLength: 2, reg: gp21, asm: "DIVWU", typ: "Int32"}, // arg0/arg1 (unsigned 32-bit)
   274  
   275  		{name: "MODUD", argLength: 2, reg: gp21, asm: "MODUD", typ: "UInt64"}, // arg0 % arg1 (unsigned 64-bit)
   276  		{name: "MODSD", argLength: 2, reg: gp21, asm: "MODSD", typ: "Int64"},  // arg0 % arg1 (signed 64-bit)
   277  		{name: "MODUW", argLength: 2, reg: gp21, asm: "MODUW", typ: "UInt32"}, // arg0 % arg1 (unsigned 32-bit)
   278  		{name: "MODSW", argLength: 2, reg: gp21, asm: "MODSW", typ: "Int32"},  // arg0 % arg1 (signed 32-bit)
   279  		// MOD is implemented as rem := arg0 - (arg0/arg1) * arg1
   280  
   281  		// Conversions are all float-to-float register operations.  "Integer" refers to encoding in the FP register.
   282  		{name: "FCTIDZ", argLength: 1, reg: fp11, asm: "FCTIDZ", typ: "Float64"}, // convert float to 64-bit int round towards zero
   283  		{name: "FCTIWZ", argLength: 1, reg: fp11, asm: "FCTIWZ", typ: "Float64"}, // convert float to 32-bit int round towards zero
   284  		{name: "FCFID", argLength: 1, reg: fp11, asm: "FCFID", typ: "Float64"},   // convert 64-bit integer to float
   285  		{name: "FCFIDS", argLength: 1, reg: fp11, asm: "FCFIDS", typ: "Float32"}, // convert 32-bit integer to float
   286  		{name: "FRSP", argLength: 1, reg: fp11, asm: "FRSP", typ: "Float64"},     // round float to 32-bit value
   287  
   288  		// Movement between float and integer registers with no change in bits; accomplished with stores+loads on PPC.
   289  		// Because the 32-bit load-literal-bits instructions have impoverished addressability, always widen the
   290  		// data instead and use FMOVDload and FMOVDstore instead (this will also dodge endianness issues).
   291  		// There are optimizations that should apply -- (Xi2f64 (MOVWload (not-ADD-ptr+offset) ) ) could use
   292  		// the word-load instructions.  (Xi2f64 (MOVDload ptr )) can be (FMOVDload ptr)
   293  
   294  		{name: "MFVSRD", argLength: 1, reg: fpgp, asm: "MFVSRD", typ: "Int64"},   // move 64 bits of F register into G register
   295  		{name: "MTVSRD", argLength: 1, reg: gpfp, asm: "MTVSRD", typ: "Float64"}, // move 64 bits of G register into F register
   296  
   297  		{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true},                           // arg0&arg1
   298  		{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"},                                            // arg0&^arg1
   299  		{name: "ANDNCC", argLength: 2, reg: gp21, asm: "ANDNCC", typ: "(Int64,Flags)"},                  // arg0&^arg1 sets CC
   300  		{name: "ANDCC", argLength: 2, reg: gp21, asm: "ANDCC", commutative: true, typ: "(Int64,Flags)"}, // arg0&arg1 sets CC
   301  		{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true},                             // arg0|arg1
   302  		{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"},                                              // arg0|^arg1
   303  		{name: "ORCC", argLength: 2, reg: gp21, asm: "ORCC", commutative: true, typ: "(Int,Flags)"},     // arg0|arg1 sets CC
   304  		{name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true},                           // ^(arg0|arg1)
   305  		{name: "NORCC", argLength: 2, reg: gp21, asm: "NORCC", commutative: true, typ: "(Int,Flags)"},   // ^(arg0|arg1) sets CC
   306  		{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true},             // arg0^arg1
   307  		{name: "XORCC", argLength: 2, reg: gp21, asm: "XORCC", commutative: true, typ: "(Int,Flags)"},   // arg0^arg1 sets CC
   308  		{name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true},             // arg0^^arg1
   309  		{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},                                              // -arg0 (integer)
   310  		{name: "NEGCC", argLength: 1, reg: gp11, asm: "NEGCC", typ: "(Int,Flags)"},                      // -arg0 (integer) sets CC
   311  		{name: "BRD", argLength: 1, reg: gp11, asm: "BRD"},                                              // reversebytes64(arg0)
   312  		{name: "BRW", argLength: 1, reg: gp11, asm: "BRW"},                                              // reversebytes32(arg0)
   313  		{name: "BRH", argLength: 1, reg: gp11, asm: "BRH"},                                              // reversebytes16(arg0)
   314  		{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"},                                            // -arg0 (floating point)
   315  		{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"},                                          // sqrt(arg0) (floating point)
   316  		{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"},                                        // sqrt(arg0) (floating point, single precision)
   317  		{name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"},                                          // floor(arg0), float64
   318  		{name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"},                                           // ceil(arg0), float64
   319  		{name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"},                                          // trunc(arg0), float64
   320  		{name: "FROUND", argLength: 1, reg: fp11, asm: "FRIN"},                                          // round(arg0), float64
   321  		{name: "FABS", argLength: 1, reg: fp11, asm: "FABS"},                                            // abs(arg0), float64
   322  		{name: "FNABS", argLength: 1, reg: fp11, asm: "FNABS"},                                          // -abs(arg0), float64
   323  		{name: "FCPSGN", argLength: 2, reg: fp21, asm: "FCPSGN"},                                        // copysign arg0 -> arg1, float64
   324  
   325  		{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"},                                                                                                 // arg0|aux
   326  		{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"},                                                                                               // arg0^aux
   327  		{name: "ANDCCconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}, asm: "ANDCC", aux: "Int64", typ: "(Int,Flags)"},           // arg0&aux == 0 // and-immediate sets CC on PPC, always.
   328  		{name: "ANDconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}, clobberFlags: true, asm: "ANDCC", aux: "Int64", typ: "Int"}, // arg0&aux == 0 // and-immediate sets CC on PPC, always.
   329  
   330  		{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB", typ: "Int64"},   // sign extend int8 to int64
   331  		{name: "MOVBZreg", argLength: 1, reg: gp11, asm: "MOVBZ", typ: "Int64"}, // zero extend uint8 to uint64
   332  		{name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH", typ: "Int64"},   // sign extend int16 to int64
   333  		{name: "MOVHZreg", argLength: 1, reg: gp11, asm: "MOVHZ", typ: "Int64"}, // zero extend uint16 to uint64
   334  		{name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW", typ: "Int64"},   // sign extend int32 to int64
   335  		{name: "MOVWZreg", argLength: 1, reg: gp11, asm: "MOVWZ", typ: "Int64"}, // zero extend uint32 to uint64
   336  
   337  		// Load bytes in the endian order of the arch from arg0+aux+auxint into a 64 bit register.
   338  		{name: "MOVBZload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"},  // load byte zero extend
   339  		{name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"},    // load 2 bytes sign extend
   340  		{name: "MOVHZload", argLength: 2, reg: gpload, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes zero extend
   341  		{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"},    // load 4 bytes sign extend
   342  		{name: "MOVWZload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes zero extend
   343  		{name: "MOVDload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"},    // load 8 bytes
   344  
   345  		// Load bytes in reverse endian order of the arch from arg0 into a 64 bit register, all zero extend.
   346  		// The generated instructions are indexed loads with no offset field in the instruction so the aux fields are not used.
   347  		// In these cases the index register field is set to 0 and the full address is in the base register.
   348  		{name: "MOVDBRload", argLength: 2, reg: gpload, asm: "MOVDBR", typ: "UInt64", faultOnNilArg0: true}, // load 8 bytes reverse order
   349  		{name: "MOVWBRload", argLength: 2, reg: gpload, asm: "MOVWBR", typ: "UInt32", faultOnNilArg0: true}, // load 4 bytes zero extend reverse order
   350  		{name: "MOVHBRload", argLength: 2, reg: gpload, asm: "MOVHBR", typ: "UInt16", faultOnNilArg0: true}, // load 2 bytes zero extend reverse order
   351  
   352  		// In these cases an index register is used in addition to a base register
   353  		// Loads from memory location arg[0] + arg[1].
   354  		{name: "MOVBZloadidx", argLength: 3, reg: gploadidx, asm: "MOVBZ", typ: "UInt8"},  // zero extend uint8 to uint64
   355  		{name: "MOVHloadidx", argLength: 3, reg: gploadidx, asm: "MOVH", typ: "Int16"},    // sign extend int16 to int64
   356  		{name: "MOVHZloadidx", argLength: 3, reg: gploadidx, asm: "MOVHZ", typ: "UInt16"}, // zero extend uint16 to uint64
   357  		{name: "MOVWloadidx", argLength: 3, reg: gploadidx, asm: "MOVW", typ: "Int32"},    // sign extend int32 to int64
   358  		{name: "MOVWZloadidx", argLength: 3, reg: gploadidx, asm: "MOVWZ", typ: "UInt32"}, // zero extend uint32 to uint64
   359  		{name: "MOVDloadidx", argLength: 3, reg: gploadidx, asm: "MOVD", typ: "Int64"},
   360  		{name: "MOVHBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVHBR", typ: "Int16"}, // sign extend int16 to int64
   361  		{name: "MOVWBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVWBR", typ: "Int32"}, // sign extend int32 to int64
   362  		{name: "MOVDBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVDBR", typ: "Int64"},
   363  		{name: "FMOVDloadidx", argLength: 3, reg: fploadidx, asm: "FMOVD", typ: "Float64"},
   364  		{name: "FMOVSloadidx", argLength: 3, reg: fploadidx, asm: "FMOVS", typ: "Float32"},
   365  
   366  		// Prefetch instruction
   367  		// Do prefetch of address generated with arg0 and arg1 with option aux. arg0=addr,arg1=memory, aux=option.
   368  		{name: "DCBT", argLength: 2, aux: "Int64", reg: prefreg, asm: "DCBT", hasSideEffects: true},
   369  
   370  		// Store bytes in the reverse endian order of the arch into arg0.
   371  		// These are indexed stores with no offset field in the instruction so the auxint fields are not used.
   372  		{name: "MOVDBRstore", argLength: 3, reg: gpstore, asm: "MOVDBR", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes reverse order
   373  		{name: "MOVWBRstore", argLength: 3, reg: gpstore, asm: "MOVWBR", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes reverse order
   374  		{name: "MOVHBRstore", argLength: 3, reg: gpstore, asm: "MOVHBR", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes reverse order
   375  
   376  		// Floating point loads from arg0+aux+auxint
   377  		{name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load double float
   378  		{name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", aux: "SymOff", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load single float
   379  
   380  		// Store bytes in the endian order of the arch into arg0+aux+auxint
   381  		{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store byte
   382  		{name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes
   383  		{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes
   384  		{name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes
   385  
   386  		// Store floating point value into arg0+aux+auxint
   387  		{name: "FMOVDstore", argLength: 3, reg: fpstore, asm: "FMOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store double flot
   388  		{name: "FMOVSstore", argLength: 3, reg: fpstore, asm: "FMOVS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store single float
   389  
   390  		// Stores using index and base registers
   391  		// Stores to arg[0] + arg[1]
   392  		{name: "MOVBstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVB", typ: "Mem"},     // store bye
   393  		{name: "MOVHstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVH", typ: "Mem"},     // store half word
   394  		{name: "MOVWstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVW", typ: "Mem"},     // store word
   395  		{name: "MOVDstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVD", typ: "Mem"},     // store double word
   396  		{name: "FMOVDstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVD", typ: "Mem"},   // store double float
   397  		{name: "FMOVSstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVS", typ: "Mem"},   // store single float
   398  		{name: "MOVHBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVHBR", typ: "Mem"}, // store half word reversed byte using index reg
   399  		{name: "MOVWBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVWBR", typ: "Mem"}, // store word reversed byte using index reg
   400  		{name: "MOVDBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVDBR", typ: "Mem"}, // store double word reversed byte using index reg
   401  
   402  		// The following ops store 0 into arg0+aux+auxint arg1=mem
   403  		{name: "MOVBstorezero", argLength: 2, reg: gpstorezero, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 1 byte
   404  		{name: "MOVHstorezero", argLength: 2, reg: gpstorezero, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 2 bytes
   405  		{name: "MOVWstorezero", argLength: 2, reg: gpstorezero, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 4 bytes
   406  		{name: "MOVDstorezero", argLength: 2, reg: gpstorezero, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 8 bytes
   407  
   408  		{name: "MOVDaddr", argLength: 1, reg: regInfo{inputs: []regMask{sp | sb | gp}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVD", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB/GP
   409  
   410  		{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "Int64", rematerializeable: true}, //
   411  		{name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", rematerializeable: true},           //
   412  		{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float32", asm: "FMOVS", rematerializeable: true},           //
   413  		{name: "FCMPU", argLength: 2, reg: fp2cr, asm: "FCMPU", typ: "Flags"},
   414  
   415  		{name: "CMP", argLength: 2, reg: gp2cr, asm: "CMP", typ: "Flags"},     // arg0 compare to arg1
   416  		{name: "CMPU", argLength: 2, reg: gp2cr, asm: "CMPU", typ: "Flags"},   // arg0 compare to arg1
   417  		{name: "CMPW", argLength: 2, reg: gp2cr, asm: "CMPW", typ: "Flags"},   // arg0 compare to arg1
   418  		{name: "CMPWU", argLength: 2, reg: gp2cr, asm: "CMPWU", typ: "Flags"}, // arg0 compare to arg1
   419  		{name: "CMPconst", argLength: 1, reg: gp1cr, asm: "CMP", aux: "Int64", typ: "Flags"},
   420  		{name: "CMPUconst", argLength: 1, reg: gp1cr, asm: "CMPU", aux: "Int64", typ: "Flags"},
   421  		{name: "CMPWconst", argLength: 1, reg: gp1cr, asm: "CMPW", aux: "Int32", typ: "Flags"},
   422  		{name: "CMPWUconst", argLength: 1, reg: gp1cr, asm: "CMPWU", aux: "Int32", typ: "Flags"},
   423  
   424  		// ISEL  arg2 ? arg0 : arg1
   425  		// ISELZ arg1 ? arg0 : $0
   426  		// auxInt values 0=LT 1=GT 2=EQ 3=SO (summary overflow/unordered) 4=GE 5=LE 6=NE 7=NSO (not summary overflow/not unordered)
   427  		// Note, auxInt^4 inverts the comparison condition. For example, LT^4 becomes GE, and "ISEL [a] x y z" is equivalent to ISEL [a^4] y x z".
   428  		{name: "ISEL", argLength: 3, reg: crgp21, asm: "ISEL", aux: "Int32", typ: "Int32"},
   429  		{name: "ISELZ", argLength: 2, reg: crgp11, asm: "ISEL", aux: "Int32"},
   430  
   431  		// SETBC auxInt values 0=LT 1=GT 2=EQ     (CRbit=1)? 1 : 0
   432  		{name: "SETBC", argLength: 1, reg: crgp, asm: "SETBC", aux: "Int32", typ: "Int32"},
   433  		// SETBCR auxInt values 0=LT 1=GT 2=EQ     (CRbit=1)? 0 : 1
   434  		{name: "SETBCR", argLength: 1, reg: crgp, asm: "SETBCR", aux: "Int32", typ: "Int32"},
   435  
   436  		// pseudo-ops
   437  		{name: "Equal", argLength: 1, reg: crgp},         // bool, true flags encode x==y false otherwise.
   438  		{name: "NotEqual", argLength: 1, reg: crgp},      // bool, true flags encode x!=y false otherwise.
   439  		{name: "LessThan", argLength: 1, reg: crgp},      // bool, true flags encode  x<y false otherwise.
   440  		{name: "FLessThan", argLength: 1, reg: crgp},     // bool, true flags encode  x<y false otherwise.
   441  		{name: "LessEqual", argLength: 1, reg: crgp},     // bool, true flags encode  x<=y false otherwise.
   442  		{name: "FLessEqual", argLength: 1, reg: crgp},    // bool, true flags encode  x<=y false otherwise; PPC <= === !> which is wrong for NaN
   443  		{name: "GreaterThan", argLength: 1, reg: crgp},   // bool, true flags encode  x>y false otherwise.
   444  		{name: "FGreaterThan", argLength: 1, reg: crgp},  // bool, true flags encode  x>y false otherwise.
   445  		{name: "GreaterEqual", argLength: 1, reg: crgp},  // bool, true flags encode  x>=y false otherwise.
   446  		{name: "FGreaterEqual", argLength: 1, reg: crgp}, // bool, true flags encode  x>=y false otherwise.; PPC >= === !< which is wrong for NaN
   447  
   448  		// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
   449  		// and sorts it to the very beginning of the block to prevent other
   450  		// use of the closure pointer.
   451  		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{ctxt}}, zeroWidth: true},
   452  
   453  		// LoweredGetCallerSP returns the SP of the caller of the current function. arg0=mem.
   454  		{name: "LoweredGetCallerSP", argLength: 1, reg: gp01, rematerializeable: true},
   455  
   456  		// LoweredGetCallerPC evaluates to the PC to which its "caller" will return.
   457  		// I.e., if f calls g "calls" sys.GetCallerPC,
   458  		// the result should be the PC within f that g will return to.
   459  		// See runtime/stubs.go for a more detailed discussion.
   460  		{name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true},
   461  
   462  		//arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
   463  		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
   464  		// Round ops to block fused-multiply-add extraction.
   465  		{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   466  		{name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   467  
   468  		{name: "CALLstatic", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},                                       // call static function aux.(*obj.LSym).  arg0=mem, auxint=argsize, returns mem
   469  		{name: "CALLtail", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true},                         // tail call static function aux.(*obj.LSym).  arg0=mem, auxint=argsize, returns mem
   470  		{name: "CALLclosure", argLength: -1, reg: regInfo{inputs: []regMask{callptr, ctxt, 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
   471  		{name: "CALLinter", argLength: -1, reg: regInfo{inputs: []regMask{callptr}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},            // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
   472  
   473  		// large or unaligned zeroing
   474  		// arg0 = address of memory to zero (in R3, changed as side effect)
   475  		// returns mem
   476  		//
   477  		// a loop is generated when there is more than one iteration
   478  		// needed to clear 4 doublewords
   479  		//
   480  		//	XXLXOR	VS32,VS32,VS32
   481  		// 	MOVD	$len/32,R31
   482  		//	MOVD	R31,CTR
   483  		//	MOVD	$16,R31
   484  		//	loop:
   485  		//	STXVD2X VS32,(R0)(R3)
   486  		//	STXVD2X	VS32,(R31)(R3)
   487  		//	ADD	R3,32
   488  		//	BC	loop
   489  
   490  		// remaining doubleword clears generated as needed
   491  		//	MOVD	R0,(R3)
   492  		//	MOVD	R0,8(R3)
   493  		//	MOVD	R0,16(R3)
   494  		//	MOVD	R0,24(R3)
   495  
   496  		// one or more of these to clear remainder < 8 bytes
   497  		//	MOVW	R0,n1(R3)
   498  		//	MOVH	R0,n2(R3)
   499  		//	MOVB	R0,n3(R3)
   500  		{
   501  			name:      "LoweredZero",
   502  			aux:       "Int64",
   503  			argLength: 2,
   504  			reg: regInfo{
   505  				inputs:   []regMask{buildReg("R20")},
   506  				clobbers: buildReg("R20"),
   507  			},
   508  			clobberFlags:   true,
   509  			typ:            "Mem",
   510  			faultOnNilArg0: true,
   511  			unsafePoint:    true,
   512  		},
   513  		{
   514  			name:      "LoweredZeroShort",
   515  			aux:       "Int64",
   516  			argLength: 2,
   517  			reg: regInfo{
   518  				inputs: []regMask{gp}},
   519  			typ:            "Mem",
   520  			faultOnNilArg0: true,
   521  			unsafePoint:    true,
   522  		},
   523  		{
   524  			name:      "LoweredQuadZeroShort",
   525  			aux:       "Int64",
   526  			argLength: 2,
   527  			reg: regInfo{
   528  				inputs: []regMask{gp},
   529  			},
   530  			typ:            "Mem",
   531  			faultOnNilArg0: true,
   532  			unsafePoint:    true,
   533  		},
   534  		{
   535  			name:      "LoweredQuadZero",
   536  			aux:       "Int64",
   537  			argLength: 2,
   538  			reg: regInfo{
   539  				inputs:   []regMask{buildReg("R20")},
   540  				clobbers: buildReg("R20"),
   541  			},
   542  			clobberFlags:   true,
   543  			typ:            "Mem",
   544  			faultOnNilArg0: true,
   545  			unsafePoint:    true,
   546  		},
   547  
   548  		// R31 is temp register
   549  		// Loop code:
   550  		//	MOVD len/32,R31		set up loop ctr
   551  		//	MOVD R31,CTR
   552  		//	MOVD $16,R31		index register
   553  		// loop:
   554  		//	LXVD2X (R0)(R4),VS32
   555  		//	LXVD2X (R31)(R4),VS33
   556  		//	ADD  R4,$32          increment src
   557  		//	STXVD2X VS32,(R0)(R3)
   558  		//	STXVD2X VS33,(R31)(R3)
   559  		//	ADD  R3,$32          increment dst
   560  		//	BC 16,0,loop         branch ctr
   561  		// For this purpose, VS32 and VS33 are treated as
   562  		// scratch registers. Since regalloc does not
   563  		// track vector registers, even if it could be marked
   564  		// as clobbered it would have no effect.
   565  		// TODO: If vector registers are managed by regalloc
   566  		// mark these as clobbered.
   567  		//
   568  		// Bytes not moved by this loop are moved
   569  		// with a combination of the following instructions,
   570  		// starting with the largest sizes and generating as
   571  		// many as needed, using the appropriate offset value.
   572  		//	MOVD  n(R4),R14
   573  		//	MOVD  R14,n(R3)
   574  		//	MOVW  n1(R4),R14
   575  		//	MOVW  R14,n1(R3)
   576  		//	MOVH  n2(R4),R14
   577  		//	MOVH  R14,n2(R3)
   578  		//	MOVB  n3(R4),R14
   579  		//	MOVB  R14,n3(R3)
   580  
   581  		{
   582  			name:      "LoweredMove",
   583  			aux:       "Int64",
   584  			argLength: 3,
   585  			reg: regInfo{
   586  				inputs:   []regMask{buildReg("R20"), buildReg("R21")},
   587  				clobbers: buildReg("R20 R21"),
   588  			},
   589  			clobberFlags:   true,
   590  			typ:            "Mem",
   591  			faultOnNilArg0: true,
   592  			faultOnNilArg1: true,
   593  			unsafePoint:    true,
   594  		},
   595  		{
   596  			name:      "LoweredMoveShort",
   597  			aux:       "Int64",
   598  			argLength: 3,
   599  			reg: regInfo{
   600  				inputs: []regMask{gp, gp},
   601  			},
   602  			typ:            "Mem",
   603  			faultOnNilArg0: true,
   604  			faultOnNilArg1: true,
   605  			unsafePoint:    true,
   606  		},
   607  
   608  		// The following is similar to the LoweredMove, but uses
   609  		// LXV instead of LXVD2X, which does not require an index
   610  		// register and will do 4 in a loop instead of only.
   611  		{
   612  			name:      "LoweredQuadMove",
   613  			aux:       "Int64",
   614  			argLength: 3,
   615  			reg: regInfo{
   616  				inputs:   []regMask{buildReg("R20"), buildReg("R21")},
   617  				clobbers: buildReg("R20 R21"),
   618  			},
   619  			clobberFlags:   true,
   620  			typ:            "Mem",
   621  			faultOnNilArg0: true,
   622  			faultOnNilArg1: true,
   623  			unsafePoint:    true,
   624  		},
   625  
   626  		{
   627  			name:      "LoweredQuadMoveShort",
   628  			aux:       "Int64",
   629  			argLength: 3,
   630  			reg: regInfo{
   631  				inputs: []regMask{gp, gp},
   632  			},
   633  			typ:            "Mem",
   634  			faultOnNilArg0: true,
   635  			faultOnNilArg1: true,
   636  			unsafePoint:    true,
   637  		},
   638  
   639  		{name: "LoweredAtomicStore8", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   640  		{name: "LoweredAtomicStore32", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   641  		{name: "LoweredAtomicStore64", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   642  
   643  		{name: "LoweredAtomicLoad8", argLength: 2, reg: gpload, typ: "UInt8", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   644  		{name: "LoweredAtomicLoad32", argLength: 2, reg: gpload, typ: "UInt32", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   645  		{name: "LoweredAtomicLoad64", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   646  		{name: "LoweredAtomicLoadPtr", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   647  
   648  		// atomic add32, 64
   649  		// LWSYNC
   650  		// LDAR         (Rarg0), Rout
   651  		// ADD		Rarg1, Rout
   652  		// STDCCC       Rout, (Rarg0)
   653  		// BNE          -3(PC)
   654  		// return new sum
   655  		{name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   656  		{name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   657  
   658  		// atomic exchange8, 32, 64
   659  		// LWSYNC
   660  		// LDAR         (Rarg0), Rout
   661  		// STDCCC       Rarg1, (Rarg0)
   662  		// BNE          -2(PC)
   663  		// ISYNC
   664  		// return old val
   665  		{name: "LoweredAtomicExchange8", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   666  		{name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   667  		{name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   668  
   669  		// atomic compare and swap.
   670  		// arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero.
   671  		// if *arg0 == arg1 {
   672  		//   *arg0 = arg2
   673  		//   return (true, memory)
   674  		// } else {
   675  		//   return (false, memory)
   676  		// }
   677  		// SYNC
   678  		// LDAR		(Rarg0), Rtmp
   679  		// CMP		Rarg1, Rtmp
   680  		// BNE		3(PC)
   681  		// STDCCC	Rarg2, (Rarg0)
   682  		// BNE		-4(PC)
   683  		// CBNZ         Rtmp, -4(PC)
   684  		// CSET         EQ, Rout
   685  		{name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   686  		{name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   687  
   688  		// atomic 8/32 and/or.
   689  		// *arg0 &= (|=) arg1. arg2=mem. returns memory. auxint must be zero.
   690  		// LBAR/LWAT	(Rarg0), Rtmp
   691  		// AND/OR	Rarg1, Rtmp
   692  		// STBCCC/STWCCC Rtmp, (Rarg0), Rtmp
   693  		// BNE		Rtmp, -3(PC)
   694  		{name: "LoweredAtomicAnd8", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
   695  		{name: "LoweredAtomicAnd32", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
   696  		{name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
   697  		{name: "LoweredAtomicOr32", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
   698  
   699  		// LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, auxint=# of buffer entries needed
   700  		// It preserves R0 through R17 (except special registers R1, R2, R11, R12, R13), g, and R20 and R21,
   701  		// but may clobber anything else, including R31 (REGTMP).
   702  		// Returns a pointer to a write barrier buffer in R29.
   703  		{name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ buildReg("R0 R3 R4 R5 R6 R7 R8 R9 R10 R14 R15 R16 R17 R20 R21 g")) | buildReg("R31"), outputs: []regMask{buildReg("R29")}}, clobberFlags: true, aux: "Int64"},
   704  
   705  		{name: "LoweredPubBarrier", argLength: 1, asm: "LWSYNC", hasSideEffects: true}, // Do data barrier. arg0=memory
   706  
   707  		// LoweredPanicBoundsRR takes x and y, two values that caused a bounds check to fail.
   708  		// the RC and CR versions are used when one of the arguments is a constant. CC is used
   709  		// when both are constant (normally both 0, as prove derives the fact that a [0] bounds
   710  		// failure means the length must have also been 0).
   711  		// AuxInt contains a report code (see PanicBounds in genericOps.go).
   712  		{name: "LoweredPanicBoundsRR", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{first7, first7}}, typ: "Mem", call: true}, // arg0=x, arg1=y, arg2=mem, returns memory.
   713  		{name: "LoweredPanicBoundsRC", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first7}}, typ: "Mem", call: true},  // arg0=x, arg1=mem, returns memory.
   714  		{name: "LoweredPanicBoundsCR", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first7}}, typ: "Mem", call: true},  // arg0=y, arg1=mem, returns memory.
   715  		{name: "LoweredPanicBoundsCC", argLength: 1, aux: "PanicBoundsCC", reg: regInfo{}, typ: "Mem", call: true},                          // arg0=mem, returns memory.
   716  
   717  		// (InvertFlags (CMP a b)) == (CMP b a)
   718  		// So if we want (LessThan (CMP a b)) but we can't do that because a is a constant,
   719  		// then we do (LessThan (InvertFlags (CMP b a))) instead.
   720  		// Rewrites will convert this to (GreaterThan (CMP b a)).
   721  		// InvertFlags is a pseudo-op which can't appear in assembly output.
   722  		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
   723  
   724  		// Constant flag values. For any comparison, there are 3 possible
   725  		// outcomes: either the three from the signed total order (<,==,>)
   726  		// or the three from the unsigned total order, depending on which
   727  		// comparison operation was used (CMP or CMPU -- PPC is different from
   728  		// the other architectures, which have a single comparison producing
   729  		// both signed and unsigned comparison results.)
   730  
   731  		// These ops are for temporary use by rewrite rules. They
   732  		// cannot appear in the generated assembly.
   733  		{name: "FlagEQ"}, // equal
   734  		{name: "FlagLT"}, // signed < or unsigned <
   735  		{name: "FlagGT"}, // signed > or unsigned >
   736  	}
   737  
   738  	blocks := []blockData{
   739  		{name: "EQ", controls: 1},
   740  		{name: "NE", controls: 1},
   741  		{name: "LT", controls: 1},
   742  		{name: "LE", controls: 1},
   743  		{name: "GT", controls: 1},
   744  		{name: "GE", controls: 1},
   745  		{name: "FLT", controls: 1},
   746  		{name: "FLE", controls: 1},
   747  		{name: "FGT", controls: 1},
   748  		{name: "FGE", controls: 1},
   749  	}
   750  
   751  	archs = append(archs, arch{
   752  		name:               "PPC64",
   753  		pkg:                "cmd/internal/obj/ppc64",
   754  		genfile:            "../../ppc64/ssa.go",
   755  		ops:                ops,
   756  		blocks:             blocks,
   757  		regnames:           regNamesPPC64,
   758  		ParamIntRegNames:   "R3 R4 R5 R6 R7 R8 R9 R10 R14 R15 R16 R17",
   759  		ParamFloatRegNames: "F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12",
   760  		gpregmask:          gp,
   761  		fpregmask:          fp,
   762  		specialregmask:     xer,
   763  		framepointerreg:    -1,
   764  		linkreg:            -1, // not used
   765  	})
   766  }
   767  

View as plain text