Source file src/cmd/compile/internal/ssa/_gen/S390XOps.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import "strings"
     8  
     9  // Notes:
    10  //  - Integer types live in the low portion of registers. Upper portions are junk.
    11  //  - Boolean types use the low-order byte of a register. 0=false, 1=true.
    12  //    Upper bytes are junk.
    13  //  - When doing sub-register operations, we try to write the whole
    14  //    destination register to avoid a partial-register write.
    15  //  - Unused portions of AuxInt (or the Val portion of ValAndOff) are
    16  //    filled by sign-extending the used portion. Users of AuxInt which interpret
    17  //    AuxInt as unsigned (e.g. shifts) must be careful.
    18  //  - The SB 'register' is implemented using instruction-relative addressing. This
    19  //    places some limitations on when and how memory operands that are addressed
    20  //    relative to SB can be used:
    21  //
    22  //     1. Pseudo-instructions do not always map to a single machine instruction when
    23  //        using the SB 'register' to address data. This is because many machine
    24  //        instructions do not have relative long (RL suffix) equivalents. For example,
    25  //        ADDload, which is assembled as AG.
    26  //
    27  //     2. Loads and stores using relative addressing require the data be aligned
    28  //        according to its size (8-bytes for double words, 4-bytes for words
    29  //        and so on).
    30  //
    31  //    We can always work around these by inserting LARL instructions (load address
    32  //    relative long) in the assembler, but typically this results in worse code
    33  //    generation because the address can't be re-used. Inserting instructions in the
    34  //    assembler also means clobbering the temp register and it is a long-term goal
    35  //    to prevent the compiler doing this so that it can be allocated as a normal
    36  //    register.
    37  //
    38  // For more information about the z/Architecture, the instruction set and the
    39  // addressing modes it supports take a look at the z/Architecture Principles of
    40  // Operation: http://publibfp.boulder.ibm.com/epubs/pdf/dz9zr010.pdf
    41  //
    42  // Suffixes encode the bit width of pseudo-instructions.
    43  // D (double word)  = 64 bit (frequently omitted)
    44  // W (word)         = 32 bit
    45  // H (half word)    = 16 bit
    46  // B (byte)         = 8 bit
    47  // S (single prec.) = 32 bit (double precision is omitted)
    48  
    49  // copied from ../../s390x/reg.go
    50  var regNamesS390X = []string{
    51  	"R0",
    52  	"R1",
    53  	"R2",
    54  	"R3",
    55  	"R4",
    56  	"R5",
    57  	"R6",
    58  	"R7",
    59  	"R8",
    60  	"R9",
    61  	"R10",
    62  	"R11",
    63  	"R12",
    64  	"g", // R13
    65  	"R14",
    66  	"SP", // R15
    67  	"F0",
    68  	"F1",
    69  	"F2",
    70  	"F3",
    71  	"F4",
    72  	"F5",
    73  	"F6",
    74  	"F7",
    75  	"F8",
    76  	"F9",
    77  	"F10",
    78  	"F11",
    79  	"F12",
    80  	"F13",
    81  	"F14",
    82  	"F15",
    83  
    84  	// If you add registers, update asyncPreempt in runtime.
    85  
    86  	//pseudo-registers
    87  	"SB",
    88  }
    89  
    90  func init() {
    91  	// Make map from reg names to reg integers.
    92  	if len(regNamesS390X) > 64 {
    93  		panic("too many registers")
    94  	}
    95  	num := map[string]int{}
    96  	for i, name := range regNamesS390X {
    97  		num[name] = i
    98  	}
    99  	buildReg := func(s string) regMask {
   100  		m := regMask(0)
   101  		for _, r := range strings.Split(s, " ") {
   102  			if n, ok := num[r]; ok {
   103  				m |= regMask(1) << uint(n)
   104  				continue
   105  			}
   106  			panic("register " + r + " not found")
   107  		}
   108  		return m
   109  	}
   110  
   111  	// Common individual register masks
   112  	var (
   113  		sp  = buildReg("SP")
   114  		sb  = buildReg("SB")
   115  		r0  = buildReg("R0")
   116  		tmp = buildReg("R11") // R11 is used as a temporary in a small number of instructions.
   117  		lr  = buildReg("R14")
   118  
   119  		// R10 is reserved by the assembler.
   120  		gp   = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14")
   121  		gpg  = gp | buildReg("g")
   122  		gpsp = gp | sp
   123  
   124  		// R0 is considered to contain the value 0 in address calculations.
   125  		ptr     = gp &^ r0
   126  		ptrsp   = ptr | sp
   127  		ptrspsb = ptrsp | sb
   128  
   129  		fp         = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15")
   130  		callerSave = gp | fp | buildReg("g") // runtime.setg (and anything calling it) may clobber g
   131  		r1         = buildReg("R1")
   132  		r2         = buildReg("R2")
   133  		r3         = buildReg("R3")
   134  		r9         = buildReg("R9")
   135  	)
   136  	// Common slices of register masks
   137  	var (
   138  		gponly = []regMask{gp}
   139  		fponly = []regMask{fp}
   140  	)
   141  
   142  	// Common regInfo
   143  	var (
   144  		gp01    = regInfo{inputs: []regMask{}, outputs: gponly}
   145  		gp11    = regInfo{inputs: []regMask{gp}, outputs: gponly}
   146  		gp11sp  = regInfo{inputs: []regMask{gpsp}, outputs: gponly}
   147  		gp21    = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
   148  		gp21sp  = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
   149  		gp21tmp = regInfo{inputs: []regMask{gp &^ tmp, gp &^ tmp}, outputs: []regMask{gp &^ tmp}, clobbers: tmp}
   150  
   151  		// R0 evaluates to 0 when used as the number of bits to shift
   152  		// so we need to exclude it from that operand.
   153  		sh21 = regInfo{inputs: []regMask{gp, ptr}, outputs: gponly}
   154  
   155  		addr    = regInfo{inputs: []regMask{sp | sb}, outputs: gponly}
   156  		addridx = regInfo{inputs: []regMask{sp | sb, ptrsp}, outputs: gponly}
   157  
   158  		gp2flags       = regInfo{inputs: []regMask{gpsp, gpsp}}
   159  		gp1flags       = regInfo{inputs: []regMask{gpsp}}
   160  		gp2flags1      = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
   161  		gp11flags      = regInfo{inputs: []regMask{gp}, outputs: gponly}
   162  		gp21flags      = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
   163  		gp2flags1flags = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
   164  
   165  		gpload       = regInfo{inputs: []regMask{ptrspsb, 0}, outputs: gponly}
   166  		gploadidx    = regInfo{inputs: []regMask{ptrspsb, ptrsp, 0}, outputs: gponly}
   167  		gpopload     = regInfo{inputs: []regMask{gp, ptrsp, 0}, outputs: gponly}
   168  		gpstore      = regInfo{inputs: []regMask{ptrspsb, gpsp, 0}}
   169  		gpstoreconst = regInfo{inputs: []regMask{ptrspsb, 0}}
   170  		gpstoreidx   = regInfo{inputs: []regMask{ptrsp, ptrsp, gpsp, 0}}
   171  		gpstorebr    = regInfo{inputs: []regMask{ptrsp, gpsp, 0}}
   172  		gpstorelaa   = regInfo{inputs: []regMask{ptrspsb, gpsp, 0}, outputs: gponly}
   173  		gpstorelab   = regInfo{inputs: []regMask{r1, gpsp, 0}, clobbers: r1}
   174  
   175  		gpmvc = regInfo{inputs: []regMask{ptrsp, ptrsp, 0}}
   176  
   177  		fp01        = regInfo{inputs: []regMask{}, outputs: fponly}
   178  		fp21        = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
   179  		fp31        = regInfo{inputs: []regMask{fp, fp, fp}, outputs: fponly}
   180  		fp21clobber = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
   181  		fpgp        = regInfo{inputs: fponly, outputs: gponly}
   182  		gpfp        = regInfo{inputs: gponly, outputs: fponly}
   183  		fp11        = regInfo{inputs: fponly, outputs: fponly}
   184  		fp1flags    = regInfo{inputs: []regMask{fp}}
   185  		fp11clobber = regInfo{inputs: fponly, outputs: fponly}
   186  		fp2flags    = regInfo{inputs: []regMask{fp, fp}}
   187  
   188  		fpload    = regInfo{inputs: []regMask{ptrspsb, 0}, outputs: fponly}
   189  		fploadidx = regInfo{inputs: []regMask{ptrsp, ptrsp, 0}, outputs: fponly}
   190  
   191  		fpstore    = regInfo{inputs: []regMask{ptrspsb, fp, 0}}
   192  		fpstoreidx = regInfo{inputs: []regMask{ptrsp, ptrsp, fp, 0}}
   193  
   194  		sync = regInfo{inputs: []regMask{0}}
   195  
   196  		// LoweredAtomicCas may overwrite arg1, so force it to R0 for now.
   197  		cas = regInfo{inputs: []regMask{ptrsp, r0, gpsp, 0}, outputs: []regMask{gp, 0}, clobbers: r0}
   198  
   199  		// LoweredAtomicExchange overwrites the output before executing
   200  		// CS{,G}, so the output register must not be the same as the
   201  		// input register. For now we just force the output register to
   202  		// R0.
   203  		exchange = regInfo{inputs: []regMask{ptrsp, gpsp &^ r0, 0}, outputs: []regMask{r0, 0}}
   204  	)
   205  
   206  	var S390Xops = []opData{
   207  		// fp ops
   208  		{name: "FADDS", argLength: 2, reg: fp21clobber, typ: "(Float32,Flags)", asm: "FADDS", commutative: true, resultInArg0: true}, // fp32 arg0 + arg1
   209  		{name: "FADD", argLength: 2, reg: fp21clobber, typ: "(Float64,Flags)", asm: "FADD", commutative: true, resultInArg0: true},   // fp64 arg0 + arg1
   210  		{name: "FSUBS", argLength: 2, reg: fp21clobber, typ: "(Float32,Flags)", asm: "FSUBS", resultInArg0: true},                    // fp32 arg0 - arg1
   211  		{name: "FSUB", argLength: 2, reg: fp21clobber, typ: "(Float64,Flags)", asm: "FSUB", resultInArg0: true},                      // fp64 arg0 - arg1
   212  		{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true, resultInArg0: true},                                // fp32 arg0 * arg1
   213  		{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true, resultInArg0: true},                                  // fp64 arg0 * arg1
   214  		{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS", resultInArg0: true},                                                   // fp32 arg0 / arg1
   215  		{name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV", resultInArg0: true},                                                     // fp64 arg0 / arg1
   216  		{name: "FNEGS", argLength: 1, reg: fp11clobber, asm: "FNEGS", clobberFlags: true},                                            // fp32 -arg0
   217  		{name: "FNEG", argLength: 1, reg: fp11clobber, asm: "FNEG", clobberFlags: true},                                              // fp64 -arg0
   218  		{name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS", resultInArg0: true},                                                 // fp32 arg1 * arg2 + arg0
   219  		{name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD", resultInArg0: true},                                                   // fp64 arg1 * arg2 + arg0
   220  		{name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS", resultInArg0: true},                                                 // fp32 arg1 * arg2 - arg0
   221  		{name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB", resultInArg0: true},                                                   // fp64 arg1 * arg2 - arg0
   222  		{name: "LPDFR", argLength: 1, reg: fp11, asm: "LPDFR"},                                                                       // fp64/fp32 set sign bit
   223  		{name: "LNDFR", argLength: 1, reg: fp11, asm: "LNDFR"},                                                                       // fp64/fp32 clear sign bit
   224  		{name: "CPSDR", argLength: 2, reg: fp21, asm: "CPSDR"},                                                                       // fp64/fp32 copy arg1 sign bit to arg0
   225  
   226  		// Single element vector floating point min / max instructions
   227  		{name: "WFMAXDB", argLength: 2, reg: fp21, asm: "WFMAXDB", typ: "Float64"}, // max[float64](arg0, arg1)
   228  		{name: "WFMAXSB", argLength: 2, reg: fp21, asm: "WFMAXSB", typ: "Float32"}, // max[float32](arg0, arg1)
   229  		{name: "WFMINDB", argLength: 2, reg: fp21, asm: "WFMINDB", typ: "Float64"}, // min[float64](arg0, arg1)
   230  		{name: "WFMINSB", argLength: 2, reg: fp21, asm: "WFMINSB", typ: "Float32"}, // min[float32](arg0, arg1)
   231  
   232  		// Round to integer, float64 only.
   233  		//
   234  		// aux | rounding mode
   235  		// ----+-----------------------------------
   236  		//   1 | round to nearest, ties away from 0
   237  		//   4 | round to nearest, ties to even
   238  		//   5 | round toward 0
   239  		//   6 | round toward +∞
   240  		//   7 | round toward -∞
   241  		{name: "FIDBR", argLength: 1, reg: fp11, asm: "FIDBR", aux: "Int8"},
   242  
   243  		{name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // fp32 load
   244  		{name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // fp64 load
   245  		{name: "FMOVSconst", reg: fp01, asm: "FMOVS", aux: "Float32", rematerializeable: true},                               // fp32 constant
   246  		{name: "FMOVDconst", reg: fp01, asm: "FMOVD", aux: "Float64", rematerializeable: true},                               // fp64 constant
   247  		{name: "FMOVSloadidx", argLength: 3, reg: fploadidx, asm: "FMOVS", aux: "SymOff", symEffect: "Read"},                 // fp32 load indexed by i
   248  		{name: "FMOVDloadidx", argLength: 3, reg: fploadidx, asm: "FMOVD", aux: "SymOff", symEffect: "Read"},                 // fp64 load indexed by i
   249  
   250  		{name: "FMOVSstore", argLength: 3, reg: fpstore, asm: "FMOVS", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // fp32 store
   251  		{name: "FMOVDstore", argLength: 3, reg: fpstore, asm: "FMOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // fp64 store
   252  		{name: "FMOVSstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVS", aux: "SymOff", symEffect: "Write"},                 // fp32 indexed by i store
   253  		{name: "FMOVDstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVD", aux: "SymOff", symEffect: "Write"},                 // fp64 indexed by i store
   254  
   255  		// binary ops
   256  		{name: "ADD", argLength: 2, reg: gp21sp, asm: "ADD", commutative: true, clobberFlags: true},                                                                  // arg0 + arg1
   257  		{name: "ADDW", argLength: 2, reg: gp21sp, asm: "ADDW", commutative: true, clobberFlags: true},                                                                // arg0 + arg1
   258  		{name: "ADDconst", argLength: 1, reg: gp11sp, asm: "ADD", aux: "Int32", typ: "UInt64", clobberFlags: true},                                                   // arg0 + auxint
   259  		{name: "ADDWconst", argLength: 1, reg: gp11sp, asm: "ADDW", aux: "Int32", clobberFlags: true},                                                                // arg0 + auxint
   260  		{name: "ADDload", argLength: 3, reg: gpopload, asm: "ADD", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"},   // arg0 + *arg1. arg2=mem
   261  		{name: "ADDWload", argLength: 3, reg: gpopload, asm: "ADDW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 + *arg1. arg2=mem
   262  
   263  		{name: "SUB", argLength: 2, reg: gp21, asm: "SUB", clobberFlags: true},                                                                                       // arg0 - arg1
   264  		{name: "SUBW", argLength: 2, reg: gp21, asm: "SUBW", clobberFlags: true},                                                                                     // arg0 - arg1
   265  		{name: "SUBconst", argLength: 1, reg: gp11, asm: "SUB", aux: "Int32", resultInArg0: true, clobberFlags: true},                                                // arg0 - auxint
   266  		{name: "SUBWconst", argLength: 1, reg: gp11, asm: "SUBW", aux: "Int32", resultInArg0: true, clobberFlags: true},                                              // arg0 - auxint
   267  		{name: "SUBload", argLength: 3, reg: gpopload, asm: "SUB", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"},   // arg0 - *arg1. arg2=mem
   268  		{name: "SUBWload", argLength: 3, reg: gpopload, asm: "SUBW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 - *arg1. arg2=mem
   269  
   270  		{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true, resultInArg0: true, clobberFlags: true},                                // arg0 * arg1
   271  		{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true, resultInArg0: true, clobberFlags: true},                                // arg0 * arg1
   272  		{name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64", resultInArg0: true, clobberFlags: true},                                // arg0 * auxint
   273  		{name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int32", resultInArg0: true, clobberFlags: true},                                // arg0 * auxint
   274  		{name: "MULLDload", argLength: 3, reg: gpopload, asm: "MULLD", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 * *arg1. arg2=mem
   275  		{name: "MULLWload", argLength: 3, reg: gpopload, asm: "MULLW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 * *arg1. arg2=mem
   276  
   277  		{name: "MULHD", argLength: 2, reg: gp21tmp, asm: "MULHD", typ: "Int64", commutative: true, resultInArg0: true, clobberFlags: true},   // (arg0 * arg1) >> width
   278  		{name: "MULHDU", argLength: 2, reg: gp21tmp, asm: "MULHDU", typ: "Int64", commutative: true, resultInArg0: true, clobberFlags: true}, // (arg0 * arg1) >> width
   279  
   280  		{name: "DIVD", argLength: 2, reg: gp21tmp, asm: "DIVD", resultInArg0: true, clobberFlags: true},   // arg0 / arg1
   281  		{name: "DIVW", argLength: 2, reg: gp21tmp, asm: "DIVW", resultInArg0: true, clobberFlags: true},   // arg0 / arg1
   282  		{name: "DIVDU", argLength: 2, reg: gp21tmp, asm: "DIVDU", resultInArg0: true, clobberFlags: true}, // arg0 / arg1
   283  		{name: "DIVWU", argLength: 2, reg: gp21tmp, asm: "DIVWU", resultInArg0: true, clobberFlags: true}, // arg0 / arg1
   284  
   285  		{name: "MODD", argLength: 2, reg: gp21tmp, asm: "MODD", resultInArg0: true, clobberFlags: true}, // arg0 % arg1
   286  		{name: "MODW", argLength: 2, reg: gp21tmp, asm: "MODW", resultInArg0: true, clobberFlags: true}, // arg0 % arg1
   287  
   288  		{name: "MODDU", argLength: 2, reg: gp21tmp, asm: "MODDU", resultInArg0: true, clobberFlags: true}, // arg0 % arg1
   289  		{name: "MODWU", argLength: 2, reg: gp21tmp, asm: "MODWU", resultInArg0: true, clobberFlags: true}, // arg0 % arg1
   290  
   291  		{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true, clobberFlags: true},                                                                    // arg0 & arg1
   292  		{name: "ANDW", argLength: 2, reg: gp21, asm: "ANDW", commutative: true, clobberFlags: true},                                                                  // arg0 & arg1
   293  		{name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int64", resultInArg0: true, clobberFlags: true},                                                // arg0 & auxint
   294  		{name: "ANDWconst", argLength: 1, reg: gp11, asm: "ANDW", aux: "Int32", resultInArg0: true, clobberFlags: true},                                              // arg0 & auxint
   295  		{name: "ANDload", argLength: 3, reg: gpopload, asm: "AND", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"},   // arg0 & *arg1. arg2=mem
   296  		{name: "ANDWload", argLength: 3, reg: gpopload, asm: "ANDW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 & *arg1. arg2=mem
   297  
   298  		{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true, clobberFlags: true},                                                                    // arg0 | arg1
   299  		{name: "ORW", argLength: 2, reg: gp21, asm: "ORW", commutative: true, clobberFlags: true},                                                                  // arg0 | arg1
   300  		{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64", resultInArg0: true, clobberFlags: true},                                                // arg0 | auxint
   301  		{name: "ORWconst", argLength: 1, reg: gp11, asm: "ORW", aux: "Int32", resultInArg0: true, clobberFlags: true},                                              // arg0 | auxint
   302  		{name: "ORload", argLength: 3, reg: gpopload, asm: "OR", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"},   // arg0 | *arg1. arg2=mem
   303  		{name: "ORWload", argLength: 3, reg: gpopload, asm: "ORW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 | *arg1. arg2=mem
   304  
   305  		{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", commutative: true, clobberFlags: true},                                                                    // arg0 ^ arg1
   306  		{name: "XORW", argLength: 2, reg: gp21, asm: "XORW", commutative: true, clobberFlags: true},                                                                  // arg0 ^ arg1
   307  		{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64", resultInArg0: true, clobberFlags: true},                                                // arg0 ^ auxint
   308  		{name: "XORWconst", argLength: 1, reg: gp11, asm: "XORW", aux: "Int32", resultInArg0: true, clobberFlags: true},                                              // arg0 ^ auxint
   309  		{name: "XORload", argLength: 3, reg: gpopload, asm: "XOR", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"},   // arg0 ^ *arg1. arg2=mem
   310  		{name: "XORWload", argLength: 3, reg: gpopload, asm: "XORW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ *arg1. arg2=mem
   311  
   312  		// Arithmetic ops with carry/borrow chain.
   313  		//
   314  		// A carry is represented by a condition code of 2 or 3 (GT or OV).
   315  		// A borrow is represented by a condition code of 0 or 1 (EQ or LT).
   316  		{name: "ADDC", argLength: 2, reg: gp21flags, asm: "ADDC", typ: "(UInt64,Flags)", commutative: true},                          // (arg0 + arg1, carry out)
   317  		{name: "ADDCconst", argLength: 1, reg: gp11flags, asm: "ADDC", typ: "(UInt64,Flags)", aux: "Int16"},                          // (arg0 + auxint, carry out)
   318  		{name: "ADDE", argLength: 3, reg: gp2flags1flags, asm: "ADDE", typ: "(UInt64,Flags)", commutative: true, resultInArg0: true}, // (arg0 + arg1 + arg2 (carry in), carry out)
   319  		{name: "SUBC", argLength: 2, reg: gp21flags, asm: "SUBC", typ: "(UInt64,Flags)"},                                             // (arg0 - arg1, borrow out)
   320  		{name: "SUBE", argLength: 3, reg: gp2flags1flags, asm: "SUBE", typ: "(UInt64,Flags)", resultInArg0: true},                    // (arg0 - arg1 - arg2 (borrow in), borrow out)
   321  
   322  		// Comparisons.
   323  		{name: "CMP", argLength: 2, reg: gp2flags, asm: "CMP", typ: "Flags"},   // arg0 compare to arg1
   324  		{name: "CMPW", argLength: 2, reg: gp2flags, asm: "CMPW", typ: "Flags"}, // arg0 compare to arg1
   325  
   326  		{name: "CMPU", argLength: 2, reg: gp2flags, asm: "CMPU", typ: "Flags"},   // arg0 compare to arg1
   327  		{name: "CMPWU", argLength: 2, reg: gp2flags, asm: "CMPWU", typ: "Flags"}, // arg0 compare to arg1
   328  
   329  		{name: "CMPconst", argLength: 1, reg: gp1flags, asm: "CMP", typ: "Flags", aux: "Int32"},     // arg0 compare to auxint
   330  		{name: "CMPWconst", argLength: 1, reg: gp1flags, asm: "CMPW", typ: "Flags", aux: "Int32"},   // arg0 compare to auxint
   331  		{name: "CMPUconst", argLength: 1, reg: gp1flags, asm: "CMPU", typ: "Flags", aux: "Int32"},   // arg0 compare to auxint
   332  		{name: "CMPWUconst", argLength: 1, reg: gp1flags, asm: "CMPWU", typ: "Flags", aux: "Int32"}, // arg0 compare to auxint
   333  
   334  		{name: "FCMPS", argLength: 2, reg: fp2flags, asm: "CEBR", typ: "Flags"},  // arg0 compare to arg1, f32
   335  		{name: "FCMP", argLength: 2, reg: fp2flags, asm: "FCMPU", typ: "Flags"},  // arg0 compare to arg1, f64
   336  		{name: "LTDBR", argLength: 1, reg: fp1flags, asm: "LTDBR", typ: "Flags"}, // arg0 compare to 0, f64
   337  		{name: "LTEBR", argLength: 1, reg: fp1flags, asm: "LTEBR", typ: "Flags"}, // arg0 compare to 0, f32
   338  
   339  		{name: "SLD", argLength: 2, reg: sh21, asm: "SLD"},                    // arg0 << arg1, shift amount is mod 64
   340  		{name: "SLW", argLength: 2, reg: sh21, asm: "SLW"},                    // arg0 << arg1, shift amount is mod 64
   341  		{name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "UInt8"}, // arg0 << auxint, shift amount 0-63
   342  		{name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "UInt8"}, // arg0 << auxint, shift amount 0-31
   343  
   344  		{name: "SRD", argLength: 2, reg: sh21, asm: "SRD"},                    // unsigned arg0 >> arg1, shift amount is mod 64
   345  		{name: "SRW", argLength: 2, reg: sh21, asm: "SRW"},                    // unsigned uint32(arg0) >> arg1, shift amount is mod 64
   346  		{name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "UInt8"}, // unsigned arg0 >> auxint, shift amount 0-63
   347  		{name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "UInt8"}, // unsigned uint32(arg0) >> auxint, shift amount 0-31
   348  
   349  		// Arithmetic shifts clobber flags.
   350  		{name: "SRAD", argLength: 2, reg: sh21, asm: "SRAD", clobberFlags: true},                    // signed arg0 >> arg1, shift amount is mod 64
   351  		{name: "SRAW", argLength: 2, reg: sh21, asm: "SRAW", clobberFlags: true},                    // signed int32(arg0) >> arg1, shift amount is mod 64
   352  		{name: "SRADconst", argLength: 1, reg: gp11, asm: "SRAD", aux: "UInt8", clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-63
   353  		{name: "SRAWconst", argLength: 1, reg: gp11, asm: "SRAW", aux: "UInt8", clobberFlags: true}, // signed int32(arg0) >> auxint, shift amount 0-31
   354  
   355  		// Rotate instructions.
   356  		// Note: no RLLGconst - use RISBGZ instead.
   357  		{name: "RLLG", argLength: 2, reg: sh21, asm: "RLLG"},                  // arg0 rotate left arg1, rotate amount 0-63
   358  		{name: "RLL", argLength: 2, reg: sh21, asm: "RLL"},                    // arg0 rotate left arg1, rotate amount 0-31
   359  		{name: "RLLconst", argLength: 1, reg: gp11, asm: "RLL", aux: "UInt8"}, // arg0 rotate left auxint, rotate amount 0-31
   360  
   361  		// Rotate then (and|or|xor|insert) selected bits instructions.
   362  		//
   363  		// Aux is an s390x.RotateParams struct containing Start, End and rotation
   364  		// Amount fields.
   365  		//
   366  		// arg1 is rotated left by the rotation amount then the bits from the start
   367  		// bit to the end bit (inclusive) are combined with arg0 using the logical
   368  		// operation specified. Bit indices are specified from left to right - the
   369  		// MSB is 0 and the LSB is 63.
   370  		//
   371  		// Examples:
   372  		//               |          aux         |
   373  		// | instruction | start | end | amount |          arg0         |          arg1         |         result        |
   374  		// +-------------+-------+-----+--------+-----------------------+-----------------------+-----------------------+
   375  		// | RXSBG (XOR) |     0 |   1 |      0 | 0xffff_ffff_ffff_ffff | 0xffff_ffff_ffff_ffff | 0x3fff_ffff_ffff_ffff |
   376  		// | RXSBG (XOR) |    62 |  63 |      0 | 0xffff_ffff_ffff_ffff | 0xffff_ffff_ffff_ffff | 0xffff_ffff_ffff_fffc |
   377  		// | RXSBG (XOR) |     0 |  47 |     16 | 0xffff_ffff_ffff_ffff | 0x0000_0000_0000_ffff | 0xffff_ffff_0000_ffff |
   378  		// +-------------+-------+-----+--------+-----------------------+-----------------------+-----------------------+
   379  		//
   380  		{name: "RXSBG", argLength: 2, reg: gp21, asm: "RXSBG", resultInArg0: true, aux: "S390XRotateParams", clobberFlags: true}, // rotate then xor selected bits
   381  		{name: "RISBGZ", argLength: 1, reg: gp11, asm: "RISBGZ", aux: "S390XRotateParams", clobberFlags: true},                   // rotate then insert selected bits [into zero]
   382  
   383  		// unary ops
   384  		{name: "NEG", argLength: 1, reg: gp11, asm: "NEG", clobberFlags: true},   // -arg0
   385  		{name: "NEGW", argLength: 1, reg: gp11, asm: "NEGW", clobberFlags: true}, // -arg0
   386  
   387  		{name: "NOT", argLength: 1, reg: gp11, resultInArg0: true, clobberFlags: true},  // ^arg0
   388  		{name: "NOTW", argLength: 1, reg: gp11, resultInArg0: true, clobberFlags: true}, // ^arg0
   389  
   390  		{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"},   // sqrt(arg0)
   391  		{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"}, // sqrt(arg0), float32
   392  
   393  		// Conditional register-register moves.
   394  		// The aux for these values is an s390x.CCMask value representing the condition code mask.
   395  		{name: "LOCGR", argLength: 3, reg: gp2flags1, resultInArg0: true, asm: "LOCGR", aux: "S390XCCMask"}, // load arg1 into arg0 if the condition code in arg2 matches a masked bit in aux.
   396  
   397  		{name: "MOVBreg", argLength: 1, reg: gp11sp, asm: "MOVB", typ: "Int64"},    // sign extend arg0 from int8 to int64
   398  		{name: "MOVBZreg", argLength: 1, reg: gp11sp, asm: "MOVBZ", typ: "UInt64"}, // zero extend arg0 from int8 to int64
   399  		{name: "MOVHreg", argLength: 1, reg: gp11sp, asm: "MOVH", typ: "Int64"},    // sign extend arg0 from int16 to int64
   400  		{name: "MOVHZreg", argLength: 1, reg: gp11sp, asm: "MOVHZ", typ: "UInt64"}, // zero extend arg0 from int16 to int64
   401  		{name: "MOVWreg", argLength: 1, reg: gp11sp, asm: "MOVW", typ: "Int64"},    // sign extend arg0 from int32 to int64
   402  		{name: "MOVWZreg", argLength: 1, reg: gp11sp, asm: "MOVWZ", typ: "UInt64"}, // zero extend arg0 from int32 to int64
   403  
   404  		{name: "MOVDconst", reg: gp01, asm: "MOVD", typ: "UInt64", aux: "Int64", rematerializeable: true}, // auxint
   405  
   406  		{name: "LDGR", argLength: 1, reg: gpfp, asm: "LDGR"}, // move int64 to float64 (no conversion)
   407  		{name: "LGDR", argLength: 1, reg: fpgp, asm: "LGDR"}, // move float64 to int64 (no conversion)
   408  
   409  		{name: "CFDBRA", argLength: 1, reg: fpgp, asm: "CFDBRA", clobberFlags: true}, // convert float64 to int32
   410  		{name: "CGDBRA", argLength: 1, reg: fpgp, asm: "CGDBRA", clobberFlags: true}, // convert float64 to int64
   411  		{name: "CFEBRA", argLength: 1, reg: fpgp, asm: "CFEBRA", clobberFlags: true}, // convert float32 to int32
   412  		{name: "CGEBRA", argLength: 1, reg: fpgp, asm: "CGEBRA", clobberFlags: true}, // convert float32 to int64
   413  		{name: "CEFBRA", argLength: 1, reg: gpfp, asm: "CEFBRA", clobberFlags: true}, // convert int32 to float32
   414  		{name: "CDFBRA", argLength: 1, reg: gpfp, asm: "CDFBRA", clobberFlags: true}, // convert int32 to float64
   415  		{name: "CEGBRA", argLength: 1, reg: gpfp, asm: "CEGBRA", clobberFlags: true}, // convert int64 to float32
   416  		{name: "CDGBRA", argLength: 1, reg: gpfp, asm: "CDGBRA", clobberFlags: true}, // convert int64 to float64
   417  		{name: "CLFEBR", argLength: 1, reg: fpgp, asm: "CLFEBR", clobberFlags: true}, // convert float32 to uint32
   418  		{name: "CLFDBR", argLength: 1, reg: fpgp, asm: "CLFDBR", clobberFlags: true}, // convert float64 to uint32
   419  		{name: "CLGEBR", argLength: 1, reg: fpgp, asm: "CLGEBR", clobberFlags: true}, // convert float32 to uint64
   420  		{name: "CLGDBR", argLength: 1, reg: fpgp, asm: "CLGDBR", clobberFlags: true}, // convert float64 to uint64
   421  		{name: "CELFBR", argLength: 1, reg: gpfp, asm: "CELFBR", clobberFlags: true}, // convert uint32 to float32
   422  		{name: "CDLFBR", argLength: 1, reg: gpfp, asm: "CDLFBR", clobberFlags: true}, // convert uint32 to float64
   423  		{name: "CELGBR", argLength: 1, reg: gpfp, asm: "CELGBR", clobberFlags: true}, // convert uint64 to float32
   424  		{name: "CDLGBR", argLength: 1, reg: gpfp, asm: "CDLGBR", clobberFlags: true}, // convert uint64 to float64
   425  
   426  		{name: "LEDBR", argLength: 1, reg: fp11, asm: "LEDBR"}, // convert float64 to float32
   427  		{name: "LDEBR", argLength: 1, reg: fp11, asm: "LDEBR"}, // convert float32 to float64
   428  
   429  		{name: "MOVDaddr", argLength: 1, reg: addr, aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxint + offset encoded in aux
   430  		{name: "MOVDaddridx", argLength: 2, reg: addridx, aux: "SymOff", symEffect: "Addr"},                    // arg0 + arg1 + auxint + aux
   431  
   432  		// auxint+aux == add auxint and the offset of the symbol in aux (if any) to the effective address
   433  		{name: "MOVBZload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"},  // load byte from arg0+auxint+aux. arg1=mem.  Zero extend.
   434  		{name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVB", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},                  // ditto, sign extend to int64
   435  		{name: "MOVHZload", argLength: 2, reg: gpload, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes from arg0+auxint+aux. arg1=mem.  Zero extend.
   436  		{name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},                  // ditto, sign extend to int64
   437  		{name: "MOVWZload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes from arg0+auxint+aux. arg1=mem.  Zero extend.
   438  		{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},                  // ditto, sign extend to int64
   439  		{name: "MOVDload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"},   // load 8 bytes from arg0+auxint+aux. arg1=mem
   440  
   441  		{name: "MOVWBR", argLength: 1, reg: gp11, asm: "MOVWBR"}, // arg0 swap bytes
   442  		{name: "MOVDBR", argLength: 1, reg: gp11, asm: "MOVDBR"}, // arg0 swap bytes
   443  
   444  		{name: "MOVHBRload", argLength: 2, reg: gpload, asm: "MOVHBR", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes from arg0+auxint+aux. arg1=mem. Reverse bytes.
   445  		{name: "MOVWBRload", argLength: 2, reg: gpload, asm: "MOVWBR", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes from arg0+auxint+aux. arg1=mem. Reverse bytes.
   446  		{name: "MOVDBRload", argLength: 2, reg: gpload, asm: "MOVDBR", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load 8 bytes from arg0+auxint+aux. arg1=mem. Reverse bytes.
   447  
   448  		{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},       // store byte in arg1 to arg0+auxint+aux. arg2=mem
   449  		{name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},       // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
   450  		{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},       // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
   451  		{name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},       // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem
   452  		{name: "MOVHBRstore", argLength: 3, reg: gpstorebr, asm: "MOVHBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem. Reverse bytes.
   453  		{name: "MOVWBRstore", argLength: 3, reg: gpstorebr, asm: "MOVWBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem. Reverse bytes.
   454  		{name: "MOVDBRstore", argLength: 3, reg: gpstorebr, asm: "MOVDBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem. Reverse bytes.
   455  
   456  		{name: "MVC", argLength: 3, reg: gpmvc, asm: "MVC", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, faultOnNilArg1: true, symEffect: "None"}, // arg0=destptr, arg1=srcptr, arg2=mem, auxint=size,off
   457  
   458  		// indexed loads/stores
   459  		{name: "MOVBZloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", symEffect: "Read"},   // load a byte from arg0+arg1+auxint+aux. arg2=mem. Zero extend.
   460  		{name: "MOVBloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVB", aux: "SymOff", typ: "Int8", symEffect: "Read"},      // load a byte from arg0+arg1+auxint+aux. arg2=mem. Sign extend.
   461  		{name: "MOVHZloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", symEffect: "Read"},  // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem. Zero extend.
   462  		{name: "MOVHloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVH", aux: "SymOff", typ: "Int16", symEffect: "Read"},     // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem. Sign extend.
   463  		{name: "MOVWZloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", symEffect: "Read"},  // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Zero extend.
   464  		{name: "MOVWloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVW", aux: "SymOff", typ: "Int32", symEffect: "Read"},     // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Sign extend.
   465  		{name: "MOVDloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVD", aux: "SymOff", typ: "UInt64", symEffect: "Read"},    // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem
   466  		{name: "MOVHBRloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVHBR", aux: "SymOff", typ: "Int16", symEffect: "Read"}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
   467  		{name: "MOVWBRloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVWBR", aux: "SymOff", typ: "Int32", symEffect: "Read"}, // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
   468  		{name: "MOVDBRloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVDBR", aux: "SymOff", typ: "Int64", symEffect: "Read"}, // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
   469  		{name: "MOVBstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVB", aux: "SymOff", symEffect: "Write"},                // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
   470  		{name: "MOVHstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVH", aux: "SymOff", symEffect: "Write"},                // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
   471  		{name: "MOVWstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVW", aux: "SymOff", symEffect: "Write"},                // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
   472  		{name: "MOVDstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVD", aux: "SymOff", symEffect: "Write"},                // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
   473  		{name: "MOVHBRstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVHBR", aux: "SymOff", symEffect: "Write"},            // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
   474  		{name: "MOVWBRstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVWBR", aux: "SymOff", symEffect: "Write"},            // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
   475  		{name: "MOVDBRstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVDBR", aux: "SymOff", symEffect: "Write"},            // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
   476  
   477  		// For storeconst ops, the AuxInt field encodes both
   478  		// the value to store and an address offset of the store.
   479  		// Cast AuxInt to a ValAndOff to extract Val and Off fields.
   480  		{name: "MOVBstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVB", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low byte of ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux.  arg1=mem
   481  		{name: "MOVHstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVH", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low 2 bytes of ...
   482  		{name: "MOVWstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVW", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low 4 bytes of ...
   483  		{name: "MOVDstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVD", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of ...
   484  
   485  		{name: "CLEAR", argLength: 2, reg: regInfo{inputs: []regMask{ptr, 0}}, asm: "CLEAR", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Write"},
   486  
   487  		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},                                                // call static function aux.(*obj.LSym).  arg0=mem, auxint=argsize, returns mem
   488  		{name: "CALLtail", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true},                                  // tail call static function aux.(*obj.LSym).  arg0=mem, auxint=argsize, returns mem
   489  		{name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{ptrsp, buildReg("R12"), 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
   490  		{name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{ptr}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},                         // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
   491  
   492  		// (InvertFlags (CMP a b)) == (CMP b a)
   493  		// InvertFlags is a pseudo-op which can't appear in assembly output.
   494  		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
   495  
   496  		// Pseudo-ops
   497  		{name: "LoweredGetG", argLength: 1, reg: gp01}, // arg0=mem
   498  		// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
   499  		// and sorts it to the very beginning of the block to prevent other
   500  		// use of R12 (the closure pointer)
   501  		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("R12")}}, zeroWidth: true},
   502  		// arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
   503  		// LoweredGetCallerSP returns the SP of the caller of the current function. arg0=mem.
   504  		{name: "LoweredGetCallerSP", argLength: 1, reg: gp01, rematerializeable: true},
   505  		// LoweredGetCallerPC evaluates to the PC to which its "caller" will return.
   506  		// I.e., if f calls g "calls" sys.GetCallerPC,
   507  		// the result should be the PC within f that g will return to.
   508  		// See runtime/stubs.go for a more detailed discussion.
   509  		{name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true},
   510  		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{ptrsp}}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
   511  		// Round ops to block fused-multiply-add extraction.
   512  		{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   513  		{name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   514  
   515  		// LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, aux=# of buffer entries needed
   516  		// It saves all GP registers if necessary,
   517  		// but clobbers R14 (LR) because it's a call,
   518  		// and also clobbers R1 as the PLT stub does.
   519  		// Returns a pointer to a write barrier buffer in R9.
   520  		{name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ gpg) | buildReg("R14") | r1, outputs: []regMask{r9}}, clobberFlags: true, aux: "Int64"},
   521  
   522  		// LoweredPanicBoundsRR takes x and y, two values that caused a bounds check to fail.
   523  		// the RC and CR versions are used when one of the arguments is a constant. CC is used
   524  		// when both are constant (normally both 0, as prove derives the fact that a [0] bounds
   525  		// failure means the length must have also been 0).
   526  		// AuxInt contains a report code (see PanicBounds in genericOps.go).
   527  		{name: "LoweredPanicBoundsRR", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{gp &^ lr, gp &^ lr}}, typ: "Mem", call: true}, // arg0=x, arg1=y, arg2=mem, returns memory.
   528  		{name: "LoweredPanicBoundsRC", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{gp &^ lr}}, typ: "Mem", call: true},    // arg0=x, arg1=mem, returns memory.
   529  		{name: "LoweredPanicBoundsCR", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{gp &^ lr}}, typ: "Mem", call: true},    // arg0=y, arg1=mem, returns memory.
   530  		{name: "LoweredPanicBoundsCC", argLength: 1, aux: "PanicBoundsCC", reg: regInfo{}, typ: "Mem", call: true},                              // arg0=mem, returns memory.
   531  
   532  		// Constant condition code values. The condition code can be 0, 1, 2 or 3.
   533  		{name: "FlagEQ"}, // CC=0 (equal)
   534  		{name: "FlagLT"}, // CC=1 (less than)
   535  		{name: "FlagGT"}, // CC=2 (greater than)
   536  		{name: "FlagOV"}, // CC=3 (overflow)
   537  
   538  		// Fast-BCR-serialization to ensure store-load ordering.
   539  		{name: "SYNC", argLength: 1, reg: sync, asm: "SYNC", typ: "Mem"},
   540  
   541  		// Atomic loads. These are just normal loads but return <value,memory> tuples
   542  		// so they can be properly ordered with other loads.
   543  		// load from arg0+auxint+aux.  arg1=mem.
   544  		{name: "MOVBZatomicload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},
   545  		{name: "MOVWZatomicload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},
   546  		{name: "MOVDatomicload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},
   547  
   548  		// Atomic stores. These are just normal stores.
   549  		// store arg1 to arg0+auxint+aux. arg2=mem.
   550  		{name: "MOVBatomicstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "Write"},
   551  		{name: "MOVWatomicstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "Write"},
   552  		{name: "MOVDatomicstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "Write"},
   553  
   554  		// Atomic adds.
   555  		// *(arg0+auxint+aux) += arg1.  arg2=mem.
   556  		// Returns a tuple of <old contents of *(arg0+auxint+aux), memory>.
   557  		{name: "LAA", argLength: 3, reg: gpstorelaa, asm: "LAA", typ: "(UInt32,Mem)", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"},
   558  		{name: "LAAG", argLength: 3, reg: gpstorelaa, asm: "LAAG", typ: "(UInt64,Mem)", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"},
   559  		{name: "AddTupleFirst32", argLength: 2}, // arg1=tuple <x,y>.  Returns <x+arg0,y>.
   560  		{name: "AddTupleFirst64", argLength: 2}, // arg1=tuple <x,y>.  Returns <x+arg0,y>.
   561  
   562  		// Atomic bitwise operations.
   563  		// Note: 'floor' operations round the pointer down to the nearest word boundary
   564  		// which reflects how they are used in the runtime.
   565  		{name: "LAN", argLength: 3, reg: gpstore, asm: "LAN", typ: "Mem", clobberFlags: true, hasSideEffects: true},         // *arg0 &= arg1. arg2 = mem.
   566  		{name: "LANfloor", argLength: 3, reg: gpstorelab, asm: "LAN", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) &= arg1. arg2 = mem.
   567  		{name: "LAO", argLength: 3, reg: gpstore, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true},         // *arg0 |= arg1. arg2 = mem.
   568  		{name: "LAOfloor", argLength: 3, reg: gpstorelab, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) |= arg1. arg2 = mem.
   569  
   570  		// Compare and swap.
   571  		// arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory.
   572  		// if *(arg0+auxint+aux) == arg1 {
   573  		//   *(arg0+auxint+aux) = arg2
   574  		//   return (true, memory)
   575  		// } else {
   576  		//   return (false, memory)
   577  		// }
   578  		// Note that these instructions also return the old value in arg1, but we ignore it.
   579  		// TODO: have these return flags instead of bool.  The current system generates:
   580  		//    CS ...
   581  		//    MOVD  $0, ret
   582  		//    BNE   2(PC)
   583  		//    MOVD  $1, ret
   584  		//    CMPW  ret, $0
   585  		//    BNE ...
   586  		// instead of just
   587  		//    CS ...
   588  		//    BEQ ...
   589  		// but we can't do that because memory-using ops can't generate flags yet
   590  		// (flagalloc wants to move flag-generating instructions around).
   591  		{name: "LoweredAtomicCas32", argLength: 4, reg: cas, asm: "CS", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"},
   592  		{name: "LoweredAtomicCas64", argLength: 4, reg: cas, asm: "CSG", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"},
   593  
   594  		// Lowered atomic swaps, emulated using compare-and-swap.
   595  		// store arg1 to arg0+auxint+aux, arg2=mem.
   596  		{name: "LoweredAtomicExchange32", argLength: 3, reg: exchange, asm: "CS", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"},
   597  		{name: "LoweredAtomicExchange64", argLength: 3, reg: exchange, asm: "CSG", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"},
   598  
   599  		// find leftmost one
   600  		{
   601  			name:         "FLOGR",
   602  			argLength:    1,
   603  			reg:          regInfo{inputs: gponly, outputs: []regMask{buildReg("R0")}, clobbers: buildReg("R1")},
   604  			asm:          "FLOGR",
   605  			typ:          "UInt64",
   606  			clobberFlags: true,
   607  		},
   608  
   609  		// population count
   610  		//
   611  		// Counts the number of ones in each byte of arg0
   612  		// and places the result into the corresponding byte
   613  		// of the result.
   614  		{
   615  			name:         "POPCNT",
   616  			argLength:    1,
   617  			reg:          gp11,
   618  			asm:          "POPCNT",
   619  			typ:          "UInt64",
   620  			clobberFlags: true,
   621  		},
   622  
   623  		// unsigned multiplication (64x64 → 128)
   624  		//
   625  		// Multiply the two 64-bit input operands together and place the 128-bit result into
   626  		// an even-odd register pair. The second register in the target pair also contains
   627  		// one of the input operands. Since we don't currently have a way to specify an
   628  		// even-odd register pair we hardcode this register pair as R2:R3.
   629  		{
   630  			name:      "MLGR",
   631  			argLength: 2,
   632  			reg:       regInfo{inputs: []regMask{gp, r3}, outputs: []regMask{r2, r3}},
   633  			asm:       "MLGR",
   634  		},
   635  
   636  		// pseudo operations to sum the output of the POPCNT instruction
   637  		{name: "SumBytes2", argLength: 1, typ: "UInt8"}, // sum the rightmost 2 bytes in arg0 ignoring overflow
   638  		{name: "SumBytes4", argLength: 1, typ: "UInt8"}, // sum the rightmost 4 bytes in arg0 ignoring overflow
   639  		{name: "SumBytes8", argLength: 1, typ: "UInt8"}, // sum all the bytes in arg0 ignoring overflow
   640  
   641  		// store multiple
   642  		{
   643  			name:           "STMG2",
   644  			argLength:      4,
   645  			reg:            regInfo{inputs: []regMask{ptrsp, buildReg("R1"), buildReg("R2"), 0}},
   646  			aux:            "SymOff",
   647  			typ:            "Mem",
   648  			asm:            "STMG",
   649  			faultOnNilArg0: true,
   650  			symEffect:      "Write",
   651  			clobberFlags:   true, // TODO(mundaym): currently uses AGFI to handle large offsets
   652  		},
   653  		{
   654  			name:           "STMG3",
   655  			argLength:      5,
   656  			reg:            regInfo{inputs: []regMask{ptrsp, buildReg("R1"), buildReg("R2"), buildReg("R3"), 0}},
   657  			aux:            "SymOff",
   658  			typ:            "Mem",
   659  			asm:            "STMG",
   660  			faultOnNilArg0: true,
   661  			symEffect:      "Write",
   662  			clobberFlags:   true, // TODO(mundaym): currently uses AGFI to handle large offsets
   663  		},
   664  		{
   665  			name:      "STMG4",
   666  			argLength: 6,
   667  			reg: regInfo{inputs: []regMask{
   668  				ptrsp,
   669  				buildReg("R1"),
   670  				buildReg("R2"),
   671  				buildReg("R3"),
   672  				buildReg("R4"),
   673  				0,
   674  			}},
   675  			aux:            "SymOff",
   676  			typ:            "Mem",
   677  			asm:            "STMG",
   678  			faultOnNilArg0: true,
   679  			symEffect:      "Write",
   680  			clobberFlags:   true, // TODO(mundaym): currently uses AGFI to handle large offsets
   681  		},
   682  		{
   683  			name:           "STM2",
   684  			argLength:      4,
   685  			reg:            regInfo{inputs: []regMask{ptrsp, buildReg("R1"), buildReg("R2"), 0}},
   686  			aux:            "SymOff",
   687  			typ:            "Mem",
   688  			asm:            "STMY",
   689  			faultOnNilArg0: true,
   690  			symEffect:      "Write",
   691  			clobberFlags:   true, // TODO(mundaym): currently uses AGFI to handle large offsets
   692  		},
   693  		{
   694  			name:           "STM3",
   695  			argLength:      5,
   696  			reg:            regInfo{inputs: []regMask{ptrsp, buildReg("R1"), buildReg("R2"), buildReg("R3"), 0}},
   697  			aux:            "SymOff",
   698  			typ:            "Mem",
   699  			asm:            "STMY",
   700  			faultOnNilArg0: true,
   701  			symEffect:      "Write",
   702  			clobberFlags:   true, // TODO(mundaym): currently uses AGFI to handle large offsets
   703  		},
   704  		{
   705  			name:      "STM4",
   706  			argLength: 6,
   707  			reg: regInfo{inputs: []regMask{
   708  				ptrsp,
   709  				buildReg("R1"),
   710  				buildReg("R2"),
   711  				buildReg("R3"),
   712  				buildReg("R4"),
   713  				0,
   714  			}},
   715  			aux:            "SymOff",
   716  			typ:            "Mem",
   717  			asm:            "STMY",
   718  			faultOnNilArg0: true,
   719  			symEffect:      "Write",
   720  			clobberFlags:   true, // TODO(mundaym): currently uses AGFI to handle large offsets
   721  		},
   722  
   723  		// large move
   724  		// auxint = remaining bytes after loop (rem)
   725  		// arg0 = address of dst memory (in R1, changed as a side effect)
   726  		// arg1 = address of src memory (in R2, changed as a side effect)
   727  		// arg2 = pointer to last address to move in loop + 256
   728  		// arg3 = mem
   729  		// returns mem
   730  		//
   731  		// mvc: MVC  $256, 0(R2), 0(R1)
   732  		//      MOVD $256(R1), R1
   733  		//      MOVD $256(R2), R2
   734  		//      CMP  R2, Rarg2
   735  		//      BNE  mvc
   736  		//	MVC  $rem, 0(R2), 0(R1) // if rem > 0
   737  		{
   738  			name:      "LoweredMove",
   739  			aux:       "Int64",
   740  			argLength: 4,
   741  			reg: regInfo{
   742  				inputs:   []regMask{buildReg("R1"), buildReg("R2"), gpsp},
   743  				clobbers: buildReg("R1 R2"),
   744  			},
   745  			clobberFlags:   true,
   746  			typ:            "Mem",
   747  			faultOnNilArg0: true,
   748  			faultOnNilArg1: true,
   749  		},
   750  
   751  		// large clear
   752  		// auxint = remaining bytes after loop (rem)
   753  		// arg0 = address of dst memory (in R1, changed as a side effect)
   754  		// arg1 = pointer to last address to zero in loop + 256
   755  		// arg2 = mem
   756  		// returns mem
   757  		//
   758  		// clear: CLEAR $256, 0(R1)
   759  		//        MOVD  $256(R1), R1
   760  		//        CMP   R1, Rarg2
   761  		//        BNE   clear
   762  		//	  CLEAR $rem, 0(R1) // if rem > 0
   763  		{
   764  			name:      "LoweredZero",
   765  			aux:       "Int64",
   766  			argLength: 3,
   767  			reg: regInfo{
   768  				inputs:   []regMask{buildReg("R1"), gpsp},
   769  				clobbers: buildReg("R1"),
   770  			},
   771  			clobberFlags:   true,
   772  			typ:            "Mem",
   773  			faultOnNilArg0: true,
   774  		},
   775  	}
   776  
   777  	// All blocks on s390x have their condition code mask (s390x.CCMask) as the Aux value.
   778  	// The condition code mask is a 4-bit mask where each bit corresponds to a condition
   779  	// code value. If the value of the condition code matches a bit set in the condition
   780  	// code mask then the first successor is executed. Otherwise the second successor is
   781  	// executed.
   782  	//
   783  	// | condition code value |  mask bit  |
   784  	// +----------------------+------------+
   785  	// | 0 (equal)            | 0b1000 (8) |
   786  	// | 1 (less than)        | 0b0100 (4) |
   787  	// | 2 (greater than)     | 0b0010 (2) |
   788  	// | 3 (unordered)        | 0b0001 (1) |
   789  	//
   790  	// Note: that compare-and-branch instructions must not have bit 3 (0b0001) set.
   791  	var S390Xblocks = []blockData{
   792  		// branch on condition
   793  		{name: "BRC", controls: 1, aux: "S390XCCMask"}, // condition code value (flags) is Controls[0]
   794  
   795  		// compare-and-branch (register-register)
   796  		//  - integrates comparison of Controls[0] with Controls[1]
   797  		//  - both control values must be in general purpose registers
   798  		{name: "CRJ", controls: 2, aux: "S390XCCMask"},   // signed 32-bit integer comparison
   799  		{name: "CGRJ", controls: 2, aux: "S390XCCMask"},  // signed 64-bit integer comparison
   800  		{name: "CLRJ", controls: 2, aux: "S390XCCMask"},  // unsigned 32-bit integer comparison
   801  		{name: "CLGRJ", controls: 2, aux: "S390XCCMask"}, // unsigned 64-bit integer comparison
   802  
   803  		// compare-and-branch (register-immediate)
   804  		//  - integrates comparison of Controls[0] with AuxInt
   805  		//  - control value must be in a general purpose register
   806  		//  - the AuxInt value is sign-extended for signed comparisons
   807  		//    and zero-extended for unsigned comparisons
   808  		{name: "CIJ", controls: 1, aux: "S390XCCMaskInt8"},    // signed 32-bit integer comparison
   809  		{name: "CGIJ", controls: 1, aux: "S390XCCMaskInt8"},   // signed 64-bit integer comparison
   810  		{name: "CLIJ", controls: 1, aux: "S390XCCMaskUint8"},  // unsigned 32-bit integer comparison
   811  		{name: "CLGIJ", controls: 1, aux: "S390XCCMaskUint8"}, // unsigned 64-bit integer comparison
   812  	}
   813  
   814  	archs = append(archs, arch{
   815  		name:            "S390X",
   816  		pkg:             "cmd/internal/obj/s390x",
   817  		genfile:         "../../s390x/ssa.go",
   818  		ops:             S390Xops,
   819  		blocks:          S390Xblocks,
   820  		regnames:        regNamesS390X,
   821  		gpregmask:       gp,
   822  		fpregmask:       fp,
   823  		framepointerreg: -1, // not used
   824  		linkreg:         int8(num["R14"]),
   825  		imports: []string{
   826  			"cmd/internal/obj/s390x",
   827  		},
   828  	})
   829  }
   830  

View as plain text