Source file src/cmd/internal/obj/x86/asm6.go

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/objabi"
    36  	"cmd/internal/sys"
    37  	"encoding/binary"
    38  	"fmt"
    39  	"internal/buildcfg"
    40  	"log"
    41  	"strings"
    42  )
    43  
    44  var (
    45  	plan9privates *obj.LSym
    46  )
    47  
    48  // Instruction layout.
    49  
    50  // Loop alignment constants:
    51  // want to align loop entry to loopAlign-byte boundary,
    52  // and willing to insert at most maxLoopPad bytes of NOP to do so.
    53  // We define a loop entry as the target of a backward jump.
    54  //
    55  // gcc uses maxLoopPad = 10 for its 'generic x86-64' config,
    56  // and it aligns all jump targets, not just backward jump targets.
    57  //
    58  // As of 6/1/2012, the effect of setting maxLoopPad = 10 here
    59  // is very slight but negative, so the alignment is disabled by
    60  // setting MaxLoopPad = 0. The code is here for reference and
    61  // for future experiments.
    62  const (
    63  	loopAlign  = 16
    64  	maxLoopPad = 0
    65  )
    66  
    67  // Bit flags that are used to express jump target properties.
    68  const (
    69  	// branchBackwards marks targets that are located behind.
    70  	// Used to express jumps to loop headers.
    71  	branchBackwards = (1 << iota)
    72  	// branchShort marks branches those target is close,
    73  	// with offset is in -128..127 range.
    74  	branchShort
    75  	// branchLoopHead marks loop entry.
    76  	// Used to insert padding for misaligned loops.
    77  	branchLoopHead
    78  )
    79  
    80  // opBytes holds optab encoding bytes.
    81  // Each ytab reserves fixed amount of bytes in this array.
    82  //
    83  // The size should be the minimal number of bytes that
    84  // are enough to hold biggest optab op lines.
    85  type opBytes [31]uint8
    86  
    87  type Optab struct {
    88  	as     obj.As
    89  	ytab   []ytab
    90  	prefix uint8
    91  	op     opBytes
    92  }
    93  
    94  type movtab struct {
    95  	as   obj.As
    96  	ft   uint8
    97  	f3t  uint8
    98  	tt   uint8
    99  	code uint8
   100  	op   [4]uint8
   101  }
   102  
   103  const (
   104  	Yxxx = iota
   105  	Ynone
   106  	Yi0 // $0
   107  	Yi1 // $1
   108  	Yu2 // $x, x fits in uint2
   109  	Yi8 // $x, x fits in int8
   110  	Yu8 // $x, x fits in uint8
   111  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
   112  	Ys32
   113  	Yi32
   114  	Yi64
   115  	Yiauto
   116  	Yal
   117  	Ycl
   118  	Yax
   119  	Ycx
   120  	Yrb
   121  	Yrl
   122  	Yrl32 // Yrl on 32-bit system
   123  	Yrf
   124  	Yf0
   125  	Yrx
   126  	Ymb
   127  	Yml
   128  	Ym
   129  	Ybr
   130  	Ycs
   131  	Yss
   132  	Yds
   133  	Yes
   134  	Yfs
   135  	Ygs
   136  	Ygdtr
   137  	Yidtr
   138  	Yldtr
   139  	Ymsw
   140  	Ytask
   141  	Ycr0
   142  	Ycr1
   143  	Ycr2
   144  	Ycr3
   145  	Ycr4
   146  	Ycr5
   147  	Ycr6
   148  	Ycr7
   149  	Ycr8
   150  	Ydr0
   151  	Ydr1
   152  	Ydr2
   153  	Ydr3
   154  	Ydr4
   155  	Ydr5
   156  	Ydr6
   157  	Ydr7
   158  	Ytr0
   159  	Ytr1
   160  	Ytr2
   161  	Ytr3
   162  	Ytr4
   163  	Ytr5
   164  	Ytr6
   165  	Ytr7
   166  	Ymr
   167  	Ymm
   168  	Yxr0          // X0 only. "<XMM0>" notation in Intel manual.
   169  	YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex
   170  	Yxr           // X0..X15
   171  	YxrEvex       // X0..X31
   172  	Yxm
   173  	YxmEvex       // YxrEvex+Ym
   174  	Yxvm          // VSIB vector array; vm32x/vm64x
   175  	YxvmEvex      // Yxvm which permits High-16 X register as index.
   176  	YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex
   177  	Yyr           // Y0..Y15
   178  	YyrEvex       // Y0..Y31
   179  	Yym
   180  	YymEvex   // YyrEvex+Ym
   181  	Yyvm      // VSIB vector array; vm32y/vm64y
   182  	YyvmEvex  // Yyvm which permits High-16 Y register as index.
   183  	YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex
   184  	Yzr       // Z0..Z31
   185  	Yzm       // Yzr+Ym
   186  	Yzvm      // VSIB vector array; vm32z/vm64z
   187  	Yk0       // K0
   188  	Yknot0    // K1..K7; write mask
   189  	Yk        // K0..K7; used for KOP
   190  	Ykm       // Yk+Ym; used for KOP
   191  	Ytls
   192  	Ytextsize
   193  	Yindir
   194  	Ymax
   195  )
   196  
   197  const (
   198  	Zxxx = iota
   199  	Zlit
   200  	Zlitm_r
   201  	Zlitr_m
   202  	Zlit_m_r
   203  	Z_rp
   204  	Zbr
   205  	Zcall
   206  	Zcallcon
   207  	Zcallduff
   208  	Zcallind
   209  	Zcallindreg
   210  	Zib_
   211  	Zib_rp
   212  	Zibo_m
   213  	Zibo_m_xm
   214  	Zil_
   215  	Zil_rp
   216  	Ziq_rp
   217  	Zilo_m
   218  	Zjmp
   219  	Zjmpcon
   220  	Zloop
   221  	Zo_iw
   222  	Zm_o
   223  	Zm_r
   224  	Z_m_r
   225  	Zm2_r
   226  	Zm_r_xm
   227  	Zm_r_i_xm
   228  	Zm_r_xm_nr
   229  	Zr_m_xm_nr
   230  	Zibm_r // mmx1,mmx2/mem64,imm8
   231  	Zibr_m
   232  	Zmb_r
   233  	Zaut_r
   234  	Zo_m
   235  	Zo_m64
   236  	Zpseudo
   237  	Zr_m
   238  	Zr_m_xm
   239  	Zrp_
   240  	Z_ib
   241  	Z_il
   242  	Zm_ibo
   243  	Zm_ilo
   244  	Zib_rr
   245  	Zil_rr
   246  	Zbyte
   247  
   248  	Zvex_rm_v_r
   249  	Zvex_rm_v_ro
   250  	Zvex_r_v_rm
   251  	Zvex_i_rm_vo
   252  	Zvex_v_rm_r
   253  	Zvex_i_rm_r
   254  	Zvex_i_r_v
   255  	Zvex_i_rm_v_r
   256  	Zvex
   257  	Zvex_rm_r_vo
   258  	Zvex_i_r_rm
   259  	Zvex_hr_rm_v_r
   260  
   261  	Zevex_first
   262  	Zevex_i_r_k_rm
   263  	Zevex_i_r_rm
   264  	Zevex_i_rm_k_r
   265  	Zevex_i_rm_k_vo
   266  	Zevex_i_rm_r
   267  	Zevex_i_rm_v_k_r
   268  	Zevex_i_rm_v_r
   269  	Zevex_i_rm_vo
   270  	Zevex_k_rmo
   271  	Zevex_r_k_rm
   272  	Zevex_r_v_k_rm
   273  	Zevex_r_v_rm
   274  	Zevex_rm_k_r
   275  	Zevex_rm_v_k_r
   276  	Zevex_rm_v_r
   277  	Zevex_last
   278  
   279  	Zmax
   280  )
   281  
   282  const (
   283  	Px   = 0
   284  	Px1  = 1    // symbolic; exact value doesn't matter
   285  	P32  = 0x32 // 32-bit only
   286  	Pe   = 0x66 // operand escape
   287  	Pm   = 0x0f // 2byte opcode escape
   288  	Pq   = 0xff // both escapes: 66 0f
   289  	Pb   = 0xfe // byte operands
   290  	Pf2  = 0xf2 // xmm escape 1: f2 0f
   291  	Pf3  = 0xf3 // xmm escape 2: f3 0f
   292  	Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f
   293  	Pq3  = 0x67 // xmm escape 3: 66 48 0f
   294  	Pq4  = 0x68 // xmm escape 4: 66 0F 38
   295  	Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38
   296  	Pq5  = 0x6a // xmm escape 5: F3 0F 38
   297  	Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38
   298  	Pfw  = 0xf4 // Pf3 with Rex.w: f3 48 0f
   299  	Pw   = 0x48 // Rex.w
   300  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   301  	Py   = 0x80 // defaults to 64-bit mode
   302  	Py1  = 0x81 // symbolic; exact value doesn't matter
   303  	Py3  = 0x83 // symbolic; exact value doesn't matter
   304  	Pavx = 0x84 // symbolic; exact value doesn't matter
   305  
   306  	RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R
   307  	Rxw     = 1 << 3 // =1, 64-bit operand size
   308  	Rxr     = 1 << 2 // extend modrm reg
   309  	Rxx     = 1 << 1 // extend sib index
   310  	Rxb     = 1 << 0 // extend modrm r/m, sib base, or opcode reg
   311  )
   312  
   313  const (
   314  	// Encoding for VEX prefix in tables.
   315  	// The P, L, and W fields are chosen to match
   316  	// their eventual locations in the VEX prefix bytes.
   317  
   318  	// Encoding for VEX prefix in tables.
   319  	// The P, L, and W fields are chosen to match
   320  	// their eventual locations in the VEX prefix bytes.
   321  
   322  	// Using spare bit to make leading [E]VEX encoding byte different from
   323  	// 0x0f even if all other VEX fields are 0.
   324  	avxEscape = 1 << 6
   325  
   326  	// P field - 2 bits
   327  	vex66 = 1 << 0
   328  	vexF3 = 2 << 0
   329  	vexF2 = 3 << 0
   330  	// L field - 1 bit
   331  	vexLZ  = 0 << 2
   332  	vexLIG = 0 << 2
   333  	vex128 = 0 << 2
   334  	vex256 = 1 << 2
   335  	// W field - 1 bit
   336  	vexWIG = 0 << 7
   337  	vexW0  = 0 << 7
   338  	vexW1  = 1 << 7
   339  	// M field - 5 bits, but mostly reserved; we can store up to 3
   340  	vex0F   = 1 << 3
   341  	vex0F38 = 2 << 3
   342  	vex0F3A = 3 << 3
   343  )
   344  
   345  var ycover [Ymax * Ymax]uint8
   346  
   347  var reg [MAXREG]int
   348  
   349  var regrex [MAXREG + 1]int
   350  
   351  var ynone = []ytab{
   352  	{Zlit, 1, argList{}},
   353  }
   354  
   355  var ytext = []ytab{
   356  	{Zpseudo, 0, argList{Ymb, Ytextsize}},
   357  	{Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
   358  }
   359  
   360  var ynop = []ytab{
   361  	{Zpseudo, 0, argList{}},
   362  	{Zpseudo, 0, argList{Yiauto}},
   363  	{Zpseudo, 0, argList{Yml}},
   364  	{Zpseudo, 0, argList{Yrf}},
   365  	{Zpseudo, 0, argList{Yxr}},
   366  	{Zpseudo, 0, argList{Yiauto}},
   367  	{Zpseudo, 0, argList{Yml}},
   368  	{Zpseudo, 0, argList{Yrf}},
   369  	{Zpseudo, 1, argList{Yxr}},
   370  }
   371  
   372  var yfuncdata = []ytab{
   373  	{Zpseudo, 0, argList{Yi32, Ym}},
   374  }
   375  
   376  var ypcdata = []ytab{
   377  	{Zpseudo, 0, argList{Yi32, Yi32}},
   378  }
   379  
   380  var yxorb = []ytab{
   381  	{Zib_, 1, argList{Yi32, Yal}},
   382  	{Zibo_m, 2, argList{Yi32, Ymb}},
   383  	{Zr_m, 1, argList{Yrb, Ymb}},
   384  	{Zm_r, 1, argList{Ymb, Yrb}},
   385  }
   386  
   387  var yaddl = []ytab{
   388  	{Zibo_m, 2, argList{Yi8, Yml}},
   389  	{Zil_, 1, argList{Yi32, Yax}},
   390  	{Zilo_m, 2, argList{Yi32, Yml}},
   391  	{Zr_m, 1, argList{Yrl, Yml}},
   392  	{Zm_r, 1, argList{Yml, Yrl}},
   393  }
   394  
   395  var yincl = []ytab{
   396  	{Z_rp, 1, argList{Yrl}},
   397  	{Zo_m, 2, argList{Yml}},
   398  }
   399  
   400  var yincq = []ytab{
   401  	{Zo_m, 2, argList{Yml}},
   402  }
   403  
   404  var ycmpb = []ytab{
   405  	{Z_ib, 1, argList{Yal, Yi32}},
   406  	{Zm_ibo, 2, argList{Ymb, Yi32}},
   407  	{Zm_r, 1, argList{Ymb, Yrb}},
   408  	{Zr_m, 1, argList{Yrb, Ymb}},
   409  }
   410  
   411  var ycmpl = []ytab{
   412  	{Zm_ibo, 2, argList{Yml, Yi8}},
   413  	{Z_il, 1, argList{Yax, Yi32}},
   414  	{Zm_ilo, 2, argList{Yml, Yi32}},
   415  	{Zm_r, 1, argList{Yml, Yrl}},
   416  	{Zr_m, 1, argList{Yrl, Yml}},
   417  }
   418  
   419  var yshb = []ytab{
   420  	{Zo_m, 2, argList{Yi1, Ymb}},
   421  	{Zibo_m, 2, argList{Yu8, Ymb}},
   422  	{Zo_m, 2, argList{Ycx, Ymb}},
   423  }
   424  
   425  var yshl = []ytab{
   426  	{Zo_m, 2, argList{Yi1, Yml}},
   427  	{Zibo_m, 2, argList{Yu8, Yml}},
   428  	{Zo_m, 2, argList{Ycl, Yml}},
   429  	{Zo_m, 2, argList{Ycx, Yml}},
   430  }
   431  
   432  var ytestl = []ytab{
   433  	{Zil_, 1, argList{Yi32, Yax}},
   434  	{Zilo_m, 2, argList{Yi32, Yml}},
   435  	{Zr_m, 1, argList{Yrl, Yml}},
   436  	{Zm_r, 1, argList{Yml, Yrl}},
   437  }
   438  
   439  var ymovb = []ytab{
   440  	{Zr_m, 1, argList{Yrb, Ymb}},
   441  	{Zm_r, 1, argList{Ymb, Yrb}},
   442  	{Zib_rp, 1, argList{Yi32, Yrb}},
   443  	{Zibo_m, 2, argList{Yi32, Ymb}},
   444  }
   445  
   446  var ybtl = []ytab{
   447  	{Zibo_m, 2, argList{Yi8, Yml}},
   448  	{Zr_m, 1, argList{Yrl, Yml}},
   449  }
   450  
   451  var ymovw = []ytab{
   452  	{Zr_m, 1, argList{Yrl, Yml}},
   453  	{Zm_r, 1, argList{Yml, Yrl}},
   454  	{Zil_rp, 1, argList{Yi32, Yrl}},
   455  	{Zilo_m, 2, argList{Yi32, Yml}},
   456  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   457  }
   458  
   459  var ymovl = []ytab{
   460  	{Zr_m, 1, argList{Yrl, Yml}},
   461  	{Zm_r, 1, argList{Yml, Yrl}},
   462  	{Zil_rp, 1, argList{Yi32, Yrl}},
   463  	{Zilo_m, 2, argList{Yi32, Yml}},
   464  	{Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
   465  	{Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
   466  	{Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
   467  	{Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
   468  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   469  }
   470  
   471  var yret = []ytab{
   472  	{Zo_iw, 1, argList{}},
   473  	{Zo_iw, 1, argList{Yi32}},
   474  }
   475  
   476  var ymovq = []ytab{
   477  	// valid in 32-bit mode
   478  	{Zm_r_xm_nr, 1, argList{Ym, Ymr}},  // 0x6f MMX MOVQ (shorter encoding)
   479  	{Zr_m_xm_nr, 1, argList{Ymr, Ym}},  // 0x7f MMX MOVQ
   480  	{Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
   481  	{Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   482  	{Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   483  
   484  	// valid only in 64-bit mode, usually with 64-bit prefix
   485  	{Zr_m, 1, argList{Yrl, Yml}},      // 0x89
   486  	{Zm_r, 1, argList{Yml, Yrl}},      // 0x8b
   487  	{Zilo_m, 2, argList{Ys32, Yrl}},   // 32 bit signed 0xc7,(0)
   488  	{Ziq_rp, 1, argList{Yi64, Yrl}},   // 0xb8 -- 32/64 bit immediate
   489  	{Zilo_m, 2, argList{Yi32, Yml}},   // 0xc7,(0)
   490  	{Zm_r_xm, 1, argList{Ymm, Ymr}},   // 0x6e MMX MOVD
   491  	{Zr_m_xm, 1, argList{Ymr, Ymm}},   // 0x7e MMX MOVD
   492  	{Zm_r_xm, 2, argList{Yml, Yxr}},   // Pe, 0x6e MOVD xmm load
   493  	{Zr_m_xm, 2, argList{Yxr, Yml}},   // Pe, 0x7e MOVD xmm store
   494  	{Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
   495  }
   496  
   497  var ymovbe = []ytab{
   498  	{Zlitm_r, 3, argList{Ym, Yrl}},
   499  	{Zlitr_m, 3, argList{Yrl, Ym}},
   500  }
   501  
   502  var ym_rl = []ytab{
   503  	{Zm_r, 1, argList{Ym, Yrl}},
   504  }
   505  
   506  var yrl_m = []ytab{
   507  	{Zr_m, 1, argList{Yrl, Ym}},
   508  }
   509  
   510  var ymb_rl = []ytab{
   511  	{Zmb_r, 1, argList{Ymb, Yrl}},
   512  }
   513  
   514  var yml_rl = []ytab{
   515  	{Zm_r, 1, argList{Yml, Yrl}},
   516  }
   517  
   518  var yrl_ml = []ytab{
   519  	{Zr_m, 1, argList{Yrl, Yml}},
   520  }
   521  
   522  var yml_mb = []ytab{
   523  	{Zr_m, 1, argList{Yrb, Ymb}},
   524  	{Zm_r, 1, argList{Ymb, Yrb}},
   525  }
   526  
   527  var yrb_mb = []ytab{
   528  	{Zr_m, 1, argList{Yrb, Ymb}},
   529  }
   530  
   531  var yxchg = []ytab{
   532  	{Z_rp, 1, argList{Yax, Yrl}},
   533  	{Zrp_, 1, argList{Yrl, Yax}},
   534  	{Zr_m, 1, argList{Yrl, Yml}},
   535  	{Zm_r, 1, argList{Yml, Yrl}},
   536  }
   537  
   538  var ydivl = []ytab{
   539  	{Zm_o, 2, argList{Yml}},
   540  }
   541  
   542  var ydivb = []ytab{
   543  	{Zm_o, 2, argList{Ymb}},
   544  }
   545  
   546  var yimul = []ytab{
   547  	{Zm_o, 2, argList{Yml}},
   548  	{Zib_rr, 1, argList{Yi8, Yrl}},
   549  	{Zil_rr, 1, argList{Yi32, Yrl}},
   550  	{Zm_r, 2, argList{Yml, Yrl}},
   551  }
   552  
   553  var yimul3 = []ytab{
   554  	{Zibm_r, 2, argList{Yi8, Yml, Yrl}},
   555  	{Zibm_r, 2, argList{Yi32, Yml, Yrl}},
   556  }
   557  
   558  var ybyte = []ytab{
   559  	{Zbyte, 1, argList{Yi64}},
   560  }
   561  
   562  var yin = []ytab{
   563  	{Zib_, 1, argList{Yi32}},
   564  	{Zlit, 1, argList{}},
   565  }
   566  
   567  var yint = []ytab{
   568  	{Zib_, 1, argList{Yi32}},
   569  }
   570  
   571  var ypushl = []ytab{
   572  	{Zrp_, 1, argList{Yrl}},
   573  	{Zm_o, 2, argList{Ym}},
   574  	{Zib_, 1, argList{Yi8}},
   575  	{Zil_, 1, argList{Yi32}},
   576  }
   577  
   578  var ypopl = []ytab{
   579  	{Z_rp, 1, argList{Yrl}},
   580  	{Zo_m, 2, argList{Ym}},
   581  }
   582  
   583  var ywrfsbase = []ytab{
   584  	{Zm_o, 2, argList{Yrl}},
   585  }
   586  
   587  var yrdrand = []ytab{
   588  	{Zo_m, 2, argList{Yrl}},
   589  }
   590  
   591  var yclflush = []ytab{
   592  	{Zo_m, 2, argList{Ym}},
   593  }
   594  
   595  var ybswap = []ytab{
   596  	{Z_rp, 2, argList{Yrl}},
   597  }
   598  
   599  var yscond = []ytab{
   600  	{Zo_m, 2, argList{Ymb}},
   601  }
   602  
   603  var yjcond = []ytab{
   604  	{Zbr, 0, argList{Ybr}},
   605  	{Zbr, 0, argList{Yi0, Ybr}},
   606  	{Zbr, 1, argList{Yi1, Ybr}},
   607  }
   608  
   609  var yloop = []ytab{
   610  	{Zloop, 1, argList{Ybr}},
   611  }
   612  
   613  var ycall = []ytab{
   614  	{Zcallindreg, 0, argList{Yml}},
   615  	{Zcallindreg, 2, argList{Yrx, Yrx}},
   616  	{Zcallind, 2, argList{Yindir}},
   617  	{Zcall, 0, argList{Ybr}},
   618  	{Zcallcon, 1, argList{Yi32}},
   619  }
   620  
   621  var yduff = []ytab{
   622  	{Zcallduff, 1, argList{Yi32}},
   623  }
   624  
   625  var yjmp = []ytab{
   626  	{Zo_m64, 2, argList{Yml}},
   627  	{Zjmp, 0, argList{Ybr}},
   628  	{Zjmpcon, 1, argList{Yi32}},
   629  }
   630  
   631  var yfmvd = []ytab{
   632  	{Zm_o, 2, argList{Ym, Yf0}},
   633  	{Zo_m, 2, argList{Yf0, Ym}},
   634  	{Zm_o, 2, argList{Yrf, Yf0}},
   635  	{Zo_m, 2, argList{Yf0, Yrf}},
   636  }
   637  
   638  var yfmvdp = []ytab{
   639  	{Zo_m, 2, argList{Yf0, Ym}},
   640  	{Zo_m, 2, argList{Yf0, Yrf}},
   641  }
   642  
   643  var yfmvf = []ytab{
   644  	{Zm_o, 2, argList{Ym, Yf0}},
   645  	{Zo_m, 2, argList{Yf0, Ym}},
   646  }
   647  
   648  var yfmvx = []ytab{
   649  	{Zm_o, 2, argList{Ym, Yf0}},
   650  }
   651  
   652  var yfmvp = []ytab{
   653  	{Zo_m, 2, argList{Yf0, Ym}},
   654  }
   655  
   656  var yfcmv = []ytab{
   657  	{Zm_o, 2, argList{Yrf, Yf0}},
   658  }
   659  
   660  var yfadd = []ytab{
   661  	{Zm_o, 2, argList{Ym, Yf0}},
   662  	{Zm_o, 2, argList{Yrf, Yf0}},
   663  	{Zo_m, 2, argList{Yf0, Yrf}},
   664  }
   665  
   666  var yfxch = []ytab{
   667  	{Zo_m, 2, argList{Yf0, Yrf}},
   668  	{Zm_o, 2, argList{Yrf, Yf0}},
   669  }
   670  
   671  var ycompp = []ytab{
   672  	{Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1
   673  }
   674  
   675  var ystsw = []ytab{
   676  	{Zo_m, 2, argList{Ym}},
   677  	{Zlit, 1, argList{Yax}},
   678  }
   679  
   680  var ysvrs_mo = []ytab{
   681  	{Zm_o, 2, argList{Ym}},
   682  }
   683  
   684  // unaryDst version of "ysvrs_mo".
   685  var ysvrs_om = []ytab{
   686  	{Zo_m, 2, argList{Ym}},
   687  }
   688  
   689  var ymm = []ytab{
   690  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   691  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   692  }
   693  
   694  var yxm = []ytab{
   695  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   696  }
   697  
   698  var yxm_q4 = []ytab{
   699  	{Zm_r, 1, argList{Yxm, Yxr}},
   700  }
   701  
   702  var yxcvm1 = []ytab{
   703  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   704  	{Zm_r_xm, 2, argList{Yxm, Ymr}},
   705  }
   706  
   707  var yxcvm2 = []ytab{
   708  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   709  	{Zm_r_xm, 2, argList{Ymm, Yxr}},
   710  }
   711  
   712  var yxr = []ytab{
   713  	{Zm_r_xm, 1, argList{Yxr, Yxr}},
   714  }
   715  
   716  var yxr_ml = []ytab{
   717  	{Zr_m_xm, 1, argList{Yxr, Yml}},
   718  }
   719  
   720  var ymr = []ytab{
   721  	{Zm_r, 1, argList{Ymr, Ymr}},
   722  }
   723  
   724  var ymr_ml = []ytab{
   725  	{Zr_m_xm, 1, argList{Ymr, Yml}},
   726  }
   727  
   728  var yxcmpi = []ytab{
   729  	{Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
   730  }
   731  
   732  var yxmov = []ytab{
   733  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   734  	{Zr_m_xm, 1, argList{Yxr, Yxm}},
   735  }
   736  
   737  var yxcvfl = []ytab{
   738  	{Zm_r_xm, 1, argList{Yxm, Yrl}},
   739  }
   740  
   741  var yxcvlf = []ytab{
   742  	{Zm_r_xm, 1, argList{Yml, Yxr}},
   743  }
   744  
   745  var yxcvfq = []ytab{
   746  	{Zm_r_xm, 2, argList{Yxm, Yrl}},
   747  }
   748  
   749  var yxcvqf = []ytab{
   750  	{Zm_r_xm, 2, argList{Yml, Yxr}},
   751  }
   752  
   753  var yps = []ytab{
   754  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   755  	{Zibo_m_xm, 2, argList{Yi8, Ymr}},
   756  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   757  	{Zibo_m_xm, 3, argList{Yi8, Yxr}},
   758  }
   759  
   760  var yxrrl = []ytab{
   761  	{Zm_r, 1, argList{Yxr, Yrl}},
   762  }
   763  
   764  var ymrxr = []ytab{
   765  	{Zm_r, 1, argList{Ymr, Yxr}},
   766  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   767  }
   768  
   769  var ymshuf = []ytab{
   770  	{Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
   771  }
   772  
   773  var ymshufb = []ytab{
   774  	{Zm2_r, 2, argList{Yxm, Yxr}},
   775  }
   776  
   777  // It should never have more than 1 entry,
   778  // because some optab entries have opcode sequences that
   779  // are longer than 2 bytes (zoffset=2 here),
   780  // ROUNDPD and ROUNDPS and recently added BLENDPD,
   781  // to name a few.
   782  var yxshuf = []ytab{
   783  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   784  }
   785  
   786  var yextrw = []ytab{
   787  	{Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
   788  	{Zibr_m, 2, argList{Yu8, Yxr, Yml}},
   789  }
   790  
   791  var yextr = []ytab{
   792  	{Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
   793  }
   794  
   795  var yinsrw = []ytab{
   796  	{Zibm_r, 2, argList{Yu8, Yml, Yxr}},
   797  }
   798  
   799  var yinsr = []ytab{
   800  	{Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
   801  }
   802  
   803  var ypsdq = []ytab{
   804  	{Zibo_m, 2, argList{Yi8, Yxr}},
   805  }
   806  
   807  var ymskb = []ytab{
   808  	{Zm_r_xm, 2, argList{Yxr, Yrl}},
   809  	{Zm_r_xm, 1, argList{Ymr, Yrl}},
   810  }
   811  
   812  var ycrc32l = []ytab{
   813  	{Zlitm_r, 0, argList{Yml, Yrl}},
   814  }
   815  
   816  var ycrc32b = []ytab{
   817  	{Zlitm_r, 0, argList{Ymb, Yrl}},
   818  }
   819  
   820  var yprefetch = []ytab{
   821  	{Zm_o, 2, argList{Ym}},
   822  }
   823  
   824  var yaes = []ytab{
   825  	{Zlitm_r, 2, argList{Yxm, Yxr}},
   826  }
   827  
   828  var yxbegin = []ytab{
   829  	{Zjmp, 1, argList{Ybr}},
   830  }
   831  
   832  var yxabort = []ytab{
   833  	{Zib_, 1, argList{Yu8}},
   834  }
   835  
   836  var ylddqu = []ytab{
   837  	{Zm_r, 1, argList{Ym, Yxr}},
   838  }
   839  
   840  var ypalignr = []ytab{
   841  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   842  }
   843  
   844  var ysha256rnds2 = []ytab{
   845  	{Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}},
   846  }
   847  
   848  var yblendvpd = []ytab{
   849  	{Z_m_r, 1, argList{Yxr0, Yxm, Yxr}},
   850  }
   851  
   852  var ymmxmm0f38 = []ytab{
   853  	{Zlitm_r, 3, argList{Ymm, Ymr}},
   854  	{Zlitm_r, 5, argList{Yxm, Yxr}},
   855  }
   856  
   857  var yextractps = []ytab{
   858  	{Zibr_m, 2, argList{Yu2, Yxr, Yml}},
   859  }
   860  
   861  var ysha1rnds4 = []ytab{
   862  	{Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
   863  }
   864  
   865  // You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
   866  // ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
   867  // to find the entry with the given p.As and then looks through the ytable for
   868  // that instruction (the second field in the optab struct) for a line whose
   869  // first two values match the Ytypes of the p.From and p.To operands.  The
   870  // function oclass computes the specific Ytype of an operand and then the set
   871  // of more general Ytypes that it satisfies is implied by the ycover table, set
   872  // up in instinit.  For example, oclass distinguishes the constants 0 and 1
   873  // from the more general 8-bit constants, but instinit says
   874  //
   875  //	ycover[Yi0*Ymax+Ys32] = 1
   876  //	ycover[Yi1*Ymax+Ys32] = 1
   877  //	ycover[Yi8*Ymax+Ys32] = 1
   878  //
   879  // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   880  // if that's what an instruction can handle.
   881  //
   882  // In parallel with the scan through the ytable for the appropriate line, there
   883  // is a z pointer that starts out pointing at the strange magic byte list in
   884  // the Optab struct.  With each step past a non-matching ytable line, z
   885  // advances by the 4th entry in the line.  When a matching line is found, that
   886  // z pointer has the extra data to use in laying down the instruction bytes.
   887  // The actual bytes laid down are a function of the 3rd entry in the line (that
   888  // is, the Ztype) and the z bytes.
   889  //
   890  // For example, let's look at AADDL.  The optab line says:
   891  //
   892  //	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   893  //
   894  // and yaddl says
   895  //
   896  //	var yaddl = []ytab{
   897  //	        {Yi8, Ynone, Yml, Zibo_m, 2},
   898  //	        {Yi32, Ynone, Yax, Zil_, 1},
   899  //	        {Yi32, Ynone, Yml, Zilo_m, 2},
   900  //	        {Yrl, Ynone, Yml, Zr_m, 1},
   901  //	        {Yml, Ynone, Yrl, Zm_r, 1},
   902  //	}
   903  //
   904  // so there are 5 possible types of ADDL instruction that can be laid down, and
   905  // possible states used to lay them down (Ztype and z pointer, assuming z
   906  // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
   907  //
   908  //	Yi8, Yml -> Zibo_m, z (0x83, 00)
   909  //	Yi32, Yax -> Zil_, z+2 (0x05)
   910  //	Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   911  //	Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   912  //	Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   913  //
   914  // The Pconstant in the optab line controls the prefix bytes to emit.  That's
   915  // relatively straightforward as this program goes.
   916  //
   917  // The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
   918  // example, is an opcode byte (z[0]) then an asmando (which is some kind of
   919  // encoded addressing mode for the Yml arg), and then a single immediate byte.
   920  // Zilo_m is the same but a long (32-bit) immediate.
   921  var optab =
   922  // as, ytab, andproto, opcode
   923  [...]Optab{
   924  	{obj.AXXX, nil, 0, opBytes{}},
   925  	{AAAA, ynone, P32, opBytes{0x37}},
   926  	{AAAD, ynone, P32, opBytes{0xd5, 0x0a}},
   927  	{AAAM, ynone, P32, opBytes{0xd4, 0x0a}},
   928  	{AAAS, ynone, P32, opBytes{0x3f}},
   929  	{AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}},
   930  	{AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   931  	{AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   932  	{AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   933  	{AADCXL, yml_rl, Pq4, opBytes{0xf6}},
   934  	{AADCXQ, yml_rl, Pq4w, opBytes{0xf6}},
   935  	{AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}},
   936  	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   937  	{AADDPD, yxm, Pq, opBytes{0x58}},
   938  	{AADDPS, yxm, Pm, opBytes{0x58}},
   939  	{AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   940  	{AADDSD, yxm, Pf2, opBytes{0x58}},
   941  	{AADDSS, yxm, Pf3, opBytes{0x58}},
   942  	{AADDSUBPD, yxm, Pq, opBytes{0xd0}},
   943  	{AADDSUBPS, yxm, Pf2, opBytes{0xd0}},
   944  	{AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   945  	{AADOXL, yml_rl, Pq5, opBytes{0xf6}},
   946  	{AADOXQ, yml_rl, Pq5w, opBytes{0xf6}},
   947  	{AADJSP, nil, 0, opBytes{}},
   948  	{AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}},
   949  	{AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   950  	{AANDNPD, yxm, Pq, opBytes{0x55}},
   951  	{AANDNPS, yxm, Pm, opBytes{0x55}},
   952  	{AANDPD, yxm, Pq, opBytes{0x54}},
   953  	{AANDPS, yxm, Pm, opBytes{0x54}},
   954  	{AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   955  	{AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   956  	{AARPL, yrl_ml, P32, opBytes{0x63}},
   957  	{ABOUNDL, yrl_m, P32, opBytes{0x62}},
   958  	{ABOUNDW, yrl_m, Pe, opBytes{0x62}},
   959  	{ABSFL, yml_rl, Pm, opBytes{0xbc}},
   960  	{ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}},
   961  	{ABSFW, yml_rl, Pq, opBytes{0xbc}},
   962  	{ABSRL, yml_rl, Pm, opBytes{0xbd}},
   963  	{ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}},
   964  	{ABSRW, yml_rl, Pq, opBytes{0xbd}},
   965  	{ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}},
   966  	{ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}},
   967  	{ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}},
   968  	{ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}},
   969  	{ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}},
   970  	{ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}},
   971  	{ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}},
   972  	{ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}},
   973  	{ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}},
   974  	{ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}},
   975  	{ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}},
   976  	{ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}},
   977  	{ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}},
   978  	{ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}},
   979  	{ABYTE, ybyte, Px, opBytes{1}},
   980  	{obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}},
   981  	{ACBW, ynone, Pe, opBytes{0x98}},
   982  	{ACDQ, ynone, Px, opBytes{0x99}},
   983  	{ACDQE, ynone, Pw, opBytes{0x98}},
   984  	{ACLAC, ynone, Pm, opBytes{01, 0xca}},
   985  	{ACLC, ynone, Px, opBytes{0xf8}},
   986  	{ACLD, ynone, Px, opBytes{0xfc}},
   987  	{ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}},
   988  	{ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}},
   989  	{ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}},
   990  	{ACLI, ynone, Px, opBytes{0xfa}},
   991  	{ACLTS, ynone, Pm, opBytes{0x06}},
   992  	{ACLWB, yclflush, Pq, opBytes{0xae, 06}},
   993  	{ACMC, ynone, Px, opBytes{0xf5}},
   994  	{ACMOVLCC, yml_rl, Pm, opBytes{0x43}},
   995  	{ACMOVLCS, yml_rl, Pm, opBytes{0x42}},
   996  	{ACMOVLEQ, yml_rl, Pm, opBytes{0x44}},
   997  	{ACMOVLGE, yml_rl, Pm, opBytes{0x4d}},
   998  	{ACMOVLGT, yml_rl, Pm, opBytes{0x4f}},
   999  	{ACMOVLHI, yml_rl, Pm, opBytes{0x47}},
  1000  	{ACMOVLLE, yml_rl, Pm, opBytes{0x4e}},
  1001  	{ACMOVLLS, yml_rl, Pm, opBytes{0x46}},
  1002  	{ACMOVLLT, yml_rl, Pm, opBytes{0x4c}},
  1003  	{ACMOVLMI, yml_rl, Pm, opBytes{0x48}},
  1004  	{ACMOVLNE, yml_rl, Pm, opBytes{0x45}},
  1005  	{ACMOVLOC, yml_rl, Pm, opBytes{0x41}},
  1006  	{ACMOVLOS, yml_rl, Pm, opBytes{0x40}},
  1007  	{ACMOVLPC, yml_rl, Pm, opBytes{0x4b}},
  1008  	{ACMOVLPL, yml_rl, Pm, opBytes{0x49}},
  1009  	{ACMOVLPS, yml_rl, Pm, opBytes{0x4a}},
  1010  	{ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}},
  1011  	{ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}},
  1012  	{ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}},
  1013  	{ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}},
  1014  	{ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}},
  1015  	{ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}},
  1016  	{ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}},
  1017  	{ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}},
  1018  	{ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}},
  1019  	{ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}},
  1020  	{ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}},
  1021  	{ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}},
  1022  	{ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}},
  1023  	{ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}},
  1024  	{ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}},
  1025  	{ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}},
  1026  	{ACMOVWCC, yml_rl, Pq, opBytes{0x43}},
  1027  	{ACMOVWCS, yml_rl, Pq, opBytes{0x42}},
  1028  	{ACMOVWEQ, yml_rl, Pq, opBytes{0x44}},
  1029  	{ACMOVWGE, yml_rl, Pq, opBytes{0x4d}},
  1030  	{ACMOVWGT, yml_rl, Pq, opBytes{0x4f}},
  1031  	{ACMOVWHI, yml_rl, Pq, opBytes{0x47}},
  1032  	{ACMOVWLE, yml_rl, Pq, opBytes{0x4e}},
  1033  	{ACMOVWLS, yml_rl, Pq, opBytes{0x46}},
  1034  	{ACMOVWLT, yml_rl, Pq, opBytes{0x4c}},
  1035  	{ACMOVWMI, yml_rl, Pq, opBytes{0x48}},
  1036  	{ACMOVWNE, yml_rl, Pq, opBytes{0x45}},
  1037  	{ACMOVWOC, yml_rl, Pq, opBytes{0x41}},
  1038  	{ACMOVWOS, yml_rl, Pq, opBytes{0x40}},
  1039  	{ACMOVWPC, yml_rl, Pq, opBytes{0x4b}},
  1040  	{ACMOVWPL, yml_rl, Pq, opBytes{0x49}},
  1041  	{ACMOVWPS, yml_rl, Pq, opBytes{0x4a}},
  1042  	{ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}},
  1043  	{ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1044  	{ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}},
  1045  	{ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}},
  1046  	{ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1047  	{ACMPSB, ynone, Pb, opBytes{0xa6}},
  1048  	{ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}},
  1049  	{ACMPSL, ynone, Px, opBytes{0xa7}},
  1050  	{ACMPSQ, ynone, Pw, opBytes{0xa7}},
  1051  	{ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}},
  1052  	{ACMPSW, ynone, Pe, opBytes{0xa7}},
  1053  	{ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1054  	{ACOMISD, yxm, Pe, opBytes{0x2f}},
  1055  	{ACOMISS, yxm, Pm, opBytes{0x2f}},
  1056  	{ACPUID, ynone, Pm, opBytes{0xa2}},
  1057  	{ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}},
  1058  	{ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}},
  1059  	{ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}},
  1060  	{ACVTPD2PS, yxm, Pe, opBytes{0x5a}},
  1061  	{ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}},
  1062  	{ACVTPS2PD, yxm, Pm, opBytes{0x5a}},
  1063  	{ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}},
  1064  	{ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}},
  1065  	{ACVTSD2SS, yxm, Pf2, opBytes{0x5a}},
  1066  	{ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}},
  1067  	{ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}},
  1068  	{ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}},
  1069  	{ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}},
  1070  	{ACVTSS2SD, yxm, Pf3, opBytes{0x5a}},
  1071  	{ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}},
  1072  	{ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}},
  1073  	{ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}},
  1074  	{ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}},
  1075  	{ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}},
  1076  	{ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}},
  1077  	{ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}},
  1078  	{ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}},
  1079  	{ACWD, ynone, Pe, opBytes{0x99}},
  1080  	{ACWDE, ynone, Px, opBytes{0x98}},
  1081  	{ACQO, ynone, Pw, opBytes{0x99}},
  1082  	{ADAA, ynone, P32, opBytes{0x27}},
  1083  	{ADAS, ynone, P32, opBytes{0x2f}},
  1084  	{ADECB, yscond, Pb, opBytes{0xfe, 01}},
  1085  	{ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}},
  1086  	{ADECQ, yincq, Pw, opBytes{0xff, 01}},
  1087  	{ADECW, yincq, Pe, opBytes{0xff, 01}},
  1088  	{ADIVB, ydivb, Pb, opBytes{0xf6, 06}},
  1089  	{ADIVL, ydivl, Px, opBytes{0xf7, 06}},
  1090  	{ADIVPD, yxm, Pe, opBytes{0x5e}},
  1091  	{ADIVPS, yxm, Pm, opBytes{0x5e}},
  1092  	{ADIVQ, ydivl, Pw, opBytes{0xf7, 06}},
  1093  	{ADIVSD, yxm, Pf2, opBytes{0x5e}},
  1094  	{ADIVSS, yxm, Pf3, opBytes{0x5e}},
  1095  	{ADIVW, ydivl, Pe, opBytes{0xf7, 06}},
  1096  	{ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}},
  1097  	{ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}},
  1098  	{AEMMS, ynone, Pm, opBytes{0x77}},
  1099  	{AENDBR64, ynone, Pf3, opBytes{0x1e, 0xfa}},
  1100  	{AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}},
  1101  	{AENTER, nil, 0, opBytes{}}, // botch
  1102  	{AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}},
  1103  	{AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}},
  1104  	{AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1105  	{AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1106  	{AHLT, ynone, Px, opBytes{0xf4}},
  1107  	{AIDIVB, ydivb, Pb, opBytes{0xf6, 07}},
  1108  	{AIDIVL, ydivl, Px, opBytes{0xf7, 07}},
  1109  	{AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}},
  1110  	{AIDIVW, ydivl, Pe, opBytes{0xf7, 07}},
  1111  	{AIMULB, ydivb, Pb, opBytes{0xf6, 05}},
  1112  	{AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1113  	{AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1114  	{AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1115  	{AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}},
  1116  	{AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}},
  1117  	{AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}},
  1118  	{AINB, yin, Pb, opBytes{0xe4, 0xec}},
  1119  	{AINW, yin, Pe, opBytes{0xe5, 0xed}},
  1120  	{AINL, yin, Px, opBytes{0xe5, 0xed}},
  1121  	{AINCB, yscond, Pb, opBytes{0xfe, 00}},
  1122  	{AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}},
  1123  	{AINCQ, yincq, Pw, opBytes{0xff, 00}},
  1124  	{AINCW, yincq, Pe, opBytes{0xff, 00}},
  1125  	{AINSB, ynone, Pb, opBytes{0x6c}},
  1126  	{AINSL, ynone, Px, opBytes{0x6d}},
  1127  	{AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}},
  1128  	{AINSW, ynone, Pe, opBytes{0x6d}},
  1129  	{AICEBP, ynone, Px, opBytes{0xf1}},
  1130  	{AINT, yint, Px, opBytes{0xcd}},
  1131  	{AINTO, ynone, P32, opBytes{0xce}},
  1132  	{AIRETL, ynone, Px, opBytes{0xcf}},
  1133  	{AIRETQ, ynone, Pw, opBytes{0xcf}},
  1134  	{AIRETW, ynone, Pe, opBytes{0xcf}},
  1135  	{AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}},
  1136  	{AJCS, yjcond, Px, opBytes{0x72, 0x82}},
  1137  	{AJCXZL, yloop, Px, opBytes{0xe3}},
  1138  	{AJCXZW, yloop, Px, opBytes{0xe3}},
  1139  	{AJCXZQ, yloop, Px, opBytes{0xe3}},
  1140  	{AJEQ, yjcond, Px, opBytes{0x74, 0x84}},
  1141  	{AJGE, yjcond, Px, opBytes{0x7d, 0x8d}},
  1142  	{AJGT, yjcond, Px, opBytes{0x7f, 0x8f}},
  1143  	{AJHI, yjcond, Px, opBytes{0x77, 0x87}},
  1144  	{AJLE, yjcond, Px, opBytes{0x7e, 0x8e}},
  1145  	{AJLS, yjcond, Px, opBytes{0x76, 0x86}},
  1146  	{AJLT, yjcond, Px, opBytes{0x7c, 0x8c}},
  1147  	{AJMI, yjcond, Px, opBytes{0x78, 0x88}},
  1148  	{obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}},
  1149  	{AJNE, yjcond, Px, opBytes{0x75, 0x85}},
  1150  	{AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}},
  1151  	{AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}},
  1152  	{AJPC, yjcond, Px, opBytes{0x7b, 0x8b}},
  1153  	{AJPL, yjcond, Px, opBytes{0x79, 0x89}},
  1154  	{AJPS, yjcond, Px, opBytes{0x7a, 0x8a}},
  1155  	{AHADDPD, yxm, Pq, opBytes{0x7c}},
  1156  	{AHADDPS, yxm, Pf2, opBytes{0x7c}},
  1157  	{AHSUBPD, yxm, Pq, opBytes{0x7d}},
  1158  	{AHSUBPS, yxm, Pf2, opBytes{0x7d}},
  1159  	{ALAHF, ynone, Px, opBytes{0x9f}},
  1160  	{ALARL, yml_rl, Pm, opBytes{0x02}},
  1161  	{ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}},
  1162  	{ALARW, yml_rl, Pq, opBytes{0x02}},
  1163  	{ALDDQU, ylddqu, Pf2, opBytes{0xf0}},
  1164  	{ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}},
  1165  	{ALEAL, ym_rl, Px, opBytes{0x8d}},
  1166  	{ALEAQ, ym_rl, Pw, opBytes{0x8d}},
  1167  	{ALEAVEL, ynone, P32, opBytes{0xc9}},
  1168  	{ALEAVEQ, ynone, Py, opBytes{0xc9}},
  1169  	{ALEAVEW, ynone, Pe, opBytes{0xc9}},
  1170  	{ALEAW, ym_rl, Pe, opBytes{0x8d}},
  1171  	{ALOCK, ynone, Px, opBytes{0xf0}},
  1172  	{ALODSB, ynone, Pb, opBytes{0xac}},
  1173  	{ALODSL, ynone, Px, opBytes{0xad}},
  1174  	{ALODSQ, ynone, Pw, opBytes{0xad}},
  1175  	{ALODSW, ynone, Pe, opBytes{0xad}},
  1176  	{ALONG, ybyte, Px, opBytes{4}},
  1177  	{ALOOP, yloop, Px, opBytes{0xe2}},
  1178  	{ALOOPEQ, yloop, Px, opBytes{0xe1}},
  1179  	{ALOOPNE, yloop, Px, opBytes{0xe0}},
  1180  	{ALTR, ydivl, Pm, opBytes{0x00, 03}},
  1181  	{ALZCNTL, yml_rl, Pf3, opBytes{0xbd}},
  1182  	{ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}},
  1183  	{ALZCNTW, yml_rl, Pef3, opBytes{0xbd}},
  1184  	{ALSLL, yml_rl, Pm, opBytes{0x03}},
  1185  	{ALSLW, yml_rl, Pq, opBytes{0x03}},
  1186  	{ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}},
  1187  	{AMASKMOVOU, yxr, Pe, opBytes{0xf7}},
  1188  	{AMASKMOVQ, ymr, Pm, opBytes{0xf7}},
  1189  	{AMAXPD, yxm, Pe, opBytes{0x5f}},
  1190  	{AMAXPS, yxm, Pm, opBytes{0x5f}},
  1191  	{AMAXSD, yxm, Pf2, opBytes{0x5f}},
  1192  	{AMAXSS, yxm, Pf3, opBytes{0x5f}},
  1193  	{AMINPD, yxm, Pe, opBytes{0x5d}},
  1194  	{AMINPS, yxm, Pm, opBytes{0x5d}},
  1195  	{AMINSD, yxm, Pf2, opBytes{0x5d}},
  1196  	{AMINSS, yxm, Pf3, opBytes{0x5d}},
  1197  	{AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}},
  1198  	{AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}},
  1199  	{AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}},
  1200  	{AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}},
  1201  	{AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1202  	{AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}},
  1203  	{AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}},
  1204  	{AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}},
  1205  	{AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}},
  1206  	{AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}},
  1207  	{AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}},
  1208  	{AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}},
  1209  	{AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}},
  1210  	{AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}},
  1211  	{AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}},
  1212  	{AMOVHLPS, yxr, Pm, opBytes{0x12}},
  1213  	{AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}},
  1214  	{AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}},
  1215  	{AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1216  	{AMOVLHPS, yxr, Pm, opBytes{0x16}},
  1217  	{AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}},
  1218  	{AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}},
  1219  	{AMOVLQSX, yml_rl, Pw, opBytes{0x63}},
  1220  	{AMOVLQZX, yml_rl, Px, opBytes{0x8b}},
  1221  	{AMOVMSKPD, yxrrl, Pq, opBytes{0x50}},
  1222  	{AMOVMSKPS, yxrrl, Pm, opBytes{0x50}},
  1223  	{AMOVNTO, yxr_ml, Pe, opBytes{0xe7}},
  1224  	{AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}},
  1225  	{AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}},
  1226  	{AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}},
  1227  	{AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}},
  1228  	{AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1229  	{AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}},
  1230  	{AMOVSB, ynone, Pb, opBytes{0xa4}},
  1231  	{AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}},
  1232  	{AMOVSL, ynone, Px, opBytes{0xa5}},
  1233  	{AMOVSQ, ynone, Pw, opBytes{0xa5}},
  1234  	{AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}},
  1235  	{AMOVSW, ynone, Pe, opBytes{0xa5}},
  1236  	{AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}},
  1237  	{AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}},
  1238  	{AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
  1239  	{AMOVWLSX, yml_rl, Pm, opBytes{0xbf}},
  1240  	{AMOVWLZX, yml_rl, Pm, opBytes{0xb7}},
  1241  	{AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}},
  1242  	{AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}},
  1243  	{AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}},
  1244  	{AMULB, ydivb, Pb, opBytes{0xf6, 04}},
  1245  	{AMULL, ydivl, Px, opBytes{0xf7, 04}},
  1246  	{AMULPD, yxm, Pe, opBytes{0x59}},
  1247  	{AMULPS, yxm, Ym, opBytes{0x59}},
  1248  	{AMULQ, ydivl, Pw, opBytes{0xf7, 04}},
  1249  	{AMULSD, yxm, Pf2, opBytes{0x59}},
  1250  	{AMULSS, yxm, Pf3, opBytes{0x59}},
  1251  	{AMULW, ydivl, Pe, opBytes{0xf7, 04}},
  1252  	{ANEGB, yscond, Pb, opBytes{0xf6, 03}},
  1253  	{ANEGL, yscond, Px, opBytes{0xf7, 03}},
  1254  	{ANEGQ, yscond, Pw, opBytes{0xf7, 03}},
  1255  	{ANEGW, yscond, Pe, opBytes{0xf7, 03}},
  1256  	{obj.ANOP, ynop, Px, opBytes{0, 0}},
  1257  	{ANOTB, yscond, Pb, opBytes{0xf6, 02}},
  1258  	{ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1259  	{ANOTQ, yscond, Pw, opBytes{0xf7, 02}},
  1260  	{ANOTW, yscond, Pe, opBytes{0xf7, 02}},
  1261  	{AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}},
  1262  	{AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1263  	{AORPD, yxm, Pq, opBytes{0x56}},
  1264  	{AORPS, yxm, Pm, opBytes{0x56}},
  1265  	{AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1266  	{AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1267  	{AOUTB, yin, Pb, opBytes{0xe6, 0xee}},
  1268  	{AOUTL, yin, Px, opBytes{0xe7, 0xef}},
  1269  	{AOUTW, yin, Pe, opBytes{0xe7, 0xef}},
  1270  	{AOUTSB, ynone, Pb, opBytes{0x6e}},
  1271  	{AOUTSL, ynone, Px, opBytes{0x6f}},
  1272  	{AOUTSW, ynone, Pe, opBytes{0x6f}},
  1273  	{APABSB, yxm_q4, Pq4, opBytes{0x1c}},
  1274  	{APABSD, yxm_q4, Pq4, opBytes{0x1e}},
  1275  	{APABSW, yxm_q4, Pq4, opBytes{0x1d}},
  1276  	{APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}},
  1277  	{APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}},
  1278  	{APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}},
  1279  	{APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}},
  1280  	{APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}},
  1281  	{APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}},
  1282  	{APADDQ, yxm, Pe, opBytes{0xd4}},
  1283  	{APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}},
  1284  	{APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}},
  1285  	{APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}},
  1286  	{APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}},
  1287  	{APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}},
  1288  	{APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}},
  1289  	{APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}},
  1290  	{APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}},
  1291  	{APAUSE, ynone, Px, opBytes{0xf3, 0x90}},
  1292  	{APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}},
  1293  	{APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}},
  1294  	{APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}},
  1295  	{APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}},
  1296  	{APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}},
  1297  	{APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}},
  1298  	{APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}},
  1299  	{APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}},
  1300  	{APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}},
  1301  	{APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}},
  1302  	{APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}},
  1303  	{APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}},
  1304  	{APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}},
  1305  	{APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}},
  1306  	{APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}},
  1307  	{APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}},
  1308  	{APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}},
  1309  	{APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1310  	{APHADDSW, yxm_q4, Pq4, opBytes{0x03}},
  1311  	{APHADDW, yxm_q4, Pq4, opBytes{0x01}},
  1312  	{APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}},
  1313  	{APHSUBD, yxm_q4, Pq4, opBytes{0x06}},
  1314  	{APHSUBSW, yxm_q4, Pq4, opBytes{0x07}},
  1315  	{APHSUBW, yxm_q4, Pq4, opBytes{0x05}},
  1316  	{APINSRW, yinsrw, Pq, opBytes{0xc4, 00}},
  1317  	{APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}},
  1318  	{APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}},
  1319  	{APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}},
  1320  	{APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}},
  1321  	{APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}},
  1322  	{APMAXSB, yxm_q4, Pq4, opBytes{0x3c}},
  1323  	{APMAXSD, yxm_q4, Pq4, opBytes{0x3d}},
  1324  	{APMAXSW, yxm, Pe, opBytes{0xee}},
  1325  	{APMAXUB, yxm, Pe, opBytes{0xde}},
  1326  	{APMAXUD, yxm_q4, Pq4, opBytes{0x3f}},
  1327  	{APMAXUW, yxm_q4, Pq4, opBytes{0x3e}},
  1328  	{APMINSB, yxm_q4, Pq4, opBytes{0x38}},
  1329  	{APMINSD, yxm_q4, Pq4, opBytes{0x39}},
  1330  	{APMINSW, yxm, Pe, opBytes{0xea}},
  1331  	{APMINUB, yxm, Pe, opBytes{0xda}},
  1332  	{APMINUD, yxm_q4, Pq4, opBytes{0x3b}},
  1333  	{APMINUW, yxm_q4, Pq4, opBytes{0x3a}},
  1334  	{APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}},
  1335  	{APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}},
  1336  	{APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}},
  1337  	{APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}},
  1338  	{APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}},
  1339  	{APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}},
  1340  	{APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}},
  1341  	{APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}},
  1342  	{APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}},
  1343  	{APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}},
  1344  	{APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}},
  1345  	{APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}},
  1346  	{APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}},
  1347  	{APMULDQ, yxm_q4, Pq4, opBytes{0x28}},
  1348  	{APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}},
  1349  	{APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}},
  1350  	{APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}},
  1351  	{APMULLD, yxm_q4, Pq4, opBytes{0x40}},
  1352  	{APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}},
  1353  	{APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}},
  1354  	{APOPAL, ynone, P32, opBytes{0x61}},
  1355  	{APOPAW, ynone, Pe, opBytes{0x61}},
  1356  	{APOPCNTW, yml_rl, Pef3, opBytes{0xb8}},
  1357  	{APOPCNTL, yml_rl, Pf3, opBytes{0xb8}},
  1358  	{APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}},
  1359  	{APOPFL, ynone, P32, opBytes{0x9d}},
  1360  	{APOPFQ, ynone, Py, opBytes{0x9d}},
  1361  	{APOPFW, ynone, Pe, opBytes{0x9d}},
  1362  	{APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}},
  1363  	{APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}},
  1364  	{APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}},
  1365  	{APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}},
  1366  	{APSADBW, yxm, Pq, opBytes{0xf6}},
  1367  	{APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}},
  1368  	{APSHUFL, yxshuf, Pq, opBytes{0x70, 00}},
  1369  	{APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}},
  1370  	{APSHUFW, ymshuf, Pm, opBytes{0x70, 00}},
  1371  	{APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}},
  1372  	{APSIGNB, yxm_q4, Pq4, opBytes{0x08}},
  1373  	{APSIGND, yxm_q4, Pq4, opBytes{0x0a}},
  1374  	{APSIGNW, yxm_q4, Pq4, opBytes{0x09}},
  1375  	{APSLLO, ypsdq, Pq, opBytes{0x73, 07}},
  1376  	{APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1377  	{APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1378  	{APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1379  	{APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1380  	{APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1381  	{APSRLO, ypsdq, Pq, opBytes{0x73, 03}},
  1382  	{APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1383  	{APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1384  	{APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1385  	{APSUBB, yxm, Pe, opBytes{0xf8}},
  1386  	{APSUBL, yxm, Pe, opBytes{0xfa}},
  1387  	{APSUBQ, yxm, Pe, opBytes{0xfb}},
  1388  	{APSUBSB, yxm, Pe, opBytes{0xe8}},
  1389  	{APSUBSW, yxm, Pe, opBytes{0xe9}},
  1390  	{APSUBUSB, yxm, Pe, opBytes{0xd8}},
  1391  	{APSUBUSW, yxm, Pe, opBytes{0xd9}},
  1392  	{APSUBW, yxm, Pe, opBytes{0xf9}},
  1393  	{APTEST, yxm_q4, Pq4, opBytes{0x17}},
  1394  	{APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}},
  1395  	{APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}},
  1396  	{APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}},
  1397  	{APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}},
  1398  	{APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}},
  1399  	{APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}},
  1400  	{APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}},
  1401  	{APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}},
  1402  	{APUSHAL, ynone, P32, opBytes{0x60}},
  1403  	{APUSHAW, ynone, Pe, opBytes{0x60}},
  1404  	{APUSHFL, ynone, P32, opBytes{0x9c}},
  1405  	{APUSHFQ, ynone, Py, opBytes{0x9c}},
  1406  	{APUSHFW, ynone, Pe, opBytes{0x9c}},
  1407  	{APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1408  	{APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1409  	{APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1410  	{APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}},
  1411  	{AQUAD, ybyte, Px, opBytes{8}},
  1412  	{ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1413  	{ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1414  	{ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1415  	{ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1416  	{ARCPPS, yxm, Pm, opBytes{0x53}},
  1417  	{ARCPSS, yxm, Pf3, opBytes{0x53}},
  1418  	{ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1419  	{ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1420  	{ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1421  	{ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1422  	{AREP, ynone, Px, opBytes{0xf3}},
  1423  	{AREPN, ynone, Px, opBytes{0xf2}},
  1424  	{obj.ARET, ynone, Px, opBytes{0xc3}},
  1425  	{ARETFW, yret, Pe, opBytes{0xcb, 0xca}},
  1426  	{ARETFL, yret, Px, opBytes{0xcb, 0xca}},
  1427  	{ARETFQ, yret, Pw, opBytes{0xcb, 0xca}},
  1428  	{AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1429  	{AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1430  	{AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1431  	{AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1432  	{ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1433  	{ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1434  	{ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1435  	{ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1436  	{ARSQRTPS, yxm, Pm, opBytes{0x52}},
  1437  	{ARSQRTSS, yxm, Pf3, opBytes{0x52}},
  1438  	{ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL
  1439  	{ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1440  	{ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1441  	{ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1442  	{ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1443  	{ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1444  	{ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1445  	{ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1446  	{ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1447  	{ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}},
  1448  	{ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1449  	{ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1450  	{ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1451  	{ASCASB, ynone, Pb, opBytes{0xae}},
  1452  	{ASCASL, ynone, Px, opBytes{0xaf}},
  1453  	{ASCASQ, ynone, Pw, opBytes{0xaf}},
  1454  	{ASCASW, ynone, Pe, opBytes{0xaf}},
  1455  	{ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}},
  1456  	{ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}},
  1457  	{ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}},
  1458  	{ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}},
  1459  	{ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}},
  1460  	{ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}},
  1461  	{ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}},
  1462  	{ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}},
  1463  	{ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}},
  1464  	{ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}},
  1465  	{ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}},
  1466  	{ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}},
  1467  	{ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}},
  1468  	{ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}},
  1469  	{ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}},
  1470  	{ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}},
  1471  	{ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1472  	{ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1473  	{ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1474  	{ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1475  	{ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1476  	{ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1477  	{ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1478  	{ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1479  	{ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}},
  1480  	{ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}},
  1481  	{ASQRTPD, yxm, Pe, opBytes{0x51}},
  1482  	{ASQRTPS, yxm, Pm, opBytes{0x51}},
  1483  	{ASQRTSD, yxm, Pf2, opBytes{0x51}},
  1484  	{ASQRTSS, yxm, Pf3, opBytes{0x51}},
  1485  	{ASTC, ynone, Px, opBytes{0xf9}},
  1486  	{ASTD, ynone, Px, opBytes{0xfd}},
  1487  	{ASTI, ynone, Px, opBytes{0xfb}},
  1488  	{ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}},
  1489  	{ASTOSB, ynone, Pb, opBytes{0xaa}},
  1490  	{ASTOSL, ynone, Px, opBytes{0xab}},
  1491  	{ASTOSQ, ynone, Pw, opBytes{0xab}},
  1492  	{ASTOSW, ynone, Pe, opBytes{0xab}},
  1493  	{ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}},
  1494  	{ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1495  	{ASUBPD, yxm, Pe, opBytes{0x5c}},
  1496  	{ASUBPS, yxm, Pm, opBytes{0x5c}},
  1497  	{ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1498  	{ASUBSD, yxm, Pf2, opBytes{0x5c}},
  1499  	{ASUBSS, yxm, Pf3, opBytes{0x5c}},
  1500  	{ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1501  	{ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}},
  1502  	{ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall
  1503  	{ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}},
  1504  	{ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1505  	{ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1506  	{ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1507  	{ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}},
  1508  	{obj.ATEXT, ytext, Px, opBytes{}},
  1509  	{AUCOMISD, yxm, Pe, opBytes{0x2e}},
  1510  	{AUCOMISS, yxm, Pm, opBytes{0x2e}},
  1511  	{AUNPCKHPD, yxm, Pe, opBytes{0x15}},
  1512  	{AUNPCKHPS, yxm, Pm, opBytes{0x15}},
  1513  	{AUNPCKLPD, yxm, Pe, opBytes{0x14}},
  1514  	{AUNPCKLPS, yxm, Pm, opBytes{0x14}},
  1515  	{AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}},
  1516  	{AVERR, ydivl, Pm, opBytes{0x00, 04}},
  1517  	{AVERW, ydivl, Pm, opBytes{0x00, 05}},
  1518  	{AWAIT, ynone, Px, opBytes{0x9b}},
  1519  	{AWORD, ybyte, Px, opBytes{2}},
  1520  	{AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}},
  1521  	{AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}},
  1522  	{AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}},
  1523  	{AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}},
  1524  	{AXLAT, ynone, Px, opBytes{0xd7}},
  1525  	{AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}},
  1526  	{AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1527  	{AXORPD, yxm, Pe, opBytes{0x57}},
  1528  	{AXORPS, yxm, Pm, opBytes{0x57}},
  1529  	{AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1530  	{AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1531  	{AFMOVB, yfmvx, Px, opBytes{0xdf, 04}},
  1532  	{AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}},
  1533  	{AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1534  	{AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}},
  1535  	{AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}},
  1536  	{AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}},
  1537  	{AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}},
  1538  	{AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}},
  1539  	{AFMOVV, yfmvx, Px, opBytes{0xdf, 05}},
  1540  	{AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}},
  1541  	{AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}},
  1542  	{AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}},
  1543  	{AFMOVX, yfmvx, Px, opBytes{0xdb, 05}},
  1544  	{AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}},
  1545  	{AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}},
  1546  	{AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}},
  1547  	{AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}},
  1548  	{AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}},
  1549  	{AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}},
  1550  	{AFCMOVB, yfcmv, Px, opBytes{0xda, 00}},
  1551  	{AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}},
  1552  	{AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}},
  1553  	{AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}},
  1554  	{AFCMOVE, yfcmv, Px, opBytes{0xda, 01}},
  1555  	{AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}},
  1556  	{AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}},
  1557  	{AFCMOVU, yfcmv, Px, opBytes{0xda, 03}},
  1558  	{AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}},
  1559  	{AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}},  // botch
  1560  	{AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch
  1561  	{AFCOMDPP, ycompp, Px, opBytes{0xde, 03}},
  1562  	{AFCOMF, yfmvx, Px, opBytes{0xd8, 02}},
  1563  	{AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}},
  1564  	{AFCOMI, yfcmv, Px, opBytes{0xdb, 06}},
  1565  	{AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}},
  1566  	{AFCOML, yfmvx, Px, opBytes{0xda, 02}},
  1567  	{AFCOMLP, yfmvx, Px, opBytes{0xda, 03}},
  1568  	{AFCOMW, yfmvx, Px, opBytes{0xde, 02}},
  1569  	{AFCOMWP, yfmvx, Px, opBytes{0xde, 03}},
  1570  	{AFUCOM, ycompp, Px, opBytes{0xdd, 04}},
  1571  	{AFUCOMI, ycompp, Px, opBytes{0xdb, 05}},
  1572  	{AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}},
  1573  	{AFUCOMP, ycompp, Px, opBytes{0xdd, 05}},
  1574  	{AFUCOMPP, ycompp, Px, opBytes{0xda, 13}},
  1575  	{AFADDDP, ycompp, Px, opBytes{0xde, 00}},
  1576  	{AFADDW, yfmvx, Px, opBytes{0xde, 00}},
  1577  	{AFADDL, yfmvx, Px, opBytes{0xda, 00}},
  1578  	{AFADDF, yfmvx, Px, opBytes{0xd8, 00}},
  1579  	{AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1580  	{AFMULDP, ycompp, Px, opBytes{0xde, 01}},
  1581  	{AFMULW, yfmvx, Px, opBytes{0xde, 01}},
  1582  	{AFMULL, yfmvx, Px, opBytes{0xda, 01}},
  1583  	{AFMULF, yfmvx, Px, opBytes{0xd8, 01}},
  1584  	{AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1585  	{AFSUBDP, ycompp, Px, opBytes{0xde, 05}},
  1586  	{AFSUBW, yfmvx, Px, opBytes{0xde, 04}},
  1587  	{AFSUBL, yfmvx, Px, opBytes{0xda, 04}},
  1588  	{AFSUBF, yfmvx, Px, opBytes{0xd8, 04}},
  1589  	{AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1590  	{AFSUBRDP, ycompp, Px, opBytes{0xde, 04}},
  1591  	{AFSUBRW, yfmvx, Px, opBytes{0xde, 05}},
  1592  	{AFSUBRL, yfmvx, Px, opBytes{0xda, 05}},
  1593  	{AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}},
  1594  	{AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1595  	{AFDIVDP, ycompp, Px, opBytes{0xde, 07}},
  1596  	{AFDIVW, yfmvx, Px, opBytes{0xde, 06}},
  1597  	{AFDIVL, yfmvx, Px, opBytes{0xda, 06}},
  1598  	{AFDIVF, yfmvx, Px, opBytes{0xd8, 06}},
  1599  	{AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1600  	{AFDIVRDP, ycompp, Px, opBytes{0xde, 06}},
  1601  	{AFDIVRW, yfmvx, Px, opBytes{0xde, 07}},
  1602  	{AFDIVRL, yfmvx, Px, opBytes{0xda, 07}},
  1603  	{AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}},
  1604  	{AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1605  	{AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}},
  1606  	{AFFREE, nil, 0, opBytes{}},
  1607  	{AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}},
  1608  	{AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}},
  1609  	{AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}},
  1610  	{AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}},
  1611  	{AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}},
  1612  	{AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}},
  1613  	{AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}},
  1614  	{AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}},
  1615  	{AFABS, ynone, Px, opBytes{0xd9, 0xe1}},
  1616  	{AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}},
  1617  	{AFBSTP, yclflush, Px, opBytes{0xdf, 06}},
  1618  	{AFCHS, ynone, Px, opBytes{0xd9, 0xe0}},
  1619  	{AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}},
  1620  	{AFCOS, ynone, Px, opBytes{0xd9, 0xff}},
  1621  	{AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}},
  1622  	{AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}},
  1623  	{AFINIT, ynone, Px, opBytes{0xdb, 0xe3}},
  1624  	{AFLD1, ynone, Px, opBytes{0xd9, 0xe8}},
  1625  	{AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}},
  1626  	{AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}},
  1627  	{AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}},
  1628  	{AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}},
  1629  	{AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}},
  1630  	{AFLDZ, ynone, Px, opBytes{0xd9, 0xee}},
  1631  	{AFNOP, ynone, Px, opBytes{0xd9, 0xd0}},
  1632  	{AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}},
  1633  	{AFPREM, ynone, Px, opBytes{0xd9, 0xf8}},
  1634  	{AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}},
  1635  	{AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}},
  1636  	{AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}},
  1637  	{AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}},
  1638  	{AFSIN, ynone, Px, opBytes{0xd9, 0xfe}},
  1639  	{AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}},
  1640  	{AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}},
  1641  	{AFTST, ynone, Px, opBytes{0xd9, 0xe4}},
  1642  	{AFXAM, ynone, Px, opBytes{0xd9, 0xe5}},
  1643  	{AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}},
  1644  	{AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}},
  1645  	{AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}},
  1646  	{ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}},
  1647  	{ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}},
  1648  	{ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}},
  1649  	{ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}},
  1650  	{ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}},
  1651  	{ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}},
  1652  	{AINVD, ynone, Pm, opBytes{0x08}},
  1653  	{AINVLPG, ydivb, Pm, opBytes{0x01, 07}},
  1654  	{AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}},
  1655  	{ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}},
  1656  	{AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}},
  1657  	{AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}},
  1658  	{AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}},
  1659  	{ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}},
  1660  	{ARDMSR, ynone, Pm, opBytes{0x32}},
  1661  	{ARDPMC, ynone, Pm, opBytes{0x33}},
  1662  	{ARDTSC, ynone, Pm, opBytes{0x31}},
  1663  	{ARSM, ynone, Pm, opBytes{0xaa}},
  1664  	{ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}},
  1665  	{ASYSRET, ynone, Pm, opBytes{0x07}},
  1666  	{AWBINVD, ynone, Pm, opBytes{0x09}},
  1667  	{AWRMSR, ynone, Pm, opBytes{0x30}},
  1668  	{AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}},
  1669  	{AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}},
  1670  	{AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}},
  1671  	{AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}},
  1672  	{AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}},
  1673  	{ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1674  	{ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1675  	{ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1676  	{ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1677  	{APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}},
  1678  	{APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}},
  1679  	{APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}},
  1680  	{APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}},
  1681  	{AMOVQL, yrl_ml, Px, opBytes{0x89}},
  1682  	{obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}},
  1683  	{AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}},
  1684  	{AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}},
  1685  	{AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}},
  1686  	{AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}},
  1687  	{AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}},
  1688  	{AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}},
  1689  	{AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}},
  1690  	{AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}},
  1691  	{AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}},
  1692  	{AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}},
  1693  	{APSHUFD, yxshuf, Pq, opBytes{0x70, 0}},
  1694  	{APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}},
  1695  	{APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}},
  1696  	{APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}},
  1697  	{AMOVDDUP, yxm, Pf2, opBytes{0x12}},
  1698  	{AMOVSHDUP, yxm, Pf3, opBytes{0x16}},
  1699  	{AMOVSLDUP, yxm, Pf3, opBytes{0x12}},
  1700  	{ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}},
  1701  	{ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}},
  1702  	{AUD1, ynone, Pm, opBytes{0xb9, 0}},
  1703  	{AUD2, ynone, Pm, opBytes{0x0b, 0}},
  1704  	{AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}},
  1705  	{ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}},
  1706  	{ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}},
  1707  	{ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}},
  1708  	{ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}},
  1709  	{ALMSW, ydivl, Pm, opBytes{0x01, 06}},
  1710  	{ALLDT, ydivl, Pm, opBytes{0x00, 02}},
  1711  	{ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}},
  1712  	{ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}},
  1713  	{ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1714  	{ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1715  	{ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1716  	{AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}},
  1717  	{AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}},
  1718  	{AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}},
  1719  	{AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}},
  1720  	{AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}},
  1721  	{AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}},
  1722  	{AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}},
  1723  	{AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}},
  1724  	{AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}},
  1725  	{AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}},
  1726  	{AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}},
  1727  	{AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}},
  1728  	{ASGDT, yclflush, Pm, opBytes{0x01, 00}},
  1729  	{ASIDT, yclflush, Pm, opBytes{0x01, 01}},
  1730  	{ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}},
  1731  	{ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}},
  1732  	{ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}},
  1733  	{ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}},
  1734  	{ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}},
  1735  	{ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}},
  1736  	{ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}},
  1737  	{ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}},
  1738  	{ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}},
  1739  	{AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}},
  1740  	{AMOVBEW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1741  	{AMOVBEL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1742  	{AMOVBEQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}},
  1743  	{ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}},
  1744  	{ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}},
  1745  	{ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}},
  1746  	{ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}},
  1747  	{ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}},
  1748  	{ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}},
  1749  	{ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}},
  1750  	{ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}},
  1751  	{ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}},
  1752  	{ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}},
  1753  	{APBLENDVB, yblendvpd, Pq4, opBytes{0x10}},
  1754  	{ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}},
  1755  	{ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}},
  1756  	{ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}},
  1757  	{ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}},
  1758  	{ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}},
  1759  	{ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}},
  1760  	{ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}},
  1761  	{ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}},
  1762  	{ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}},
  1763  	{ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}},
  1764  	{ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}},
  1765  	{AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}},
  1766  	{AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}},
  1767  	{AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}},
  1768  	{AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}},
  1769  	{ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}},
  1770  	{ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}},
  1771  	{ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}},
  1772  	{ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}},
  1773  	{ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}},
  1774  	{ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}},
  1775  	{ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}},
  1776  	{ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}},
  1777  	{ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}},
  1778  	{ARDPID, yrdrand, Pf3, opBytes{0xc7, 07}},
  1779  
  1780  	{ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}},
  1781  	{ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}},
  1782  	{AXACQUIRE, ynone, Px, opBytes{0xf2}},
  1783  	{AXRELEASE, ynone, Px, opBytes{0xf3}},
  1784  	{AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}},
  1785  	{AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}},
  1786  	{AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}},
  1787  	{AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}},
  1788  	{AXGETBV, ynone, Pm, opBytes{01, 0xd0}},
  1789  	{obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}},
  1790  	{obj.APCDATA, ypcdata, Px, opBytes{0, 0}},
  1791  	{obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}},
  1792  	{obj.ADUFFZERO, yduff, Px, opBytes{0xe8}},
  1793  
  1794  	{obj.AEND, nil, 0, opBytes{}},
  1795  	{0, nil, 0, opBytes{}},
  1796  }
  1797  
  1798  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1799  
  1800  // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
  1801  // This happens on systems like Solaris that call .so functions instead of system calls.
  1802  // It does not seem to be necessary for any other systems. This is probably working
  1803  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1804  // what that bug is. And this does fix it.
  1805  func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
  1806  	if ctxt.Headtype == objabi.Hsolaris {
  1807  		// All the Solaris dynamic imports from libc.so begin with "libc_".
  1808  		return strings.HasPrefix(s.Name, "libc_")
  1809  	}
  1810  	return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
  1811  }
  1812  
  1813  // single-instruction no-ops of various lengths.
  1814  // constructed by hand and disassembled with gdb to verify.
  1815  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1816  var nop = [][16]uint8{
  1817  	{0x90},
  1818  	{0x66, 0x90},
  1819  	{0x0F, 0x1F, 0x00},
  1820  	{0x0F, 0x1F, 0x40, 0x00},
  1821  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1822  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1823  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1824  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1825  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1826  }
  1827  
  1828  // Native Client rejects the repeated 0x66 prefix.
  1829  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1830  func fillnop(p []byte, n int) {
  1831  	var m int
  1832  
  1833  	for n > 0 {
  1834  		m = n
  1835  		if m > len(nop) {
  1836  			m = len(nop)
  1837  		}
  1838  		copy(p[:m], nop[m-1][:m])
  1839  		p = p[m:]
  1840  		n -= m
  1841  	}
  1842  }
  1843  
  1844  func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1845  	s.Grow(int64(c) + int64(pad))
  1846  	fillnop(s.P[c:], int(pad))
  1847  	return c + pad
  1848  }
  1849  
  1850  func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
  1851  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1852  		return l
  1853  	}
  1854  	return q
  1855  }
  1856  
  1857  // isJump returns whether p is a jump instruction.
  1858  // It is used to ensure that no standalone or macro-fused jump will straddle
  1859  // or end on a 32 byte boundary by inserting NOPs before the jumps.
  1860  func isJump(p *obj.Prog) bool {
  1861  	return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL ||
  1862  		p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO
  1863  }
  1864  
  1865  // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional
  1866  // jump. Otherwise, nil is returned.
  1867  func lookForJCC(p *obj.Prog) *obj.Prog {
  1868  	// Skip any PCDATA, FUNCDATA or NOP instructions
  1869  	var q *obj.Prog
  1870  	for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link {
  1871  	}
  1872  
  1873  	if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL {
  1874  		return nil
  1875  	}
  1876  
  1877  	switch q.As {
  1878  	case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI,
  1879  		AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT:
  1880  	default:
  1881  		return nil
  1882  	}
  1883  
  1884  	return q
  1885  }
  1886  
  1887  // fusedJump determines whether p can be fused with a subsequent conditional jump instruction.
  1888  // If it can, we return true followed by the total size of the fused jump. If it can't, we return false.
  1889  // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2.
  1890  func fusedJump(p *obj.Prog) (bool, uint8) {
  1891  	var fusedSize uint8
  1892  
  1893  	// The first instruction in a macro fused pair may be preceded by the LOCK prefix,
  1894  	// or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we
  1895  	// need to be careful to insert any padding before the locks rather than directly after them.
  1896  
  1897  	if p.As == AXRELEASE || p.As == AXACQUIRE {
  1898  		fusedSize += p.Isize
  1899  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1900  		}
  1901  		if p == nil {
  1902  			return false, 0
  1903  		}
  1904  	}
  1905  	if p.As == ALOCK {
  1906  		fusedSize += p.Isize
  1907  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1908  		}
  1909  		if p == nil {
  1910  			return false, 0
  1911  		}
  1912  	}
  1913  	cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW
  1914  
  1915  	cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ ||
  1916  		p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp
  1917  
  1918  	testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW ||
  1919  		p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW
  1920  
  1921  	incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW ||
  1922  		p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW
  1923  
  1924  	if !cmpAddSub && !testAnd && !incDec {
  1925  		return false, 0
  1926  	}
  1927  
  1928  	if !incDec {
  1929  		var argOne obj.AddrType
  1930  		var argTwo obj.AddrType
  1931  		if cmp {
  1932  			argOne = p.From.Type
  1933  			argTwo = p.To.Type
  1934  		} else {
  1935  			argOne = p.To.Type
  1936  			argTwo = p.From.Type
  1937  		}
  1938  		if argOne == obj.TYPE_REG {
  1939  			if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM {
  1940  				return false, 0
  1941  			}
  1942  		} else if argOne == obj.TYPE_MEM {
  1943  			if argTwo != obj.TYPE_REG {
  1944  				return false, 0
  1945  			}
  1946  		} else {
  1947  			return false, 0
  1948  		}
  1949  	}
  1950  
  1951  	fusedSize += p.Isize
  1952  	jmp := lookForJCC(p)
  1953  	if jmp == nil {
  1954  		return false, 0
  1955  	}
  1956  
  1957  	fusedSize += jmp.Isize
  1958  
  1959  	if testAnd {
  1960  		return true, fusedSize
  1961  	}
  1962  
  1963  	if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI ||
  1964  		jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC {
  1965  		return false, 0
  1966  	}
  1967  
  1968  	if cmpAddSub {
  1969  		return true, fusedSize
  1970  	}
  1971  
  1972  	if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS {
  1973  		return false, 0
  1974  	}
  1975  
  1976  	return true, fusedSize
  1977  }
  1978  
  1979  type padJumpsCtx int32
  1980  
  1981  func makePjcCtx(ctxt *obj.Link) padJumpsCtx {
  1982  	// Disable jump padding on 32 bit builds by setting
  1983  	// padJumps to 0.
  1984  	if ctxt.Arch.Family == sys.I386 {
  1985  		return padJumpsCtx(0)
  1986  	}
  1987  
  1988  	// Disable jump padding for hand written assembly code.
  1989  	if ctxt.IsAsm {
  1990  		return padJumpsCtx(0)
  1991  	}
  1992  
  1993  	return padJumpsCtx(32)
  1994  }
  1995  
  1996  // padJump detects whether the instruction being assembled is a standalone or a macro-fused
  1997  // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does
  1998  // not cross or end on a 32 byte boundary.
  1999  func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 {
  2000  	if pjc == 0 {
  2001  		return c
  2002  	}
  2003  
  2004  	var toPad int32
  2005  	fj, fjSize := fusedJump(p)
  2006  	mask := int32(pjc - 1)
  2007  	if fj {
  2008  		if (c&mask)+int32(fjSize) >= int32(pjc) {
  2009  			toPad = int32(pjc) - (c & mask)
  2010  		}
  2011  	} else if isJump(p) {
  2012  		if (c&mask)+int32(p.Isize) >= int32(pjc) {
  2013  			toPad = int32(pjc) - (c & mask)
  2014  		}
  2015  	}
  2016  	if toPad <= 0 {
  2017  		return c
  2018  	}
  2019  
  2020  	return noppad(ctxt, s, c, toPad)
  2021  }
  2022  
  2023  // reAssemble is called if an instruction's size changes during assembly. If
  2024  // it does and the instruction is a standalone or a macro-fused jump we need to
  2025  // reassemble.
  2026  func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool {
  2027  	if pjc == 0 {
  2028  		return false
  2029  	}
  2030  
  2031  	fj, _ := fusedJump(p)
  2032  	return fj || isJump(p)
  2033  }
  2034  
  2035  type nopPad struct {
  2036  	p *obj.Prog // Instruction before the pad
  2037  	n int32     // Size of the pad
  2038  }
  2039  
  2040  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  2041  	if ctxt.Retpoline && ctxt.Arch.Family == sys.I386 {
  2042  		ctxt.Diag("-spectre=ret not supported on 386")
  2043  		ctxt.Retpoline = false // don't keep printing
  2044  	}
  2045  
  2046  	pjc := makePjcCtx(ctxt)
  2047  
  2048  	if s.P != nil {
  2049  		return
  2050  	}
  2051  
  2052  	if ycover[0] == 0 {
  2053  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  2054  	}
  2055  
  2056  	for p := s.Func().Text; p != nil; p = p.Link {
  2057  		if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil {
  2058  			p.To.SetTarget(p)
  2059  		}
  2060  		if p.As == AADJSP {
  2061  			p.To.Type = obj.TYPE_REG
  2062  			p.To.Reg = REG_SP
  2063  			// Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive.
  2064  			// One exception: It is smaller to encode $-0x80 than $0x80.
  2065  			// For that case, flip the sign and the op:
  2066  			// Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'.
  2067  			switch v := p.From.Offset; {
  2068  			case v == 0:
  2069  				p.As = obj.ANOP
  2070  			case v == 0x80 || (v < 0 && v != -0x80):
  2071  				p.As = spadjop(ctxt, AADDL, AADDQ)
  2072  				p.From.Offset *= -1
  2073  			default:
  2074  				p.As = spadjop(ctxt, ASUBL, ASUBQ)
  2075  			}
  2076  		}
  2077  		if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) {
  2078  			if p.To.Type != obj.TYPE_REG {
  2079  				ctxt.Diag("non-retpoline-compatible: %v", p)
  2080  				continue
  2081  			}
  2082  			p.To.Type = obj.TYPE_BRANCH
  2083  			p.To.Name = obj.NAME_EXTERN
  2084  			p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg)))
  2085  			p.To.Reg = 0
  2086  			p.To.Offset = 0
  2087  		}
  2088  	}
  2089  
  2090  	var count int64 // rough count of number of instructions
  2091  	for p := s.Func().Text; p != nil; p = p.Link {
  2092  		count++
  2093  		p.Back = branchShort // use short branches first time through
  2094  		if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) {
  2095  			p.Back |= branchBackwards
  2096  			q.Back |= branchLoopHead
  2097  		}
  2098  	}
  2099  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  2100  
  2101  	var ab AsmBuf
  2102  	var n int
  2103  	var c int32
  2104  	errors := ctxt.Errors
  2105  	var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies)
  2106  	nrelocs0 := len(s.R)
  2107  	for {
  2108  		// This loop continues while there are reasons to re-assemble
  2109  		// whole block, like the presence of long forward jumps.
  2110  		reAssemble := false
  2111  		for i := range s.R[nrelocs0:] {
  2112  			s.R[nrelocs0+i] = obj.Reloc{}
  2113  		}
  2114  		s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler
  2115  		s.P = s.P[:0]
  2116  		c = 0
  2117  		var pPrev *obj.Prog
  2118  		nops = nops[:0]
  2119  		for p := s.Func().Text; p != nil; p = p.Link {
  2120  			c0 := c
  2121  			c = pjc.padJump(ctxt, s, p, c)
  2122  
  2123  			if p.As == obj.APCALIGN || p.As == obj.APCALIGNMAX {
  2124  				v := obj.AlignmentPadding(c, p, ctxt, s)
  2125  				if v > 0 {
  2126  					s.Grow(int64(c) + int64(v))
  2127  					fillnop(s.P[c:], int(v))
  2128  				}
  2129  				p.Pc = int64(c)
  2130  				c += int32(v)
  2131  				pPrev = p
  2132  				continue
  2133  
  2134  			}
  2135  
  2136  			if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {
  2137  				// pad with NOPs
  2138  				v := -c & (loopAlign - 1)
  2139  
  2140  				if v <= maxLoopPad {
  2141  					s.Grow(int64(c) + int64(v))
  2142  					fillnop(s.P[c:], int(v))
  2143  					c += v
  2144  				}
  2145  			}
  2146  
  2147  			p.Pc = int64(c)
  2148  
  2149  			// process forward jumps to p
  2150  			for q := p.Rel; q != nil; q = q.Forwd {
  2151  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  2152  				if q.Back&branchShort != 0 {
  2153  					if v > 127 {
  2154  						reAssemble = true
  2155  						q.Back ^= branchShort
  2156  					}
  2157  
  2158  					if q.As == AJCXZL || q.As == AXBEGIN {
  2159  						s.P[q.Pc+2] = byte(v)
  2160  					} else {
  2161  						s.P[q.Pc+1] = byte(v)
  2162  					}
  2163  				} else {
  2164  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  2165  				}
  2166  			}
  2167  
  2168  			p.Rel = nil
  2169  
  2170  			p.Pc = int64(c)
  2171  			ab.asmins(ctxt, s, p)
  2172  			m := ab.Len()
  2173  			if int(p.Isize) != m {
  2174  				p.Isize = uint8(m)
  2175  				if pjc.reAssemble(p) {
  2176  					// We need to re-assemble here to check for jumps and fused jumps
  2177  					// that span or end on 32 byte boundaries.
  2178  					reAssemble = true
  2179  				}
  2180  			}
  2181  
  2182  			s.Grow(p.Pc + int64(m))
  2183  			copy(s.P[p.Pc:], ab.Bytes())
  2184  			// If there was padding, remember it.
  2185  			if pPrev != nil && !ctxt.IsAsm && c > c0 {
  2186  				nops = append(nops, nopPad{p: pPrev, n: c - c0})
  2187  			}
  2188  			c += int32(m)
  2189  			pPrev = p
  2190  		}
  2191  
  2192  		n++
  2193  		if n > 1000 {
  2194  			ctxt.Diag("span must be looping")
  2195  			log.Fatalf("loop")
  2196  		}
  2197  		if !reAssemble {
  2198  			break
  2199  		}
  2200  		if ctxt.Errors > errors {
  2201  			return
  2202  		}
  2203  	}
  2204  	// splice padding nops into Progs
  2205  	for _, n := range nops {
  2206  		pp := n.p
  2207  		np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)}
  2208  		pp.Link = np
  2209  	}
  2210  
  2211  	s.Size = int64(c)
  2212  
  2213  	if false { /* debug['a'] > 1 */
  2214  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  2215  		var i int
  2216  		for i = 0; i < len(s.P); i++ {
  2217  			fmt.Printf(" %.2x", s.P[i])
  2218  			if i%16 == 15 {
  2219  				fmt.Printf("\n  %.6x", uint(i+1))
  2220  			}
  2221  		}
  2222  
  2223  		if i%16 != 0 {
  2224  			fmt.Printf("\n")
  2225  		}
  2226  
  2227  		for i := 0; i < len(s.R); i++ {
  2228  			r := &s.R[i]
  2229  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2230  		}
  2231  	}
  2232  
  2233  	// Mark nonpreemptible instruction sequences.
  2234  	// The 2-instruction TLS access sequence
  2235  	//	MOVQ TLS, BX
  2236  	//	MOVQ 0(BX)(TLS*1), BX
  2237  	// is not async preemptible, as if it is preempted and resumed on
  2238  	// a different thread, the TLS address may become invalid.
  2239  	if !CanUse1InsnTLS(ctxt) {
  2240  		useTLS := func(p *obj.Prog) bool {
  2241  			// Only need to mark the second instruction, which has
  2242  			// REG_TLS as Index. (It is okay to interrupt and restart
  2243  			// the first instruction.)
  2244  			return p.From.Index == REG_TLS
  2245  		}
  2246  		obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil)
  2247  	}
  2248  
  2249  	// Now that we know byte offsets, we can generate jump table entries.
  2250  	// TODO: could this live in obj instead of obj/$ARCH?
  2251  	for _, jt := range s.Func().JumpTables {
  2252  		for i, p := range jt.Targets {
  2253  			// The ith jumptable entry points to the p.Pc'th
  2254  			// byte in the function symbol s.
  2255  			jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, s, p.Pc)
  2256  		}
  2257  	}
  2258  }
  2259  
  2260  func instinit(ctxt *obj.Link) {
  2261  	if ycover[0] != 0 {
  2262  		// Already initialized; stop now.
  2263  		// This happens in the cmd/asm tests,
  2264  		// each of which re-initializes the arch.
  2265  		return
  2266  	}
  2267  
  2268  	switch ctxt.Headtype {
  2269  	case objabi.Hplan9:
  2270  		plan9privates = ctxt.Lookup("_privates")
  2271  	}
  2272  
  2273  	for i := range avxOptab {
  2274  		c := avxOptab[i].as
  2275  		if opindex[c&obj.AMask] != nil {
  2276  			ctxt.Diag("phase error in avxOptab: %d (%v)", i, c)
  2277  		}
  2278  		opindex[c&obj.AMask] = &avxOptab[i]
  2279  	}
  2280  	for i := 1; optab[i].as != 0; i++ {
  2281  		c := optab[i].as
  2282  		if opindex[c&obj.AMask] != nil {
  2283  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  2284  		}
  2285  		opindex[c&obj.AMask] = &optab[i]
  2286  	}
  2287  
  2288  	for i := 0; i < Ymax; i++ {
  2289  		ycover[i*Ymax+i] = 1
  2290  	}
  2291  
  2292  	ycover[Yi0*Ymax+Yu2] = 1
  2293  	ycover[Yi1*Ymax+Yu2] = 1
  2294  
  2295  	ycover[Yi0*Ymax+Yi8] = 1
  2296  	ycover[Yi1*Ymax+Yi8] = 1
  2297  	ycover[Yu2*Ymax+Yi8] = 1
  2298  	ycover[Yu7*Ymax+Yi8] = 1
  2299  
  2300  	ycover[Yi0*Ymax+Yu7] = 1
  2301  	ycover[Yi1*Ymax+Yu7] = 1
  2302  	ycover[Yu2*Ymax+Yu7] = 1
  2303  
  2304  	ycover[Yi0*Ymax+Yu8] = 1
  2305  	ycover[Yi1*Ymax+Yu8] = 1
  2306  	ycover[Yu2*Ymax+Yu8] = 1
  2307  	ycover[Yu7*Ymax+Yu8] = 1
  2308  
  2309  	ycover[Yi0*Ymax+Ys32] = 1
  2310  	ycover[Yi1*Ymax+Ys32] = 1
  2311  	ycover[Yu2*Ymax+Ys32] = 1
  2312  	ycover[Yu7*Ymax+Ys32] = 1
  2313  	ycover[Yu8*Ymax+Ys32] = 1
  2314  	ycover[Yi8*Ymax+Ys32] = 1
  2315  
  2316  	ycover[Yi0*Ymax+Yi32] = 1
  2317  	ycover[Yi1*Ymax+Yi32] = 1
  2318  	ycover[Yu2*Ymax+Yi32] = 1
  2319  	ycover[Yu7*Ymax+Yi32] = 1
  2320  	ycover[Yu8*Ymax+Yi32] = 1
  2321  	ycover[Yi8*Ymax+Yi32] = 1
  2322  	ycover[Ys32*Ymax+Yi32] = 1
  2323  
  2324  	ycover[Yi0*Ymax+Yi64] = 1
  2325  	ycover[Yi1*Ymax+Yi64] = 1
  2326  	ycover[Yu7*Ymax+Yi64] = 1
  2327  	ycover[Yu2*Ymax+Yi64] = 1
  2328  	ycover[Yu8*Ymax+Yi64] = 1
  2329  	ycover[Yi8*Ymax+Yi64] = 1
  2330  	ycover[Ys32*Ymax+Yi64] = 1
  2331  	ycover[Yi32*Ymax+Yi64] = 1
  2332  
  2333  	ycover[Yal*Ymax+Yrb] = 1
  2334  	ycover[Ycl*Ymax+Yrb] = 1
  2335  	ycover[Yax*Ymax+Yrb] = 1
  2336  	ycover[Ycx*Ymax+Yrb] = 1
  2337  	ycover[Yrx*Ymax+Yrb] = 1
  2338  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2339  
  2340  	ycover[Ycl*Ymax+Ycx] = 1
  2341  
  2342  	ycover[Yax*Ymax+Yrx] = 1
  2343  	ycover[Ycx*Ymax+Yrx] = 1
  2344  
  2345  	ycover[Yax*Ymax+Yrl] = 1
  2346  	ycover[Ycx*Ymax+Yrl] = 1
  2347  	ycover[Yrx*Ymax+Yrl] = 1
  2348  	ycover[Yrl32*Ymax+Yrl] = 1
  2349  
  2350  	ycover[Yf0*Ymax+Yrf] = 1
  2351  
  2352  	ycover[Yal*Ymax+Ymb] = 1
  2353  	ycover[Ycl*Ymax+Ymb] = 1
  2354  	ycover[Yax*Ymax+Ymb] = 1
  2355  	ycover[Ycx*Ymax+Ymb] = 1
  2356  	ycover[Yrx*Ymax+Ymb] = 1
  2357  	ycover[Yrb*Ymax+Ymb] = 1
  2358  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2359  	ycover[Ym*Ymax+Ymb] = 1
  2360  
  2361  	ycover[Yax*Ymax+Yml] = 1
  2362  	ycover[Ycx*Ymax+Yml] = 1
  2363  	ycover[Yrx*Ymax+Yml] = 1
  2364  	ycover[Yrl*Ymax+Yml] = 1
  2365  	ycover[Yrl32*Ymax+Yml] = 1
  2366  	ycover[Ym*Ymax+Yml] = 1
  2367  
  2368  	ycover[Yax*Ymax+Ymm] = 1
  2369  	ycover[Ycx*Ymax+Ymm] = 1
  2370  	ycover[Yrx*Ymax+Ymm] = 1
  2371  	ycover[Yrl*Ymax+Ymm] = 1
  2372  	ycover[Yrl32*Ymax+Ymm] = 1
  2373  	ycover[Ym*Ymax+Ymm] = 1
  2374  	ycover[Ymr*Ymax+Ymm] = 1
  2375  
  2376  	ycover[Yxr0*Ymax+Yxr] = 1
  2377  
  2378  	ycover[Ym*Ymax+Yxm] = 1
  2379  	ycover[Yxr0*Ymax+Yxm] = 1
  2380  	ycover[Yxr*Ymax+Yxm] = 1
  2381  
  2382  	ycover[Ym*Ymax+Yym] = 1
  2383  	ycover[Yyr*Ymax+Yym] = 1
  2384  
  2385  	ycover[Yxr0*Ymax+YxrEvex] = 1
  2386  	ycover[Yxr*Ymax+YxrEvex] = 1
  2387  
  2388  	ycover[Ym*Ymax+YxmEvex] = 1
  2389  	ycover[Yxr0*Ymax+YxmEvex] = 1
  2390  	ycover[Yxr*Ymax+YxmEvex] = 1
  2391  	ycover[YxrEvex*Ymax+YxmEvex] = 1
  2392  
  2393  	ycover[Yyr*Ymax+YyrEvex] = 1
  2394  
  2395  	ycover[Ym*Ymax+YymEvex] = 1
  2396  	ycover[Yyr*Ymax+YymEvex] = 1
  2397  	ycover[YyrEvex*Ymax+YymEvex] = 1
  2398  
  2399  	ycover[Ym*Ymax+Yzm] = 1
  2400  	ycover[Yzr*Ymax+Yzm] = 1
  2401  
  2402  	ycover[Yk0*Ymax+Yk] = 1
  2403  	ycover[Yknot0*Ymax+Yk] = 1
  2404  
  2405  	ycover[Yk0*Ymax+Ykm] = 1
  2406  	ycover[Yknot0*Ymax+Ykm] = 1
  2407  	ycover[Yk*Ymax+Ykm] = 1
  2408  	ycover[Ym*Ymax+Ykm] = 1
  2409  
  2410  	ycover[Yxvm*Ymax+YxvmEvex] = 1
  2411  
  2412  	ycover[Yyvm*Ymax+YyvmEvex] = 1
  2413  
  2414  	for i := 0; i < MAXREG; i++ {
  2415  		reg[i] = -1
  2416  		if i >= REG_AL && i <= REG_R15B {
  2417  			reg[i] = (i - REG_AL) & 7
  2418  			if i >= REG_SPB && i <= REG_DIB {
  2419  				regrex[i] = 0x40
  2420  			}
  2421  			if i >= REG_R8B && i <= REG_R15B {
  2422  				regrex[i] = Rxr | Rxx | Rxb
  2423  			}
  2424  		}
  2425  
  2426  		if i >= REG_AH && i <= REG_BH {
  2427  			reg[i] = 4 + ((i - REG_AH) & 7)
  2428  		}
  2429  		if i >= REG_AX && i <= REG_R15 {
  2430  			reg[i] = (i - REG_AX) & 7
  2431  			if i >= REG_R8 {
  2432  				regrex[i] = Rxr | Rxx | Rxb
  2433  			}
  2434  		}
  2435  
  2436  		if i >= REG_F0 && i <= REG_F0+7 {
  2437  			reg[i] = (i - REG_F0) & 7
  2438  		}
  2439  		if i >= REG_M0 && i <= REG_M0+7 {
  2440  			reg[i] = (i - REG_M0) & 7
  2441  		}
  2442  		if i >= REG_K0 && i <= REG_K0+7 {
  2443  			reg[i] = (i - REG_K0) & 7
  2444  		}
  2445  		if i >= REG_X0 && i <= REG_X0+15 {
  2446  			reg[i] = (i - REG_X0) & 7
  2447  			if i >= REG_X0+8 {
  2448  				regrex[i] = Rxr | Rxx | Rxb
  2449  			}
  2450  		}
  2451  		if i >= REG_X16 && i <= REG_X16+15 {
  2452  			reg[i] = (i - REG_X16) & 7
  2453  			if i >= REG_X16+8 {
  2454  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2455  			} else {
  2456  				regrex[i] = RxrEvex
  2457  			}
  2458  		}
  2459  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2460  			reg[i] = (i - REG_Y0) & 7
  2461  			if i >= REG_Y0+8 {
  2462  				regrex[i] = Rxr | Rxx | Rxb
  2463  			}
  2464  		}
  2465  		if i >= REG_Y16 && i <= REG_Y16+15 {
  2466  			reg[i] = (i - REG_Y16) & 7
  2467  			if i >= REG_Y16+8 {
  2468  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2469  			} else {
  2470  				regrex[i] = RxrEvex
  2471  			}
  2472  		}
  2473  		if i >= REG_Z0 && i <= REG_Z0+15 {
  2474  			reg[i] = (i - REG_Z0) & 7
  2475  			if i > REG_Z0+7 {
  2476  				regrex[i] = Rxr | Rxx | Rxb
  2477  			}
  2478  		}
  2479  		if i >= REG_Z16 && i <= REG_Z16+15 {
  2480  			reg[i] = (i - REG_Z16) & 7
  2481  			if i >= REG_Z16+8 {
  2482  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2483  			} else {
  2484  				regrex[i] = RxrEvex
  2485  			}
  2486  		}
  2487  
  2488  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2489  			regrex[i] = Rxr
  2490  		}
  2491  	}
  2492  }
  2493  
  2494  var isAndroid = buildcfg.GOOS == "android"
  2495  
  2496  func prefixof(ctxt *obj.Link, a *obj.Addr) int {
  2497  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2498  		return 0
  2499  	}
  2500  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2501  		switch a.Reg {
  2502  		case REG_CS:
  2503  			return 0x2e
  2504  
  2505  		case REG_DS:
  2506  			return 0x3e
  2507  
  2508  		case REG_ES:
  2509  			return 0x26
  2510  
  2511  		case REG_FS:
  2512  			return 0x64
  2513  
  2514  		case REG_GS:
  2515  			return 0x65
  2516  
  2517  		case REG_TLS:
  2518  			// NOTE: Systems listed here should be only systems that
  2519  			// support direct TLS references like 8(TLS) implemented as
  2520  			// direct references from FS or GS. Systems that require
  2521  			// the initial-exec model, where you load the TLS base into
  2522  			// a register and then index from that register, do not reach
  2523  			// this code and should not be listed.
  2524  			if ctxt.Arch.Family == sys.I386 {
  2525  				switch ctxt.Headtype {
  2526  				default:
  2527  					if isAndroid {
  2528  						return 0x65 // GS
  2529  					}
  2530  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2531  
  2532  				case objabi.Hdarwin,
  2533  					objabi.Hdragonfly,
  2534  					objabi.Hfreebsd,
  2535  					objabi.Hnetbsd,
  2536  					objabi.Hopenbsd:
  2537  					return 0x65 // GS
  2538  				}
  2539  			}
  2540  
  2541  			switch ctxt.Headtype {
  2542  			default:
  2543  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2544  
  2545  			case objabi.Hlinux:
  2546  				if isAndroid {
  2547  					return 0x64 // FS
  2548  				}
  2549  
  2550  				if ctxt.Flag_shared {
  2551  					log.Fatalf("unknown TLS base register for linux with -shared")
  2552  				} else {
  2553  					return 0x64 // FS
  2554  				}
  2555  
  2556  			case objabi.Hdragonfly,
  2557  				objabi.Hfreebsd,
  2558  				objabi.Hnetbsd,
  2559  				objabi.Hopenbsd,
  2560  				objabi.Hsolaris:
  2561  				return 0x64 // FS
  2562  
  2563  			case objabi.Hdarwin:
  2564  				return 0x65 // GS
  2565  			}
  2566  		}
  2567  	}
  2568  
  2569  	switch a.Index {
  2570  	case REG_CS:
  2571  		return 0x2e
  2572  
  2573  	case REG_DS:
  2574  		return 0x3e
  2575  
  2576  	case REG_ES:
  2577  		return 0x26
  2578  
  2579  	case REG_TLS:
  2580  		if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
  2581  			// When building for inclusion into a shared library, an instruction of the form
  2582  			//     MOV off(CX)(TLS*1), AX
  2583  			// becomes
  2584  			//     mov %gs:off(%ecx), %eax // on i386
  2585  			//     mov %fs:off(%rcx), %rax // on amd64
  2586  			// which assumes that the correct TLS offset has been loaded into CX (today
  2587  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2588  			// a shared library the instruction it becomes
  2589  			//     mov 0x0(%ecx), %eax // on i386
  2590  			//     mov 0x0(%rcx), %rax // on amd64
  2591  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2592  			if ctxt.Arch.Family == sys.I386 {
  2593  				return 0x65 // GS
  2594  			}
  2595  			return 0x64 // FS
  2596  		}
  2597  
  2598  	case REG_FS:
  2599  		return 0x64
  2600  
  2601  	case REG_GS:
  2602  		return 0x65
  2603  	}
  2604  
  2605  	return 0
  2606  }
  2607  
  2608  // oclassRegList returns multisource operand class for addr.
  2609  func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int {
  2610  	// TODO(quasilyte): when oclass register case is refactored into
  2611  	// lookup table, use it here to get register kind more easily.
  2612  	// Helper functions like regIsXmm should go away too (they will become redundant).
  2613  
  2614  	regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 }
  2615  	regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 }
  2616  	regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 }
  2617  
  2618  	reg0, reg1 := decodeRegisterRange(addr.Offset)
  2619  	low := regIndex(int16(reg0))
  2620  	high := regIndex(int16(reg1))
  2621  
  2622  	if ctxt.Arch.Family == sys.I386 {
  2623  		if low >= 8 || high >= 8 {
  2624  			return Yxxx
  2625  		}
  2626  	}
  2627  
  2628  	switch high - low {
  2629  	case 3:
  2630  		switch {
  2631  		case regIsXmm(reg0) && regIsXmm(reg1):
  2632  			return YxrEvexMulti4
  2633  		case regIsYmm(reg0) && regIsYmm(reg1):
  2634  			return YyrEvexMulti4
  2635  		case regIsZmm(reg0) && regIsZmm(reg1):
  2636  			return YzrMulti4
  2637  		default:
  2638  			return Yxxx
  2639  		}
  2640  	default:
  2641  		return Yxxx
  2642  	}
  2643  }
  2644  
  2645  // oclassVMem returns V-mem (vector memory with VSIB) operand class.
  2646  // For addr that is not V-mem returns (Yxxx, false).
  2647  func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) {
  2648  	switch addr.Index {
  2649  	case REG_X0 + 0,
  2650  		REG_X0 + 1,
  2651  		REG_X0 + 2,
  2652  		REG_X0 + 3,
  2653  		REG_X0 + 4,
  2654  		REG_X0 + 5,
  2655  		REG_X0 + 6,
  2656  		REG_X0 + 7:
  2657  		return Yxvm, true
  2658  	case REG_X8 + 0,
  2659  		REG_X8 + 1,
  2660  		REG_X8 + 2,
  2661  		REG_X8 + 3,
  2662  		REG_X8 + 4,
  2663  		REG_X8 + 5,
  2664  		REG_X8 + 6,
  2665  		REG_X8 + 7:
  2666  		if ctxt.Arch.Family == sys.I386 {
  2667  			return Yxxx, true
  2668  		}
  2669  		return Yxvm, true
  2670  	case REG_X16 + 0,
  2671  		REG_X16 + 1,
  2672  		REG_X16 + 2,
  2673  		REG_X16 + 3,
  2674  		REG_X16 + 4,
  2675  		REG_X16 + 5,
  2676  		REG_X16 + 6,
  2677  		REG_X16 + 7,
  2678  		REG_X16 + 8,
  2679  		REG_X16 + 9,
  2680  		REG_X16 + 10,
  2681  		REG_X16 + 11,
  2682  		REG_X16 + 12,
  2683  		REG_X16 + 13,
  2684  		REG_X16 + 14,
  2685  		REG_X16 + 15:
  2686  		if ctxt.Arch.Family == sys.I386 {
  2687  			return Yxxx, true
  2688  		}
  2689  		return YxvmEvex, true
  2690  
  2691  	case REG_Y0 + 0,
  2692  		REG_Y0 + 1,
  2693  		REG_Y0 + 2,
  2694  		REG_Y0 + 3,
  2695  		REG_Y0 + 4,
  2696  		REG_Y0 + 5,
  2697  		REG_Y0 + 6,
  2698  		REG_Y0 + 7:
  2699  		return Yyvm, true
  2700  	case REG_Y8 + 0,
  2701  		REG_Y8 + 1,
  2702  		REG_Y8 + 2,
  2703  		REG_Y8 + 3,
  2704  		REG_Y8 + 4,
  2705  		REG_Y8 + 5,
  2706  		REG_Y8 + 6,
  2707  		REG_Y8 + 7:
  2708  		if ctxt.Arch.Family == sys.I386 {
  2709  			return Yxxx, true
  2710  		}
  2711  		return Yyvm, true
  2712  	case REG_Y16 + 0,
  2713  		REG_Y16 + 1,
  2714  		REG_Y16 + 2,
  2715  		REG_Y16 + 3,
  2716  		REG_Y16 + 4,
  2717  		REG_Y16 + 5,
  2718  		REG_Y16 + 6,
  2719  		REG_Y16 + 7,
  2720  		REG_Y16 + 8,
  2721  		REG_Y16 + 9,
  2722  		REG_Y16 + 10,
  2723  		REG_Y16 + 11,
  2724  		REG_Y16 + 12,
  2725  		REG_Y16 + 13,
  2726  		REG_Y16 + 14,
  2727  		REG_Y16 + 15:
  2728  		if ctxt.Arch.Family == sys.I386 {
  2729  			return Yxxx, true
  2730  		}
  2731  		return YyvmEvex, true
  2732  
  2733  	case REG_Z0 + 0,
  2734  		REG_Z0 + 1,
  2735  		REG_Z0 + 2,
  2736  		REG_Z0 + 3,
  2737  		REG_Z0 + 4,
  2738  		REG_Z0 + 5,
  2739  		REG_Z0 + 6,
  2740  		REG_Z0 + 7:
  2741  		return Yzvm, true
  2742  	case REG_Z8 + 0,
  2743  		REG_Z8 + 1,
  2744  		REG_Z8 + 2,
  2745  		REG_Z8 + 3,
  2746  		REG_Z8 + 4,
  2747  		REG_Z8 + 5,
  2748  		REG_Z8 + 6,
  2749  		REG_Z8 + 7,
  2750  		REG_Z8 + 8,
  2751  		REG_Z8 + 9,
  2752  		REG_Z8 + 10,
  2753  		REG_Z8 + 11,
  2754  		REG_Z8 + 12,
  2755  		REG_Z8 + 13,
  2756  		REG_Z8 + 14,
  2757  		REG_Z8 + 15,
  2758  		REG_Z8 + 16,
  2759  		REG_Z8 + 17,
  2760  		REG_Z8 + 18,
  2761  		REG_Z8 + 19,
  2762  		REG_Z8 + 20,
  2763  		REG_Z8 + 21,
  2764  		REG_Z8 + 22,
  2765  		REG_Z8 + 23:
  2766  		if ctxt.Arch.Family == sys.I386 {
  2767  			return Yxxx, true
  2768  		}
  2769  		return Yzvm, true
  2770  	}
  2771  
  2772  	return Yxxx, false
  2773  }
  2774  
  2775  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2776  	switch a.Type {
  2777  	case obj.TYPE_REGLIST:
  2778  		return oclassRegList(ctxt, a)
  2779  
  2780  	case obj.TYPE_NONE:
  2781  		return Ynone
  2782  
  2783  	case obj.TYPE_BRANCH:
  2784  		return Ybr
  2785  
  2786  	case obj.TYPE_INDIR:
  2787  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2788  			return Yindir
  2789  		}
  2790  		return Yxxx
  2791  
  2792  	case obj.TYPE_MEM:
  2793  		// Pseudo registers have negative index, but SP is
  2794  		// not pseudo on x86, hence REG_SP check is not redundant.
  2795  		if a.Index == REG_SP || a.Index < 0 {
  2796  			// Can't use FP/SB/PC/SP as the index register.
  2797  			return Yxxx
  2798  		}
  2799  
  2800  		if vmem, ok := oclassVMem(ctxt, a); ok {
  2801  			return vmem
  2802  		}
  2803  
  2804  		if ctxt.Arch.Family == sys.AMD64 {
  2805  			switch a.Name {
  2806  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2807  				// Global variables can't use index registers and their
  2808  				// base register is %rip (%rip is encoded as REG_NONE).
  2809  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2810  					return Yxxx
  2811  				}
  2812  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2813  				// These names must have a base of SP.  The old compiler
  2814  				// uses 0 for the base register. SSA uses REG_SP.
  2815  				if a.Reg != REG_SP && a.Reg != 0 {
  2816  					return Yxxx
  2817  				}
  2818  			case obj.NAME_NONE:
  2819  				// everything is ok
  2820  			default:
  2821  				// unknown name
  2822  				return Yxxx
  2823  			}
  2824  		}
  2825  		return Ym
  2826  
  2827  	case obj.TYPE_ADDR:
  2828  		switch a.Name {
  2829  		case obj.NAME_GOTREF:
  2830  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2831  			return Yxxx
  2832  
  2833  		case obj.NAME_EXTERN,
  2834  			obj.NAME_STATIC:
  2835  			if a.Sym != nil && useAbs(ctxt, a.Sym) {
  2836  				return Yi32
  2837  			}
  2838  			return Yiauto // use pc-relative addressing
  2839  
  2840  		case obj.NAME_AUTO,
  2841  			obj.NAME_PARAM:
  2842  			return Yiauto
  2843  		}
  2844  
  2845  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2846  		// and got Yi32 in an earlier version of this code.
  2847  		// Keep doing that until we fix yduff etc.
  2848  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2849  			return Yi32
  2850  		}
  2851  
  2852  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2853  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2854  		}
  2855  		fallthrough
  2856  
  2857  	case obj.TYPE_CONST:
  2858  		if a.Sym != nil {
  2859  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2860  		}
  2861  
  2862  		v := a.Offset
  2863  		if ctxt.Arch.Family == sys.I386 {
  2864  			v = int64(int32(v))
  2865  		}
  2866  		switch {
  2867  		case v == 0:
  2868  			return Yi0
  2869  		case v == 1:
  2870  			return Yi1
  2871  		case v >= 0 && v <= 3:
  2872  			return Yu2
  2873  		case v >= 0 && v <= 127:
  2874  			return Yu7
  2875  		case v >= 0 && v <= 255:
  2876  			return Yu8
  2877  		case v >= -128 && v <= 127:
  2878  			return Yi8
  2879  		}
  2880  		if ctxt.Arch.Family == sys.I386 {
  2881  			return Yi32
  2882  		}
  2883  		l := int32(v)
  2884  		if int64(l) == v {
  2885  			return Ys32 // can sign extend
  2886  		}
  2887  		if v>>32 == 0 {
  2888  			return Yi32 // unsigned
  2889  		}
  2890  		return Yi64
  2891  
  2892  	case obj.TYPE_TEXTSIZE:
  2893  		return Ytextsize
  2894  	}
  2895  
  2896  	if a.Type != obj.TYPE_REG {
  2897  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2898  		return Yxxx
  2899  	}
  2900  
  2901  	switch a.Reg {
  2902  	case REG_AL:
  2903  		return Yal
  2904  
  2905  	case REG_AX:
  2906  		return Yax
  2907  
  2908  		/*
  2909  			case REG_SPB:
  2910  		*/
  2911  	case REG_BPB,
  2912  		REG_SIB,
  2913  		REG_DIB,
  2914  		REG_R8B,
  2915  		REG_R9B,
  2916  		REG_R10B,
  2917  		REG_R11B,
  2918  		REG_R12B,
  2919  		REG_R13B,
  2920  		REG_R14B,
  2921  		REG_R15B:
  2922  		if ctxt.Arch.Family == sys.I386 {
  2923  			return Yxxx
  2924  		}
  2925  		fallthrough
  2926  
  2927  	case REG_DL,
  2928  		REG_BL,
  2929  		REG_AH,
  2930  		REG_CH,
  2931  		REG_DH,
  2932  		REG_BH:
  2933  		return Yrb
  2934  
  2935  	case REG_CL:
  2936  		return Ycl
  2937  
  2938  	case REG_CX:
  2939  		return Ycx
  2940  
  2941  	case REG_DX, REG_BX:
  2942  		return Yrx
  2943  
  2944  	case REG_R8, // not really Yrl
  2945  		REG_R9,
  2946  		REG_R10,
  2947  		REG_R11,
  2948  		REG_R12,
  2949  		REG_R13,
  2950  		REG_R14,
  2951  		REG_R15:
  2952  		if ctxt.Arch.Family == sys.I386 {
  2953  			return Yxxx
  2954  		}
  2955  		fallthrough
  2956  
  2957  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2958  		if ctxt.Arch.Family == sys.I386 {
  2959  			return Yrl32
  2960  		}
  2961  		return Yrl
  2962  
  2963  	case REG_F0 + 0:
  2964  		return Yf0
  2965  
  2966  	case REG_F0 + 1,
  2967  		REG_F0 + 2,
  2968  		REG_F0 + 3,
  2969  		REG_F0 + 4,
  2970  		REG_F0 + 5,
  2971  		REG_F0 + 6,
  2972  		REG_F0 + 7:
  2973  		return Yrf
  2974  
  2975  	case REG_M0 + 0,
  2976  		REG_M0 + 1,
  2977  		REG_M0 + 2,
  2978  		REG_M0 + 3,
  2979  		REG_M0 + 4,
  2980  		REG_M0 + 5,
  2981  		REG_M0 + 6,
  2982  		REG_M0 + 7:
  2983  		return Ymr
  2984  
  2985  	case REG_X0:
  2986  		return Yxr0
  2987  
  2988  	case REG_X0 + 1,
  2989  		REG_X0 + 2,
  2990  		REG_X0 + 3,
  2991  		REG_X0 + 4,
  2992  		REG_X0 + 5,
  2993  		REG_X0 + 6,
  2994  		REG_X0 + 7,
  2995  		REG_X0 + 8,
  2996  		REG_X0 + 9,
  2997  		REG_X0 + 10,
  2998  		REG_X0 + 11,
  2999  		REG_X0 + 12,
  3000  		REG_X0 + 13,
  3001  		REG_X0 + 14,
  3002  		REG_X0 + 15:
  3003  		return Yxr
  3004  
  3005  	case REG_X0 + 16,
  3006  		REG_X0 + 17,
  3007  		REG_X0 + 18,
  3008  		REG_X0 + 19,
  3009  		REG_X0 + 20,
  3010  		REG_X0 + 21,
  3011  		REG_X0 + 22,
  3012  		REG_X0 + 23,
  3013  		REG_X0 + 24,
  3014  		REG_X0 + 25,
  3015  		REG_X0 + 26,
  3016  		REG_X0 + 27,
  3017  		REG_X0 + 28,
  3018  		REG_X0 + 29,
  3019  		REG_X0 + 30,
  3020  		REG_X0 + 31:
  3021  		return YxrEvex
  3022  
  3023  	case REG_Y0 + 0,
  3024  		REG_Y0 + 1,
  3025  		REG_Y0 + 2,
  3026  		REG_Y0 + 3,
  3027  		REG_Y0 + 4,
  3028  		REG_Y0 + 5,
  3029  		REG_Y0 + 6,
  3030  		REG_Y0 + 7,
  3031  		REG_Y0 + 8,
  3032  		REG_Y0 + 9,
  3033  		REG_Y0 + 10,
  3034  		REG_Y0 + 11,
  3035  		REG_Y0 + 12,
  3036  		REG_Y0 + 13,
  3037  		REG_Y0 + 14,
  3038  		REG_Y0 + 15:
  3039  		return Yyr
  3040  
  3041  	case REG_Y0 + 16,
  3042  		REG_Y0 + 17,
  3043  		REG_Y0 + 18,
  3044  		REG_Y0 + 19,
  3045  		REG_Y0 + 20,
  3046  		REG_Y0 + 21,
  3047  		REG_Y0 + 22,
  3048  		REG_Y0 + 23,
  3049  		REG_Y0 + 24,
  3050  		REG_Y0 + 25,
  3051  		REG_Y0 + 26,
  3052  		REG_Y0 + 27,
  3053  		REG_Y0 + 28,
  3054  		REG_Y0 + 29,
  3055  		REG_Y0 + 30,
  3056  		REG_Y0 + 31:
  3057  		return YyrEvex
  3058  
  3059  	case REG_Z0 + 0,
  3060  		REG_Z0 + 1,
  3061  		REG_Z0 + 2,
  3062  		REG_Z0 + 3,
  3063  		REG_Z0 + 4,
  3064  		REG_Z0 + 5,
  3065  		REG_Z0 + 6,
  3066  		REG_Z0 + 7:
  3067  		return Yzr
  3068  
  3069  	case REG_Z0 + 8,
  3070  		REG_Z0 + 9,
  3071  		REG_Z0 + 10,
  3072  		REG_Z0 + 11,
  3073  		REG_Z0 + 12,
  3074  		REG_Z0 + 13,
  3075  		REG_Z0 + 14,
  3076  		REG_Z0 + 15,
  3077  		REG_Z0 + 16,
  3078  		REG_Z0 + 17,
  3079  		REG_Z0 + 18,
  3080  		REG_Z0 + 19,
  3081  		REG_Z0 + 20,
  3082  		REG_Z0 + 21,
  3083  		REG_Z0 + 22,
  3084  		REG_Z0 + 23,
  3085  		REG_Z0 + 24,
  3086  		REG_Z0 + 25,
  3087  		REG_Z0 + 26,
  3088  		REG_Z0 + 27,
  3089  		REG_Z0 + 28,
  3090  		REG_Z0 + 29,
  3091  		REG_Z0 + 30,
  3092  		REG_Z0 + 31:
  3093  		if ctxt.Arch.Family == sys.I386 {
  3094  			return Yxxx
  3095  		}
  3096  		return Yzr
  3097  
  3098  	case REG_K0:
  3099  		return Yk0
  3100  
  3101  	case REG_K0 + 1,
  3102  		REG_K0 + 2,
  3103  		REG_K0 + 3,
  3104  		REG_K0 + 4,
  3105  		REG_K0 + 5,
  3106  		REG_K0 + 6,
  3107  		REG_K0 + 7:
  3108  		return Yknot0
  3109  
  3110  	case REG_CS:
  3111  		return Ycs
  3112  	case REG_SS:
  3113  		return Yss
  3114  	case REG_DS:
  3115  		return Yds
  3116  	case REG_ES:
  3117  		return Yes
  3118  	case REG_FS:
  3119  		return Yfs
  3120  	case REG_GS:
  3121  		return Ygs
  3122  	case REG_TLS:
  3123  		return Ytls
  3124  
  3125  	case REG_GDTR:
  3126  		return Ygdtr
  3127  	case REG_IDTR:
  3128  		return Yidtr
  3129  	case REG_LDTR:
  3130  		return Yldtr
  3131  	case REG_MSW:
  3132  		return Ymsw
  3133  	case REG_TASK:
  3134  		return Ytask
  3135  
  3136  	case REG_CR + 0:
  3137  		return Ycr0
  3138  	case REG_CR + 1:
  3139  		return Ycr1
  3140  	case REG_CR + 2:
  3141  		return Ycr2
  3142  	case REG_CR + 3:
  3143  		return Ycr3
  3144  	case REG_CR + 4:
  3145  		return Ycr4
  3146  	case REG_CR + 5:
  3147  		return Ycr5
  3148  	case REG_CR + 6:
  3149  		return Ycr6
  3150  	case REG_CR + 7:
  3151  		return Ycr7
  3152  	case REG_CR + 8:
  3153  		return Ycr8
  3154  
  3155  	case REG_DR + 0:
  3156  		return Ydr0
  3157  	case REG_DR + 1:
  3158  		return Ydr1
  3159  	case REG_DR + 2:
  3160  		return Ydr2
  3161  	case REG_DR + 3:
  3162  		return Ydr3
  3163  	case REG_DR + 4:
  3164  		return Ydr4
  3165  	case REG_DR + 5:
  3166  		return Ydr5
  3167  	case REG_DR + 6:
  3168  		return Ydr6
  3169  	case REG_DR + 7:
  3170  		return Ydr7
  3171  
  3172  	case REG_TR + 0:
  3173  		return Ytr0
  3174  	case REG_TR + 1:
  3175  		return Ytr1
  3176  	case REG_TR + 2:
  3177  		return Ytr2
  3178  	case REG_TR + 3:
  3179  		return Ytr3
  3180  	case REG_TR + 4:
  3181  		return Ytr4
  3182  	case REG_TR + 5:
  3183  		return Ytr5
  3184  	case REG_TR + 6:
  3185  		return Ytr6
  3186  	case REG_TR + 7:
  3187  		return Ytr7
  3188  	}
  3189  
  3190  	return Yxxx
  3191  }
  3192  
  3193  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  3194  // and hold assembly state.
  3195  type AsmBuf struct {
  3196  	buf      [100]byte
  3197  	off      int
  3198  	rexflag  int
  3199  	vexflag  bool // Per inst: true for VEX-encoded
  3200  	evexflag bool // Per inst: true for EVEX-encoded
  3201  	rep      bool
  3202  	repn     bool
  3203  	lock     bool
  3204  
  3205  	evex evexBits // Initialized when evexflag is true
  3206  }
  3207  
  3208  // Put1 appends one byte to the end of the buffer.
  3209  func (ab *AsmBuf) Put1(x byte) {
  3210  	ab.buf[ab.off] = x
  3211  	ab.off++
  3212  }
  3213  
  3214  // Put2 appends two bytes to the end of the buffer.
  3215  func (ab *AsmBuf) Put2(x, y byte) {
  3216  	ab.buf[ab.off+0] = x
  3217  	ab.buf[ab.off+1] = y
  3218  	ab.off += 2
  3219  }
  3220  
  3221  // Put3 appends three bytes to the end of the buffer.
  3222  func (ab *AsmBuf) Put3(x, y, z byte) {
  3223  	ab.buf[ab.off+0] = x
  3224  	ab.buf[ab.off+1] = y
  3225  	ab.buf[ab.off+2] = z
  3226  	ab.off += 3
  3227  }
  3228  
  3229  // Put4 appends four bytes to the end of the buffer.
  3230  func (ab *AsmBuf) Put4(x, y, z, w byte) {
  3231  	ab.buf[ab.off+0] = x
  3232  	ab.buf[ab.off+1] = y
  3233  	ab.buf[ab.off+2] = z
  3234  	ab.buf[ab.off+3] = w
  3235  	ab.off += 4
  3236  }
  3237  
  3238  // PutInt16 writes v into the buffer using little-endian encoding.
  3239  func (ab *AsmBuf) PutInt16(v int16) {
  3240  	ab.buf[ab.off+0] = byte(v)
  3241  	ab.buf[ab.off+1] = byte(v >> 8)
  3242  	ab.off += 2
  3243  }
  3244  
  3245  // PutInt32 writes v into the buffer using little-endian encoding.
  3246  func (ab *AsmBuf) PutInt32(v int32) {
  3247  	ab.buf[ab.off+0] = byte(v)
  3248  	ab.buf[ab.off+1] = byte(v >> 8)
  3249  	ab.buf[ab.off+2] = byte(v >> 16)
  3250  	ab.buf[ab.off+3] = byte(v >> 24)
  3251  	ab.off += 4
  3252  }
  3253  
  3254  // PutInt64 writes v into the buffer using little-endian encoding.
  3255  func (ab *AsmBuf) PutInt64(v int64) {
  3256  	ab.buf[ab.off+0] = byte(v)
  3257  	ab.buf[ab.off+1] = byte(v >> 8)
  3258  	ab.buf[ab.off+2] = byte(v >> 16)
  3259  	ab.buf[ab.off+3] = byte(v >> 24)
  3260  	ab.buf[ab.off+4] = byte(v >> 32)
  3261  	ab.buf[ab.off+5] = byte(v >> 40)
  3262  	ab.buf[ab.off+6] = byte(v >> 48)
  3263  	ab.buf[ab.off+7] = byte(v >> 56)
  3264  	ab.off += 8
  3265  }
  3266  
  3267  // Put copies b into the buffer.
  3268  func (ab *AsmBuf) Put(b []byte) {
  3269  	copy(ab.buf[ab.off:], b)
  3270  	ab.off += len(b)
  3271  }
  3272  
  3273  // PutOpBytesLit writes zero terminated sequence of bytes from op,
  3274  // starting at specified offset (e.g. z counter value).
  3275  // Trailing 0 is not written.
  3276  //
  3277  // Intended to be used for literal Z cases.
  3278  // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r).
  3279  func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) {
  3280  	for int(op[offset]) != 0 {
  3281  		ab.Put1(byte(op[offset]))
  3282  		offset++
  3283  	}
  3284  }
  3285  
  3286  // Insert inserts b at offset i.
  3287  func (ab *AsmBuf) Insert(i int, b byte) {
  3288  	ab.off++
  3289  	copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1])
  3290  	ab.buf[i] = b
  3291  }
  3292  
  3293  // Last returns the byte at the end of the buffer.
  3294  func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] }
  3295  
  3296  // Len returns the length of the buffer.
  3297  func (ab *AsmBuf) Len() int { return ab.off }
  3298  
  3299  // Bytes returns the contents of the buffer.
  3300  func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] }
  3301  
  3302  // Reset empties the buffer.
  3303  func (ab *AsmBuf) Reset() { ab.off = 0 }
  3304  
  3305  // At returns the byte at offset i.
  3306  func (ab *AsmBuf) At(i int) byte { return ab.buf[i] }
  3307  
  3308  // asmidx emits SIB byte.
  3309  func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  3310  	var i int
  3311  
  3312  	// X/Y index register is used in VSIB.
  3313  	switch index {
  3314  	default:
  3315  		goto bad
  3316  
  3317  	case REG_NONE:
  3318  		i = 4 << 3
  3319  		goto bas
  3320  
  3321  	case REG_R8,
  3322  		REG_R9,
  3323  		REG_R10,
  3324  		REG_R11,
  3325  		REG_R12,
  3326  		REG_R13,
  3327  		REG_R14,
  3328  		REG_R15,
  3329  		REG_X8,
  3330  		REG_X9,
  3331  		REG_X10,
  3332  		REG_X11,
  3333  		REG_X12,
  3334  		REG_X13,
  3335  		REG_X14,
  3336  		REG_X15,
  3337  		REG_X16,
  3338  		REG_X17,
  3339  		REG_X18,
  3340  		REG_X19,
  3341  		REG_X20,
  3342  		REG_X21,
  3343  		REG_X22,
  3344  		REG_X23,
  3345  		REG_X24,
  3346  		REG_X25,
  3347  		REG_X26,
  3348  		REG_X27,
  3349  		REG_X28,
  3350  		REG_X29,
  3351  		REG_X30,
  3352  		REG_X31,
  3353  		REG_Y8,
  3354  		REG_Y9,
  3355  		REG_Y10,
  3356  		REG_Y11,
  3357  		REG_Y12,
  3358  		REG_Y13,
  3359  		REG_Y14,
  3360  		REG_Y15,
  3361  		REG_Y16,
  3362  		REG_Y17,
  3363  		REG_Y18,
  3364  		REG_Y19,
  3365  		REG_Y20,
  3366  		REG_Y21,
  3367  		REG_Y22,
  3368  		REG_Y23,
  3369  		REG_Y24,
  3370  		REG_Y25,
  3371  		REG_Y26,
  3372  		REG_Y27,
  3373  		REG_Y28,
  3374  		REG_Y29,
  3375  		REG_Y30,
  3376  		REG_Y31,
  3377  		REG_Z8,
  3378  		REG_Z9,
  3379  		REG_Z10,
  3380  		REG_Z11,
  3381  		REG_Z12,
  3382  		REG_Z13,
  3383  		REG_Z14,
  3384  		REG_Z15,
  3385  		REG_Z16,
  3386  		REG_Z17,
  3387  		REG_Z18,
  3388  		REG_Z19,
  3389  		REG_Z20,
  3390  		REG_Z21,
  3391  		REG_Z22,
  3392  		REG_Z23,
  3393  		REG_Z24,
  3394  		REG_Z25,
  3395  		REG_Z26,
  3396  		REG_Z27,
  3397  		REG_Z28,
  3398  		REG_Z29,
  3399  		REG_Z30,
  3400  		REG_Z31:
  3401  		if ctxt.Arch.Family == sys.I386 {
  3402  			goto bad
  3403  		}
  3404  		fallthrough
  3405  
  3406  	case REG_AX,
  3407  		REG_CX,
  3408  		REG_DX,
  3409  		REG_BX,
  3410  		REG_BP,
  3411  		REG_SI,
  3412  		REG_DI,
  3413  		REG_X0,
  3414  		REG_X1,
  3415  		REG_X2,
  3416  		REG_X3,
  3417  		REG_X4,
  3418  		REG_X5,
  3419  		REG_X6,
  3420  		REG_X7,
  3421  		REG_Y0,
  3422  		REG_Y1,
  3423  		REG_Y2,
  3424  		REG_Y3,
  3425  		REG_Y4,
  3426  		REG_Y5,
  3427  		REG_Y6,
  3428  		REG_Y7,
  3429  		REG_Z0,
  3430  		REG_Z1,
  3431  		REG_Z2,
  3432  		REG_Z3,
  3433  		REG_Z4,
  3434  		REG_Z5,
  3435  		REG_Z6,
  3436  		REG_Z7:
  3437  		i = reg[index] << 3
  3438  	}
  3439  
  3440  	switch scale {
  3441  	default:
  3442  		goto bad
  3443  
  3444  	case 1:
  3445  		break
  3446  
  3447  	case 2:
  3448  		i |= 1 << 6
  3449  
  3450  	case 4:
  3451  		i |= 2 << 6
  3452  
  3453  	case 8:
  3454  		i |= 3 << 6
  3455  	}
  3456  
  3457  bas:
  3458  	switch base {
  3459  	default:
  3460  		goto bad
  3461  
  3462  	case REG_NONE: // must be mod=00
  3463  		i |= 5
  3464  
  3465  	case REG_R8,
  3466  		REG_R9,
  3467  		REG_R10,
  3468  		REG_R11,
  3469  		REG_R12,
  3470  		REG_R13,
  3471  		REG_R14,
  3472  		REG_R15:
  3473  		if ctxt.Arch.Family == sys.I386 {
  3474  			goto bad
  3475  		}
  3476  		fallthrough
  3477  
  3478  	case REG_AX,
  3479  		REG_CX,
  3480  		REG_DX,
  3481  		REG_BX,
  3482  		REG_SP,
  3483  		REG_BP,
  3484  		REG_SI,
  3485  		REG_DI:
  3486  		i |= reg[base]
  3487  	}
  3488  
  3489  	ab.Put1(byte(i))
  3490  	return
  3491  
  3492  bad:
  3493  	ctxt.Diag("asmidx: bad address %d/%s/%s", scale, rconv(index), rconv(base))
  3494  	ab.Put1(0)
  3495  }
  3496  
  3497  func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  3498  	var rel obj.Reloc
  3499  
  3500  	v := vaddr(ctxt, p, a, &rel)
  3501  	if rel.Siz != 0 {
  3502  		if rel.Siz != 4 {
  3503  			ctxt.Diag("bad reloc")
  3504  		}
  3505  		rel.Off = int32(p.Pc + int64(ab.Len()))
  3506  		cursym.AddRel(ctxt, rel)
  3507  	}
  3508  
  3509  	ab.PutInt32(int32(v))
  3510  }
  3511  
  3512  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  3513  	if r != nil {
  3514  		*r = obj.Reloc{}
  3515  	}
  3516  
  3517  	switch a.Name {
  3518  	case obj.NAME_STATIC,
  3519  		obj.NAME_GOTREF,
  3520  		obj.NAME_EXTERN:
  3521  		s := a.Sym
  3522  		if r == nil {
  3523  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3524  			log.Fatalf("reloc")
  3525  		}
  3526  
  3527  		if a.Name == obj.NAME_GOTREF {
  3528  			r.Siz = 4
  3529  			r.Type = objabi.R_GOTPCREL
  3530  		} else if useAbs(ctxt, s) {
  3531  			r.Siz = 4
  3532  			r.Type = objabi.R_ADDR
  3533  		} else {
  3534  			r.Siz = 4
  3535  			r.Type = objabi.R_PCREL
  3536  		}
  3537  
  3538  		r.Off = -1 // caller must fill in
  3539  		r.Sym = s
  3540  		r.Add = a.Offset
  3541  
  3542  		return 0
  3543  	}
  3544  
  3545  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  3546  		if r == nil {
  3547  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3548  			log.Fatalf("reloc")
  3549  		}
  3550  
  3551  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  3552  			r.Type = objabi.R_TLS_LE
  3553  			r.Siz = 4
  3554  			r.Off = -1 // caller must fill in
  3555  			r.Add = a.Offset
  3556  		}
  3557  		return 0
  3558  	}
  3559  
  3560  	return a.Offset
  3561  }
  3562  
  3563  func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  3564  	var base int
  3565  	var rel obj.Reloc
  3566  
  3567  	rex &= 0x40 | Rxr
  3568  	if a.Offset != int64(int32(a.Offset)) {
  3569  		// The rules are slightly different for 386 and AMD64,
  3570  		// mostly for historical reasons. We may unify them later,
  3571  		// but it must be discussed beforehand.
  3572  		//
  3573  		// For 64bit mode only LEAL is allowed to overflow.
  3574  		// It's how https://golang.org/cl/59630 made it.
  3575  		// crypto/sha1/sha1block_amd64.s depends on this feature.
  3576  		//
  3577  		// For 32bit mode rules are more permissive.
  3578  		// If offset fits uint32, it's permitted.
  3579  		// This is allowed for assembly that wants to use 32-bit hex
  3580  		// constants, e.g. LEAL 0x99999999(AX), AX.
  3581  		overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) ||
  3582  			(ctxt.Arch.Family != sys.AMD64 &&
  3583  				int64(uint32(a.Offset)) == a.Offset &&
  3584  				ab.rexflag&Rxw == 0)
  3585  		if !overflowOK {
  3586  			ctxt.Diag("offset too large in %s", p)
  3587  		}
  3588  	}
  3589  	v := int32(a.Offset)
  3590  	rel.Siz = 0
  3591  
  3592  	switch a.Type {
  3593  	case obj.TYPE_ADDR:
  3594  		if a.Name == obj.NAME_NONE {
  3595  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  3596  		}
  3597  		if a.Index == REG_TLS {
  3598  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  3599  		}
  3600  		goto bad
  3601  
  3602  	case obj.TYPE_REG:
  3603  		const regFirst = REG_AL
  3604  		const regLast = REG_Z31
  3605  		if a.Reg < regFirst || regLast < a.Reg {
  3606  			goto bad
  3607  		}
  3608  		if v != 0 {
  3609  			goto bad
  3610  		}
  3611  		ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  3612  		ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  3613  		return
  3614  	}
  3615  
  3616  	if a.Type != obj.TYPE_MEM {
  3617  		goto bad
  3618  	}
  3619  
  3620  	if a.Index != REG_NONE && a.Index != REG_TLS && !(REG_CS <= a.Index && a.Index <= REG_GS) {
  3621  		base := int(a.Reg)
  3622  		switch a.Name {
  3623  		case obj.NAME_EXTERN,
  3624  			obj.NAME_GOTREF,
  3625  			obj.NAME_STATIC:
  3626  			if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  3627  				goto bad
  3628  			}
  3629  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3630  				// The base register has already been set. It holds the PC
  3631  				// of this instruction returned by a PC-reading thunk.
  3632  				// See obj6.go:rewriteToPcrel.
  3633  			} else {
  3634  				base = REG_NONE
  3635  			}
  3636  			v = int32(vaddr(ctxt, p, a, &rel))
  3637  
  3638  		case obj.NAME_AUTO,
  3639  			obj.NAME_PARAM:
  3640  			base = REG_SP
  3641  		}
  3642  
  3643  		ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  3644  		if base == REG_NONE {
  3645  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3646  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3647  			goto putrelv
  3648  		}
  3649  
  3650  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3651  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3652  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3653  			return
  3654  		}
  3655  
  3656  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3657  			ab.Put1(byte(1<<6 | 4<<0 | r<<3))
  3658  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3659  			ab.Put1(disp8)
  3660  			return
  3661  		}
  3662  
  3663  		ab.Put1(byte(2<<6 | 4<<0 | r<<3))
  3664  		ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3665  		goto putrelv
  3666  	}
  3667  
  3668  	base = int(a.Reg)
  3669  	switch a.Name {
  3670  	case obj.NAME_STATIC,
  3671  		obj.NAME_GOTREF,
  3672  		obj.NAME_EXTERN:
  3673  		if a.Sym == nil {
  3674  			ctxt.Diag("bad addr: %v", p)
  3675  		}
  3676  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3677  			// The base register has already been set. It holds the PC
  3678  			// of this instruction returned by a PC-reading thunk.
  3679  			// See obj6.go:rewriteToPcrel.
  3680  		} else {
  3681  			base = REG_NONE
  3682  		}
  3683  		v = int32(vaddr(ctxt, p, a, &rel))
  3684  
  3685  	case obj.NAME_AUTO,
  3686  		obj.NAME_PARAM:
  3687  		base = REG_SP
  3688  	}
  3689  
  3690  	if base == REG_TLS {
  3691  		v = int32(vaddr(ctxt, p, a, &rel))
  3692  	}
  3693  
  3694  	ab.rexflag |= regrex[base]&Rxb | rex
  3695  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  3696  		if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  3697  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  3698  				ctxt.Diag("%v has offset against gotref", p)
  3699  			}
  3700  			ab.Put1(byte(0<<6 | 5<<0 | r<<3))
  3701  			goto putrelv
  3702  		}
  3703  
  3704  		// temporary
  3705  		ab.Put2(
  3706  			byte(0<<6|4<<0|r<<3), // sib present
  3707  			0<<6|4<<3|5<<0,       // DS:d32
  3708  		)
  3709  		goto putrelv
  3710  	}
  3711  
  3712  	if base == REG_SP || base == REG_R12 {
  3713  		if v == 0 {
  3714  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3715  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3716  			return
  3717  		}
  3718  
  3719  		if disp8, ok := toDisp8(v, p, ab); ok {
  3720  			ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  3721  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3722  			ab.Put1(disp8)
  3723  			return
  3724  		}
  3725  
  3726  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3727  		ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3728  		goto putrelv
  3729  	}
  3730  
  3731  	if REG_AX <= base && base <= REG_R15 {
  3732  		if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid &&
  3733  			ctxt.Headtype != objabi.Hwindows {
  3734  			rel = obj.Reloc{}
  3735  			rel.Type = objabi.R_TLS_LE
  3736  			rel.Siz = 4
  3737  			rel.Sym = nil
  3738  			rel.Add = int64(v)
  3739  			v = 0
  3740  		}
  3741  
  3742  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3743  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3744  			return
  3745  		}
  3746  
  3747  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3748  			ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8)
  3749  			return
  3750  		}
  3751  
  3752  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3753  		goto putrelv
  3754  	}
  3755  
  3756  	goto bad
  3757  
  3758  putrelv:
  3759  	if rel.Siz != 0 {
  3760  		if rel.Siz != 4 {
  3761  			ctxt.Diag("bad rel")
  3762  			goto bad
  3763  		}
  3764  
  3765  		rel.Off = int32(p.Pc + int64(ab.Len()))
  3766  		cursym.AddRel(ctxt, rel)
  3767  	}
  3768  
  3769  	ab.PutInt32(v)
  3770  	return
  3771  
  3772  bad:
  3773  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3774  }
  3775  
  3776  func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3777  	ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3778  }
  3779  
  3780  func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3781  	ab.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3782  }
  3783  
  3784  func bytereg(a *obj.Addr, t *uint8) {
  3785  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3786  		a.Reg += REG_AL - REG_AX
  3787  		*t = 0
  3788  	}
  3789  }
  3790  
  3791  func unbytereg(a *obj.Addr, t *uint8) {
  3792  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3793  		a.Reg += REG_AX - REG_AL
  3794  		*t = 0
  3795  	}
  3796  }
  3797  
  3798  const (
  3799  	movLit uint8 = iota // Like Zlit
  3800  	movRegMem
  3801  	movMemReg
  3802  	movRegMem2op
  3803  	movMemReg2op
  3804  	movFullPtr // Load full pointer, trash heap (unsupported)
  3805  	movDoubleShift
  3806  	movTLSReg
  3807  )
  3808  
  3809  var ymovtab = []movtab{
  3810  	// push
  3811  	{APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}},
  3812  	{APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}},
  3813  	{APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}},
  3814  	{APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}},
  3815  	{APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3816  	{APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3817  	{APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3818  	{APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3819  	{APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}},
  3820  	{APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}},
  3821  	{APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}},
  3822  	{APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}},
  3823  	{APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}},
  3824  	{APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}},
  3825  
  3826  	// pop
  3827  	{APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}},
  3828  	{APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}},
  3829  	{APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}},
  3830  	{APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3831  	{APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3832  	{APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3833  	{APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3834  	{APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}},
  3835  	{APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}},
  3836  	{APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}},
  3837  	{APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}},
  3838  	{APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}},
  3839  
  3840  	// mov seg
  3841  	{AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}},
  3842  	{AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}},
  3843  	{AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}},
  3844  	{AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}},
  3845  	{AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}},
  3846  	{AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}},
  3847  	{AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}},
  3848  	{AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}},
  3849  	{AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}},
  3850  	{AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}},
  3851  	{AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}},
  3852  	{AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}},
  3853  
  3854  	// mov cr
  3855  	{AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3856  	{AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3857  	{AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3858  	{AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3859  	{AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3860  	{AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3861  	{AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3862  	{AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3863  	{AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3864  	{AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3865  	{AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3866  	{AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3867  	{AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3868  	{AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3869  	{AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3870  	{AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3871  	{AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3872  	{AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3873  	{AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3874  	{AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3875  
  3876  	// mov dr
  3877  	{AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3878  	{AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3879  	{AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3880  	{AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3881  	{AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}},
  3882  	{AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}},
  3883  	{AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3884  	{AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3885  	{AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3886  	{AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3887  	{AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3888  	{AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3889  	{AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}},
  3890  	{AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}},
  3891  	{AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3892  	{AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3893  
  3894  	// mov tr
  3895  	{AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}},
  3896  	{AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}},
  3897  	{AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}},
  3898  	{AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}},
  3899  
  3900  	// lgdt, sgdt, lidt, sidt
  3901  	{AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3902  	{AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3903  	{AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3904  	{AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3905  	{AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3906  	{AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3907  	{AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3908  	{AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3909  
  3910  	// lldt, sldt
  3911  	{AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}},
  3912  	{AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}},
  3913  
  3914  	// lmsw, smsw
  3915  	{AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}},
  3916  	{AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}},
  3917  
  3918  	// ltr, str
  3919  	{AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}},
  3920  	{AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}},
  3921  
  3922  	/* load full pointer - unsupported
  3923  	{AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}},
  3924  	{AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}},
  3925  	*/
  3926  
  3927  	// double shift
  3928  	{ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3929  	{ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3930  	{ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3931  	{ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3932  	{ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3933  	{ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3934  	{ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3935  	{ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3936  	{ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3937  	{ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3938  	{ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3939  	{ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3940  	{ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3941  	{ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3942  	{ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3943  	{ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3944  	{ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3945  	{ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3946  
  3947  	// load TLS base
  3948  	{AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3949  	{AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3950  	{0, 0, 0, 0, 0, [4]uint8{}},
  3951  }
  3952  
  3953  func isax(a *obj.Addr) bool {
  3954  	switch a.Reg {
  3955  	case REG_AX, REG_AL, REG_AH:
  3956  		return true
  3957  	}
  3958  
  3959  	return a.Index == REG_AX
  3960  }
  3961  
  3962  func subreg(p *obj.Prog, from int, to int) {
  3963  	if false { /* debug['Q'] */
  3964  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3965  	}
  3966  
  3967  	if int(p.From.Reg) == from {
  3968  		p.From.Reg = int16(to)
  3969  		p.Ft = 0
  3970  	}
  3971  
  3972  	if int(p.To.Reg) == from {
  3973  		p.To.Reg = int16(to)
  3974  		p.Tt = 0
  3975  	}
  3976  
  3977  	if int(p.From.Index) == from {
  3978  		p.From.Index = int16(to)
  3979  		p.Ft = 0
  3980  	}
  3981  
  3982  	if int(p.To.Index) == from {
  3983  		p.To.Index = int16(to)
  3984  		p.Tt = 0
  3985  	}
  3986  
  3987  	if false { /* debug['Q'] */
  3988  		fmt.Printf("%v\n", p)
  3989  	}
  3990  }
  3991  
  3992  func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3993  	switch op {
  3994  	case Pm, Pe, Pf2, Pf3:
  3995  		if osize != 1 {
  3996  			if op != Pm {
  3997  				ab.Put1(byte(op))
  3998  			}
  3999  			ab.Put1(Pm)
  4000  			z++
  4001  			op = int(o.op[z])
  4002  			break
  4003  		}
  4004  		fallthrough
  4005  
  4006  	default:
  4007  		if ab.Len() == 0 || ab.Last() != Pm {
  4008  			ab.Put1(Pm)
  4009  		}
  4010  	}
  4011  
  4012  	ab.Put1(byte(op))
  4013  	return z
  4014  }
  4015  
  4016  var bpduff1 = []byte{
  4017  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  4018  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  4019  }
  4020  
  4021  var bpduff2 = []byte{
  4022  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  4023  }
  4024  
  4025  // asmevex emits EVEX pregis and opcode byte.
  4026  // In addition to asmvex r/m, vvvv and reg fields also requires optional
  4027  // K-masking register.
  4028  //
  4029  // Expects asmbuf.evex to be properly initialized.
  4030  func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) {
  4031  	ab.evexflag = true
  4032  	evex := ab.evex
  4033  
  4034  	rexR := byte(1)
  4035  	evexR := byte(1)
  4036  	rexX := byte(1)
  4037  	rexB := byte(1)
  4038  	if r != nil {
  4039  		if regrex[r.Reg]&Rxr != 0 {
  4040  			rexR = 0 // "ModR/M.reg" selector 4th bit.
  4041  		}
  4042  		if regrex[r.Reg]&RxrEvex != 0 {
  4043  			evexR = 0 // "ModR/M.reg" selector 5th bit.
  4044  		}
  4045  	}
  4046  	if rm != nil {
  4047  		if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 {
  4048  			rexX = 0
  4049  		} else if regrex[rm.Index]&Rxx != 0 {
  4050  			rexX = 0
  4051  		}
  4052  		if regrex[rm.Reg]&Rxb != 0 {
  4053  			rexB = 0
  4054  		}
  4055  	}
  4056  	// P0 = [R][X][B][R'][00][mm]
  4057  	p0 := (rexR << 7) |
  4058  		(rexX << 6) |
  4059  		(rexB << 5) |
  4060  		(evexR << 4) |
  4061  		(0 << 2) |
  4062  		(evex.M() << 0)
  4063  
  4064  	vexV := byte(0)
  4065  	if v != nil {
  4066  		// 4bit-wide reg index.
  4067  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4068  	}
  4069  	vexV ^= 0x0F
  4070  	// P1 = [W][vvvv][1][pp]
  4071  	p1 := (evex.W() << 7) |
  4072  		(vexV << 3) |
  4073  		(1 << 2) |
  4074  		(evex.P() << 0)
  4075  
  4076  	suffix := evexSuffixMap[p.Scond]
  4077  	evexZ := byte(0)
  4078  	evexLL := evex.L()
  4079  	evexB := byte(0)
  4080  	evexV := byte(1)
  4081  	evexA := byte(0)
  4082  	if suffix.zeroing {
  4083  		if !evex.ZeroingEnabled() {
  4084  			ctxt.Diag("unsupported zeroing: %v", p)
  4085  		}
  4086  		if k == nil {
  4087  			// When you request zeroing you must specify a mask register.
  4088  			// See issue 57952.
  4089  			ctxt.Diag("mask register must be specified for .Z instructions: %v", p)
  4090  		} else if k.Reg == REG_K0 {
  4091  			// The mask register must not be K0. That restriction is already
  4092  			// handled by the Yknot0 restriction in the opcode tables, so we
  4093  			// won't ever reach here. But put something sensible here just in case.
  4094  			ctxt.Diag("mask register must not be K0 for .Z instructions: %v", p)
  4095  		}
  4096  		evexZ = 1
  4097  	}
  4098  	switch {
  4099  	case suffix.rounding != rcUnset:
  4100  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4101  			ctxt.Diag("illegal rounding with memory argument: %v", p)
  4102  		} else if !evex.RoundingEnabled() {
  4103  			ctxt.Diag("unsupported rounding: %v", p)
  4104  		}
  4105  		evexB = 1
  4106  		evexLL = suffix.rounding
  4107  	case suffix.broadcast:
  4108  		if rm == nil || rm.Type != obj.TYPE_MEM {
  4109  			ctxt.Diag("illegal broadcast without memory argument: %v", p)
  4110  		} else if !evex.BroadcastEnabled() {
  4111  			ctxt.Diag("unsupported broadcast: %v", p)
  4112  		}
  4113  		evexB = 1
  4114  	case suffix.sae:
  4115  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4116  			ctxt.Diag("illegal SAE with memory argument: %v", p)
  4117  		} else if !evex.SaeEnabled() {
  4118  			ctxt.Diag("unsupported SAE: %v", p)
  4119  		}
  4120  		evexB = 1
  4121  	}
  4122  	if rm != nil && regrex[rm.Index]&RxrEvex != 0 {
  4123  		evexV = 0
  4124  	} else if v != nil && regrex[v.Reg]&RxrEvex != 0 {
  4125  		evexV = 0 // VSR selector 5th bit.
  4126  	}
  4127  	if k != nil {
  4128  		evexA = byte(reg[k.Reg])
  4129  	}
  4130  	// P2 = [z][L'L][b][V'][aaa]
  4131  	p2 := (evexZ << 7) |
  4132  		(evexLL << 5) |
  4133  		(evexB << 4) |
  4134  		(evexV << 3) |
  4135  		(evexA << 0)
  4136  
  4137  	const evexEscapeByte = 0x62
  4138  	ab.Put4(evexEscapeByte, p0, p1, p2)
  4139  	ab.Put1(evex.opcode)
  4140  }
  4141  
  4142  // Emit VEX prefix and opcode byte.
  4143  // The three addresses are the r/m, vvvv, and reg fields.
  4144  // The reg and rm arguments appear in the same order as the
  4145  // arguments to asmand, which typically follows the call to asmvex.
  4146  // The final two arguments are the VEX prefix (see encoding above)
  4147  // and the opcode byte.
  4148  // For details about vex prefix see:
  4149  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  4150  func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  4151  	ab.vexflag = true
  4152  	rexR := 0
  4153  	if r != nil {
  4154  		rexR = regrex[r.Reg] & Rxr
  4155  	}
  4156  	rexB := 0
  4157  	rexX := 0
  4158  	if rm != nil {
  4159  		rexB = regrex[rm.Reg] & Rxb
  4160  		rexX = regrex[rm.Index] & Rxx
  4161  	}
  4162  	vexM := (vex >> 3) & 0x7
  4163  	vexWLP := vex & 0x87
  4164  	vexV := byte(0)
  4165  	if v != nil {
  4166  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4167  	}
  4168  	vexV ^= 0xF
  4169  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  4170  		// Can use 2-byte encoding.
  4171  		ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  4172  	} else {
  4173  		// Must use 3-byte encoding.
  4174  		ab.Put3(0xc4,
  4175  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  4176  			vexV<<3|vexWLP,
  4177  		)
  4178  	}
  4179  	ab.Put1(opcode)
  4180  }
  4181  
  4182  // regIndex returns register index that fits in 5 bits.
  4183  //
  4184  //	R         : 3 bit | legacy instructions     | N/A
  4185  //	[R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr
  4186  //	EVEX.R    : 1 bit | EVEX extension bit      | RxrEvex
  4187  //
  4188  // Examples:
  4189  //
  4190  //	REG_Z30 => 30
  4191  //	REG_X15 => 15
  4192  //	REG_R9  => 9
  4193  //	REG_AX  => 0
  4194  func regIndex(r int16) int {
  4195  	lower3bits := reg[r]
  4196  	high4bit := regrex[r] & Rxr << 1
  4197  	high5bit := regrex[r] & RxrEvex << 0
  4198  	return lower3bits | high4bit | high5bit
  4199  }
  4200  
  4201  // avx2gatherValid reports whether p satisfies AVX2 gather constraints.
  4202  // Reports errors via ctxt.
  4203  func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4204  	// If any pair of the index, mask, or destination registers
  4205  	// are the same, illegal instruction trap (#UD) is triggered.
  4206  	index := regIndex(p.GetFrom3().Index)
  4207  	mask := regIndex(p.From.Reg)
  4208  	dest := regIndex(p.To.Reg)
  4209  	if dest == mask || dest == index || mask == index {
  4210  		ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
  4211  		return false
  4212  	}
  4213  
  4214  	return true
  4215  }
  4216  
  4217  // avx512gatherValid reports whether p satisfies AVX512 gather constraints.
  4218  // Reports errors via ctxt.
  4219  func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4220  	// Illegal instruction trap (#UD) is triggered if the destination vector
  4221  	// register is the same as index vector in VSIB.
  4222  	index := regIndex(p.From.Index)
  4223  	dest := regIndex(p.To.Reg)
  4224  	if dest == index {
  4225  		ctxt.Diag("index and destination registers should be distinct: %v", p)
  4226  		return false
  4227  	}
  4228  
  4229  	return true
  4230  }
  4231  
  4232  func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4233  	o := opindex[p.As&obj.AMask]
  4234  
  4235  	if o == nil {
  4236  		ctxt.Diag("asmins: missing op %v", p)
  4237  		return
  4238  	}
  4239  
  4240  	if pre := prefixof(ctxt, &p.From); pre != 0 {
  4241  		ab.Put1(byte(pre))
  4242  	}
  4243  	if pre := prefixof(ctxt, &p.To); pre != 0 {
  4244  		ab.Put1(byte(pre))
  4245  	}
  4246  
  4247  	// Checks to warn about instruction/arguments combinations that
  4248  	// will unconditionally trigger illegal instruction trap (#UD).
  4249  	switch p.As {
  4250  	case AVGATHERDPD,
  4251  		AVGATHERQPD,
  4252  		AVGATHERDPS,
  4253  		AVGATHERQPS,
  4254  		AVPGATHERDD,
  4255  		AVPGATHERQD,
  4256  		AVPGATHERDQ,
  4257  		AVPGATHERQQ:
  4258  		if p.GetFrom3() == nil {
  4259  			// gathers need a 3rd arg. See issue 58822.
  4260  			ctxt.Diag("need a third arg for gather instruction: %v", p)
  4261  			return
  4262  		}
  4263  		// AVX512 gather requires explicit K mask.
  4264  		if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 {
  4265  			if !avx512gatherValid(ctxt, p) {
  4266  				return
  4267  			}
  4268  		} else {
  4269  			if !avx2gatherValid(ctxt, p) {
  4270  				return
  4271  			}
  4272  		}
  4273  	}
  4274  
  4275  	if p.Ft == 0 {
  4276  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  4277  	}
  4278  	if p.Tt == 0 {
  4279  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  4280  	}
  4281  
  4282  	ft := int(p.Ft) * Ymax
  4283  	tt := int(p.Tt) * Ymax
  4284  
  4285  	xo := obj.Bool2int(o.op[0] == 0x0f)
  4286  	z := 0
  4287  
  4288  	args := make([]int, 0, argListMax)
  4289  	if ft != Ynone*Ymax {
  4290  		args = append(args, ft)
  4291  	}
  4292  	for i := range p.RestArgs {
  4293  		args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax)
  4294  	}
  4295  	if tt != Ynone*Ymax {
  4296  		args = append(args, tt)
  4297  	}
  4298  
  4299  	var f3t int
  4300  	for _, yt := range o.ytab {
  4301  		// ytab matching is purely args-based,
  4302  		// but AVX512 suffixes like "Z" or "RU_SAE" will
  4303  		// add EVEX-only filter that will reject non-EVEX matches.
  4304  		//
  4305  		// Consider "VADDPD.BCST 2032(DX), X0, X0".
  4306  		// Without this rule, operands will lead to VEX-encoded form
  4307  		// and produce "c5b15813" encoding.
  4308  		if !yt.match(args) {
  4309  			// "xo" is always zero for VEX/EVEX encoded insts.
  4310  			z += int(yt.zoffset) + xo
  4311  		} else {
  4312  			if p.Scond != 0 && !evexZcase(yt.zcase) {
  4313  				// Do not signal error and continue to search
  4314  				// for matching EVEX-encoded form.
  4315  				z += int(yt.zoffset)
  4316  				continue
  4317  			}
  4318  
  4319  			switch o.prefix {
  4320  			case Px1: // first option valid only in 32-bit mode
  4321  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  4322  					z += int(yt.zoffset) + xo
  4323  					continue
  4324  				}
  4325  			case Pq: // 16 bit escape and opcode escape
  4326  				ab.Put2(Pe, Pm)
  4327  
  4328  			case Pq3: // 16 bit escape and opcode escape + REX.W
  4329  				ab.rexflag |= Pw
  4330  				ab.Put2(Pe, Pm)
  4331  
  4332  			case Pq4: // 66 0F 38
  4333  				ab.Put3(0x66, 0x0F, 0x38)
  4334  
  4335  			case Pq4w: // 66 0F 38 + REX.W
  4336  				ab.rexflag |= Pw
  4337  				ab.Put3(0x66, 0x0F, 0x38)
  4338  
  4339  			case Pq5: // F3 0F 38
  4340  				ab.Put3(0xF3, 0x0F, 0x38)
  4341  
  4342  			case Pq5w: //  F3 0F 38 + REX.W
  4343  				ab.rexflag |= Pw
  4344  				ab.Put3(0xF3, 0x0F, 0x38)
  4345  
  4346  			case Pf2, // xmm opcode escape
  4347  				Pf3:
  4348  				ab.Put2(o.prefix, Pm)
  4349  
  4350  			case Pef3:
  4351  				ab.Put3(Pe, Pf3, Pm)
  4352  
  4353  			case Pfw: // xmm opcode escape + REX.W
  4354  				ab.rexflag |= Pw
  4355  				ab.Put2(Pf3, Pm)
  4356  
  4357  			case Pm: // opcode escape
  4358  				ab.Put1(Pm)
  4359  
  4360  			case Pe: // 16 bit escape
  4361  				ab.Put1(Pe)
  4362  
  4363  			case Pw: // 64-bit escape
  4364  				if ctxt.Arch.Family != sys.AMD64 {
  4365  					ctxt.Diag("asmins: illegal 64: %v", p)
  4366  				}
  4367  				ab.rexflag |= Pw
  4368  
  4369  			case Pw8: // 64-bit escape if z >= 8
  4370  				if z >= 8 {
  4371  					if ctxt.Arch.Family != sys.AMD64 {
  4372  						ctxt.Diag("asmins: illegal 64: %v", p)
  4373  					}
  4374  					ab.rexflag |= Pw
  4375  				}
  4376  
  4377  			case Pb: // botch
  4378  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  4379  					goto bad
  4380  				}
  4381  				// NOTE(rsc): This is probably safe to do always,
  4382  				// but when enabled it chooses different encodings
  4383  				// than the old cmd/internal/obj/i386 code did,
  4384  				// which breaks our "same bits out" checks.
  4385  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  4386  				// in the original obj/i386, and it would encode
  4387  				// (using a valid, shorter form) as 3c 00 if we enabled
  4388  				// the call to bytereg here.
  4389  				if ctxt.Arch.Family == sys.AMD64 {
  4390  					bytereg(&p.From, &p.Ft)
  4391  					bytereg(&p.To, &p.Tt)
  4392  				}
  4393  
  4394  			case P32: // 32 bit but illegal if 64-bit mode
  4395  				if ctxt.Arch.Family == sys.AMD64 {
  4396  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  4397  				}
  4398  
  4399  			case Py: // 64-bit only, no prefix
  4400  				if ctxt.Arch.Family != sys.AMD64 {
  4401  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4402  				}
  4403  
  4404  			case Py1: // 64-bit only if z < 1, no prefix
  4405  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  4406  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4407  				}
  4408  
  4409  			case Py3: // 64-bit only if z < 3, no prefix
  4410  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  4411  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4412  				}
  4413  			}
  4414  
  4415  			if z >= len(o.op) {
  4416  				log.Fatalf("asmins bad table %v", p)
  4417  			}
  4418  			op := int(o.op[z])
  4419  			if op == 0x0f {
  4420  				ab.Put1(byte(op))
  4421  				z++
  4422  				op = int(o.op[z])
  4423  			}
  4424  
  4425  			switch yt.zcase {
  4426  			default:
  4427  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  4428  				return
  4429  
  4430  			case Zpseudo:
  4431  				break
  4432  
  4433  			case Zlit:
  4434  				ab.PutOpBytesLit(z, &o.op)
  4435  
  4436  			case Zlitr_m:
  4437  				ab.PutOpBytesLit(z, &o.op)
  4438  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4439  
  4440  			case Zlitm_r:
  4441  				ab.PutOpBytesLit(z, &o.op)
  4442  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4443  
  4444  			case Zlit_m_r:
  4445  				ab.PutOpBytesLit(z, &o.op)
  4446  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4447  
  4448  			case Zmb_r:
  4449  				bytereg(&p.From, &p.Ft)
  4450  				fallthrough
  4451  
  4452  			case Zm_r:
  4453  				ab.Put1(byte(op))
  4454  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4455  
  4456  			case Z_m_r:
  4457  				ab.Put1(byte(op))
  4458  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4459  
  4460  			case Zm2_r:
  4461  				ab.Put2(byte(op), o.op[z+1])
  4462  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4463  
  4464  			case Zm_r_xm:
  4465  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4466  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4467  
  4468  			case Zm_r_xm_nr:
  4469  				ab.rexflag = 0
  4470  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4471  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4472  
  4473  			case Zm_r_i_xm:
  4474  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4475  				ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
  4476  				ab.Put1(byte(p.To.Offset))
  4477  
  4478  			case Zibm_r, Zibr_m:
  4479  				ab.PutOpBytesLit(z, &o.op)
  4480  				if yt.zcase == Zibr_m {
  4481  					ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4482  				} else {
  4483  					ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4484  				}
  4485  				switch {
  4486  				default:
  4487  					ab.Put1(byte(p.From.Offset))
  4488  				case yt.args[0] == Yi32 && o.prefix == Pe:
  4489  					ab.PutInt16(int16(p.From.Offset))
  4490  				case yt.args[0] == Yi32:
  4491  					ab.PutInt32(int32(p.From.Offset))
  4492  				}
  4493  
  4494  			case Zaut_r:
  4495  				ab.Put1(0x8d) // leal
  4496  				if p.From.Type != obj.TYPE_ADDR {
  4497  					ctxt.Diag("asmins: Zaut sb type ADDR")
  4498  				}
  4499  				p.From.Type = obj.TYPE_MEM
  4500  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4501  				p.From.Type = obj.TYPE_ADDR
  4502  
  4503  			case Zm_o:
  4504  				ab.Put1(byte(op))
  4505  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4506  
  4507  			case Zr_m:
  4508  				ab.Put1(byte(op))
  4509  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4510  
  4511  			case Zvex:
  4512  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4513  
  4514  			case Zvex_rm_v_r:
  4515  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4516  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4517  
  4518  			case Zvex_rm_v_ro:
  4519  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4520  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4521  
  4522  			case Zvex_i_rm_vo:
  4523  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4524  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2]))
  4525  				ab.Put1(byte(p.From.Offset))
  4526  
  4527  			case Zvex_i_r_v:
  4528  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4529  				regnum := byte(0x7)
  4530  				if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
  4531  					regnum &= byte(p.GetFrom3().Reg - REG_X0)
  4532  				} else {
  4533  					regnum &= byte(p.GetFrom3().Reg - REG_Y0)
  4534  				}
  4535  				ab.Put1(o.op[z+2] | regnum)
  4536  				ab.Put1(byte(p.From.Offset))
  4537  
  4538  			case Zvex_i_rm_v_r:
  4539  				imm, from, from3, to := unpackOps4(p)
  4540  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4541  				ab.asmand(ctxt, cursym, p, from, to)
  4542  				ab.Put1(byte(imm.Offset))
  4543  
  4544  			case Zvex_i_rm_r:
  4545  				ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
  4546  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4547  				ab.Put1(byte(p.From.Offset))
  4548  
  4549  			case Zvex_v_rm_r:
  4550  				ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
  4551  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4552  
  4553  			case Zvex_r_v_rm:
  4554  				ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
  4555  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4556  
  4557  			case Zvex_rm_r_vo:
  4558  				ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
  4559  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4560  
  4561  			case Zvex_i_r_rm:
  4562  				ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
  4563  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4564  				ab.Put1(byte(p.From.Offset))
  4565  
  4566  			case Zvex_hr_rm_v_r:
  4567  				hr, from, from3, to := unpackOps4(p)
  4568  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4569  				ab.asmand(ctxt, cursym, p, from, to)
  4570  				ab.Put1(byte(regIndex(hr.Reg) << 4))
  4571  
  4572  			case Zevex_k_rmo:
  4573  				ab.evex = newEVEXBits(z, &o.op)
  4574  				ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From)
  4575  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3]))
  4576  
  4577  			case Zevex_i_rm_vo:
  4578  				ab.evex = newEVEXBits(z, &o.op)
  4579  				ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil)
  4580  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3]))
  4581  				ab.Put1(byte(p.From.Offset))
  4582  
  4583  			case Zevex_i_rm_k_vo:
  4584  				imm, from, kmask, to := unpackOps4(p)
  4585  				ab.evex = newEVEXBits(z, &o.op)
  4586  				ab.asmevex(ctxt, p, from, to, nil, kmask)
  4587  				ab.asmando(ctxt, cursym, p, from, int(o.op[z+3]))
  4588  				ab.Put1(byte(imm.Offset))
  4589  
  4590  			case Zevex_i_r_rm:
  4591  				ab.evex = newEVEXBits(z, &o.op)
  4592  				ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil)
  4593  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4594  				ab.Put1(byte(p.From.Offset))
  4595  
  4596  			case Zevex_i_r_k_rm:
  4597  				imm, from, kmask, to := unpackOps4(p)
  4598  				ab.evex = newEVEXBits(z, &o.op)
  4599  				ab.asmevex(ctxt, p, to, nil, from, kmask)
  4600  				ab.asmand(ctxt, cursym, p, to, from)
  4601  				ab.Put1(byte(imm.Offset))
  4602  
  4603  			case Zevex_i_rm_r:
  4604  				ab.evex = newEVEXBits(z, &o.op)
  4605  				ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil)
  4606  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4607  				ab.Put1(byte(p.From.Offset))
  4608  
  4609  			case Zevex_i_rm_k_r:
  4610  				imm, from, kmask, to := unpackOps4(p)
  4611  				ab.evex = newEVEXBits(z, &o.op)
  4612  				ab.asmevex(ctxt, p, from, nil, to, kmask)
  4613  				ab.asmand(ctxt, cursym, p, from, to)
  4614  				ab.Put1(byte(imm.Offset))
  4615  
  4616  			case Zevex_i_rm_v_r:
  4617  				imm, from, from3, to := unpackOps4(p)
  4618  				ab.evex = newEVEXBits(z, &o.op)
  4619  				ab.asmevex(ctxt, p, from, from3, to, nil)
  4620  				ab.asmand(ctxt, cursym, p, from, to)
  4621  				ab.Put1(byte(imm.Offset))
  4622  
  4623  			case Zevex_i_rm_v_k_r:
  4624  				imm, from, from3, kmask, to := unpackOps5(p)
  4625  				ab.evex = newEVEXBits(z, &o.op)
  4626  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4627  				ab.asmand(ctxt, cursym, p, from, to)
  4628  				ab.Put1(byte(imm.Offset))
  4629  
  4630  			case Zevex_r_v_rm:
  4631  				ab.evex = newEVEXBits(z, &o.op)
  4632  				ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil)
  4633  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4634  
  4635  			case Zevex_rm_v_r:
  4636  				ab.evex = newEVEXBits(z, &o.op)
  4637  				ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil)
  4638  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4639  
  4640  			case Zevex_rm_k_r:
  4641  				ab.evex = newEVEXBits(z, &o.op)
  4642  				ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3())
  4643  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4644  
  4645  			case Zevex_r_k_rm:
  4646  				ab.evex = newEVEXBits(z, &o.op)
  4647  				ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3())
  4648  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4649  
  4650  			case Zevex_rm_v_k_r:
  4651  				from, from3, kmask, to := unpackOps4(p)
  4652  				ab.evex = newEVEXBits(z, &o.op)
  4653  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4654  				ab.asmand(ctxt, cursym, p, from, to)
  4655  
  4656  			case Zevex_r_v_k_rm:
  4657  				from, from3, kmask, to := unpackOps4(p)
  4658  				ab.evex = newEVEXBits(z, &o.op)
  4659  				ab.asmevex(ctxt, p, to, from3, from, kmask)
  4660  				ab.asmand(ctxt, cursym, p, to, from)
  4661  
  4662  			case Zr_m_xm:
  4663  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4664  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4665  
  4666  			case Zr_m_xm_nr:
  4667  				ab.rexflag = 0
  4668  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4669  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4670  
  4671  			case Zo_m:
  4672  				ab.Put1(byte(op))
  4673  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4674  
  4675  			case Zcallindreg:
  4676  				cursym.AddRel(ctxt, obj.Reloc{
  4677  					Type: objabi.R_CALLIND,
  4678  					Off:  int32(p.Pc),
  4679  				})
  4680  				fallthrough
  4681  
  4682  			case Zo_m64:
  4683  				ab.Put1(byte(op))
  4684  				ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  4685  
  4686  			case Zm_ibo:
  4687  				ab.Put1(byte(op))
  4688  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4689  				ab.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  4690  
  4691  			case Zibo_m:
  4692  				ab.Put1(byte(op))
  4693  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4694  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4695  
  4696  			case Zibo_m_xm:
  4697  				z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4698  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4699  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4700  
  4701  			case Z_ib, Zib_:
  4702  				var a *obj.Addr
  4703  				if yt.zcase == Zib_ {
  4704  					a = &p.From
  4705  				} else {
  4706  					a = &p.To
  4707  				}
  4708  				ab.Put1(byte(op))
  4709  				if p.As == AXABORT {
  4710  					ab.Put1(o.op[z+1])
  4711  				}
  4712  				ab.Put1(byte(vaddr(ctxt, p, a, nil)))
  4713  
  4714  			case Zib_rp:
  4715  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4716  				ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  4717  
  4718  			case Zil_rp:
  4719  				ab.rexflag |= regrex[p.To.Reg] & Rxb
  4720  				ab.Put1(byte(op + reg[p.To.Reg]))
  4721  				if o.prefix == Pe {
  4722  					v := vaddr(ctxt, p, &p.From, nil)
  4723  					ab.PutInt16(int16(v))
  4724  				} else {
  4725  					ab.relput4(ctxt, cursym, p, &p.From)
  4726  				}
  4727  
  4728  			case Zo_iw:
  4729  				ab.Put1(byte(op))
  4730  				if p.From.Type != obj.TYPE_NONE {
  4731  					v := vaddr(ctxt, p, &p.From, nil)
  4732  					ab.PutInt16(int16(v))
  4733  				}
  4734  
  4735  			case Ziq_rp:
  4736  				var rel obj.Reloc
  4737  				v := vaddr(ctxt, p, &p.From, &rel)
  4738  				l := int(v >> 32)
  4739  				if l == 0 && rel.Siz != 8 {
  4740  					ab.rexflag &^= (0x40 | Rxw)
  4741  
  4742  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4743  					ab.Put1(byte(0xb8 + reg[p.To.Reg]))
  4744  					if rel.Type != 0 {
  4745  						rel.Off = int32(p.Pc + int64(ab.Len()))
  4746  						cursym.AddRel(ctxt, rel)
  4747  					}
  4748  
  4749  					ab.PutInt32(int32(v))
  4750  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend
  4751  					ab.Put1(0xc7)
  4752  					ab.asmando(ctxt, cursym, p, &p.To, 0)
  4753  
  4754  					ab.PutInt32(int32(v)) // need all 8
  4755  				} else {
  4756  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4757  					ab.Put1(byte(op + reg[p.To.Reg]))
  4758  					if rel.Type != 0 {
  4759  						rel.Off = int32(p.Pc + int64(ab.Len()))
  4760  						cursym.AddRel(ctxt, rel)
  4761  					}
  4762  
  4763  					ab.PutInt64(v)
  4764  				}
  4765  
  4766  			case Zib_rr:
  4767  				ab.Put1(byte(op))
  4768  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4769  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4770  
  4771  			case Z_il, Zil_:
  4772  				var a *obj.Addr
  4773  				if yt.zcase == Zil_ {
  4774  					a = &p.From
  4775  				} else {
  4776  					a = &p.To
  4777  				}
  4778  				ab.Put1(byte(op))
  4779  				if o.prefix == Pe {
  4780  					v := vaddr(ctxt, p, a, nil)
  4781  					ab.PutInt16(int16(v))
  4782  				} else {
  4783  					ab.relput4(ctxt, cursym, p, a)
  4784  				}
  4785  
  4786  			case Zm_ilo, Zilo_m:
  4787  				var a *obj.Addr
  4788  				ab.Put1(byte(op))
  4789  				if yt.zcase == Zilo_m {
  4790  					a = &p.From
  4791  					ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4792  				} else {
  4793  					a = &p.To
  4794  					ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4795  				}
  4796  
  4797  				if o.prefix == Pe {
  4798  					v := vaddr(ctxt, p, a, nil)
  4799  					ab.PutInt16(int16(v))
  4800  				} else {
  4801  					ab.relput4(ctxt, cursym, p, a)
  4802  				}
  4803  
  4804  			case Zil_rr:
  4805  				ab.Put1(byte(op))
  4806  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4807  				if o.prefix == Pe {
  4808  					v := vaddr(ctxt, p, &p.From, nil)
  4809  					ab.PutInt16(int16(v))
  4810  				} else {
  4811  					ab.relput4(ctxt, cursym, p, &p.From)
  4812  				}
  4813  
  4814  			case Z_rp:
  4815  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4816  				ab.Put1(byte(op + reg[p.To.Reg]))
  4817  
  4818  			case Zrp_:
  4819  				ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  4820  				ab.Put1(byte(op + reg[p.From.Reg]))
  4821  
  4822  			case Zcallcon, Zjmpcon:
  4823  				if yt.zcase == Zcallcon {
  4824  					ab.Put1(byte(op))
  4825  				} else {
  4826  					ab.Put1(o.op[z+1])
  4827  				}
  4828  				cursym.AddRel(ctxt, obj.Reloc{
  4829  					Type: objabi.R_PCREL,
  4830  					Off:  int32(p.Pc + int64(ab.Len())),
  4831  					Siz:  4,
  4832  					Add:  p.To.Offset,
  4833  				})
  4834  				ab.PutInt32(0)
  4835  
  4836  			case Zcallind:
  4837  				ab.Put2(byte(op), o.op[z+1])
  4838  				typ := objabi.R_ADDR
  4839  				if ctxt.Arch.Family == sys.AMD64 {
  4840  					typ = objabi.R_PCREL
  4841  				}
  4842  				cursym.AddRel(ctxt, obj.Reloc{
  4843  					Type: typ,
  4844  					Off:  int32(p.Pc + int64(ab.Len())),
  4845  					Siz:  4,
  4846  					Sym:  p.To.Sym,
  4847  					Add:  p.To.Offset,
  4848  				})
  4849  				ab.PutInt32(0)
  4850  
  4851  			case Zcall, Zcallduff:
  4852  				if p.To.Sym == nil {
  4853  					ctxt.Diag("call without target")
  4854  					ctxt.DiagFlush()
  4855  					log.Fatalf("bad code")
  4856  				}
  4857  
  4858  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  4859  					ctxt.Diag("directly calling duff when dynamically linking Go")
  4860  				}
  4861  
  4862  				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4863  					// Maintain BP around call, since duffcopy/duffzero can't do it
  4864  					// (the call jumps into the middle of the function).
  4865  					// This makes it possible to see call sites for duffcopy/duffzero in
  4866  					// BP-based profiling tools like Linux perf (which is the
  4867  					// whole point of maintaining frame pointers in Go).
  4868  					// MOVQ BP, -16(SP)
  4869  					// LEAQ -16(SP), BP
  4870  					ab.Put(bpduff1)
  4871  				}
  4872  				ab.Put1(byte(op))
  4873  				cursym.AddRel(ctxt, obj.Reloc{
  4874  					Type: objabi.R_CALL,
  4875  					Off:  int32(p.Pc + int64(ab.Len())),
  4876  					Siz:  4,
  4877  					Sym:  p.To.Sym,
  4878  					Add:  p.To.Offset,
  4879  				})
  4880  				ab.PutInt32(0)
  4881  
  4882  				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4883  					// Pop BP pushed above.
  4884  					// MOVQ 0(BP), BP
  4885  					ab.Put(bpduff2)
  4886  				}
  4887  
  4888  			// TODO: jump across functions needs reloc
  4889  			case Zbr, Zjmp, Zloop:
  4890  				if p.As == AXBEGIN {
  4891  					ab.Put1(byte(op))
  4892  				}
  4893  				if p.To.Sym != nil {
  4894  					if yt.zcase != Zjmp {
  4895  						ctxt.Diag("branch to ATEXT")
  4896  						ctxt.DiagFlush()
  4897  						log.Fatalf("bad code")
  4898  					}
  4899  
  4900  					ab.Put1(o.op[z+1])
  4901  					cursym.AddRel(ctxt, obj.Reloc{
  4902  						// Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that
  4903  						// it can point to a trampoline instead of the destination itself.
  4904  						Type: objabi.R_CALL,
  4905  						Off:  int32(p.Pc + int64(ab.Len())),
  4906  						Siz:  4,
  4907  						Sym:  p.To.Sym,
  4908  					})
  4909  					ab.PutInt32(0)
  4910  					break
  4911  				}
  4912  
  4913  				// Assumes q is in this function.
  4914  				// TODO: Check in input, preserve in brchain.
  4915  
  4916  				// Fill in backward jump now.
  4917  				q := p.To.Target()
  4918  
  4919  				if q == nil {
  4920  					ctxt.Diag("jmp/branch/loop without target")
  4921  					ctxt.DiagFlush()
  4922  					log.Fatalf("bad code")
  4923  				}
  4924  
  4925  				if p.Back&branchBackwards != 0 {
  4926  					v := q.Pc - (p.Pc + 2)
  4927  					if v >= -128 && p.As != AXBEGIN {
  4928  						if p.As == AJCXZL {
  4929  							ab.Put1(0x67)
  4930  						}
  4931  						ab.Put2(byte(op), byte(v))
  4932  					} else if yt.zcase == Zloop {
  4933  						ctxt.Diag("loop too far: %v", p)
  4934  					} else {
  4935  						v -= 5 - 2
  4936  						if p.As == AXBEGIN {
  4937  							v--
  4938  						}
  4939  						if yt.zcase == Zbr {
  4940  							ab.Put1(0x0f)
  4941  							v--
  4942  						}
  4943  
  4944  						ab.Put1(o.op[z+1])
  4945  						ab.PutInt32(int32(v))
  4946  					}
  4947  
  4948  					break
  4949  				}
  4950  
  4951  				// Annotate target; will fill in later.
  4952  				p.Forwd = q.Rel
  4953  
  4954  				q.Rel = p
  4955  				if p.Back&branchShort != 0 && p.As != AXBEGIN {
  4956  					if p.As == AJCXZL {
  4957  						ab.Put1(0x67)
  4958  					}
  4959  					ab.Put2(byte(op), 0)
  4960  				} else if yt.zcase == Zloop {
  4961  					ctxt.Diag("loop too far: %v", p)
  4962  				} else {
  4963  					if yt.zcase == Zbr {
  4964  						ab.Put1(0x0f)
  4965  					}
  4966  					ab.Put1(o.op[z+1])
  4967  					ab.PutInt32(0)
  4968  				}
  4969  
  4970  			case Zbyte:
  4971  				var rel obj.Reloc
  4972  				v := vaddr(ctxt, p, &p.From, &rel)
  4973  				if rel.Siz != 0 {
  4974  					rel.Siz = uint8(op)
  4975  					rel.Off = int32(p.Pc + int64(ab.Len()))
  4976  					cursym.AddRel(ctxt, rel)
  4977  				}
  4978  
  4979  				ab.Put1(byte(v))
  4980  				if op > 1 {
  4981  					ab.Put1(byte(v >> 8))
  4982  					if op > 2 {
  4983  						ab.PutInt16(int16(v >> 16))
  4984  						if op > 4 {
  4985  							ab.PutInt32(int32(v >> 32))
  4986  						}
  4987  					}
  4988  				}
  4989  			}
  4990  
  4991  			return
  4992  		}
  4993  	}
  4994  	f3t = Ynone * Ymax
  4995  	if p.GetFrom3() != nil {
  4996  		f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
  4997  	}
  4998  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  4999  		var pp obj.Prog
  5000  		var t []byte
  5001  		if p.As == mo[0].as {
  5002  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  5003  				t = mo[0].op[:]
  5004  				switch mo[0].code {
  5005  				default:
  5006  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  5007  
  5008  				case movLit:
  5009  					for z = 0; t[z] != 0; z++ {
  5010  						ab.Put1(t[z])
  5011  					}
  5012  
  5013  				case movRegMem:
  5014  					ab.Put1(t[0])
  5015  					ab.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  5016  
  5017  				case movMemReg:
  5018  					ab.Put1(t[0])
  5019  					ab.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  5020  
  5021  				case movRegMem2op: // r,m - 2op
  5022  					ab.Put2(t[0], t[1])
  5023  					ab.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  5024  					ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  5025  
  5026  				case movMemReg2op:
  5027  					ab.Put2(t[0], t[1])
  5028  					ab.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  5029  					ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  5030  
  5031  				case movFullPtr:
  5032  					if t[0] != 0 {
  5033  						ab.Put1(t[0])
  5034  					}
  5035  					switch p.To.Index {
  5036  					default:
  5037  						goto bad
  5038  
  5039  					case REG_DS:
  5040  						ab.Put1(0xc5)
  5041  
  5042  					case REG_SS:
  5043  						ab.Put2(0x0f, 0xb2)
  5044  
  5045  					case REG_ES:
  5046  						ab.Put1(0xc4)
  5047  
  5048  					case REG_FS:
  5049  						ab.Put2(0x0f, 0xb4)
  5050  
  5051  					case REG_GS:
  5052  						ab.Put2(0x0f, 0xb5)
  5053  					}
  5054  
  5055  					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  5056  
  5057  				case movDoubleShift:
  5058  					if t[0] == Pw {
  5059  						if ctxt.Arch.Family != sys.AMD64 {
  5060  							ctxt.Diag("asmins: illegal 64: %v", p)
  5061  						}
  5062  						ab.rexflag |= Pw
  5063  						t = t[1:]
  5064  					} else if t[0] == Pe {
  5065  						ab.Put1(Pe)
  5066  						t = t[1:]
  5067  					}
  5068  
  5069  					switch p.From.Type {
  5070  					default:
  5071  						goto bad
  5072  
  5073  					case obj.TYPE_CONST:
  5074  						ab.Put2(0x0f, t[0])
  5075  						ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5076  						ab.Put1(byte(p.From.Offset))
  5077  
  5078  					case obj.TYPE_REG:
  5079  						switch p.From.Reg {
  5080  						default:
  5081  							goto bad
  5082  
  5083  						case REG_CL, REG_CX:
  5084  							ab.Put2(0x0f, t[1])
  5085  							ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5086  						}
  5087  					}
  5088  
  5089  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5090  				// where you load the TLS base register into a register and then index off that
  5091  				// register to access the actual TLS variables. Systems that allow direct TLS access
  5092  				// are handled in prefixof above and should not be listed here.
  5093  				case movTLSReg:
  5094  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  5095  						ctxt.Diag("invalid load of TLS: %v", p)
  5096  					}
  5097  
  5098  					if ctxt.Arch.Family == sys.I386 {
  5099  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5100  						// where you load the TLS base register into a register and then index off that
  5101  						// register to access the actual TLS variables. Systems that allow direct TLS access
  5102  						// are handled in prefixof above and should not be listed here.
  5103  						switch ctxt.Headtype {
  5104  						default:
  5105  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5106  
  5107  						case objabi.Hlinux, objabi.Hfreebsd:
  5108  							if ctxt.Flag_shared {
  5109  								// Note that this is not generating the same insns as the other cases.
  5110  								//     MOV TLS, dst
  5111  								// becomes
  5112  								//     call __x86.get_pc_thunk.dst
  5113  								//     movl (gotpc + g@gotntpoff)(dst), dst
  5114  								// which is encoded as
  5115  								//     call __x86.get_pc_thunk.dst
  5116  								//     movq 0(dst), dst
  5117  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  5118  								// is g, which we can't check here, but will when we assemble the second
  5119  								// instruction.
  5120  								dst := p.To.Reg
  5121  								ab.Put1(0xe8)
  5122  								cursym.AddRel(ctxt, obj.Reloc{
  5123  									Type: objabi.R_CALL,
  5124  									Off:  int32(p.Pc + int64(ab.Len())),
  5125  									Siz:  4,
  5126  									Sym:  ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))),
  5127  								})
  5128  								ab.PutInt32(0)
  5129  
  5130  								ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  5131  								cursym.AddRel(ctxt, obj.Reloc{
  5132  									Type: objabi.R_TLS_IE,
  5133  									Off:  int32(p.Pc + int64(ab.Len())),
  5134  									Siz:  4,
  5135  									Add:  2,
  5136  								})
  5137  								ab.PutInt32(0)
  5138  							} else {
  5139  								// ELF TLS base is 0(GS).
  5140  								pp.From = p.From
  5141  
  5142  								pp.From.Type = obj.TYPE_MEM
  5143  								pp.From.Reg = REG_GS
  5144  								pp.From.Offset = 0
  5145  								pp.From.Index = REG_NONE
  5146  								pp.From.Scale = 0
  5147  								ab.Put2(0x65, // GS
  5148  									0x8B)
  5149  								ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5150  							}
  5151  						case objabi.Hplan9:
  5152  							pp.From = obj.Addr{}
  5153  							pp.From.Type = obj.TYPE_MEM
  5154  							pp.From.Name = obj.NAME_EXTERN
  5155  							pp.From.Sym = plan9privates
  5156  							pp.From.Offset = 0
  5157  							pp.From.Index = REG_NONE
  5158  							ab.Put1(0x8B)
  5159  							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5160  						}
  5161  						break
  5162  					}
  5163  
  5164  					switch ctxt.Headtype {
  5165  					default:
  5166  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5167  
  5168  					case objabi.Hlinux, objabi.Hfreebsd:
  5169  						if !ctxt.Flag_shared {
  5170  							log.Fatalf("unknown TLS base location for linux/freebsd without -shared")
  5171  						}
  5172  						// Note that this is not generating the same insn as the other cases.
  5173  						//     MOV TLS, R_to
  5174  						// becomes
  5175  						//     movq g@gottpoff(%rip), R_to
  5176  						// which is encoded as
  5177  						//     movq 0(%rip), R_to
  5178  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  5179  						// is g, which we can't check here, but will when we assemble the second
  5180  						// instruction.
  5181  						ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  5182  
  5183  						ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  5184  						cursym.AddRel(ctxt, obj.Reloc{
  5185  							Type: objabi.R_TLS_IE,
  5186  							Off:  int32(p.Pc + int64(ab.Len())),
  5187  							Siz:  4,
  5188  							Add:  -4,
  5189  						})
  5190  						ab.PutInt32(0)
  5191  
  5192  					case objabi.Hplan9:
  5193  						pp.From = obj.Addr{}
  5194  						pp.From.Type = obj.TYPE_MEM
  5195  						pp.From.Name = obj.NAME_EXTERN
  5196  						pp.From.Sym = plan9privates
  5197  						pp.From.Offset = 0
  5198  						pp.From.Index = REG_NONE
  5199  						ab.rexflag |= Pw
  5200  						ab.Put1(0x8B)
  5201  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5202  
  5203  					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  5204  						// TLS base is 0(FS).
  5205  						pp.From = p.From
  5206  
  5207  						pp.From.Type = obj.TYPE_MEM
  5208  						pp.From.Name = obj.NAME_NONE
  5209  						pp.From.Reg = REG_NONE
  5210  						pp.From.Offset = 0
  5211  						pp.From.Index = REG_NONE
  5212  						pp.From.Scale = 0
  5213  						ab.rexflag |= Pw
  5214  						ab.Put2(0x64, // FS
  5215  							0x8B)
  5216  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5217  					}
  5218  				}
  5219  				return
  5220  			}
  5221  		}
  5222  	}
  5223  	goto bad
  5224  
  5225  bad:
  5226  	if ctxt.Arch.Family != sys.AMD64 {
  5227  		// here, the assembly has failed.
  5228  		// if it's a byte instruction that has
  5229  		// unaddressable registers, try to
  5230  		// exchange registers and reissue the
  5231  		// instruction with the operands renamed.
  5232  		pp := *p
  5233  
  5234  		unbytereg(&pp.From, &pp.Ft)
  5235  		unbytereg(&pp.To, &pp.Tt)
  5236  
  5237  		z := int(p.From.Reg)
  5238  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5239  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5240  			// For now, different to keep bit-for-bit compatibility.
  5241  			if ctxt.Arch.Family == sys.I386 {
  5242  				breg := byteswapreg(ctxt, &p.To)
  5243  				if breg != REG_AX {
  5244  					ab.Put1(0x87) // xchg lhs,bx
  5245  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5246  					subreg(&pp, z, breg)
  5247  					ab.doasm(ctxt, cursym, &pp)
  5248  					ab.Put1(0x87) // xchg lhs,bx
  5249  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5250  				} else {
  5251  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5252  					subreg(&pp, z, REG_AX)
  5253  					ab.doasm(ctxt, cursym, &pp)
  5254  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5255  				}
  5256  				return
  5257  			}
  5258  
  5259  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  5260  				// We certainly don't want to exchange
  5261  				// with AX if the op is MUL or DIV.
  5262  				ab.Put1(0x87) // xchg lhs,bx
  5263  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5264  				subreg(&pp, z, REG_BX)
  5265  				ab.doasm(ctxt, cursym, &pp)
  5266  				ab.Put1(0x87) // xchg lhs,bx
  5267  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5268  			} else {
  5269  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5270  				subreg(&pp, z, REG_AX)
  5271  				ab.doasm(ctxt, cursym, &pp)
  5272  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5273  			}
  5274  			return
  5275  		}
  5276  
  5277  		z = int(p.To.Reg)
  5278  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5279  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5280  			// For now, different to keep bit-for-bit compatibility.
  5281  			if ctxt.Arch.Family == sys.I386 {
  5282  				breg := byteswapreg(ctxt, &p.From)
  5283  				if breg != REG_AX {
  5284  					ab.Put1(0x87) //xchg rhs,bx
  5285  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5286  					subreg(&pp, z, breg)
  5287  					ab.doasm(ctxt, cursym, &pp)
  5288  					ab.Put1(0x87) // xchg rhs,bx
  5289  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5290  				} else {
  5291  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5292  					subreg(&pp, z, REG_AX)
  5293  					ab.doasm(ctxt, cursym, &pp)
  5294  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5295  				}
  5296  				return
  5297  			}
  5298  
  5299  			if isax(&p.From) {
  5300  				ab.Put1(0x87) // xchg rhs,bx
  5301  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5302  				subreg(&pp, z, REG_BX)
  5303  				ab.doasm(ctxt, cursym, &pp)
  5304  				ab.Put1(0x87) // xchg rhs,bx
  5305  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5306  			} else {
  5307  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5308  				subreg(&pp, z, REG_AX)
  5309  				ab.doasm(ctxt, cursym, &pp)
  5310  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5311  			}
  5312  			return
  5313  		}
  5314  	}
  5315  
  5316  	ctxt.Diag("%s: invalid instruction: %v", cursym.Name, p)
  5317  }
  5318  
  5319  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  5320  // which is not referenced in a.
  5321  // If a is empty, it returns BX to account for MULB-like instructions
  5322  // that might use DX and AX.
  5323  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  5324  	cana, canb, canc, cand := true, true, true, true
  5325  	if a.Type == obj.TYPE_NONE {
  5326  		cana, cand = false, false
  5327  	}
  5328  
  5329  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  5330  		switch a.Reg {
  5331  		case REG_NONE:
  5332  			cana, cand = false, false
  5333  		case REG_AX, REG_AL, REG_AH:
  5334  			cana = false
  5335  		case REG_BX, REG_BL, REG_BH:
  5336  			canb = false
  5337  		case REG_CX, REG_CL, REG_CH:
  5338  			canc = false
  5339  		case REG_DX, REG_DL, REG_DH:
  5340  			cand = false
  5341  		}
  5342  	}
  5343  
  5344  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  5345  		switch a.Index {
  5346  		case REG_AX:
  5347  			cana = false
  5348  		case REG_BX:
  5349  			canb = false
  5350  		case REG_CX:
  5351  			canc = false
  5352  		case REG_DX:
  5353  			cand = false
  5354  		}
  5355  	}
  5356  
  5357  	switch {
  5358  	case cana:
  5359  		return REG_AX
  5360  	case canb:
  5361  		return REG_BX
  5362  	case canc:
  5363  		return REG_CX
  5364  	case cand:
  5365  		return REG_DX
  5366  	default:
  5367  		ctxt.Diag("impossible byte register")
  5368  		ctxt.DiagFlush()
  5369  		log.Fatalf("bad code")
  5370  		return 0
  5371  	}
  5372  }
  5373  
  5374  func isbadbyte(a *obj.Addr) bool {
  5375  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  5376  }
  5377  
  5378  func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  5379  	ab.Reset()
  5380  
  5381  	ab.rexflag = 0
  5382  	ab.vexflag = false
  5383  	ab.evexflag = false
  5384  	mark := ab.Len()
  5385  	ab.doasm(ctxt, cursym, p)
  5386  	if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5387  		// as befits the whole approach of the architecture,
  5388  		// the rex prefix must appear before the first opcode byte
  5389  		// (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  5390  		// before the 0f opcode escape!), or it might be ignored.
  5391  		// note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  5392  		if ctxt.Arch.Family != sys.AMD64 {
  5393  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  5394  		}
  5395  		n := ab.Len()
  5396  		var np int
  5397  		for np = mark; np < n; np++ {
  5398  			c := ab.At(np)
  5399  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  5400  				break
  5401  			}
  5402  		}
  5403  		ab.Insert(np, byte(0x40|ab.rexflag))
  5404  	}
  5405  
  5406  	n := ab.Len()
  5407  	for i := len(cursym.R) - 1; i >= 0; i-- {
  5408  		r := &cursym.R[i]
  5409  		if int64(r.Off) < p.Pc {
  5410  			break
  5411  		}
  5412  		if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5413  			r.Off++
  5414  		}
  5415  		if r.Type == objabi.R_PCREL {
  5416  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  5417  				// PC-relative addressing is relative to the end of the instruction,
  5418  				// but the relocations applied by the linker are relative to the end
  5419  				// of the relocation. Because immediate instruction
  5420  				// arguments can follow the PC-relative memory reference in the
  5421  				// instruction encoding, the two may not coincide. In this case,
  5422  				// adjust addend so that linker can keep relocating relative to the
  5423  				// end of the relocation.
  5424  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  5425  			} else if ctxt.Arch.Family == sys.I386 {
  5426  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  5427  				// assumes that the previous instruction loaded the PC of the end
  5428  				// of that instruction into CX, so the adjustment is relative to
  5429  				// that.
  5430  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5431  			}
  5432  		}
  5433  		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  5434  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  5435  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5436  		}
  5437  
  5438  	}
  5439  }
  5440  
  5441  // unpackOps4 extracts 4 operands from p.
  5442  func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) {
  5443  	return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To
  5444  }
  5445  
  5446  // unpackOps5 extracts 5 operands from p.
  5447  func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) {
  5448  	return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To
  5449  }
  5450  

View as plain text