Source file src/cmd/internal/obj/x86/obj6.go

     1  // Inferno utils/6l/pass.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/pass.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/objabi"
    36  	"cmd/internal/src"
    37  	"cmd/internal/sys"
    38  	"internal/abi"
    39  	"log"
    40  	"math"
    41  	"path"
    42  	"strings"
    43  )
    44  
    45  func CanUse1InsnTLS(ctxt *obj.Link) bool {
    46  	if isAndroid {
    47  		// Android uses a global variable for the tls offset.
    48  		return false
    49  	}
    50  
    51  	if ctxt.Arch.Family == sys.I386 {
    52  		switch ctxt.Headtype {
    53  		case objabi.Hlinux,
    54  			objabi.Hplan9,
    55  			objabi.Hwindows:
    56  			return false
    57  		}
    58  
    59  		return true
    60  	}
    61  
    62  	switch ctxt.Headtype {
    63  	case objabi.Hplan9, objabi.Hwindows:
    64  		return false
    65  	case objabi.Hlinux, objabi.Hfreebsd:
    66  		return !ctxt.Flag_shared
    67  	}
    68  
    69  	return true
    70  }
    71  
    72  func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
    73  	// Thread-local storage references use the TLS pseudo-register.
    74  	// As a register, TLS refers to the thread-local storage base, and it
    75  	// can only be loaded into another register:
    76  	//
    77  	//         MOVQ TLS, AX
    78  	//
    79  	// An offset from the thread-local storage base is written off(reg)(TLS*1).
    80  	// Semantically it is off(reg), but the (TLS*1) annotation marks this as
    81  	// indexing from the loaded TLS base. This emits a relocation so that
    82  	// if the linker needs to adjust the offset, it can. For example:
    83  	//
    84  	//         MOVQ TLS, AX
    85  	//         MOVQ 0(AX)(TLS*1), CX // load g into CX
    86  	//
    87  	// On systems that support direct access to the TLS memory, this
    88  	// pair of instructions can be reduced to a direct TLS memory reference:
    89  	//
    90  	//         MOVQ 0(TLS), CX // load g into CX
    91  	//
    92  	// The 2-instruction and 1-instruction forms correspond to the two code
    93  	// sequences for loading a TLS variable in the local exec model given in "ELF
    94  	// Handling For Thread-Local Storage".
    95  	//
    96  	// We apply this rewrite on systems that support the 1-instruction form.
    97  	// The decision is made using only the operating system and the -shared flag,
    98  	// not the link mode. If some link modes on a particular operating system
    99  	// require the 2-instruction form, then all builds for that operating system
   100  	// will use the 2-instruction form, so that the link mode decision can be
   101  	// delayed to link time.
   102  	//
   103  	// In this way, all supported systems use identical instructions to
   104  	// access TLS, and they are rewritten appropriately first here in
   105  	// liblink and then finally using relocations in the linker.
   106  	//
   107  	// When -shared is passed, we leave the code in the 2-instruction form but
   108  	// assemble (and relocate) them in different ways to generate the initial
   109  	// exec code sequence. It's a bit of a fluke that this is possible without
   110  	// rewriting the instructions more comprehensively, and it only does because
   111  	// we only support a single TLS variable (g).
   112  
   113  	if CanUse1InsnTLS(ctxt) {
   114  		// Reduce 2-instruction sequence to 1-instruction sequence.
   115  		// Sequences like
   116  		//	MOVQ TLS, BX
   117  		//	... off(BX)(TLS*1) ...
   118  		// become
   119  		//	NOP
   120  		//	... off(TLS) ...
   121  		//
   122  		// TODO(rsc): Remove the Hsolaris special case. It exists only to
   123  		// guarantee we are producing byte-identical binaries as before this code.
   124  		// But it should be unnecessary.
   125  		if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != objabi.Hsolaris {
   126  			obj.Nopout(p)
   127  		}
   128  		if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 {
   129  			p.From.Reg = REG_TLS
   130  			p.From.Scale = 0
   131  			p.From.Index = REG_NONE
   132  		}
   133  
   134  		if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   135  			p.To.Reg = REG_TLS
   136  			p.To.Scale = 0
   137  			p.To.Index = REG_NONE
   138  		}
   139  	} else {
   140  		// load_g, below, always inserts the 1-instruction sequence. Rewrite it
   141  		// as the 2-instruction sequence if necessary.
   142  		//	MOVQ 0(TLS), BX
   143  		// becomes
   144  		//	MOVQ TLS, BX
   145  		//	MOVQ 0(BX)(TLS*1), BX
   146  		if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   147  			q := obj.Appendp(p, newprog)
   148  			q.As = p.As
   149  			q.From = p.From
   150  			q.From.Type = obj.TYPE_MEM
   151  			q.From.Reg = p.To.Reg
   152  			q.From.Index = REG_TLS
   153  			q.From.Scale = 2 // TODO: use 1
   154  			q.To = p.To
   155  			p.From.Type = obj.TYPE_REG
   156  			p.From.Reg = REG_TLS
   157  			p.From.Index = REG_NONE
   158  			p.From.Offset = 0
   159  		}
   160  	}
   161  
   162  	// Android and Windows use a tls offset determined at runtime. Rewrite
   163  	//	MOVQ TLS, BX
   164  	// to
   165  	//	MOVQ runtime.tls_g(SB), BX
   166  	if (isAndroid || ctxt.Headtype == objabi.Hwindows) &&
   167  		(p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   168  		p.From.Type = obj.TYPE_MEM
   169  		p.From.Name = obj.NAME_EXTERN
   170  		p.From.Reg = REG_NONE
   171  		p.From.Sym = ctxt.Lookup("runtime.tls_g")
   172  		p.From.Index = REG_NONE
   173  		if ctxt.Headtype == objabi.Hwindows {
   174  			// Windows requires an additional indirection
   175  			// to retrieve the TLS pointer,
   176  			// as runtime.tls_g contains the TLS offset from GS or FS.
   177  			// on AMD64 add
   178  			//	MOVQ 0(BX)(GS*1), BX
   179  			// on 386 add
   180  			//	MOVQ 0(BX)(FS*1), BX4
   181  			q := obj.Appendp(p, newprog)
   182  			q.As = p.As
   183  			q.From = obj.Addr{}
   184  			q.From.Type = obj.TYPE_MEM
   185  			q.From.Reg = p.To.Reg
   186  			if ctxt.Arch.Family == sys.AMD64 {
   187  				q.From.Index = REG_GS
   188  			} else {
   189  				q.From.Index = REG_FS
   190  			}
   191  			q.From.Scale = 1
   192  			q.From.Offset = 0
   193  			q.To = p.To
   194  		}
   195  	}
   196  
   197  	// TODO: Remove.
   198  	if ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64 || ctxt.Headtype == objabi.Hplan9 {
   199  		if p.From.Scale == 1 && p.From.Index == REG_TLS {
   200  			p.From.Scale = 2
   201  		}
   202  		if p.To.Scale == 1 && p.To.Index == REG_TLS {
   203  			p.To.Scale = 2
   204  		}
   205  	}
   206  
   207  	// Rewrite 0 to $0 in 3rd argument to CMPPS etc.
   208  	// That's what the tables expect.
   209  	switch p.As {
   210  	case ACMPPD, ACMPPS, ACMPSD, ACMPSS:
   211  		if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil {
   212  			p.To.Type = obj.TYPE_CONST
   213  		}
   214  	}
   215  
   216  	// Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH.
   217  	switch p.As {
   218  	case obj.ACALL, obj.AJMP, obj.ARET:
   219  		if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil {
   220  			p.To.Type = obj.TYPE_BRANCH
   221  		}
   222  	}
   223  
   224  	// Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ.
   225  	if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) {
   226  		switch p.As {
   227  		case AMOVL:
   228  			p.As = ALEAL
   229  			p.From.Type = obj.TYPE_MEM
   230  		case AMOVQ:
   231  			p.As = ALEAQ
   232  			p.From.Type = obj.TYPE_MEM
   233  		}
   234  	}
   235  
   236  	// Rewrite float constants to values stored in memory.
   237  	switch p.As {
   238  	// Convert AMOVSS $(0), Xx to AXORPS Xx, Xx
   239  	case AMOVSS:
   240  		if p.From.Type == obj.TYPE_FCONST {
   241  			//  f == 0 can't be used here due to -0, so use Float64bits
   242  			if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
   243  				if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
   244  					p.As = AXORPS
   245  					p.From = p.To
   246  					break
   247  				}
   248  			}
   249  		}
   250  		fallthrough
   251  
   252  	case AFMOVF,
   253  		AFADDF,
   254  		AFSUBF,
   255  		AFSUBRF,
   256  		AFMULF,
   257  		AFDIVF,
   258  		AFDIVRF,
   259  		AFCOMF,
   260  		AFCOMFP,
   261  		AADDSS,
   262  		ASUBSS,
   263  		AMULSS,
   264  		ADIVSS,
   265  		ACOMISS,
   266  		AUCOMISS:
   267  		if p.From.Type == obj.TYPE_FCONST {
   268  			f32 := float32(p.From.Val.(float64))
   269  			p.From.Type = obj.TYPE_MEM
   270  			p.From.Name = obj.NAME_EXTERN
   271  			p.From.Sym = ctxt.Float32Sym(f32)
   272  			p.From.Offset = 0
   273  		}
   274  
   275  	case AMOVSD:
   276  		// Convert AMOVSD $(0), Xx to AXORPS Xx, Xx
   277  		if p.From.Type == obj.TYPE_FCONST {
   278  			//  f == 0 can't be used here due to -0, so use Float64bits
   279  			if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
   280  				if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
   281  					p.As = AXORPS
   282  					p.From = p.To
   283  					break
   284  				}
   285  			}
   286  		}
   287  		fallthrough
   288  
   289  	case AFMOVD,
   290  		AFADDD,
   291  		AFSUBD,
   292  		AFSUBRD,
   293  		AFMULD,
   294  		AFDIVD,
   295  		AFDIVRD,
   296  		AFCOMD,
   297  		AFCOMDP,
   298  		AADDSD,
   299  		ASUBSD,
   300  		AMULSD,
   301  		ADIVSD,
   302  		ACOMISD,
   303  		AUCOMISD:
   304  		if p.From.Type == obj.TYPE_FCONST {
   305  			f64 := p.From.Val.(float64)
   306  			p.From.Type = obj.TYPE_MEM
   307  			p.From.Name = obj.NAME_EXTERN
   308  			p.From.Sym = ctxt.Float64Sym(f64)
   309  			p.From.Offset = 0
   310  		}
   311  	}
   312  
   313  	if ctxt.Flag_dynlink {
   314  		rewriteToUseGot(ctxt, p, newprog)
   315  	}
   316  
   317  	if ctxt.Flag_shared && ctxt.Arch.Family == sys.I386 {
   318  		rewriteToPcrel(ctxt, p, newprog)
   319  	}
   320  }
   321  
   322  // Rewrite p, if necessary, to access global data via the global offset table.
   323  func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
   324  	var lea, mov obj.As
   325  	var reg int16
   326  	if ctxt.Arch.Family == sys.AMD64 {
   327  		lea = ALEAQ
   328  		mov = AMOVQ
   329  		reg = REG_R15
   330  	} else {
   331  		lea = ALEAL
   332  		mov = AMOVL
   333  		reg = REG_CX
   334  		if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
   335  			// Special case: clobber the destination register with
   336  			// the PC so we don't have to clobber CX.
   337  			// The SSA backend depends on CX not being clobbered across LEAL.
   338  			// See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared).
   339  			reg = p.To.Reg
   340  		}
   341  	}
   342  
   343  	if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO {
   344  		//     ADUFFxxx $offset
   345  		// becomes
   346  		//     $MOV runtime.duffxxx@GOT, $reg
   347  		//     $LEA $offset($reg), $reg
   348  		//     CALL $reg
   349  		// (we use LEAx rather than ADDx because ADDx clobbers
   350  		// flags and duffzero on 386 does not otherwise do so).
   351  		var sym *obj.LSym
   352  		if p.As == obj.ADUFFZERO {
   353  			sym = ctxt.LookupABI("runtime.duffzero", obj.ABIInternal)
   354  		} else {
   355  			sym = ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal)
   356  		}
   357  		offset := p.To.Offset
   358  		p.As = mov
   359  		p.From.Type = obj.TYPE_MEM
   360  		p.From.Name = obj.NAME_GOTREF
   361  		p.From.Sym = sym
   362  		p.To.Type = obj.TYPE_REG
   363  		p.To.Reg = reg
   364  		p.To.Offset = 0
   365  		p.To.Sym = nil
   366  		p1 := obj.Appendp(p, newprog)
   367  		p1.As = lea
   368  		p1.From.Type = obj.TYPE_MEM
   369  		p1.From.Offset = offset
   370  		p1.From.Reg = reg
   371  		p1.To.Type = obj.TYPE_REG
   372  		p1.To.Reg = reg
   373  		p2 := obj.Appendp(p1, newprog)
   374  		p2.As = obj.ACALL
   375  		p2.To.Type = obj.TYPE_REG
   376  		p2.To.Reg = reg
   377  	}
   378  
   379  	// We only care about global data: NAME_EXTERN means a global
   380  	// symbol in the Go sense, and p.Sym.Local is true for a few
   381  	// internally defined symbols.
   382  	if p.As == lea && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   383  		// $LEA sym, Rx becomes $MOV $sym, Rx which will be rewritten below
   384  		p.As = mov
   385  		p.From.Type = obj.TYPE_ADDR
   386  	}
   387  	if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   388  		// $MOV $sym, Rx becomes $MOV sym@GOT, Rx
   389  		// $MOV $sym+<off>, Rx becomes $MOV sym@GOT, Rx; $LEA <off>(Rx), Rx
   390  		// On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX
   391  		cmplxdest := false
   392  		pAs := p.As
   393  		var dest obj.Addr
   394  		if p.To.Type != obj.TYPE_REG || pAs != mov {
   395  			if ctxt.Arch.Family == sys.AMD64 {
   396  				ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p)
   397  			}
   398  			cmplxdest = true
   399  			dest = p.To
   400  			p.As = mov
   401  			p.To.Type = obj.TYPE_REG
   402  			p.To.Reg = reg
   403  			p.To.Sym = nil
   404  			p.To.Name = obj.NAME_NONE
   405  		}
   406  		p.From.Type = obj.TYPE_MEM
   407  		p.From.Name = obj.NAME_GOTREF
   408  		q := p
   409  		if p.From.Offset != 0 {
   410  			q = obj.Appendp(p, newprog)
   411  			q.As = lea
   412  			q.From.Type = obj.TYPE_MEM
   413  			q.From.Reg = p.To.Reg
   414  			q.From.Offset = p.From.Offset
   415  			q.To = p.To
   416  			p.From.Offset = 0
   417  		}
   418  		if cmplxdest {
   419  			q = obj.Appendp(q, newprog)
   420  			q.As = pAs
   421  			q.To = dest
   422  			q.From.Type = obj.TYPE_REG
   423  			q.From.Reg = reg
   424  		}
   425  	}
   426  	if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN {
   427  		ctxt.Diag("don't know how to handle %v with -dynlink", p)
   428  	}
   429  	var source *obj.Addr
   430  	// MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry
   431  	// MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15)
   432  	// An addition may be inserted between the two MOVs if there is an offset.
   433  	if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   434  		if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
   435  			ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p)
   436  		}
   437  		source = &p.From
   438  	} else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
   439  		source = &p.To
   440  	} else {
   441  		return
   442  	}
   443  	if p.As == obj.ACALL {
   444  		// When dynlinking on 386, almost any call might end up being a call
   445  		// to a PLT, so make sure the GOT pointer is loaded into BX.
   446  		// RegTo2 is set on the replacement call insn to stop it being
   447  		// processed when it is in turn passed to progedit.
   448  		//
   449  		// We disable open-coded defers in buildssa() on 386 ONLY with shared
   450  		// libraries because of this extra code added before deferreturn calls.
   451  		//
   452  		// computeDeferReturn in cmd/link/internal/ld/pcln.go depends
   453  		// on the size of these instructions.
   454  		if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 {
   455  			return
   456  		}
   457  		p1 := obj.Appendp(p, newprog)
   458  		p2 := obj.Appendp(p1, newprog)
   459  
   460  		p1.As = ALEAL
   461  		p1.From.Type = obj.TYPE_MEM
   462  		p1.From.Name = obj.NAME_STATIC
   463  		p1.From.Sym = ctxt.Lookup("_GLOBAL_OFFSET_TABLE_")
   464  		p1.To.Type = obj.TYPE_REG
   465  		p1.To.Reg = REG_BX
   466  
   467  		p2.As = p.As
   468  		p2.Scond = p.Scond
   469  		p2.From = p.From
   470  		if p.RestArgs != nil {
   471  			p2.RestArgs = append(p2.RestArgs, p.RestArgs...)
   472  		}
   473  		p2.Reg = p.Reg
   474  		p2.To = p.To
   475  		// p.To.Type was set to TYPE_BRANCH above, but that makes checkaddr
   476  		// in ../pass.go complain, so set it back to TYPE_MEM here, until p2
   477  		// itself gets passed to progedit.
   478  		p2.To.Type = obj.TYPE_MEM
   479  		p2.RegTo2 = 1
   480  
   481  		obj.Nopout(p)
   482  		return
   483  
   484  	}
   485  	if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
   486  		return
   487  	}
   488  	if source.Type != obj.TYPE_MEM {
   489  		ctxt.Diag("don't know how to handle %v with -dynlink", p)
   490  	}
   491  	p1 := obj.Appendp(p, newprog)
   492  	p2 := obj.Appendp(p1, newprog)
   493  
   494  	p1.As = mov
   495  	p1.From.Type = obj.TYPE_MEM
   496  	p1.From.Sym = source.Sym
   497  	p1.From.Name = obj.NAME_GOTREF
   498  	p1.To.Type = obj.TYPE_REG
   499  	p1.To.Reg = reg
   500  
   501  	p2.As = p.As
   502  	p2.From = p.From
   503  	p2.To = p.To
   504  	if from3 := p.GetFrom3(); from3 != nil {
   505  		p2.AddRestSource(*from3)
   506  	}
   507  	if p.From.Name == obj.NAME_EXTERN {
   508  		p2.From.Reg = reg
   509  		p2.From.Name = obj.NAME_NONE
   510  		p2.From.Sym = nil
   511  	} else if p.To.Name == obj.NAME_EXTERN {
   512  		p2.To.Reg = reg
   513  		p2.To.Name = obj.NAME_NONE
   514  		p2.To.Sym = nil
   515  	} else {
   516  		return
   517  	}
   518  	obj.Nopout(p)
   519  }
   520  
   521  func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
   522  	// RegTo2 is set on the instructions we insert here so they don't get
   523  	// processed twice.
   524  	if p.RegTo2 != 0 {
   525  		return
   526  	}
   527  	if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP {
   528  		return
   529  	}
   530  	// Any Prog (aside from the above special cases) with an Addr with Name ==
   531  	// NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.XX
   532  	// inserted before it.
   533  	isName := func(a *obj.Addr) bool {
   534  		if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 {
   535  			return false
   536  		}
   537  		if a.Sym.Type == objabi.STLSBSS {
   538  			return false
   539  		}
   540  		return a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_STATIC || a.Name == obj.NAME_GOTREF
   541  	}
   542  
   543  	if isName(&p.From) && p.From.Type == obj.TYPE_ADDR {
   544  		// Handle things like "MOVL $sym, (SP)" or "PUSHL $sym" by rewriting
   545  		// to "MOVL $sym, CX; MOVL CX, (SP)" or "MOVL $sym, CX; PUSHL CX"
   546  		// respectively.
   547  		if p.To.Type != obj.TYPE_REG {
   548  			q := obj.Appendp(p, newprog)
   549  			q.As = p.As
   550  			q.From.Type = obj.TYPE_REG
   551  			q.From.Reg = REG_CX
   552  			q.To = p.To
   553  			p.As = AMOVL
   554  			p.To.Type = obj.TYPE_REG
   555  			p.To.Reg = REG_CX
   556  			p.To.Sym = nil
   557  			p.To.Name = obj.NAME_NONE
   558  		}
   559  	}
   560  
   561  	if !isName(&p.From) && !isName(&p.To) && (p.GetFrom3() == nil || !isName(p.GetFrom3())) {
   562  		return
   563  	}
   564  	var dst int16 = REG_CX
   565  	if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
   566  		dst = p.To.Reg
   567  		// Why? See the comment near the top of rewriteToUseGot above.
   568  		// AMOVLs might be introduced by the GOT rewrites.
   569  	}
   570  	q := obj.Appendp(p, newprog)
   571  	q.RegTo2 = 1
   572  	r := obj.Appendp(q, newprog)
   573  	r.RegTo2 = 1
   574  	q.As = obj.ACALL
   575  	thunkname := "__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))
   576  	q.To.Sym = ctxt.LookupInit(thunkname, func(s *obj.LSym) { s.Set(obj.AttrLocal, true) })
   577  	q.To.Type = obj.TYPE_MEM
   578  	q.To.Name = obj.NAME_EXTERN
   579  	r.As = p.As
   580  	r.Scond = p.Scond
   581  	r.From = p.From
   582  	r.RestArgs = p.RestArgs
   583  	r.Reg = p.Reg
   584  	r.To = p.To
   585  	if isName(&p.From) {
   586  		r.From.Reg = dst
   587  	}
   588  	if isName(&p.To) {
   589  		r.To.Reg = dst
   590  	}
   591  	if p.GetFrom3() != nil && isName(p.GetFrom3()) {
   592  		r.GetFrom3().Reg = dst
   593  	}
   594  	obj.Nopout(p)
   595  }
   596  
   597  // Prog.mark
   598  const (
   599  	markBit = 1 << 0 // used in errorCheck to avoid duplicate work
   600  )
   601  
   602  func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
   603  	if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
   604  		return
   605  	}
   606  
   607  	p := cursym.Func().Text
   608  	autoffset := int32(p.To.Offset)
   609  	if autoffset < 0 {
   610  		autoffset = 0
   611  	}
   612  
   613  	hasCall := false
   614  	for q := p; q != nil; q = q.Link {
   615  		if q.As == obj.ACALL || q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO {
   616  			hasCall = true
   617  			break
   618  		}
   619  	}
   620  
   621  	var bpsize int
   622  	if ctxt.Arch.Family == sys.AMD64 &&
   623  		!p.From.Sym.NoFrame() && // (1) below
   624  		!(autoffset == 0 && !hasCall) { // (2) below
   625  		// Make room to save a base pointer.
   626  		// There are 2 cases we must avoid:
   627  		// 1) If noframe is set (which we do for functions which tail call).
   628  		// For performance, we also want to avoid:
   629  		// 2) Frameless leaf functions
   630  		bpsize = ctxt.Arch.PtrSize
   631  		autoffset += int32(bpsize)
   632  		p.To.Offset += int64(bpsize)
   633  	} else {
   634  		bpsize = 0
   635  		p.From.Sym.Set(obj.AttrNoFrame, true)
   636  	}
   637  
   638  	textarg := int64(p.To.Val.(int32))
   639  	cursym.Func().Args = int32(textarg)
   640  	cursym.Func().Locals = int32(p.To.Offset)
   641  
   642  	// TODO(rsc): Remove.
   643  	if ctxt.Arch.Family == sys.I386 && cursym.Func().Locals < 0 {
   644  		cursym.Func().Locals = 0
   645  	}
   646  
   647  	// TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'.
   648  	if ctxt.Arch.Family == sys.AMD64 && autoffset < abi.StackSmall && !p.From.Sym.NoSplit() {
   649  		leaf := true
   650  	LeafSearch:
   651  		for q := p; q != nil; q = q.Link {
   652  			switch q.As {
   653  			case obj.ACALL:
   654  				// Treat common runtime calls that take no arguments
   655  				// the same as duffcopy and duffzero.
   656  
   657  				// Note that of these functions, panicBounds does
   658  				// use some stack, but its stack together with the
   659  				// < StackSmall used by this function is still
   660  				// less than stackNosplit. See issue 31219.
   661  				if !isZeroArgRuntimeCall(q.To.Sym) {
   662  					leaf = false
   663  					break LeafSearch
   664  				}
   665  				fallthrough
   666  			case obj.ADUFFCOPY, obj.ADUFFZERO:
   667  				if autoffset >= abi.StackSmall-8 {
   668  					leaf = false
   669  					break LeafSearch
   670  				}
   671  			}
   672  		}
   673  
   674  		if leaf {
   675  			p.From.Sym.Set(obj.AttrNoSplit, true)
   676  		}
   677  	}
   678  
   679  	if !p.From.Sym.NoSplit() {
   680  		// Emit split check.
   681  		p = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg))
   682  	}
   683  
   684  	if bpsize > 0 {
   685  		// Save caller's BP
   686  		p = obj.Appendp(p, newprog)
   687  
   688  		p.As = APUSHQ
   689  		p.From.Type = obj.TYPE_REG
   690  		p.From.Reg = REG_BP
   691  
   692  		// Move current frame to BP
   693  		p = obj.Appendp(p, newprog)
   694  
   695  		p.As = AMOVQ
   696  		p.From.Type = obj.TYPE_REG
   697  		p.From.Reg = REG_SP
   698  		p.To.Type = obj.TYPE_REG
   699  		p.To.Reg = REG_BP
   700  	}
   701  
   702  	if autoffset%int32(ctxt.Arch.RegSize) != 0 {
   703  		ctxt.Diag("unaligned stack size %d", autoffset)
   704  	}
   705  
   706  	// localoffset is autoffset discounting the frame pointer,
   707  	// which has already been allocated in the stack.
   708  	localoffset := autoffset - int32(bpsize)
   709  	if localoffset != 0 {
   710  		p = obj.Appendp(p, newprog)
   711  		p.As = AADJSP
   712  		p.From.Type = obj.TYPE_CONST
   713  		p.From.Offset = int64(localoffset)
   714  		p.Spadj = localoffset
   715  	}
   716  
   717  	// Delve debugger would like the next instruction to be noted as the end of the function prologue.
   718  	// TODO: are there other cases (e.g., wrapper functions) that need marking?
   719  	if autoffset != 0 {
   720  		p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
   721  	}
   722  
   723  	var deltasp int32
   724  	for p = cursym.Func().Text; p != nil; p = p.Link {
   725  		pcsize := ctxt.Arch.RegSize
   726  		switch p.From.Name {
   727  		case obj.NAME_AUTO:
   728  			p.From.Offset += int64(deltasp) - int64(bpsize)
   729  		case obj.NAME_PARAM:
   730  			p.From.Offset += int64(deltasp) + int64(pcsize)
   731  		}
   732  		if p.GetFrom3() != nil {
   733  			switch p.GetFrom3().Name {
   734  			case obj.NAME_AUTO:
   735  				p.GetFrom3().Offset += int64(deltasp) - int64(bpsize)
   736  			case obj.NAME_PARAM:
   737  				p.GetFrom3().Offset += int64(deltasp) + int64(pcsize)
   738  			}
   739  		}
   740  		switch p.To.Name {
   741  		case obj.NAME_AUTO:
   742  			p.To.Offset += int64(deltasp) - int64(bpsize)
   743  		case obj.NAME_PARAM:
   744  			p.To.Offset += int64(deltasp) + int64(pcsize)
   745  		}
   746  
   747  		switch p.As {
   748  		default:
   749  			if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SP && p.As != ACMPL && p.As != ACMPQ {
   750  				f := cursym.Func()
   751  				if f.FuncFlag&abi.FuncFlagSPWrite == 0 {
   752  					f.FuncFlag |= abi.FuncFlagSPWrite
   753  					if ctxt.Debugvlog || !ctxt.IsAsm {
   754  						ctxt.Logf("auto-SPWRITE: %s %v\n", cursym.Name, p)
   755  						if !ctxt.IsAsm {
   756  							ctxt.Diag("invalid auto-SPWRITE in non-assembly")
   757  							ctxt.DiagFlush()
   758  							log.Fatalf("bad SPWRITE")
   759  						}
   760  					}
   761  				}
   762  			}
   763  			continue
   764  
   765  		case APUSHL, APUSHFL:
   766  			deltasp += 4
   767  			p.Spadj = 4
   768  			continue
   769  
   770  		case APUSHQ, APUSHFQ:
   771  			deltasp += 8
   772  			p.Spadj = 8
   773  			continue
   774  
   775  		case APUSHW, APUSHFW:
   776  			deltasp += 2
   777  			p.Spadj = 2
   778  			continue
   779  
   780  		case APOPL, APOPFL:
   781  			deltasp -= 4
   782  			p.Spadj = -4
   783  			continue
   784  
   785  		case APOPQ, APOPFQ:
   786  			deltasp -= 8
   787  			p.Spadj = -8
   788  			continue
   789  
   790  		case APOPW, APOPFW:
   791  			deltasp -= 2
   792  			p.Spadj = -2
   793  			continue
   794  
   795  		case AADJSP:
   796  			p.Spadj = int32(p.From.Offset)
   797  			deltasp += int32(p.From.Offset)
   798  			continue
   799  
   800  		case obj.ARET:
   801  			// do nothing
   802  		}
   803  
   804  		if autoffset != deltasp {
   805  			ctxt.Diag("%s: unbalanced PUSH/POP", cursym)
   806  		}
   807  
   808  		if autoffset != 0 {
   809  			to := p.To // Keep To attached to RET for retjmp below
   810  			p.To = obj.Addr{}
   811  			if localoffset != 0 {
   812  				p.As = AADJSP
   813  				p.From.Type = obj.TYPE_CONST
   814  				p.From.Offset = int64(-localoffset)
   815  				p.Spadj = -localoffset
   816  				p = obj.Appendp(p, newprog)
   817  			}
   818  
   819  			if bpsize > 0 {
   820  				// Restore caller's BP
   821  				p.As = APOPQ
   822  				p.To.Type = obj.TYPE_REG
   823  				p.To.Reg = REG_BP
   824  				p.Spadj = -int32(bpsize)
   825  				p = obj.Appendp(p, newprog)
   826  			}
   827  
   828  			p.As = obj.ARET
   829  			p.To = to
   830  
   831  			// If there are instructions following
   832  			// this ARET, they come from a branch
   833  			// with the same stackframe, so undo
   834  			// the cleanup.
   835  			p.Spadj = +autoffset
   836  		}
   837  
   838  		if p.To.Sym != nil { // retjmp
   839  			p.As = obj.AJMP
   840  		}
   841  	}
   842  }
   843  
   844  func isZeroArgRuntimeCall(s *obj.LSym) bool {
   845  	if s == nil {
   846  		return false
   847  	}
   848  	switch s.Name {
   849  	case "runtime.panicdivide", "runtime.panicwrap", "runtime.panicshift", "runtime.panicBounds", "runtime.panicExtend":
   850  		return true
   851  	}
   852  	return false
   853  }
   854  
   855  // loadG ensures the G is loaded into a register (either CX or REGG),
   856  // appending instructions to p if necessary. It returns the new last
   857  // instruction and the G register.
   858  func loadG(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc) (*obj.Prog, int16) {
   859  	if ctxt.Arch.Family == sys.AMD64 && cursym.ABI() == obj.ABIInternal {
   860  		// Use the G register directly in ABIInternal
   861  		return p, REGG
   862  	}
   863  
   864  	var regg int16 = REG_CX
   865  	if ctxt.Arch.Family == sys.AMD64 {
   866  		regg = REGG // == REG_R14
   867  	}
   868  
   869  	p = obj.Appendp(p, newprog)
   870  	p.As = AMOVQ
   871  	if ctxt.Arch.PtrSize == 4 {
   872  		p.As = AMOVL
   873  	}
   874  	p.From.Type = obj.TYPE_MEM
   875  	p.From.Reg = REG_TLS
   876  	p.From.Offset = 0
   877  	p.To.Type = obj.TYPE_REG
   878  	p.To.Reg = regg
   879  
   880  	// Rewrite TLS instruction if necessary.
   881  	next := p.Link
   882  	progedit(ctxt, p, newprog)
   883  	for p.Link != next {
   884  		p = p.Link
   885  		progedit(ctxt, p, newprog)
   886  	}
   887  
   888  	if p.From.Index == REG_TLS {
   889  		p.From.Scale = 2
   890  	}
   891  
   892  	return p, regg
   893  }
   894  
   895  // Append code to p to check for stack split.
   896  // Appends to (does not overwrite) p.
   897  // Assumes g is in rg.
   898  // Returns last new instruction.
   899  func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc, framesize int32, textarg int32) *obj.Prog {
   900  	cmp := ACMPQ
   901  	lea := ALEAQ
   902  	mov := AMOVQ
   903  	sub := ASUBQ
   904  	push, pop := APUSHQ, APOPQ
   905  
   906  	if ctxt.Arch.Family == sys.I386 {
   907  		cmp = ACMPL
   908  		lea = ALEAL
   909  		mov = AMOVL
   910  		sub = ASUBL
   911  		push, pop = APUSHL, APOPL
   912  	}
   913  
   914  	tmp := int16(REG_AX) // use AX for 32-bit
   915  	if ctxt.Arch.Family == sys.AMD64 {
   916  		// Avoid register parameters.
   917  		tmp = int16(REGENTRYTMP0)
   918  	}
   919  
   920  	if ctxt.Flag_maymorestack != "" {
   921  		p = cursym.Func().SpillRegisterArgs(p, newprog)
   922  
   923  		if cursym.Func().Text.From.Sym.NeedCtxt() {
   924  			p = obj.Appendp(p, newprog)
   925  			p.As = push
   926  			p.From.Type = obj.TYPE_REG
   927  			p.From.Reg = REGCTXT
   928  		}
   929  
   930  		// We call maymorestack with an ABI matching the
   931  		// caller's ABI. Since this is the first thing that
   932  		// happens in the function, we have to be consistent
   933  		// with the caller about CPU state (notably,
   934  		// fixed-meaning registers).
   935  
   936  		p = obj.Appendp(p, newprog)
   937  		p.As = obj.ACALL
   938  		p.To.Type = obj.TYPE_BRANCH
   939  		p.To.Name = obj.NAME_EXTERN
   940  		p.To.Sym = ctxt.LookupABI(ctxt.Flag_maymorestack, cursym.ABI())
   941  
   942  		if cursym.Func().Text.From.Sym.NeedCtxt() {
   943  			p = obj.Appendp(p, newprog)
   944  			p.As = pop
   945  			p.To.Type = obj.TYPE_REG
   946  			p.To.Reg = REGCTXT
   947  		}
   948  
   949  		p = cursym.Func().UnspillRegisterArgs(p, newprog)
   950  	}
   951  
   952  	// Jump back to here after morestack returns.
   953  	startPred := p
   954  
   955  	// Load G register
   956  	var rg int16
   957  	p, rg = loadG(ctxt, cursym, p, newprog)
   958  
   959  	var q1 *obj.Prog
   960  	if framesize <= abi.StackSmall {
   961  		// small stack: SP <= stackguard
   962  		//	CMPQ SP, stackguard
   963  		p = obj.Appendp(p, newprog)
   964  
   965  		p.As = cmp
   966  		p.From.Type = obj.TYPE_REG
   967  		p.From.Reg = REG_SP
   968  		p.To.Type = obj.TYPE_MEM
   969  		p.To.Reg = rg
   970  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
   971  		if cursym.CFunc() {
   972  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
   973  		}
   974  
   975  		// Mark the stack bound check and morestack call async nonpreemptible.
   976  		// If we get preempted here, when resumed the preemption request is
   977  		// cleared, but we'll still call morestack, which will double the stack
   978  		// unnecessarily. See issue #35470.
   979  		p = ctxt.StartUnsafePoint(p, newprog)
   980  	} else if framesize <= abi.StackBig {
   981  		// large stack: SP-framesize <= stackguard-StackSmall
   982  		//	LEAQ -xxx(SP), tmp
   983  		//	CMPQ tmp, stackguard
   984  		p = obj.Appendp(p, newprog)
   985  
   986  		p.As = lea
   987  		p.From.Type = obj.TYPE_MEM
   988  		p.From.Reg = REG_SP
   989  		p.From.Offset = -(int64(framesize) - abi.StackSmall)
   990  		p.To.Type = obj.TYPE_REG
   991  		p.To.Reg = tmp
   992  
   993  		p = obj.Appendp(p, newprog)
   994  		p.As = cmp
   995  		p.From.Type = obj.TYPE_REG
   996  		p.From.Reg = tmp
   997  		p.To.Type = obj.TYPE_MEM
   998  		p.To.Reg = rg
   999  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
  1000  		if cursym.CFunc() {
  1001  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
  1002  		}
  1003  
  1004  		p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
  1005  	} else {
  1006  		// Such a large stack we need to protect against underflow.
  1007  		// The runtime guarantees SP > objabi.StackBig, but
  1008  		// framesize is large enough that SP-framesize may
  1009  		// underflow, causing a direct comparison with the
  1010  		// stack guard to incorrectly succeed. We explicitly
  1011  		// guard against underflow.
  1012  		//
  1013  		//	MOVQ	SP, tmp
  1014  		//	SUBQ	$(framesize - StackSmall), tmp
  1015  		//	// If subtraction wrapped (carry set), morestack.
  1016  		//	JCS	label-of-call-to-morestack
  1017  		//	CMPQ	tmp, stackguard
  1018  
  1019  		p = obj.Appendp(p, newprog)
  1020  
  1021  		p.As = mov
  1022  		p.From.Type = obj.TYPE_REG
  1023  		p.From.Reg = REG_SP
  1024  		p.To.Type = obj.TYPE_REG
  1025  		p.To.Reg = tmp
  1026  
  1027  		p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
  1028  
  1029  		p = obj.Appendp(p, newprog)
  1030  		p.As = sub
  1031  		p.From.Type = obj.TYPE_CONST
  1032  		p.From.Offset = int64(framesize) - abi.StackSmall
  1033  		p.To.Type = obj.TYPE_REG
  1034  		p.To.Reg = tmp
  1035  
  1036  		p = obj.Appendp(p, newprog)
  1037  		p.As = AJCS
  1038  		p.To.Type = obj.TYPE_BRANCH
  1039  		q1 = p
  1040  
  1041  		p = obj.Appendp(p, newprog)
  1042  		p.As = cmp
  1043  		p.From.Type = obj.TYPE_REG
  1044  		p.From.Reg = tmp
  1045  		p.To.Type = obj.TYPE_MEM
  1046  		p.To.Reg = rg
  1047  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
  1048  		if cursym.CFunc() {
  1049  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
  1050  		}
  1051  	}
  1052  
  1053  	// common
  1054  	jls := obj.Appendp(p, newprog)
  1055  	jls.As = AJLS
  1056  	jls.To.Type = obj.TYPE_BRANCH
  1057  
  1058  	end := ctxt.EndUnsafePoint(jls, newprog, -1)
  1059  
  1060  	var last *obj.Prog
  1061  	for last = cursym.Func().Text; last.Link != nil; last = last.Link {
  1062  	}
  1063  
  1064  	// Now we are at the end of the function, but logically
  1065  	// we are still in function prologue. We need to fix the
  1066  	// SP data and PCDATA.
  1067  	spfix := obj.Appendp(last, newprog)
  1068  	spfix.As = obj.ANOP
  1069  	spfix.Spadj = -framesize
  1070  
  1071  	pcdata := ctxt.EmitEntryStackMap(cursym, spfix, newprog)
  1072  	spill := ctxt.StartUnsafePoint(pcdata, newprog)
  1073  	pcdata = cursym.Func().SpillRegisterArgs(spill, newprog)
  1074  
  1075  	call := obj.Appendp(pcdata, newprog)
  1076  	call.Pos = cursym.Func().Text.Pos
  1077  	call.As = obj.ACALL
  1078  	call.To.Type = obj.TYPE_BRANCH
  1079  	call.To.Name = obj.NAME_EXTERN
  1080  	morestack := "runtime.morestack"
  1081  	switch {
  1082  	case cursym.CFunc():
  1083  		morestack = "runtime.morestackc"
  1084  	case !cursym.Func().Text.From.Sym.NeedCtxt():
  1085  		morestack = "runtime.morestack_noctxt"
  1086  	}
  1087  	call.To.Sym = ctxt.Lookup(morestack)
  1088  	// When compiling 386 code for dynamic linking, the call needs to be adjusted
  1089  	// to follow PIC rules. This in turn can insert more instructions, so we need
  1090  	// to keep track of the start of the call (where the jump will be to) and the
  1091  	// end (which following instructions are appended to).
  1092  	callend := call
  1093  	progedit(ctxt, callend, newprog)
  1094  	for ; callend.Link != nil; callend = callend.Link {
  1095  		progedit(ctxt, callend.Link, newprog)
  1096  	}
  1097  
  1098  	// The instructions which unspill regs should be preemptible.
  1099  	pcdata = ctxt.EndUnsafePoint(callend, newprog, -1)
  1100  	unspill := cursym.Func().UnspillRegisterArgs(pcdata, newprog)
  1101  
  1102  	jmp := obj.Appendp(unspill, newprog)
  1103  	jmp.As = obj.AJMP
  1104  	jmp.To.Type = obj.TYPE_BRANCH
  1105  	jmp.To.SetTarget(startPred.Link)
  1106  	jmp.Spadj = +framesize
  1107  
  1108  	jls.To.SetTarget(spill)
  1109  	if q1 != nil {
  1110  		q1.To.SetTarget(spill)
  1111  	}
  1112  
  1113  	return end
  1114  }
  1115  
  1116  func isR15(r int16) bool {
  1117  	return r == REG_R15 || r == REG_R15B
  1118  }
  1119  func addrMentionsR15(a *obj.Addr) bool {
  1120  	if a == nil {
  1121  		return false
  1122  	}
  1123  	return isR15(a.Reg) || isR15(a.Index)
  1124  }
  1125  func progMentionsR15(p *obj.Prog) bool {
  1126  	return addrMentionsR15(&p.From) || addrMentionsR15(&p.To) || isR15(p.Reg) || addrMentionsR15(p.GetFrom3())
  1127  }
  1128  
  1129  func addrUsesGlobal(a *obj.Addr) bool {
  1130  	if a == nil {
  1131  		return false
  1132  	}
  1133  	return a.Name == obj.NAME_EXTERN && !a.Sym.Local()
  1134  }
  1135  func progUsesGlobal(p *obj.Prog) bool {
  1136  	if p.As == obj.ACALL || p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
  1137  		// These opcodes don't use a GOT to access their argument (see rewriteToUseGot),
  1138  		// or R15 would be dead at them anyway.
  1139  		return false
  1140  	}
  1141  	if p.As == ALEAQ {
  1142  		// The GOT entry is placed directly in the destination register; R15 is not used.
  1143  		return false
  1144  	}
  1145  	return addrUsesGlobal(&p.From) || addrUsesGlobal(&p.To) || addrUsesGlobal(p.GetFrom3())
  1146  }
  1147  
  1148  type rwMask int
  1149  
  1150  const (
  1151  	readFrom rwMask = 1 << iota
  1152  	readTo
  1153  	readReg
  1154  	readFrom3
  1155  	writeFrom
  1156  	writeTo
  1157  	writeReg
  1158  	writeFrom3
  1159  )
  1160  
  1161  // progRW returns a mask describing the effects of the instruction p.
  1162  // Note: this isn't exhaustively accurate. It is only currently used for detecting
  1163  // reads/writes to R15, so SSE register behavior isn't fully correct, and
  1164  // other weird cases (e.g. writes to DX by CLD) also aren't captured.
  1165  func progRW(p *obj.Prog) rwMask {
  1166  	var m rwMask
  1167  	// Default for most instructions
  1168  	if p.From.Type != obj.TYPE_NONE {
  1169  		m |= readFrom
  1170  	}
  1171  	if p.To.Type != obj.TYPE_NONE {
  1172  		// Most x86 instructions update the To value
  1173  		m |= readTo | writeTo
  1174  	}
  1175  	if p.Reg != 0 {
  1176  		m |= readReg
  1177  	}
  1178  	if p.GetFrom3() != nil {
  1179  		m |= readFrom3
  1180  	}
  1181  
  1182  	// Lots of exceptions to the above defaults.
  1183  	name := p.As.String()
  1184  	if strings.HasPrefix(name, "MOV") || strings.HasPrefix(name, "PMOV") {
  1185  		// MOV instructions don't read To.
  1186  		m &^= readTo
  1187  	}
  1188  	switch p.As {
  1189  	case APOPW, APOPL, APOPQ,
  1190  		ALEAL, ALEAQ,
  1191  		AIMUL3W, AIMUL3L, AIMUL3Q,
  1192  		APEXTRB, APEXTRW, APEXTRD, APEXTRQ, AVPEXTRB, AVPEXTRW, AVPEXTRD, AVPEXTRQ, AEXTRACTPS,
  1193  		ABSFW, ABSFL, ABSFQ, ABSRW, ABSRL, ABSRQ, APOPCNTW, APOPCNTL, APOPCNTQ, ALZCNTW, ALZCNTL, ALZCNTQ,
  1194  		ASHLXL, ASHLXQ, ASHRXL, ASHRXQ, ASARXL, ASARXQ:
  1195  		// These instructions are pure writes to To. They don't use its old value.
  1196  		m &^= readTo
  1197  	case AXORL, AXORQ:
  1198  		// Register-clearing idiom doesn't read previous value.
  1199  		if p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_REG && p.From.Reg == p.To.Reg {
  1200  			m &^= readFrom | readTo
  1201  		}
  1202  	case AMULXL, AMULXQ:
  1203  		// These are write-only to both To and From3.
  1204  		m &^= readTo | readFrom3
  1205  		m |= writeFrom3
  1206  	}
  1207  	return m
  1208  }
  1209  
  1210  // progReadsR15 reports whether p reads the register R15.
  1211  func progReadsR15(p *obj.Prog) bool {
  1212  	m := progRW(p)
  1213  	if m&readFrom != 0 && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) {
  1214  		return true
  1215  	}
  1216  	if m&readTo != 0 && p.To.Type == obj.TYPE_REG && isR15(p.To.Reg) {
  1217  		return true
  1218  	}
  1219  	if m&readReg != 0 && isR15(p.Reg) {
  1220  		return true
  1221  	}
  1222  	if m&readFrom3 != 0 && p.GetFrom3().Type == obj.TYPE_REG && isR15(p.GetFrom3().Reg) {
  1223  		return true
  1224  	}
  1225  	// reads of the index registers
  1226  	if p.From.Type == obj.TYPE_MEM && (isR15(p.From.Reg) || isR15(p.From.Index)) {
  1227  		return true
  1228  	}
  1229  	if p.To.Type == obj.TYPE_MEM && (isR15(p.To.Reg) || isR15(p.To.Index)) {
  1230  		return true
  1231  	}
  1232  	if f3 := p.GetFrom3(); f3 != nil && f3.Type == obj.TYPE_MEM && (isR15(f3.Reg) || isR15(f3.Index)) {
  1233  		return true
  1234  	}
  1235  	return false
  1236  }
  1237  
  1238  // progWritesR15 reports whether p writes the register R15.
  1239  func progWritesR15(p *obj.Prog) bool {
  1240  	m := progRW(p)
  1241  	if m&writeFrom != 0 && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) {
  1242  		return true
  1243  	}
  1244  	if m&writeTo != 0 && p.To.Type == obj.TYPE_REG && isR15(p.To.Reg) {
  1245  		return true
  1246  	}
  1247  	if m&writeReg != 0 && isR15(p.Reg) {
  1248  		return true
  1249  	}
  1250  	if m&writeFrom3 != 0 && p.GetFrom3().Type == obj.TYPE_REG && isR15(p.GetFrom3().Reg) {
  1251  		return true
  1252  	}
  1253  	return false
  1254  }
  1255  
  1256  func errorCheck(ctxt *obj.Link, s *obj.LSym) {
  1257  	// When dynamic linking, R15 is used to access globals. Reject code that
  1258  	// uses R15 after a global variable access.
  1259  	if !ctxt.Flag_dynlink {
  1260  		return
  1261  	}
  1262  
  1263  	// Flood fill all the instructions where R15's value is junk.
  1264  	// If there are any uses of R15 in that set, report an error.
  1265  	var work []*obj.Prog
  1266  	var mentionsR15 bool
  1267  	for p := s.Func().Text; p != nil; p = p.Link {
  1268  		if progUsesGlobal(p) {
  1269  			work = append(work, p)
  1270  			p.Mark |= markBit
  1271  		}
  1272  		if progMentionsR15(p) {
  1273  			mentionsR15 = true
  1274  		}
  1275  	}
  1276  	if mentionsR15 {
  1277  		for len(work) > 0 {
  1278  			p := work[len(work)-1]
  1279  			work = work[:len(work)-1]
  1280  			if progReadsR15(p) {
  1281  				pos := ctxt.PosTable.Pos(p.Pos)
  1282  				ctxt.Diag("%s:%s: when dynamic linking, R15 is clobbered by a global variable access and is used here: %v", path.Base(pos.Filename()), pos.LineNumber(), p)
  1283  				break // only report one error
  1284  			}
  1285  			if progWritesR15(p) {
  1286  				// R15 is overwritten by this instruction. Its value is not junk any more.
  1287  				continue
  1288  			}
  1289  			if q := p.To.Target(); q != nil && q.Mark&markBit == 0 {
  1290  				q.Mark |= markBit
  1291  				work = append(work, q)
  1292  			}
  1293  			if p.As == obj.AJMP || p.As == obj.ARET {
  1294  				continue // no fallthrough
  1295  			}
  1296  			if q := p.Link; q != nil && q.Mark&markBit == 0 {
  1297  				q.Mark |= markBit
  1298  				work = append(work, q)
  1299  			}
  1300  		}
  1301  	}
  1302  
  1303  	// Clean up.
  1304  	for p := s.Func().Text; p != nil; p = p.Link {
  1305  		p.Mark &^= markBit
  1306  	}
  1307  }
  1308  
  1309  var unaryDst = map[obj.As]bool{
  1310  	ABSWAPL:     true,
  1311  	ABSWAPQ:     true,
  1312  	ACLDEMOTE:   true,
  1313  	ACLFLUSH:    true,
  1314  	ACLFLUSHOPT: true,
  1315  	ACLWB:       true,
  1316  	ACMPXCHG16B: true,
  1317  	ACMPXCHG8B:  true,
  1318  	ADECB:       true,
  1319  	ADECL:       true,
  1320  	ADECQ:       true,
  1321  	ADECW:       true,
  1322  	AFBSTP:      true,
  1323  	AFFREE:      true,
  1324  	AFLDENV:     true,
  1325  	AFSAVE:      true,
  1326  	AFSTCW:      true,
  1327  	AFSTENV:     true,
  1328  	AFSTSW:      true,
  1329  	AFXSAVE64:   true,
  1330  	AFXSAVE:     true,
  1331  	AINCB:       true,
  1332  	AINCL:       true,
  1333  	AINCQ:       true,
  1334  	AINCW:       true,
  1335  	ANEGB:       true,
  1336  	ANEGL:       true,
  1337  	ANEGQ:       true,
  1338  	ANEGW:       true,
  1339  	ANOTB:       true,
  1340  	ANOTL:       true,
  1341  	ANOTQ:       true,
  1342  	ANOTW:       true,
  1343  	APOPL:       true,
  1344  	APOPQ:       true,
  1345  	APOPW:       true,
  1346  	ARDFSBASEL:  true,
  1347  	ARDFSBASEQ:  true,
  1348  	ARDGSBASEL:  true,
  1349  	ARDGSBASEQ:  true,
  1350  	ARDPID:      true,
  1351  	ARDRANDL:    true,
  1352  	ARDRANDQ:    true,
  1353  	ARDRANDW:    true,
  1354  	ARDSEEDL:    true,
  1355  	ARDSEEDQ:    true,
  1356  	ARDSEEDW:    true,
  1357  	ASETCC:      true,
  1358  	ASETCS:      true,
  1359  	ASETEQ:      true,
  1360  	ASETGE:      true,
  1361  	ASETGT:      true,
  1362  	ASETHI:      true,
  1363  	ASETLE:      true,
  1364  	ASETLS:      true,
  1365  	ASETLT:      true,
  1366  	ASETMI:      true,
  1367  	ASETNE:      true,
  1368  	ASETOC:      true,
  1369  	ASETOS:      true,
  1370  	ASETPC:      true,
  1371  	ASETPL:      true,
  1372  	ASETPS:      true,
  1373  	ASGDT:       true,
  1374  	ASIDT:       true,
  1375  	ASLDTL:      true,
  1376  	ASLDTQ:      true,
  1377  	ASLDTW:      true,
  1378  	ASMSWL:      true,
  1379  	ASMSWQ:      true,
  1380  	ASMSWW:      true,
  1381  	ASTMXCSR:    true,
  1382  	ASTRL:       true,
  1383  	ASTRQ:       true,
  1384  	ASTRW:       true,
  1385  	AXSAVE64:    true,
  1386  	AXSAVE:      true,
  1387  	AXSAVEC64:   true,
  1388  	AXSAVEC:     true,
  1389  	AXSAVEOPT64: true,
  1390  	AXSAVEOPT:   true,
  1391  	AXSAVES64:   true,
  1392  	AXSAVES:     true,
  1393  }
  1394  
  1395  var Linkamd64 = obj.LinkArch{
  1396  	Arch:           sys.ArchAMD64,
  1397  	Init:           instinit,
  1398  	ErrorCheck:     errorCheck,
  1399  	Preprocess:     preprocess,
  1400  	Assemble:       span6,
  1401  	Progedit:       progedit,
  1402  	SEH:            populateSeh,
  1403  	UnaryDst:       unaryDst,
  1404  	DWARFRegisters: AMD64DWARFRegisters,
  1405  }
  1406  
  1407  var Link386 = obj.LinkArch{
  1408  	Arch:           sys.Arch386,
  1409  	Init:           instinit,
  1410  	Preprocess:     preprocess,
  1411  	Assemble:       span6,
  1412  	Progedit:       progedit,
  1413  	UnaryDst:       unaryDst,
  1414  	DWARFRegisters: X86DWARFRegisters,
  1415  }
  1416  

View as plain text