Source file src/cmd/compile/internal/ssa/rewrite.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/logopt"
    10  	"cmd/compile/internal/reflectdata"
    11  	"cmd/compile/internal/types"
    12  	"cmd/internal/obj"
    13  	"cmd/internal/obj/s390x"
    14  	"cmd/internal/objabi"
    15  	"cmd/internal/src"
    16  	"encoding/binary"
    17  	"fmt"
    18  	"internal/buildcfg"
    19  	"io"
    20  	"math"
    21  	"math/bits"
    22  	"os"
    23  	"path/filepath"
    24  	"strings"
    25  )
    26  
    27  type deadValueChoice bool
    28  
    29  const (
    30  	leaveDeadValues  deadValueChoice = false
    31  	removeDeadValues                 = true
    32  
    33  	repZeroThreshold = 1408 // size beyond which we use REP STOS for zeroing
    34  )
    35  
    36  // deadcode indicates whether rewrite should try to remove any values that become dead.
    37  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    38  	// repeat rewrites until we find no more rewrites
    39  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    40  	pendingLines.clear()
    41  	debug := f.pass.debug
    42  	if debug > 1 {
    43  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    44  	}
    45  	// if the number of rewrite iterations reaches itersLimit we will
    46  	// at that point turn on cycle detection. Instead of a fixed limit,
    47  	// size the limit according to func size to allow for cases such
    48  	// as the one in issue #66773.
    49  	itersLimit := f.NumBlocks()
    50  	if itersLimit < 20 {
    51  		itersLimit = 20
    52  	}
    53  	var iters int
    54  	var states map[string]bool
    55  	for {
    56  		change := false
    57  		deadChange := false
    58  		for _, b := range f.Blocks {
    59  			var b0 *Block
    60  			if debug > 1 {
    61  				b0 = new(Block)
    62  				*b0 = *b
    63  				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
    64  			}
    65  			for i, c := range b.ControlValues() {
    66  				for c.Op == OpCopy {
    67  					c = c.Args[0]
    68  					b.ReplaceControl(i, c)
    69  				}
    70  			}
    71  			if rb(b) {
    72  				change = true
    73  				if debug > 1 {
    74  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    75  				}
    76  			}
    77  			for j, v := range b.Values {
    78  				var v0 *Value
    79  				if debug > 1 {
    80  					v0 = new(Value)
    81  					*v0 = *v
    82  					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
    83  				}
    84  				if v.Uses == 0 && v.removeable() {
    85  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    86  						// Reset any values that are now unused, so that we decrement
    87  						// the use count of all of its arguments.
    88  						// Not quite a deadcode pass, because it does not handle cycles.
    89  						// But it should help Uses==1 rules to fire.
    90  						v.reset(OpInvalid)
    91  						deadChange = true
    92  					}
    93  					// No point rewriting values which aren't used.
    94  					continue
    95  				}
    96  
    97  				vchange := phielimValue(v)
    98  				if vchange && debug > 1 {
    99  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   100  				}
   101  
   102  				// Eliminate copy inputs.
   103  				// If any copy input becomes unused, mark it
   104  				// as invalid and discard its argument. Repeat
   105  				// recursively on the discarded argument.
   106  				// This phase helps remove phantom "dead copy" uses
   107  				// of a value so that a x.Uses==1 rule condition
   108  				// fires reliably.
   109  				for i, a := range v.Args {
   110  					if a.Op != OpCopy {
   111  						continue
   112  					}
   113  					aa := copySource(a)
   114  					v.SetArg(i, aa)
   115  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   116  					// to hold it.  The first candidate is the value that will replace a (aa),
   117  					// if it shares the same block and line and is eligible.
   118  					// The second option is v, which has a as an input.  Because aa is earlier in
   119  					// the data flow, it is the better choice.
   120  					if a.Pos.IsStmt() == src.PosIsStmt {
   121  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   122  							aa.Pos = aa.Pos.WithIsStmt()
   123  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   124  							v.Pos = v.Pos.WithIsStmt()
   125  						} else {
   126  							// Record the lost line and look for a new home after all rewrites are complete.
   127  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   128  							// line to appear in more than one block, but only one block is stored, so if both end
   129  							// up here, then one will be lost.
   130  							pendingLines.set(a.Pos, int32(a.Block.ID))
   131  						}
   132  						a.Pos = a.Pos.WithNotStmt()
   133  					}
   134  					vchange = true
   135  					for a.Uses == 0 {
   136  						b := a.Args[0]
   137  						a.reset(OpInvalid)
   138  						a = b
   139  					}
   140  				}
   141  				if vchange && debug > 1 {
   142  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   143  				}
   144  
   145  				// apply rewrite function
   146  				if rv(v) {
   147  					vchange = true
   148  					// If value changed to a poor choice for a statement boundary, move the boundary
   149  					if v.Pos.IsStmt() == src.PosIsStmt {
   150  						if k := nextGoodStatementIndex(v, j, b); k != j {
   151  							v.Pos = v.Pos.WithNotStmt()
   152  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   153  						}
   154  					}
   155  				}
   156  
   157  				change = change || vchange
   158  				if vchange && debug > 1 {
   159  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   160  				}
   161  			}
   162  		}
   163  		if !change && !deadChange {
   164  			break
   165  		}
   166  		iters++
   167  		if (iters > itersLimit || debug >= 2) && change {
   168  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   169  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   170  			// and the maximum value encountered during make.bash is 12.
   171  			// Start checking for cycles. (This is too expensive to do routinely.)
   172  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   173  			if states == nil {
   174  				states = make(map[string]bool)
   175  			}
   176  			h := f.rewriteHash()
   177  			if _, ok := states[h]; ok {
   178  				// We've found a cycle.
   179  				// To diagnose it, set debug to 2 and start again,
   180  				// so that we'll print all rules applied until we complete another cycle.
   181  				// If debug is already >= 2, we've already done that, so it's time to crash.
   182  				if debug < 2 {
   183  					debug = 2
   184  					states = make(map[string]bool)
   185  				} else {
   186  					f.Fatalf("rewrite cycle detected")
   187  				}
   188  			}
   189  			states[h] = true
   190  		}
   191  	}
   192  	// remove clobbered values
   193  	for _, b := range f.Blocks {
   194  		j := 0
   195  		for i, v := range b.Values {
   196  			vl := v.Pos
   197  			if v.Op == OpInvalid {
   198  				if v.Pos.IsStmt() == src.PosIsStmt {
   199  					pendingLines.set(vl, int32(b.ID))
   200  				}
   201  				f.freeValue(v)
   202  				continue
   203  			}
   204  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) {
   205  				if pl, ok := pendingLines.get(vl); ok && pl == int32(b.ID) {
   206  					pendingLines.remove(vl)
   207  					v.Pos = v.Pos.WithIsStmt()
   208  				}
   209  			}
   210  			if i != j {
   211  				b.Values[j] = v
   212  			}
   213  			j++
   214  		}
   215  		if pl, ok := pendingLines.get(b.Pos); ok && pl == int32(b.ID) {
   216  			b.Pos = b.Pos.WithIsStmt()
   217  			pendingLines.remove(b.Pos)
   218  		}
   219  		b.truncateValues(j)
   220  	}
   221  }
   222  
   223  // Common functions called from rewriting rules
   224  
   225  func is64BitFloat(t *types.Type) bool {
   226  	return t.Size() == 8 && t.IsFloat()
   227  }
   228  
   229  func is32BitFloat(t *types.Type) bool {
   230  	return t.Size() == 4 && t.IsFloat()
   231  }
   232  
   233  func is64BitInt(t *types.Type) bool {
   234  	return t.Size() == 8 && t.IsInteger()
   235  }
   236  
   237  func is32BitInt(t *types.Type) bool {
   238  	return t.Size() == 4 && t.IsInteger()
   239  }
   240  
   241  func is16BitInt(t *types.Type) bool {
   242  	return t.Size() == 2 && t.IsInteger()
   243  }
   244  
   245  func is8BitInt(t *types.Type) bool {
   246  	return t.Size() == 1 && t.IsInteger()
   247  }
   248  
   249  func isPtr(t *types.Type) bool {
   250  	return t.IsPtrShaped()
   251  }
   252  
   253  func copyCompatibleType(t1, t2 *types.Type) bool {
   254  	if t1.Size() != t2.Size() {
   255  		return false
   256  	}
   257  	if t1.IsInteger() {
   258  		return t2.IsInteger()
   259  	}
   260  	if isPtr(t1) {
   261  		return isPtr(t2)
   262  	}
   263  	return t1.Compare(t2) == types.CMPeq
   264  }
   265  
   266  // mergeSym merges two symbolic offsets. There is no real merging of
   267  // offsets, we just pick the non-nil one.
   268  func mergeSym(x, y Sym) Sym {
   269  	if x == nil {
   270  		return y
   271  	}
   272  	if y == nil {
   273  		return x
   274  	}
   275  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   276  }
   277  
   278  func canMergeSym(x, y Sym) bool {
   279  	return x == nil || y == nil
   280  }
   281  
   282  // canMergeLoadClobber reports whether the load can be merged into target without
   283  // invalidating the schedule.
   284  // It also checks that the other non-load argument x is something we
   285  // are ok with clobbering.
   286  func canMergeLoadClobber(target, load, x *Value) bool {
   287  	// The register containing x is going to get clobbered.
   288  	// Don't merge if we still need the value of x.
   289  	// We don't have liveness information here, but we can
   290  	// approximate x dying with:
   291  	//  1) target is x's only use.
   292  	//  2) target is not in a deeper loop than x.
   293  	switch {
   294  	case x.Uses == 2 && x.Op == OpPhi && len(x.Args) == 2 && (x.Args[0] == target || x.Args[1] == target) && target.Uses == 1:
   295  		// This is a simple detector to determine that x is probably
   296  		// not live after target. (It does not need to be perfect,
   297  		// regalloc will issue a reg-reg move to save it if we are wrong.)
   298  		// We have:
   299  		//   x = Phi(?, target)
   300  		//   target = Op(load, x)
   301  		// Because target has only one use as a Phi argument, we can schedule it
   302  		// very late. Hopefully, later than the other use of x. (The other use died
   303  		// between x and target, or exists on another branch entirely).
   304  	case x.Uses > 1:
   305  		return false
   306  	}
   307  	loopnest := x.Block.Func.loopnest()
   308  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   309  		return false
   310  	}
   311  	return canMergeLoad(target, load)
   312  }
   313  
   314  // canMergeLoad reports whether the load can be merged into target without
   315  // invalidating the schedule.
   316  func canMergeLoad(target, load *Value) bool {
   317  	if target.Block.ID != load.Block.ID {
   318  		// If the load is in a different block do not merge it.
   319  		return false
   320  	}
   321  
   322  	// We can't merge the load into the target if the load
   323  	// has more than one use.
   324  	if load.Uses != 1 {
   325  		return false
   326  	}
   327  
   328  	mem := load.MemoryArg()
   329  
   330  	// We need the load's memory arg to still be alive at target. That
   331  	// can't be the case if one of target's args depends on a memory
   332  	// state that is a successor of load's memory arg.
   333  	//
   334  	// For example, it would be invalid to merge load into target in
   335  	// the following situation because newmem has killed oldmem
   336  	// before target is reached:
   337  	//     load = read ... oldmem
   338  	//   newmem = write ... oldmem
   339  	//     arg0 = read ... newmem
   340  	//   target = add arg0 load
   341  	//
   342  	// If the argument comes from a different block then we can exclude
   343  	// it immediately because it must dominate load (which is in the
   344  	// same block as target).
   345  	var args []*Value
   346  	for _, a := range target.Args {
   347  		if a != load && a.Block.ID == target.Block.ID {
   348  			args = append(args, a)
   349  		}
   350  	}
   351  
   352  	// memPreds contains memory states known to be predecessors of load's
   353  	// memory state. It is lazily initialized.
   354  	var memPreds map[*Value]bool
   355  	for i := 0; len(args) > 0; i++ {
   356  		const limit = 100
   357  		if i >= limit {
   358  			// Give up if we have done a lot of iterations.
   359  			return false
   360  		}
   361  		v := args[len(args)-1]
   362  		args = args[:len(args)-1]
   363  		if target.Block.ID != v.Block.ID {
   364  			// Since target and load are in the same block
   365  			// we can stop searching when we leave the block.
   366  			continue
   367  		}
   368  		if v.Op == OpPhi {
   369  			// A Phi implies we have reached the top of the block.
   370  			// The memory phi, if it exists, is always
   371  			// the first logical store in the block.
   372  			continue
   373  		}
   374  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   375  			// We could handle this situation however it is likely
   376  			// to be very rare.
   377  			return false
   378  		}
   379  		if v.Op.SymEffect()&SymAddr != 0 {
   380  			// This case prevents an operation that calculates the
   381  			// address of a local variable from being forced to schedule
   382  			// before its corresponding VarDef.
   383  			// See issue 28445.
   384  			//   v1 = LOAD ...
   385  			//   v2 = VARDEF
   386  			//   v3 = LEAQ
   387  			//   v4 = CMPQ v1 v3
   388  			// We don't want to combine the CMPQ with the load, because
   389  			// that would force the CMPQ to schedule before the VARDEF, which
   390  			// in turn requires the LEAQ to schedule before the VARDEF.
   391  			return false
   392  		}
   393  		if v.Type.IsMemory() {
   394  			if memPreds == nil {
   395  				// Initialise a map containing memory states
   396  				// known to be predecessors of load's memory
   397  				// state.
   398  				memPreds = make(map[*Value]bool)
   399  				m := mem
   400  				const limit = 50
   401  				for i := 0; i < limit; i++ {
   402  					if m.Op == OpPhi {
   403  						// The memory phi, if it exists, is always
   404  						// the first logical store in the block.
   405  						break
   406  					}
   407  					if m.Block.ID != target.Block.ID {
   408  						break
   409  					}
   410  					if !m.Type.IsMemory() {
   411  						break
   412  					}
   413  					memPreds[m] = true
   414  					if len(m.Args) == 0 {
   415  						break
   416  					}
   417  					m = m.MemoryArg()
   418  				}
   419  			}
   420  
   421  			// We can merge if v is a predecessor of mem.
   422  			//
   423  			// For example, we can merge load into target in the
   424  			// following scenario:
   425  			//      x = read ... v
   426  			//    mem = write ... v
   427  			//   load = read ... mem
   428  			// target = add x load
   429  			if memPreds[v] {
   430  				continue
   431  			}
   432  			return false
   433  		}
   434  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   435  			// If v takes mem as an input then we know mem
   436  			// is valid at this point.
   437  			continue
   438  		}
   439  		for _, a := range v.Args {
   440  			if target.Block.ID == a.Block.ID {
   441  				args = append(args, a)
   442  			}
   443  		}
   444  	}
   445  
   446  	return true
   447  }
   448  
   449  // isSameCall reports whether aux is the same as the given named symbol.
   450  func isSameCall(aux Aux, name string) bool {
   451  	fn := aux.(*AuxCall).Fn
   452  	return fn != nil && fn.String() == name
   453  }
   454  
   455  // canLoadUnaligned reports if the architecture supports unaligned load operations.
   456  func canLoadUnaligned(c *Config) bool {
   457  	return c.ctxt.Arch.Alignment == 1
   458  }
   459  
   460  // nlzX returns the number of leading zeros.
   461  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
   462  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
   463  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
   464  func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
   465  
   466  // ntzX returns the number of trailing zeros.
   467  func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
   468  func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
   469  func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
   470  func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
   471  
   472  func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
   473  func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
   474  func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
   475  func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
   476  func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
   477  
   478  // nto returns the number of trailing ones.
   479  func nto(x int64) int64 {
   480  	return int64(ntz64(^x))
   481  }
   482  
   483  // logX returns logarithm of n base 2.
   484  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   485  func log8(n int8) int64   { return log8u(uint8(n)) }
   486  func log16(n int16) int64 { return log16u(uint16(n)) }
   487  func log32(n int32) int64 { return log32u(uint32(n)) }
   488  func log64(n int64) int64 { return log64u(uint64(n)) }
   489  
   490  // logXu returns the logarithm of n base 2.
   491  // n must be a power of 2 (isUnsignedPowerOfTwo returns true)
   492  func log8u(n uint8) int64   { return int64(bits.Len8(n)) - 1 }
   493  func log16u(n uint16) int64 { return int64(bits.Len16(n)) - 1 }
   494  func log32u(n uint32) int64 { return int64(bits.Len32(n)) - 1 }
   495  func log64u(n uint64) int64 { return int64(bits.Len64(n)) - 1 }
   496  
   497  // isPowerOfTwoX functions report whether n is a power of 2.
   498  func isPowerOfTwo[T int8 | int16 | int32 | int64](n T) bool {
   499  	return n > 0 && n&(n-1) == 0
   500  }
   501  
   502  // isUnsignedPowerOfTwo reports whether n is an unsigned power of 2.
   503  func isUnsignedPowerOfTwo[T uint8 | uint16 | uint32 | uint64](n T) bool {
   504  	return n != 0 && n&(n-1) == 0
   505  }
   506  
   507  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   508  func is32Bit(n int64) bool {
   509  	return n == int64(int32(n))
   510  }
   511  
   512  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   513  func is16Bit(n int64) bool {
   514  	return n == int64(int16(n))
   515  }
   516  
   517  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   518  func is8Bit(n int64) bool {
   519  	return n == int64(int8(n))
   520  }
   521  
   522  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   523  func isU8Bit(n int64) bool {
   524  	return n == int64(uint8(n))
   525  }
   526  
   527  // is12Bit reports whether n can be represented as a signed 12 bit integer.
   528  func is12Bit(n int64) bool {
   529  	return -(1<<11) <= n && n < (1<<11)
   530  }
   531  
   532  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   533  func isU12Bit(n int64) bool {
   534  	return 0 <= n && n < (1<<12)
   535  }
   536  
   537  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   538  func isU16Bit(n int64) bool {
   539  	return n == int64(uint16(n))
   540  }
   541  
   542  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   543  func isU32Bit(n int64) bool {
   544  	return n == int64(uint32(n))
   545  }
   546  
   547  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   548  func is20Bit(n int64) bool {
   549  	return -(1<<19) <= n && n < (1<<19)
   550  }
   551  
   552  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   553  func b2i(b bool) int64 {
   554  	if b {
   555  		return 1
   556  	}
   557  	return 0
   558  }
   559  
   560  // b2i32 translates a boolean value to 0 or 1.
   561  func b2i32(b bool) int32 {
   562  	if b {
   563  		return 1
   564  	}
   565  	return 0
   566  }
   567  
   568  func canMulStrengthReduce(config *Config, x int64) bool {
   569  	_, ok := config.mulRecipes[x]
   570  	return ok
   571  }
   572  func canMulStrengthReduce32(config *Config, x int32) bool {
   573  	_, ok := config.mulRecipes[int64(x)]
   574  	return ok
   575  }
   576  
   577  // mulStrengthReduce returns v*x evaluated at the location
   578  // (block and source position) of m.
   579  // canMulStrengthReduce must have returned true.
   580  func mulStrengthReduce(m *Value, v *Value, x int64) *Value {
   581  	return v.Block.Func.Config.mulRecipes[x].build(m, v)
   582  }
   583  
   584  // mulStrengthReduce32 returns v*x evaluated at the location
   585  // (block and source position) of m.
   586  // canMulStrengthReduce32 must have returned true.
   587  // The upper 32 bits of m might be set to junk.
   588  func mulStrengthReduce32(m *Value, v *Value, x int32) *Value {
   589  	return v.Block.Func.Config.mulRecipes[int64(x)].build(m, v)
   590  }
   591  
   592  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   593  // A shift is bounded if it is shifting by less than the width of the shifted value.
   594  func shiftIsBounded(v *Value) bool {
   595  	return v.AuxInt != 0
   596  }
   597  
   598  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   599  // generated code as much as possible.
   600  func canonLessThan(x, y *Value) bool {
   601  	if x.Op != y.Op {
   602  		return x.Op < y.Op
   603  	}
   604  	if !x.Pos.SameFileAndLine(y.Pos) {
   605  		return x.Pos.Before(y.Pos)
   606  	}
   607  	return x.ID < y.ID
   608  }
   609  
   610  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   611  // of the mantissa. It will panic if the truncation results in lost information.
   612  func truncate64Fto32F(f float64) float32 {
   613  	if !isExactFloat32(f) {
   614  		panic("truncate64Fto32F: truncation is not exact")
   615  	}
   616  	if !math.IsNaN(f) {
   617  		return float32(f)
   618  	}
   619  	// NaN bit patterns aren't necessarily preserved across conversion
   620  	// instructions so we need to do the conversion manually.
   621  	b := math.Float64bits(f)
   622  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   623  	//          | sign                  | exponent   | mantissa       |
   624  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   625  	return math.Float32frombits(r)
   626  }
   627  
   628  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   629  func DivisionNeedsFixUp(v *Value) bool {
   630  	return v.AuxInt == 0
   631  }
   632  
   633  // auxTo32F decodes a float32 from the AuxInt value provided.
   634  func auxTo32F(i int64) float32 {
   635  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   636  }
   637  
   638  func auxIntToBool(i int64) bool {
   639  	if i == 0 {
   640  		return false
   641  	}
   642  	return true
   643  }
   644  func auxIntToInt8(i int64) int8 {
   645  	return int8(i)
   646  }
   647  func auxIntToInt16(i int64) int16 {
   648  	return int16(i)
   649  }
   650  func auxIntToInt32(i int64) int32 {
   651  	return int32(i)
   652  }
   653  func auxIntToInt64(i int64) int64 {
   654  	return i
   655  }
   656  func auxIntToUint8(i int64) uint8 {
   657  	return uint8(i)
   658  }
   659  func auxIntToFloat32(i int64) float32 {
   660  	return float32(math.Float64frombits(uint64(i)))
   661  }
   662  func auxIntToFloat64(i int64) float64 {
   663  	return math.Float64frombits(uint64(i))
   664  }
   665  func auxIntToValAndOff(i int64) ValAndOff {
   666  	return ValAndOff(i)
   667  }
   668  func auxIntToArm64BitField(i int64) arm64BitField {
   669  	return arm64BitField(i)
   670  }
   671  func auxIntToFlagConstant(x int64) flagConstant {
   672  	return flagConstant(x)
   673  }
   674  
   675  func auxIntToOp(cc int64) Op {
   676  	return Op(cc)
   677  }
   678  
   679  func boolToAuxInt(b bool) int64 {
   680  	if b {
   681  		return 1
   682  	}
   683  	return 0
   684  }
   685  func int8ToAuxInt(i int8) int64 {
   686  	return int64(i)
   687  }
   688  func int16ToAuxInt(i int16) int64 {
   689  	return int64(i)
   690  }
   691  func int32ToAuxInt(i int32) int64 {
   692  	return int64(i)
   693  }
   694  func int64ToAuxInt(i int64) int64 {
   695  	return int64(i)
   696  }
   697  func uint8ToAuxInt(i uint8) int64 {
   698  	return int64(int8(i))
   699  }
   700  func float32ToAuxInt(f float32) int64 {
   701  	return int64(math.Float64bits(float64(f)))
   702  }
   703  func float64ToAuxInt(f float64) int64 {
   704  	return int64(math.Float64bits(f))
   705  }
   706  func valAndOffToAuxInt(v ValAndOff) int64 {
   707  	return int64(v)
   708  }
   709  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   710  	return int64(v)
   711  }
   712  func flagConstantToAuxInt(x flagConstant) int64 {
   713  	return int64(x)
   714  }
   715  
   716  func opToAuxInt(o Op) int64 {
   717  	return int64(o)
   718  }
   719  
   720  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   721  type Aux interface {
   722  	CanBeAnSSAAux()
   723  }
   724  
   725  // for now only used to mark moves that need to avoid clobbering flags
   726  type auxMark bool
   727  
   728  func (auxMark) CanBeAnSSAAux() {}
   729  
   730  var AuxMark auxMark
   731  
   732  // stringAux wraps string values for use in Aux.
   733  type stringAux string
   734  
   735  func (stringAux) CanBeAnSSAAux() {}
   736  
   737  func auxToString(i Aux) string {
   738  	return string(i.(stringAux))
   739  }
   740  func auxToSym(i Aux) Sym {
   741  	// TODO: kind of a hack - allows nil interface through
   742  	s, _ := i.(Sym)
   743  	return s
   744  }
   745  func auxToType(i Aux) *types.Type {
   746  	return i.(*types.Type)
   747  }
   748  func auxToCall(i Aux) *AuxCall {
   749  	return i.(*AuxCall)
   750  }
   751  func auxToS390xCCMask(i Aux) s390x.CCMask {
   752  	return i.(s390x.CCMask)
   753  }
   754  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   755  	return i.(s390x.RotateParams)
   756  }
   757  
   758  func StringToAux(s string) Aux {
   759  	return stringAux(s)
   760  }
   761  func symToAux(s Sym) Aux {
   762  	return s
   763  }
   764  func callToAux(s *AuxCall) Aux {
   765  	return s
   766  }
   767  func typeToAux(t *types.Type) Aux {
   768  	return t
   769  }
   770  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   771  	return c
   772  }
   773  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   774  	return r
   775  }
   776  
   777  // uaddOvf reports whether unsigned a+b would overflow.
   778  func uaddOvf(a, b int64) bool {
   779  	return uint64(a)+uint64(b) < uint64(a)
   780  }
   781  
   782  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   783  	v.Op = OpStaticLECall
   784  	auxcall := v.Aux.(*AuxCall)
   785  	auxcall.Fn = sym
   786  	// Remove first arg
   787  	v.Args[0].Uses--
   788  	copy(v.Args[0:], v.Args[1:])
   789  	v.Args[len(v.Args)-1] = nil // aid GC
   790  	v.Args = v.Args[:len(v.Args)-1]
   791  	if f := v.Block.Func; f.pass.debug > 0 {
   792  		f.Warnl(v.Pos, "de-virtualizing call")
   793  	}
   794  	return v
   795  }
   796  
   797  // isSamePtr reports whether p1 and p2 point to the same address.
   798  func isSamePtr(p1, p2 *Value) bool {
   799  	if p1 == p2 {
   800  		return true
   801  	}
   802  	if p1.Op != p2.Op {
   803  		for p1.Op == OpOffPtr && p1.AuxInt == 0 {
   804  			p1 = p1.Args[0]
   805  		}
   806  		for p2.Op == OpOffPtr && p2.AuxInt == 0 {
   807  			p2 = p2.Args[0]
   808  		}
   809  		if p1 == p2 {
   810  			return true
   811  		}
   812  		if p1.Op != p2.Op {
   813  			return false
   814  		}
   815  	}
   816  	switch p1.Op {
   817  	case OpOffPtr:
   818  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   819  	case OpAddr, OpLocalAddr:
   820  		return p1.Aux == p2.Aux
   821  	case OpAddPtr:
   822  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   823  	}
   824  	return false
   825  }
   826  
   827  func isStackPtr(v *Value) bool {
   828  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   829  		v = v.Args[0]
   830  	}
   831  	return v.Op == OpSP || v.Op == OpLocalAddr
   832  }
   833  
   834  // disjoint reports whether the memory region specified by [p1:p1+n1)
   835  // does not overlap with [p2:p2+n2).
   836  // A return value of false does not imply the regions overlap.
   837  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   838  	if n1 == 0 || n2 == 0 {
   839  		return true
   840  	}
   841  	if p1 == p2 {
   842  		return false
   843  	}
   844  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   845  		base, offset = ptr, 0
   846  		for base.Op == OpOffPtr {
   847  			offset += base.AuxInt
   848  			base = base.Args[0]
   849  		}
   850  		if opcodeTable[base.Op].nilCheck {
   851  			base = base.Args[0]
   852  		}
   853  		return base, offset
   854  	}
   855  
   856  	// Run types-based analysis
   857  	if disjointTypes(p1.Type, p2.Type) {
   858  		return true
   859  	}
   860  
   861  	p1, off1 := baseAndOffset(p1)
   862  	p2, off2 := baseAndOffset(p2)
   863  	if isSamePtr(p1, p2) {
   864  		return !overlap(off1, n1, off2, n2)
   865  	}
   866  	// p1 and p2 are not the same, so if they are both OpAddrs then
   867  	// they point to different variables.
   868  	// If one pointer is on the stack and the other is an argument
   869  	// then they can't overlap.
   870  	switch p1.Op {
   871  	case OpAddr, OpLocalAddr:
   872  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   873  			return true
   874  		}
   875  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   876  	case OpArg, OpArgIntReg:
   877  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   878  			return true
   879  		}
   880  	case OpSP:
   881  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   882  	}
   883  	return false
   884  }
   885  
   886  // disjointTypes reports whether a memory region pointed to by a pointer of type
   887  // t1 does not overlap with a memory region pointed to by a pointer of type t2 --
   888  // based on type aliasing rules.
   889  func disjointTypes(t1 *types.Type, t2 *types.Type) bool {
   890  	// Unsafe pointer can alias with anything.
   891  	if t1.IsUnsafePtr() || t2.IsUnsafePtr() {
   892  		return false
   893  	}
   894  
   895  	if !t1.IsPtr() || !t2.IsPtr() {
   896  		panic("disjointTypes: one of arguments is not a pointer")
   897  	}
   898  
   899  	t1 = t1.Elem()
   900  	t2 = t2.Elem()
   901  
   902  	// Not-in-heap types are not supported -- they are rare and non-important; also,
   903  	// type.HasPointers check doesn't work for them correctly.
   904  	if t1.NotInHeap() || t2.NotInHeap() {
   905  		return false
   906  	}
   907  
   908  	isPtrShaped := func(t *types.Type) bool { return int(t.Size()) == types.PtrSize && t.HasPointers() }
   909  
   910  	// Pointers and non-pointers are disjoint (https://pkg.go.dev/unsafe#Pointer).
   911  	if (isPtrShaped(t1) && !t2.HasPointers()) ||
   912  		(isPtrShaped(t2) && !t1.HasPointers()) {
   913  		return true
   914  	}
   915  
   916  	return false
   917  }
   918  
   919  // moveSize returns the number of bytes an aligned MOV instruction moves.
   920  func moveSize(align int64, c *Config) int64 {
   921  	switch {
   922  	case align%8 == 0 && c.PtrSize == 8:
   923  		return 8
   924  	case align%4 == 0:
   925  		return 4
   926  	case align%2 == 0:
   927  		return 2
   928  	}
   929  	return 1
   930  }
   931  
   932  // mergePoint finds a block among a's blocks which dominates b and is itself
   933  // dominated by all of a's blocks. Returns nil if it can't find one.
   934  // Might return nil even if one does exist.
   935  func mergePoint(b *Block, a ...*Value) *Block {
   936  	// Walk backward from b looking for one of the a's blocks.
   937  
   938  	// Max distance
   939  	d := 100
   940  
   941  	for d > 0 {
   942  		for _, x := range a {
   943  			if b == x.Block {
   944  				goto found
   945  			}
   946  		}
   947  		if len(b.Preds) > 1 {
   948  			// Don't know which way to go back. Abort.
   949  			return nil
   950  		}
   951  		b = b.Preds[0].b
   952  		d--
   953  	}
   954  	return nil // too far away
   955  found:
   956  	// At this point, r is the first value in a that we find by walking backwards.
   957  	// if we return anything, r will be it.
   958  	r := b
   959  
   960  	// Keep going, counting the other a's that we find. They must all dominate r.
   961  	na := 0
   962  	for d > 0 {
   963  		for _, x := range a {
   964  			if b == x.Block {
   965  				na++
   966  			}
   967  		}
   968  		if na == len(a) {
   969  			// Found all of a in a backwards walk. We can return r.
   970  			return r
   971  		}
   972  		if len(b.Preds) > 1 {
   973  			return nil
   974  		}
   975  		b = b.Preds[0].b
   976  		d--
   977  
   978  	}
   979  	return nil // too far away
   980  }
   981  
   982  // clobber invalidates values. Returns true.
   983  // clobber is used by rewrite rules to:
   984  //
   985  //	A) make sure the values are really dead and never used again.
   986  //	B) decrement use counts of the values' args.
   987  func clobber(vv ...*Value) bool {
   988  	for _, v := range vv {
   989  		v.reset(OpInvalid)
   990  		// Note: leave v.Block intact.  The Block field is used after clobber.
   991  	}
   992  	return true
   993  }
   994  
   995  // resetCopy resets v to be a copy of arg.
   996  // Always returns true.
   997  func resetCopy(v *Value, arg *Value) bool {
   998  	v.reset(OpCopy)
   999  	v.AddArg(arg)
  1000  	return true
  1001  }
  1002  
  1003  // clobberIfDead resets v when use count is 1. Returns true.
  1004  // clobberIfDead is used by rewrite rules to decrement
  1005  // use counts of v's args when v is dead and never used.
  1006  func clobberIfDead(v *Value) bool {
  1007  	if v.Uses == 1 {
  1008  		v.reset(OpInvalid)
  1009  	}
  1010  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
  1011  	return true
  1012  }
  1013  
  1014  // noteRule is an easy way to track if a rule is matched when writing
  1015  // new ones.  Make the rule of interest also conditional on
  1016  //
  1017  //	noteRule("note to self: rule of interest matched")
  1018  //
  1019  // and that message will print when the rule matches.
  1020  func noteRule(s string) bool {
  1021  	fmt.Println(s)
  1022  	return true
  1023  }
  1024  
  1025  // countRule increments Func.ruleMatches[key].
  1026  // If Func.ruleMatches is non-nil at the end
  1027  // of compilation, it will be printed to stdout.
  1028  // This is intended to make it easier to find which functions
  1029  // which contain lots of rules matches when developing new rules.
  1030  func countRule(v *Value, key string) bool {
  1031  	f := v.Block.Func
  1032  	if f.ruleMatches == nil {
  1033  		f.ruleMatches = make(map[string]int)
  1034  	}
  1035  	f.ruleMatches[key]++
  1036  	return true
  1037  }
  1038  
  1039  // warnRule generates compiler debug output with string s when
  1040  // v is not in autogenerated code, cond is true and the rule has fired.
  1041  func warnRule(cond bool, v *Value, s string) bool {
  1042  	if pos := v.Pos; pos.Line() > 1 && cond {
  1043  		v.Block.Func.Warnl(pos, s)
  1044  	}
  1045  	return true
  1046  }
  1047  
  1048  // for a pseudo-op like (LessThan x), extract x.
  1049  func flagArg(v *Value) *Value {
  1050  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1051  		return nil
  1052  	}
  1053  	return v.Args[0]
  1054  }
  1055  
  1056  // arm64Negate finds the complement to an ARM64 condition code,
  1057  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1058  //
  1059  // For floating point, it's more subtle because NaN is unordered. We do
  1060  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1061  func arm64Negate(op Op) Op {
  1062  	switch op {
  1063  	case OpARM64LessThan:
  1064  		return OpARM64GreaterEqual
  1065  	case OpARM64LessThanU:
  1066  		return OpARM64GreaterEqualU
  1067  	case OpARM64GreaterThan:
  1068  		return OpARM64LessEqual
  1069  	case OpARM64GreaterThanU:
  1070  		return OpARM64LessEqualU
  1071  	case OpARM64LessEqual:
  1072  		return OpARM64GreaterThan
  1073  	case OpARM64LessEqualU:
  1074  		return OpARM64GreaterThanU
  1075  	case OpARM64GreaterEqual:
  1076  		return OpARM64LessThan
  1077  	case OpARM64GreaterEqualU:
  1078  		return OpARM64LessThanU
  1079  	case OpARM64Equal:
  1080  		return OpARM64NotEqual
  1081  	case OpARM64NotEqual:
  1082  		return OpARM64Equal
  1083  	case OpARM64LessThanF:
  1084  		return OpARM64NotLessThanF
  1085  	case OpARM64NotLessThanF:
  1086  		return OpARM64LessThanF
  1087  	case OpARM64LessEqualF:
  1088  		return OpARM64NotLessEqualF
  1089  	case OpARM64NotLessEqualF:
  1090  		return OpARM64LessEqualF
  1091  	case OpARM64GreaterThanF:
  1092  		return OpARM64NotGreaterThanF
  1093  	case OpARM64NotGreaterThanF:
  1094  		return OpARM64GreaterThanF
  1095  	case OpARM64GreaterEqualF:
  1096  		return OpARM64NotGreaterEqualF
  1097  	case OpARM64NotGreaterEqualF:
  1098  		return OpARM64GreaterEqualF
  1099  	default:
  1100  		panic("unreachable")
  1101  	}
  1102  }
  1103  
  1104  // arm64Invert evaluates (InvertFlags op), which
  1105  // is the same as altering the condition codes such
  1106  // that the same result would be produced if the arguments
  1107  // to the flag-generating instruction were reversed, e.g.
  1108  // (InvertFlags (CMP x y)) -> (CMP y x)
  1109  func arm64Invert(op Op) Op {
  1110  	switch op {
  1111  	case OpARM64LessThan:
  1112  		return OpARM64GreaterThan
  1113  	case OpARM64LessThanU:
  1114  		return OpARM64GreaterThanU
  1115  	case OpARM64GreaterThan:
  1116  		return OpARM64LessThan
  1117  	case OpARM64GreaterThanU:
  1118  		return OpARM64LessThanU
  1119  	case OpARM64LessEqual:
  1120  		return OpARM64GreaterEqual
  1121  	case OpARM64LessEqualU:
  1122  		return OpARM64GreaterEqualU
  1123  	case OpARM64GreaterEqual:
  1124  		return OpARM64LessEqual
  1125  	case OpARM64GreaterEqualU:
  1126  		return OpARM64LessEqualU
  1127  	case OpARM64Equal, OpARM64NotEqual:
  1128  		return op
  1129  	case OpARM64LessThanF:
  1130  		return OpARM64GreaterThanF
  1131  	case OpARM64GreaterThanF:
  1132  		return OpARM64LessThanF
  1133  	case OpARM64LessEqualF:
  1134  		return OpARM64GreaterEqualF
  1135  	case OpARM64GreaterEqualF:
  1136  		return OpARM64LessEqualF
  1137  	case OpARM64NotLessThanF:
  1138  		return OpARM64NotGreaterThanF
  1139  	case OpARM64NotGreaterThanF:
  1140  		return OpARM64NotLessThanF
  1141  	case OpARM64NotLessEqualF:
  1142  		return OpARM64NotGreaterEqualF
  1143  	case OpARM64NotGreaterEqualF:
  1144  		return OpARM64NotLessEqualF
  1145  	default:
  1146  		panic("unreachable")
  1147  	}
  1148  }
  1149  
  1150  // evaluate an ARM64 op against a flags value
  1151  // that is potentially constant; return 1 for true,
  1152  // -1 for false, and 0 for not constant.
  1153  func ccARM64Eval(op Op, flags *Value) int {
  1154  	fop := flags.Op
  1155  	if fop == OpARM64InvertFlags {
  1156  		return -ccARM64Eval(op, flags.Args[0])
  1157  	}
  1158  	if fop != OpARM64FlagConstant {
  1159  		return 0
  1160  	}
  1161  	fc := flagConstant(flags.AuxInt)
  1162  	b2i := func(b bool) int {
  1163  		if b {
  1164  			return 1
  1165  		}
  1166  		return -1
  1167  	}
  1168  	switch op {
  1169  	case OpARM64Equal:
  1170  		return b2i(fc.eq())
  1171  	case OpARM64NotEqual:
  1172  		return b2i(fc.ne())
  1173  	case OpARM64LessThan:
  1174  		return b2i(fc.lt())
  1175  	case OpARM64LessThanU:
  1176  		return b2i(fc.ult())
  1177  	case OpARM64GreaterThan:
  1178  		return b2i(fc.gt())
  1179  	case OpARM64GreaterThanU:
  1180  		return b2i(fc.ugt())
  1181  	case OpARM64LessEqual:
  1182  		return b2i(fc.le())
  1183  	case OpARM64LessEqualU:
  1184  		return b2i(fc.ule())
  1185  	case OpARM64GreaterEqual:
  1186  		return b2i(fc.ge())
  1187  	case OpARM64GreaterEqualU:
  1188  		return b2i(fc.uge())
  1189  	}
  1190  	return 0
  1191  }
  1192  
  1193  // logRule logs the use of the rule s. This will only be enabled if
  1194  // rewrite rules were generated with the -log option, see _gen/rulegen.go.
  1195  func logRule(s string) {
  1196  	if ruleFile == nil {
  1197  		// Open a log file to write log to. We open in append
  1198  		// mode because all.bash runs the compiler lots of times,
  1199  		// and we want the concatenation of all of those logs.
  1200  		// This means, of course, that users need to rm the old log
  1201  		// to get fresh data.
  1202  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1203  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1204  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1205  		if err != nil {
  1206  			panic(err)
  1207  		}
  1208  		ruleFile = w
  1209  	}
  1210  	_, err := fmt.Fprintln(ruleFile, s)
  1211  	if err != nil {
  1212  		panic(err)
  1213  	}
  1214  }
  1215  
  1216  var ruleFile io.Writer
  1217  
  1218  func isConstZero(v *Value) bool {
  1219  	switch v.Op {
  1220  	case OpConstNil:
  1221  		return true
  1222  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1223  		return v.AuxInt == 0
  1224  	case OpStringMake, OpIMake, OpComplexMake:
  1225  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1])
  1226  	case OpSliceMake:
  1227  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1]) && isConstZero(v.Args[2])
  1228  	case OpStringPtr, OpStringLen, OpSlicePtr, OpSliceLen, OpSliceCap, OpITab, OpIData, OpComplexReal, OpComplexImag:
  1229  		return isConstZero(v.Args[0])
  1230  	}
  1231  	return false
  1232  }
  1233  
  1234  // reciprocalExact64 reports whether 1/c is exactly representable.
  1235  func reciprocalExact64(c float64) bool {
  1236  	b := math.Float64bits(c)
  1237  	man := b & (1<<52 - 1)
  1238  	if man != 0 {
  1239  		return false // not a power of 2, denormal, or NaN
  1240  	}
  1241  	exp := b >> 52 & (1<<11 - 1)
  1242  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1243  	// changes the exponent to 0x7fe-exp.
  1244  	switch exp {
  1245  	case 0:
  1246  		return false // ±0
  1247  	case 0x7ff:
  1248  		return false // ±inf
  1249  	case 0x7fe:
  1250  		return false // exponent is not representable
  1251  	default:
  1252  		return true
  1253  	}
  1254  }
  1255  
  1256  // reciprocalExact32 reports whether 1/c is exactly representable.
  1257  func reciprocalExact32(c float32) bool {
  1258  	b := math.Float32bits(c)
  1259  	man := b & (1<<23 - 1)
  1260  	if man != 0 {
  1261  		return false // not a power of 2, denormal, or NaN
  1262  	}
  1263  	exp := b >> 23 & (1<<8 - 1)
  1264  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1265  	// changes the exponent to 0xfe-exp.
  1266  	switch exp {
  1267  	case 0:
  1268  		return false // ±0
  1269  	case 0xff:
  1270  		return false // ±inf
  1271  	case 0xfe:
  1272  		return false // exponent is not representable
  1273  	default:
  1274  		return true
  1275  	}
  1276  }
  1277  
  1278  // check if an immediate can be directly encoded into an ARM's instruction.
  1279  func isARMImmRot(v uint32) bool {
  1280  	for i := 0; i < 16; i++ {
  1281  		if v&^0xff == 0 {
  1282  			return true
  1283  		}
  1284  		v = v<<2 | v>>30
  1285  	}
  1286  
  1287  	return false
  1288  }
  1289  
  1290  // overlap reports whether the ranges given by the given offset and
  1291  // size pairs overlap.
  1292  func overlap(offset1, size1, offset2, size2 int64) bool {
  1293  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1294  		return true
  1295  	}
  1296  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1297  		return true
  1298  	}
  1299  	return false
  1300  }
  1301  
  1302  // check if value zeroes out upper 32-bit of 64-bit register.
  1303  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1304  // because it catches same amount of cases as 4.
  1305  func zeroUpper32Bits(x *Value, depth int) bool {
  1306  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1307  		// If the value is signed, it might get re-sign-extended
  1308  		// during spill and restore. See issue 68227.
  1309  		return false
  1310  	}
  1311  	switch x.Op {
  1312  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1313  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1314  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1315  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1316  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1317  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1318  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1319  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1320  		OpAMD64SHLL, OpAMD64SHLLconst:
  1321  		return true
  1322  	case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
  1323  		OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
  1324  		OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
  1325  		return true
  1326  	case OpArg: // note: but not ArgIntReg
  1327  		// amd64 always loads args from the stack unsigned.
  1328  		// most other architectures load them sign/zero extended based on the type.
  1329  		return x.Type.Size() == 4 && x.Block.Func.Config.arch == "amd64"
  1330  	case OpPhi, OpSelect0, OpSelect1:
  1331  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1332  		// just limit recursion depth.
  1333  		if depth <= 0 {
  1334  			return false
  1335  		}
  1336  		for i := range x.Args {
  1337  			if !zeroUpper32Bits(x.Args[i], depth-1) {
  1338  				return false
  1339  			}
  1340  		}
  1341  		return true
  1342  
  1343  	}
  1344  	return false
  1345  }
  1346  
  1347  // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
  1348  func zeroUpper48Bits(x *Value, depth int) bool {
  1349  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1350  		return false
  1351  	}
  1352  	switch x.Op {
  1353  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1354  		return true
  1355  	case OpArg: // note: but not ArgIntReg
  1356  		return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
  1357  	case OpPhi, OpSelect0, OpSelect1:
  1358  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1359  		// just limit recursion depth.
  1360  		if depth <= 0 {
  1361  			return false
  1362  		}
  1363  		for i := range x.Args {
  1364  			if !zeroUpper48Bits(x.Args[i], depth-1) {
  1365  				return false
  1366  			}
  1367  		}
  1368  		return true
  1369  
  1370  	}
  1371  	return false
  1372  }
  1373  
  1374  // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
  1375  func zeroUpper56Bits(x *Value, depth int) bool {
  1376  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1377  		return false
  1378  	}
  1379  	switch x.Op {
  1380  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1381  		return true
  1382  	case OpArg: // note: but not ArgIntReg
  1383  		return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
  1384  	case OpPhi, OpSelect0, OpSelect1:
  1385  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1386  		// just limit recursion depth.
  1387  		if depth <= 0 {
  1388  			return false
  1389  		}
  1390  		for i := range x.Args {
  1391  			if !zeroUpper56Bits(x.Args[i], depth-1) {
  1392  				return false
  1393  			}
  1394  		}
  1395  		return true
  1396  
  1397  	}
  1398  	return false
  1399  }
  1400  
  1401  func isInlinableMemclr(c *Config, sz int64) bool {
  1402  	if sz < 0 {
  1403  		return false
  1404  	}
  1405  	// TODO: expand this check to allow other architectures
  1406  	// see CL 454255 and issue 56997
  1407  	switch c.arch {
  1408  	case "amd64", "arm64":
  1409  		return true
  1410  	case "ppc64le", "ppc64", "loong64":
  1411  		return sz < 512
  1412  	}
  1413  	return false
  1414  }
  1415  
  1416  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1417  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1418  // safe, either because Move will do all of its loads before any of its stores, or
  1419  // because the arguments are known to be disjoint.
  1420  // This is used as a check for replacing memmove with Move ops.
  1421  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1422  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1423  	// Move ops may or may not be faster for large sizes depending on how the platform
  1424  	// lowers them, so we only perform this optimization on platforms that we know to
  1425  	// have fast Move ops.
  1426  	switch c.arch {
  1427  	case "amd64":
  1428  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1429  	case "arm64":
  1430  		return sz <= 64 || (sz <= 1024 && disjoint(dst, sz, src, sz))
  1431  	case "386":
  1432  		return sz <= 8
  1433  	case "s390x", "ppc64", "ppc64le":
  1434  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1435  	case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
  1436  		return sz <= 4
  1437  	}
  1438  	return false
  1439  }
  1440  func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1441  	return isInlinableMemmove(dst, src, sz, c)
  1442  }
  1443  
  1444  // logLargeCopy logs the occurrence of a large copy.
  1445  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1446  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1447  func logLargeCopy(v *Value, s int64) bool {
  1448  	if s < 128 {
  1449  		return true
  1450  	}
  1451  	if logopt.Enabled() {
  1452  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1453  	}
  1454  	return true
  1455  }
  1456  func LogLargeCopy(funcName string, pos src.XPos, s int64) {
  1457  	if s < 128 {
  1458  		return
  1459  	}
  1460  	if logopt.Enabled() {
  1461  		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
  1462  	}
  1463  }
  1464  
  1465  // hasSmallRotate reports whether the architecture has rotate instructions
  1466  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1467  func hasSmallRotate(c *Config) bool {
  1468  	switch c.arch {
  1469  	case "amd64", "386":
  1470  		return true
  1471  	default:
  1472  		return false
  1473  	}
  1474  }
  1475  
  1476  func supportsPPC64PCRel() bool {
  1477  	// PCRel is currently supported for >= power10, linux only
  1478  	// Internal and external linking supports this on ppc64le; internal linking on ppc64.
  1479  	return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
  1480  }
  1481  
  1482  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1483  	if sh < 0 || sh >= sz {
  1484  		panic("PPC64 shift arg sh out of range")
  1485  	}
  1486  	if mb < 0 || mb >= sz {
  1487  		panic("PPC64 shift arg mb out of range")
  1488  	}
  1489  	if me < 0 || me >= sz {
  1490  		panic("PPC64 shift arg me out of range")
  1491  	}
  1492  	return int32(sh<<16 | mb<<8 | me)
  1493  }
  1494  
  1495  func GetPPC64Shiftsh(auxint int64) int64 {
  1496  	return int64(int8(auxint >> 16))
  1497  }
  1498  
  1499  func GetPPC64Shiftmb(auxint int64) int64 {
  1500  	return int64(int8(auxint >> 8))
  1501  }
  1502  
  1503  // Test if this value can encoded as a mask for a rlwinm like
  1504  // operation.  Masks can also extend from the msb and wrap to
  1505  // the lsb too.  That is, the valid masks are 32 bit strings
  1506  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1507  //
  1508  // Note: This ignores the upper 32 bits of the input. When a
  1509  // zero extended result is desired (e.g a 64 bit result), the
  1510  // user must verify the upper 32 bits are 0 and the mask is
  1511  // contiguous (that is, non-wrapping).
  1512  func isPPC64WordRotateMask(v64 int64) bool {
  1513  	// Isolate rightmost 1 (if none 0) and add.
  1514  	v := uint32(v64)
  1515  	vp := (v & -v) + v
  1516  	// Likewise, for the wrapping case.
  1517  	vn := ^v
  1518  	vpn := (vn & -vn) + vn
  1519  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1520  }
  1521  
  1522  // Test if this mask is a valid, contiguous bitmask which can be
  1523  // represented by a RLWNM mask and also clears the upper 32 bits
  1524  // of the register.
  1525  func isPPC64WordRotateMaskNonWrapping(v64 int64) bool {
  1526  	// Isolate rightmost 1 (if none 0) and add.
  1527  	v := uint32(v64)
  1528  	vp := (v & -v) + v
  1529  	return (v&vp == 0) && v != 0 && uint64(uint32(v64)) == uint64(v64)
  1530  }
  1531  
  1532  // Compress mask and shift into single value of the form
  1533  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1534  // be used to regenerate the input mask.
  1535  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1536  	var mb, me, mbn, men int
  1537  
  1538  	// Determine boundaries and then decode them
  1539  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1540  		panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
  1541  	} else if nbits == 32 {
  1542  		mb = bits.LeadingZeros32(uint32(mask))
  1543  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1544  		mbn = bits.LeadingZeros32(^uint32(mask))
  1545  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1546  	} else {
  1547  		mb = bits.LeadingZeros64(uint64(mask))
  1548  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1549  		mbn = bits.LeadingZeros64(^uint64(mask))
  1550  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1551  	}
  1552  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1553  	if mb == 0 && me == int(nbits) {
  1554  		// swap the inverted values
  1555  		mb, me = men, mbn
  1556  	}
  1557  
  1558  	return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
  1559  }
  1560  
  1561  // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
  1562  // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
  1563  // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
  1564  // operations can be combined. This functions assumes the two opcodes can
  1565  // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
  1566  func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
  1567  	mb := s
  1568  	r := 64 - s
  1569  	// A larger mb is a smaller mask.
  1570  	if (encoded>>8)&0xFF < mb {
  1571  		encoded = (encoded &^ 0xFF00) | mb<<8
  1572  	}
  1573  	// The rotate is expected to be 0.
  1574  	if (encoded & 0xFF0000) != 0 {
  1575  		panic("non-zero rotate")
  1576  	}
  1577  	return encoded | r<<16
  1578  }
  1579  
  1580  // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  1581  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1582  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1583  	auxint := uint64(sauxint)
  1584  	rotate = int64((auxint >> 16) & 0xFF)
  1585  	mb = int64((auxint >> 8) & 0xFF)
  1586  	me = int64((auxint >> 0) & 0xFF)
  1587  	nbits := int64((auxint >> 24) & 0xFF)
  1588  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1589  	if mb > me {
  1590  		mask = ^mask
  1591  	}
  1592  	if nbits == 32 {
  1593  		mask = uint64(uint32(mask))
  1594  	}
  1595  
  1596  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1597  	// is inclusive.
  1598  	me = (me - 1) & (nbits - 1)
  1599  	return
  1600  }
  1601  
  1602  // This verifies that the mask is a set of
  1603  // consecutive bits including the least
  1604  // significant bit.
  1605  func isPPC64ValidShiftMask(v int64) bool {
  1606  	if (v != 0) && ((v+1)&v) == 0 {
  1607  		return true
  1608  	}
  1609  	return false
  1610  }
  1611  
  1612  func getPPC64ShiftMaskLength(v int64) int64 {
  1613  	return int64(bits.Len64(uint64(v)))
  1614  }
  1615  
  1616  // Decompose a shift right into an equivalent rotate/mask,
  1617  // and return mask & m.
  1618  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1619  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1620  	return m & int64(smask)
  1621  }
  1622  
  1623  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1624  func mergePPC64AndSrwi(m, s int64) int64 {
  1625  	mask := mergePPC64RShiftMask(m, s, 32)
  1626  	if !isPPC64WordRotateMask(mask) {
  1627  		return 0
  1628  	}
  1629  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1630  }
  1631  
  1632  // Combine (ANDconst [m] (SRDconst [s])) into (RLWINM [y]) or return 0
  1633  func mergePPC64AndSrdi(m, s int64) int64 {
  1634  	mask := mergePPC64RShiftMask(m, s, 64)
  1635  
  1636  	// Verify the rotate and mask result only uses the lower 32 bits.
  1637  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, -int(s))
  1638  	if rv&uint64(mask) != 0 {
  1639  		return 0
  1640  	}
  1641  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1642  		return 0
  1643  	}
  1644  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1645  }
  1646  
  1647  // Combine (ANDconst [m] (SLDconst [s])) into (RLWINM [y]) or return 0
  1648  func mergePPC64AndSldi(m, s int64) int64 {
  1649  	mask := -1 << s & m
  1650  
  1651  	// Verify the rotate and mask result only uses the lower 32 bits.
  1652  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, int(s))
  1653  	if rv&uint64(mask) != 0 {
  1654  		return 0
  1655  	}
  1656  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1657  		return 0
  1658  	}
  1659  	return encodePPC64RotateMask(s&31, mask, 32)
  1660  }
  1661  
  1662  // Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1663  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1664  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1665  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1666  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1667  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1668  
  1669  	// Rewrite mask to apply after the final left shift.
  1670  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1671  
  1672  	r_1 := 32 - srw
  1673  	r_2 := GetPPC64Shiftsh(sld)
  1674  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1675  
  1676  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1677  		return 0
  1678  	}
  1679  	return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
  1680  }
  1681  
  1682  // Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1683  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1684  func mergePPC64ClrlsldiSrd(sld, srd int64) int64 {
  1685  	mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd)
  1686  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1687  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1688  
  1689  	// Rewrite mask to apply after the final left shift.
  1690  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1691  
  1692  	r_1 := 64 - srd
  1693  	r_2 := GetPPC64Shiftsh(sld)
  1694  	r_3 := (r_1 + r_2) & 63 // This can wrap.
  1695  
  1696  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1697  		return 0
  1698  	}
  1699  	// This combine only works when selecting and shifting the lower 32 bits.
  1700  	v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3))
  1701  	if v1&mask_3 != 0 {
  1702  		return 0
  1703  	}
  1704  	return encodePPC64RotateMask(int64(r_3&31), int64(mask_3), 32)
  1705  }
  1706  
  1707  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1708  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1709  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1710  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1711  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1712  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1713  
  1714  	// combine the masks, and adjust for the final left shift.
  1715  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1716  	r_2 := GetPPC64Shiftsh(int64(sld))
  1717  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1718  
  1719  	// Verify the result is still a valid bitmask of <= 32 bits.
  1720  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1721  		return 0
  1722  	}
  1723  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1724  }
  1725  
  1726  // Test if RLWINM feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1727  // or 0 if they cannot be merged.
  1728  func mergePPC64AndRlwinm(mask uint32, rlw int64) int64 {
  1729  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1730  	mask_out := (mask_rlw & uint64(mask))
  1731  
  1732  	// Verify the result is still a valid bitmask of <= 32 bits.
  1733  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1734  		return 0
  1735  	}
  1736  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1737  }
  1738  
  1739  // Test if RLWINM opcode rlw clears the upper 32 bits of the
  1740  // result. Return rlw if it does, 0 otherwise.
  1741  func mergePPC64MovwzregRlwinm(rlw int64) int64 {
  1742  	_, mb, me, _ := DecodePPC64RotateMask(rlw)
  1743  	if mb > me {
  1744  		return 0
  1745  	}
  1746  	return rlw
  1747  }
  1748  
  1749  // Test if AND feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1750  // or 0 if they cannot be merged.
  1751  func mergePPC64RlwinmAnd(rlw int64, mask uint32) int64 {
  1752  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1753  
  1754  	// Rotate the input mask, combine with the rlwnm mask, and test if it is still a valid rlwinm mask.
  1755  	r_mask := bits.RotateLeft32(mask, int(r))
  1756  
  1757  	mask_out := (mask_rlw & uint64(r_mask))
  1758  
  1759  	// Verify the result is still a valid bitmask of <= 32 bits.
  1760  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1761  		return 0
  1762  	}
  1763  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1764  }
  1765  
  1766  // Test if RLWINM feeding into SRDconst can be merged. Return the encoded RLIWNM constant,
  1767  // or 0 if they cannot be merged.
  1768  func mergePPC64SldiRlwinm(sldi, rlw int64) int64 {
  1769  	r_1, mb, me, mask_1 := DecodePPC64RotateMask(rlw)
  1770  	if mb > me || mb < sldi {
  1771  		// Wrapping masks cannot be merged as the upper 32 bits are effectively undefined in this case.
  1772  		// Likewise, if mb is less than the shift amount, it cannot be merged.
  1773  		return 0
  1774  	}
  1775  	// combine the masks, and adjust for the final left shift.
  1776  	mask_3 := mask_1 << sldi
  1777  	r_3 := (r_1 + sldi) & 31 // This can wrap.
  1778  
  1779  	// Verify the result is still a valid bitmask of <= 32 bits.
  1780  	if uint64(uint32(mask_3)) != mask_3 {
  1781  		return 0
  1782  	}
  1783  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1784  }
  1785  
  1786  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1787  // or return 0 if they cannot be combined.
  1788  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1789  	if sld > srw || srw >= 32 {
  1790  		return 0
  1791  	}
  1792  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1793  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1794  	mask := (mask_r & mask_l) << uint(sld)
  1795  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1796  }
  1797  
  1798  // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
  1799  // to (Select0 (opCC x y)) without having to explicitly fixup every user
  1800  // of op.
  1801  //
  1802  // E.g consider the case:
  1803  // a = (ADD x y)
  1804  // b = (CMPconst [0] a)
  1805  // c = (OR a z)
  1806  //
  1807  // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
  1808  // would produce:
  1809  // a  = (ADD x y)
  1810  // a' = (ADDCC x y)
  1811  // a” = (Select0 a')
  1812  // b  = (CMPconst [0] a”)
  1813  // c  = (OR a z)
  1814  //
  1815  // which makes it impossible to rewrite the second user. Instead the result
  1816  // of this conversion is:
  1817  // a' = (ADDCC x y)
  1818  // a  = (Select0 a')
  1819  // b  = (CMPconst [0] a)
  1820  // c  = (OR a z)
  1821  //
  1822  // Which makes it trivial to rewrite b using a lowering rule.
  1823  func convertPPC64OpToOpCC(op *Value) *Value {
  1824  	ccOpMap := map[Op]Op{
  1825  		OpPPC64ADD:      OpPPC64ADDCC,
  1826  		OpPPC64ADDconst: OpPPC64ADDCCconst,
  1827  		OpPPC64AND:      OpPPC64ANDCC,
  1828  		OpPPC64ANDN:     OpPPC64ANDNCC,
  1829  		OpPPC64ANDconst: OpPPC64ANDCCconst,
  1830  		OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
  1831  		OpPPC64MULHDU:   OpPPC64MULHDUCC,
  1832  		OpPPC64NEG:      OpPPC64NEGCC,
  1833  		OpPPC64NOR:      OpPPC64NORCC,
  1834  		OpPPC64OR:       OpPPC64ORCC,
  1835  		OpPPC64RLDICL:   OpPPC64RLDICLCC,
  1836  		OpPPC64SUB:      OpPPC64SUBCC,
  1837  		OpPPC64XOR:      OpPPC64XORCC,
  1838  	}
  1839  	b := op.Block
  1840  	opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
  1841  	opCC.AddArgs(op.Args...)
  1842  	op.reset(OpSelect0)
  1843  	op.AddArgs(opCC)
  1844  	return op
  1845  }
  1846  
  1847  // Try converting a RLDICL to ANDCC. If successful, return the mask otherwise 0.
  1848  func convertPPC64RldiclAndccconst(sauxint int64) int64 {
  1849  	r, _, _, mask := DecodePPC64RotateMask(sauxint)
  1850  	if r != 0 || mask&0xFFFF != mask {
  1851  		return 0
  1852  	}
  1853  	return int64(mask)
  1854  }
  1855  
  1856  // Convenience function to rotate a 32 bit constant value by another constant.
  1857  func rotateLeft32(v, rotate int64) int64 {
  1858  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1859  }
  1860  
  1861  func rotateRight64(v, rotate int64) int64 {
  1862  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1863  }
  1864  
  1865  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1866  func armBFAuxInt(lsb, width int64) arm64BitField {
  1867  	if lsb < 0 || lsb > 63 {
  1868  		panic("ARM(64) bit field lsb constant out of range")
  1869  	}
  1870  	if width < 1 || lsb+width > 64 {
  1871  		panic("ARM(64) bit field width constant out of range")
  1872  	}
  1873  	return arm64BitField(width | lsb<<8)
  1874  }
  1875  
  1876  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1877  func (bfc arm64BitField) lsb() int64 {
  1878  	return int64(uint64(bfc) >> 8)
  1879  }
  1880  
  1881  // returns the width part of the auxInt field of arm64 bitfield ops.
  1882  func (bfc arm64BitField) width() int64 {
  1883  	return int64(bfc) & 0xff
  1884  }
  1885  
  1886  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1887  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1888  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1889  	return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1890  }
  1891  
  1892  // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  1893  func arm64BFWidth(mask, rshift int64) int64 {
  1894  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1895  	if shiftedMask == 0 {
  1896  		panic("ARM64 BF mask is zero")
  1897  	}
  1898  	return nto(shiftedMask)
  1899  }
  1900  
  1901  // registerizable reports whether t is a primitive type that fits in
  1902  // a register. It assumes float64 values will always fit into registers
  1903  // even if that isn't strictly true.
  1904  func registerizable(b *Block, typ *types.Type) bool {
  1905  	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
  1906  		return true
  1907  	}
  1908  	if typ.IsInteger() {
  1909  		return typ.Size() <= b.Func.Config.RegSize
  1910  	}
  1911  	return false
  1912  }
  1913  
  1914  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  1915  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  1916  	f := v.Block.Func
  1917  	if !f.Config.Race {
  1918  		return false
  1919  	}
  1920  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  1921  		return false
  1922  	}
  1923  	for _, b := range f.Blocks {
  1924  		for _, v := range b.Values {
  1925  			switch v.Op {
  1926  			case OpStaticCall, OpStaticLECall:
  1927  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  1928  				// Allow calls to panic*
  1929  				s := v.Aux.(*AuxCall).Fn.String()
  1930  				switch s {
  1931  				case "runtime.racefuncenter", "runtime.racefuncexit",
  1932  					"runtime.panicdivide", "runtime.panicwrap",
  1933  					"runtime.panicshift":
  1934  					continue
  1935  				}
  1936  				// If we encountered any call, we need to keep racefunc*,
  1937  				// for accurate stacktraces.
  1938  				return false
  1939  			case OpPanicBounds, OpPanicExtend:
  1940  				// Note: these are panic generators that are ok (like the static calls above).
  1941  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  1942  				// We must keep the race functions if there are any other call types.
  1943  				return false
  1944  			}
  1945  		}
  1946  	}
  1947  	if isSameCall(sym, "runtime.racefuncenter") {
  1948  		// TODO REGISTER ABI this needs to be cleaned up.
  1949  		// If we're removing racefuncenter, remove its argument as well.
  1950  		if v.Args[0].Op != OpStore {
  1951  			if v.Op == OpStaticLECall {
  1952  				// there is no store, yet.
  1953  				return true
  1954  			}
  1955  			return false
  1956  		}
  1957  		mem := v.Args[0].Args[2]
  1958  		v.Args[0].reset(OpCopy)
  1959  		v.Args[0].AddArg(mem)
  1960  	}
  1961  	return true
  1962  }
  1963  
  1964  // symIsRO reports whether sym is a read-only global.
  1965  func symIsRO(sym Sym) bool {
  1966  	lsym := sym.(*obj.LSym)
  1967  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  1968  }
  1969  
  1970  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  1971  func symIsROZero(sym Sym) bool {
  1972  	lsym := sym.(*obj.LSym)
  1973  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  1974  		return false
  1975  	}
  1976  	for _, b := range lsym.P {
  1977  		if b != 0 {
  1978  			return false
  1979  		}
  1980  	}
  1981  	return true
  1982  }
  1983  
  1984  // isFixed32 returns true if the int32 at offset off in symbol sym
  1985  // is known and constant.
  1986  func isFixed32(c *Config, sym Sym, off int64) bool {
  1987  	return isFixed(c, sym, off, 4)
  1988  }
  1989  
  1990  // isFixed returns true if the range [off,off+size] of the symbol sym
  1991  // is known and constant.
  1992  func isFixed(c *Config, sym Sym, off, size int64) bool {
  1993  	lsym := sym.(*obj.LSym)
  1994  	if lsym.Extra == nil {
  1995  		return false
  1996  	}
  1997  	if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  1998  		if off == 2*c.PtrSize && size == 4 {
  1999  			return true // type hash field
  2000  		}
  2001  	}
  2002  	return false
  2003  }
  2004  func fixed32(c *Config, sym Sym, off int64) int32 {
  2005  	lsym := sym.(*obj.LSym)
  2006  	if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  2007  		if off == 2*c.PtrSize {
  2008  			return int32(types.TypeHash(ti.Type.(*types.Type)))
  2009  		}
  2010  	}
  2011  	base.Fatalf("fixed32 data not known for %s:%d", sym, off)
  2012  	return 0
  2013  }
  2014  
  2015  // isFixedSym returns true if the contents of sym at the given offset
  2016  // is known and is the constant address of another symbol.
  2017  func isFixedSym(sym Sym, off int64) bool {
  2018  	lsym := sym.(*obj.LSym)
  2019  	switch {
  2020  	case lsym.Type == objabi.SRODATA:
  2021  		// itabs, dictionaries
  2022  	default:
  2023  		return false
  2024  	}
  2025  	for _, r := range lsym.R {
  2026  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  2027  			return true
  2028  		}
  2029  	}
  2030  	return false
  2031  }
  2032  func fixedSym(f *Func, sym Sym, off int64) Sym {
  2033  	lsym := sym.(*obj.LSym)
  2034  	for _, r := range lsym.R {
  2035  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off {
  2036  			if strings.HasPrefix(r.Sym.Name, "type:") {
  2037  				// In case we're loading a type out of a dictionary, we need to record
  2038  				// that the containing function might put that type in an interface.
  2039  				// That information is currently recorded in relocations in the dictionary,
  2040  				// but if we perform this load at compile time then the dictionary
  2041  				// might be dead.
  2042  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2043  			} else if strings.HasPrefix(r.Sym.Name, "go:itab") {
  2044  				// Same, but if we're using an itab we need to record that the
  2045  				// itab._type might be put in an interface.
  2046  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2047  			}
  2048  			return r.Sym
  2049  		}
  2050  	}
  2051  	base.Fatalf("fixedSym data not known for %s:%d", sym, off)
  2052  	return nil
  2053  }
  2054  
  2055  // read8 reads one byte from the read-only global sym at offset off.
  2056  func read8(sym Sym, off int64) uint8 {
  2057  	lsym := sym.(*obj.LSym)
  2058  	if off >= int64(len(lsym.P)) || off < 0 {
  2059  		// Invalid index into the global sym.
  2060  		// This can happen in dead code, so we don't want to panic.
  2061  		// Just return any value, it will eventually get ignored.
  2062  		// See issue 29215.
  2063  		return 0
  2064  	}
  2065  	return lsym.P[off]
  2066  }
  2067  
  2068  // read16 reads two bytes from the read-only global sym at offset off.
  2069  func read16(sym Sym, off int64, byteorder binary.ByteOrder) uint16 {
  2070  	lsym := sym.(*obj.LSym)
  2071  	// lsym.P is written lazily.
  2072  	// Bytes requested after the end of lsym.P are 0.
  2073  	var src []byte
  2074  	if 0 <= off && off < int64(len(lsym.P)) {
  2075  		src = lsym.P[off:]
  2076  	}
  2077  	buf := make([]byte, 2)
  2078  	copy(buf, src)
  2079  	return byteorder.Uint16(buf)
  2080  }
  2081  
  2082  // read32 reads four bytes from the read-only global sym at offset off.
  2083  func read32(sym Sym, off int64, byteorder binary.ByteOrder) uint32 {
  2084  	lsym := sym.(*obj.LSym)
  2085  	var src []byte
  2086  	if 0 <= off && off < int64(len(lsym.P)) {
  2087  		src = lsym.P[off:]
  2088  	}
  2089  	buf := make([]byte, 4)
  2090  	copy(buf, src)
  2091  	return byteorder.Uint32(buf)
  2092  }
  2093  
  2094  // read64 reads eight bytes from the read-only global sym at offset off.
  2095  func read64(sym Sym, off int64, byteorder binary.ByteOrder) uint64 {
  2096  	lsym := sym.(*obj.LSym)
  2097  	var src []byte
  2098  	if 0 <= off && off < int64(len(lsym.P)) {
  2099  		src = lsym.P[off:]
  2100  	}
  2101  	buf := make([]byte, 8)
  2102  	copy(buf, src)
  2103  	return byteorder.Uint64(buf)
  2104  }
  2105  
  2106  // sequentialAddresses reports true if it can prove that x + n == y
  2107  func sequentialAddresses(x, y *Value, n int64) bool {
  2108  	if x == y && n == 0 {
  2109  		return true
  2110  	}
  2111  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  2112  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2113  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2114  		return true
  2115  	}
  2116  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2117  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2118  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2119  		return true
  2120  	}
  2121  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  2122  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2123  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2124  		return true
  2125  	}
  2126  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2127  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2128  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2129  		return true
  2130  	}
  2131  	return false
  2132  }
  2133  
  2134  // flagConstant represents the result of a compile-time comparison.
  2135  // The sense of these flags does not necessarily represent the hardware's notion
  2136  // of a flags register - these are just a compile-time construct.
  2137  // We happen to match the semantics to those of arm/arm64.
  2138  // Note that these semantics differ from x86: the carry flag has the opposite
  2139  // sense on a subtraction!
  2140  //
  2141  //	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  2142  //	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  2143  //	 (because it does x + ^y + C).
  2144  //
  2145  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  2146  type flagConstant uint8
  2147  
  2148  // N reports whether the result of an operation is negative (high bit set).
  2149  func (fc flagConstant) N() bool {
  2150  	return fc&1 != 0
  2151  }
  2152  
  2153  // Z reports whether the result of an operation is 0.
  2154  func (fc flagConstant) Z() bool {
  2155  	return fc&2 != 0
  2156  }
  2157  
  2158  // C reports whether an unsigned add overflowed (carry), or an
  2159  // unsigned subtract did not underflow (borrow).
  2160  func (fc flagConstant) C() bool {
  2161  	return fc&4 != 0
  2162  }
  2163  
  2164  // V reports whether a signed operation overflowed or underflowed.
  2165  func (fc flagConstant) V() bool {
  2166  	return fc&8 != 0
  2167  }
  2168  
  2169  func (fc flagConstant) eq() bool {
  2170  	return fc.Z()
  2171  }
  2172  func (fc flagConstant) ne() bool {
  2173  	return !fc.Z()
  2174  }
  2175  func (fc flagConstant) lt() bool {
  2176  	return fc.N() != fc.V()
  2177  }
  2178  func (fc flagConstant) le() bool {
  2179  	return fc.Z() || fc.lt()
  2180  }
  2181  func (fc flagConstant) gt() bool {
  2182  	return !fc.Z() && fc.ge()
  2183  }
  2184  func (fc flagConstant) ge() bool {
  2185  	return fc.N() == fc.V()
  2186  }
  2187  func (fc flagConstant) ult() bool {
  2188  	return !fc.C()
  2189  }
  2190  func (fc flagConstant) ule() bool {
  2191  	return fc.Z() || fc.ult()
  2192  }
  2193  func (fc flagConstant) ugt() bool {
  2194  	return !fc.Z() && fc.uge()
  2195  }
  2196  func (fc flagConstant) uge() bool {
  2197  	return fc.C()
  2198  }
  2199  
  2200  func (fc flagConstant) ltNoov() bool {
  2201  	return fc.lt() && !fc.V()
  2202  }
  2203  func (fc flagConstant) leNoov() bool {
  2204  	return fc.le() && !fc.V()
  2205  }
  2206  func (fc flagConstant) gtNoov() bool {
  2207  	return fc.gt() && !fc.V()
  2208  }
  2209  func (fc flagConstant) geNoov() bool {
  2210  	return fc.ge() && !fc.V()
  2211  }
  2212  
  2213  func (fc flagConstant) String() string {
  2214  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  2215  }
  2216  
  2217  type flagConstantBuilder struct {
  2218  	N bool
  2219  	Z bool
  2220  	C bool
  2221  	V bool
  2222  }
  2223  
  2224  func (fcs flagConstantBuilder) encode() flagConstant {
  2225  	var fc flagConstant
  2226  	if fcs.N {
  2227  		fc |= 1
  2228  	}
  2229  	if fcs.Z {
  2230  		fc |= 2
  2231  	}
  2232  	if fcs.C {
  2233  		fc |= 4
  2234  	}
  2235  	if fcs.V {
  2236  		fc |= 8
  2237  	}
  2238  	return fc
  2239  }
  2240  
  2241  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  2242  //  - the results of the C flag are different
  2243  //  - the results of the V flag when y==minint are different
  2244  
  2245  // addFlags64 returns the flags that would be set from computing x+y.
  2246  func addFlags64(x, y int64) flagConstant {
  2247  	var fcb flagConstantBuilder
  2248  	fcb.Z = x+y == 0
  2249  	fcb.N = x+y < 0
  2250  	fcb.C = uint64(x+y) < uint64(x)
  2251  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2252  	return fcb.encode()
  2253  }
  2254  
  2255  // subFlags64 returns the flags that would be set from computing x-y.
  2256  func subFlags64(x, y int64) flagConstant {
  2257  	var fcb flagConstantBuilder
  2258  	fcb.Z = x-y == 0
  2259  	fcb.N = x-y < 0
  2260  	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
  2261  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2262  	return fcb.encode()
  2263  }
  2264  
  2265  // addFlags32 returns the flags that would be set from computing x+y.
  2266  func addFlags32(x, y int32) flagConstant {
  2267  	var fcb flagConstantBuilder
  2268  	fcb.Z = x+y == 0
  2269  	fcb.N = x+y < 0
  2270  	fcb.C = uint32(x+y) < uint32(x)
  2271  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2272  	return fcb.encode()
  2273  }
  2274  
  2275  // subFlags32 returns the flags that would be set from computing x-y.
  2276  func subFlags32(x, y int32) flagConstant {
  2277  	var fcb flagConstantBuilder
  2278  	fcb.Z = x-y == 0
  2279  	fcb.N = x-y < 0
  2280  	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
  2281  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2282  	return fcb.encode()
  2283  }
  2284  
  2285  // logicFlags64 returns flags set to the sign/zeroness of x.
  2286  // C and V are set to false.
  2287  func logicFlags64(x int64) flagConstant {
  2288  	var fcb flagConstantBuilder
  2289  	fcb.Z = x == 0
  2290  	fcb.N = x < 0
  2291  	return fcb.encode()
  2292  }
  2293  
  2294  // logicFlags32 returns flags set to the sign/zeroness of x.
  2295  // C and V are set to false.
  2296  func logicFlags32(x int32) flagConstant {
  2297  	var fcb flagConstantBuilder
  2298  	fcb.Z = x == 0
  2299  	fcb.N = x < 0
  2300  	return fcb.encode()
  2301  }
  2302  
  2303  func makeJumpTableSym(b *Block) *obj.LSym {
  2304  	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
  2305  	// The jump table symbol is accessed only from the function symbol.
  2306  	s.Set(obj.AttrStatic, true)
  2307  	return s
  2308  }
  2309  
  2310  // canRotate reports whether the architecture supports
  2311  // rotates of integer registers with the given number of bits.
  2312  func canRotate(c *Config, bits int64) bool {
  2313  	if bits > c.PtrSize*8 {
  2314  		// Don't rewrite to rotates bigger than the machine word.
  2315  		return false
  2316  	}
  2317  	switch c.arch {
  2318  	case "386", "amd64", "arm64", "loong64", "riscv64":
  2319  		return true
  2320  	case "arm", "s390x", "ppc64", "ppc64le", "wasm":
  2321  		return bits >= 32
  2322  	default:
  2323  		return false
  2324  	}
  2325  }
  2326  
  2327  // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
  2328  func isARM64bitcon(x uint64) bool {
  2329  	if x == 1<<64-1 || x == 0 {
  2330  		return false
  2331  	}
  2332  	// determine the period and sign-extend a unit to 64 bits
  2333  	switch {
  2334  	case x != x>>32|x<<32:
  2335  		// period is 64
  2336  		// nothing to do
  2337  	case x != x>>16|x<<48:
  2338  		// period is 32
  2339  		x = uint64(int64(int32(x)))
  2340  	case x != x>>8|x<<56:
  2341  		// period is 16
  2342  		x = uint64(int64(int16(x)))
  2343  	case x != x>>4|x<<60:
  2344  		// period is 8
  2345  		x = uint64(int64(int8(x)))
  2346  	default:
  2347  		// period is 4 or 2, always true
  2348  		// 0001, 0010, 0100, 1000 -- 0001 rotate
  2349  		// 0011, 0110, 1100, 1001 -- 0011 rotate
  2350  		// 0111, 1011, 1101, 1110 -- 0111 rotate
  2351  		// 0101, 1010             -- 01   rotate, repeat
  2352  		return true
  2353  	}
  2354  	return sequenceOfOnes(x) || sequenceOfOnes(^x)
  2355  }
  2356  
  2357  // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
  2358  func sequenceOfOnes(x uint64) bool {
  2359  	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
  2360  	y += x
  2361  	return (y-1)&y == 0
  2362  }
  2363  
  2364  // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
  2365  func isARM64addcon(v int64) bool {
  2366  	/* uimm12 or uimm24? */
  2367  	if v < 0 {
  2368  		return false
  2369  	}
  2370  	if (v & 0xFFF) == 0 {
  2371  		v >>= 12
  2372  	}
  2373  	return v <= 0xFFF
  2374  }
  2375  
  2376  // setPos sets the position of v to pos, then returns true.
  2377  // Useful for setting the result of a rewrite's position to
  2378  // something other than the default.
  2379  func setPos(v *Value, pos src.XPos) bool {
  2380  	v.Pos = pos
  2381  	return true
  2382  }
  2383  
  2384  // isNonNegative reports whether v is known to be greater or equal to zero.
  2385  // Note that this is pretty simplistic. The prove pass generates more detailed
  2386  // nonnegative information about values.
  2387  func isNonNegative(v *Value) bool {
  2388  	if !v.Type.IsInteger() {
  2389  		v.Fatalf("isNonNegative bad type: %v", v.Type)
  2390  	}
  2391  	// TODO: return true if !v.Type.IsSigned()
  2392  	// SSA isn't type-safe enough to do that now (issue 37753).
  2393  	// The checks below depend only on the pattern of bits.
  2394  
  2395  	switch v.Op {
  2396  	case OpConst64:
  2397  		return v.AuxInt >= 0
  2398  
  2399  	case OpConst32:
  2400  		return int32(v.AuxInt) >= 0
  2401  
  2402  	case OpConst16:
  2403  		return int16(v.AuxInt) >= 0
  2404  
  2405  	case OpConst8:
  2406  		return int8(v.AuxInt) >= 0
  2407  
  2408  	case OpStringLen, OpSliceLen, OpSliceCap,
  2409  		OpZeroExt8to64, OpZeroExt16to64, OpZeroExt32to64,
  2410  		OpZeroExt8to32, OpZeroExt16to32, OpZeroExt8to16,
  2411  		OpCtz64, OpCtz32, OpCtz16, OpCtz8,
  2412  		OpCtz64NonZero, OpCtz32NonZero, OpCtz16NonZero, OpCtz8NonZero,
  2413  		OpBitLen64, OpBitLen32, OpBitLen16, OpBitLen8:
  2414  		return true
  2415  
  2416  	case OpRsh64Ux64, OpRsh32Ux64:
  2417  		by := v.Args[1]
  2418  		return by.Op == OpConst64 && by.AuxInt > 0
  2419  
  2420  	case OpRsh64x64, OpRsh32x64, OpRsh8x64, OpRsh16x64, OpRsh32x32, OpRsh64x32,
  2421  		OpSignExt32to64, OpSignExt16to64, OpSignExt8to64, OpSignExt16to32, OpSignExt8to32:
  2422  		return isNonNegative(v.Args[0])
  2423  
  2424  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2425  		return isNonNegative(v.Args[0]) || isNonNegative(v.Args[1])
  2426  
  2427  	case OpMod64, OpMod32, OpMod16, OpMod8,
  2428  		OpDiv64, OpDiv32, OpDiv16, OpDiv8,
  2429  		OpOr64, OpOr32, OpOr16, OpOr8,
  2430  		OpXor64, OpXor32, OpXor16, OpXor8:
  2431  		return isNonNegative(v.Args[0]) && isNonNegative(v.Args[1])
  2432  
  2433  		// We could handle OpPhi here, but the improvements from doing
  2434  		// so are very minor, and it is neither simple nor cheap.
  2435  	}
  2436  	return false
  2437  }
  2438  
  2439  func rewriteStructLoad(v *Value) *Value {
  2440  	b := v.Block
  2441  	ptr := v.Args[0]
  2442  	mem := v.Args[1]
  2443  
  2444  	t := v.Type
  2445  	args := make([]*Value, t.NumFields())
  2446  	for i := range args {
  2447  		ft := t.FieldType(i)
  2448  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), ptr)
  2449  		args[i] = b.NewValue2(v.Pos, OpLoad, ft, addr, mem)
  2450  	}
  2451  
  2452  	v.reset(OpStructMake)
  2453  	v.AddArgs(args...)
  2454  	return v
  2455  }
  2456  
  2457  func rewriteStructStore(v *Value) *Value {
  2458  	b := v.Block
  2459  	dst := v.Args[0]
  2460  	x := v.Args[1]
  2461  	if x.Op != OpStructMake {
  2462  		base.Fatalf("invalid struct store: %v", x)
  2463  	}
  2464  	mem := v.Args[2]
  2465  
  2466  	t := x.Type
  2467  	for i, arg := range x.Args {
  2468  		ft := t.FieldType(i)
  2469  
  2470  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), dst)
  2471  		mem = b.NewValue3A(v.Pos, OpStore, types.TypeMem, typeToAux(ft), addr, arg, mem)
  2472  	}
  2473  
  2474  	return mem
  2475  }
  2476  
  2477  // isDirectType reports whether v represents a type
  2478  // (a *runtime._type) whose value is stored directly in an
  2479  // interface (i.e., is pointer or pointer-like).
  2480  func isDirectType(v *Value) bool {
  2481  	return isDirectType1(v)
  2482  }
  2483  
  2484  // v is a type
  2485  func isDirectType1(v *Value) bool {
  2486  	switch v.Op {
  2487  	case OpITab:
  2488  		return isDirectType2(v.Args[0])
  2489  	case OpAddr:
  2490  		lsym := v.Aux.(*obj.LSym)
  2491  		if lsym.Extra == nil {
  2492  			return false
  2493  		}
  2494  		if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  2495  			return types.IsDirectIface(ti.Type.(*types.Type))
  2496  		}
  2497  	}
  2498  	return false
  2499  }
  2500  
  2501  // v is an empty interface
  2502  func isDirectType2(v *Value) bool {
  2503  	switch v.Op {
  2504  	case OpIMake:
  2505  		return isDirectType1(v.Args[0])
  2506  	}
  2507  	return false
  2508  }
  2509  
  2510  // isDirectIface reports whether v represents an itab
  2511  // (a *runtime._itab) for a type whose value is stored directly
  2512  // in an interface (i.e., is pointer or pointer-like).
  2513  func isDirectIface(v *Value) bool {
  2514  	return isDirectIface1(v, 9)
  2515  }
  2516  
  2517  // v is an itab
  2518  func isDirectIface1(v *Value, depth int) bool {
  2519  	if depth == 0 {
  2520  		return false
  2521  	}
  2522  	switch v.Op {
  2523  	case OpITab:
  2524  		return isDirectIface2(v.Args[0], depth-1)
  2525  	case OpAddr:
  2526  		lsym := v.Aux.(*obj.LSym)
  2527  		if lsym.Extra == nil {
  2528  			return false
  2529  		}
  2530  		if ii, ok := (*lsym.Extra).(*obj.ItabInfo); ok {
  2531  			return types.IsDirectIface(ii.Type.(*types.Type))
  2532  		}
  2533  	case OpConstNil:
  2534  		// We can treat this as direct, because if the itab is
  2535  		// nil, the data field must be nil also.
  2536  		return true
  2537  	}
  2538  	return false
  2539  }
  2540  
  2541  // v is an interface
  2542  func isDirectIface2(v *Value, depth int) bool {
  2543  	if depth == 0 {
  2544  		return false
  2545  	}
  2546  	switch v.Op {
  2547  	case OpIMake:
  2548  		return isDirectIface1(v.Args[0], depth-1)
  2549  	case OpPhi:
  2550  		for _, a := range v.Args {
  2551  			if !isDirectIface2(a, depth-1) {
  2552  				return false
  2553  			}
  2554  		}
  2555  		return true
  2556  	}
  2557  	return false
  2558  }
  2559  
  2560  func bitsAdd64(x, y, carry int64) (r struct{ sum, carry int64 }) {
  2561  	s, c := bits.Add64(uint64(x), uint64(y), uint64(carry))
  2562  	r.sum, r.carry = int64(s), int64(c)
  2563  	return
  2564  }
  2565  
  2566  func bitsMulU64(x, y int64) (r struct{ hi, lo int64 }) {
  2567  	hi, lo := bits.Mul64(uint64(x), uint64(y))
  2568  	r.hi, r.lo = int64(hi), int64(lo)
  2569  	return
  2570  }
  2571  func bitsMulU32(x, y int32) (r struct{ hi, lo int32 }) {
  2572  	hi, lo := bits.Mul32(uint32(x), uint32(y))
  2573  	r.hi, r.lo = int32(hi), int32(lo)
  2574  	return
  2575  }
  2576  
  2577  // flagify rewrites v which is (X ...) to (Select0 (Xflags ...)).
  2578  func flagify(v *Value) bool {
  2579  	var flagVersion Op
  2580  	switch v.Op {
  2581  	case OpAMD64ADDQconst:
  2582  		flagVersion = OpAMD64ADDQconstflags
  2583  	case OpAMD64ADDLconst:
  2584  		flagVersion = OpAMD64ADDLconstflags
  2585  	default:
  2586  		base.Fatalf("can't flagify op %s", v.Op)
  2587  	}
  2588  	inner := v.copyInto(v.Block)
  2589  	inner.Op = flagVersion
  2590  	inner.Type = types.NewTuple(v.Type, types.TypeFlags)
  2591  	v.reset(OpSelect0)
  2592  	v.AddArg(inner)
  2593  	return true
  2594  }
  2595  
  2596  // PanicBoundsC contains a constant for a bounds failure.
  2597  type PanicBoundsC struct {
  2598  	C int64
  2599  }
  2600  
  2601  // PanicBoundsCC contains 2 constants for a bounds failure.
  2602  type PanicBoundsCC struct {
  2603  	Cx int64
  2604  	Cy int64
  2605  }
  2606  
  2607  func (p PanicBoundsC) CanBeAnSSAAux() {
  2608  }
  2609  func (p PanicBoundsCC) CanBeAnSSAAux() {
  2610  }
  2611  
  2612  func auxToPanicBoundsC(i Aux) PanicBoundsC {
  2613  	return i.(PanicBoundsC)
  2614  }
  2615  func auxToPanicBoundsCC(i Aux) PanicBoundsCC {
  2616  	return i.(PanicBoundsCC)
  2617  }
  2618  func panicBoundsCToAux(p PanicBoundsC) Aux {
  2619  	return p
  2620  }
  2621  func panicBoundsCCToAux(p PanicBoundsCC) Aux {
  2622  	return p
  2623  }
  2624  
  2625  // When v is (IMake typ (StructMake ...)), convert to
  2626  // (IMake typ arg) where arg is the pointer-y argument to
  2627  // the StructMake (there must be exactly one).
  2628  func imakeOfStructMake(v *Value) *Value {
  2629  	var arg *Value
  2630  	for _, a := range v.Args[1].Args {
  2631  		if a.Type.Size() > 0 {
  2632  			arg = a
  2633  			break
  2634  		}
  2635  	}
  2636  	return v.Block.NewValue2(v.Pos, OpIMake, v.Type, v.Args[0], arg)
  2637  }
  2638  

View as plain text