Source file src/cmd/compile/internal/ssa/prove.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/types"
     9  	"cmd/internal/src"
    10  	"fmt"
    11  	"math"
    12  	"math/bits"
    13  )
    14  
    15  type branch int
    16  
    17  const (
    18  	unknown branch = iota
    19  	positive
    20  	negative
    21  	// The outedges from a jump table are jumpTable0,
    22  	// jumpTable0+1, jumpTable0+2, etc. There could be an
    23  	// arbitrary number so we can't list them all here.
    24  	jumpTable0
    25  )
    26  
    27  func (b branch) String() string {
    28  	switch b {
    29  	case unknown:
    30  		return "unk"
    31  	case positive:
    32  		return "pos"
    33  	case negative:
    34  		return "neg"
    35  	default:
    36  		return fmt.Sprintf("jmp%d", b-jumpTable0)
    37  	}
    38  }
    39  
    40  // relation represents the set of possible relations between
    41  // pairs of variables (v, w). Without a priori knowledge the
    42  // mask is lt | eq | gt meaning v can be less than, equal to or
    43  // greater than w. When the execution path branches on the condition
    44  // `v op w` the set of relations is updated to exclude any
    45  // relation not possible due to `v op w` being true (or false).
    46  //
    47  // E.g.
    48  //
    49  //	r := relation(...)
    50  //
    51  //	if v < w {
    52  //	  newR := r & lt
    53  //	}
    54  //	if v >= w {
    55  //	  newR := r & (eq|gt)
    56  //	}
    57  //	if v != w {
    58  //	  newR := r & (lt|gt)
    59  //	}
    60  type relation uint
    61  
    62  const (
    63  	lt relation = 1 << iota
    64  	eq
    65  	gt
    66  )
    67  
    68  var relationStrings = [...]string{
    69  	0: "none", lt: "<", eq: "==", lt | eq: "<=",
    70  	gt: ">", gt | lt: "!=", gt | eq: ">=", gt | eq | lt: "any",
    71  }
    72  
    73  func (r relation) String() string {
    74  	if r < relation(len(relationStrings)) {
    75  		return relationStrings[r]
    76  	}
    77  	return fmt.Sprintf("relation(%d)", uint(r))
    78  }
    79  
    80  // domain represents the domain of a variable pair in which a set
    81  // of relations is known. For example, relations learned for unsigned
    82  // pairs cannot be transferred to signed pairs because the same bit
    83  // representation can mean something else.
    84  type domain uint
    85  
    86  const (
    87  	signed domain = 1 << iota
    88  	unsigned
    89  	pointer
    90  	boolean
    91  )
    92  
    93  var domainStrings = [...]string{
    94  	"signed", "unsigned", "pointer", "boolean",
    95  }
    96  
    97  func (d domain) String() string {
    98  	s := ""
    99  	for i, ds := range domainStrings {
   100  		if d&(1<<uint(i)) != 0 {
   101  			if len(s) != 0 {
   102  				s += "|"
   103  			}
   104  			s += ds
   105  			d &^= 1 << uint(i)
   106  		}
   107  	}
   108  	if d != 0 {
   109  		if len(s) != 0 {
   110  			s += "|"
   111  		}
   112  		s += fmt.Sprintf("0x%x", uint(d))
   113  	}
   114  	return s
   115  }
   116  
   117  // a limit records known upper and lower bounds for a value.
   118  //
   119  // If we have min>max or umin>umax, then this limit is
   120  // called "unsatisfiable". When we encounter such a limit, we
   121  // know that any code for which that limit applies is unreachable.
   122  // We don't particularly care how unsatisfiable limits propagate,
   123  // including becoming satisfiable, because any optimization
   124  // decisions based on those limits only apply to unreachable code.
   125  type limit struct {
   126  	min, max   int64  // min <= value <= max, signed
   127  	umin, umax uint64 // umin <= value <= umax, unsigned
   128  	// For booleans, we use 0==false, 1==true for both ranges
   129  	// For pointers, we use 0,0,0,0 for nil and minInt64,maxInt64,1,maxUint64 for nonnil
   130  }
   131  
   132  func (l limit) String() string {
   133  	return fmt.Sprintf("sm,SM,um,UM=%d,%d,%d,%d", l.min, l.max, l.umin, l.umax)
   134  }
   135  
   136  func (l limit) intersect(l2 limit) limit {
   137  	l.min = max(l.min, l2.min)
   138  	l.umin = max(l.umin, l2.umin)
   139  	l.max = min(l.max, l2.max)
   140  	l.umax = min(l.umax, l2.umax)
   141  	return l
   142  }
   143  
   144  func (l limit) signedMin(m int64) limit {
   145  	l.min = max(l.min, m)
   146  	return l
   147  }
   148  
   149  func (l limit) signedMinMax(minimum, maximum int64) limit {
   150  	l.min = max(l.min, minimum)
   151  	l.max = min(l.max, maximum)
   152  	return l
   153  }
   154  
   155  func (l limit) unsignedMin(m uint64) limit {
   156  	l.umin = max(l.umin, m)
   157  	return l
   158  }
   159  func (l limit) unsignedMax(m uint64) limit {
   160  	l.umax = min(l.umax, m)
   161  	return l
   162  }
   163  func (l limit) unsignedMinMax(minimum, maximum uint64) limit {
   164  	l.umin = max(l.umin, minimum)
   165  	l.umax = min(l.umax, maximum)
   166  	return l
   167  }
   168  
   169  func (l limit) nonzero() bool {
   170  	return l.min > 0 || l.umin > 0 || l.max < 0
   171  }
   172  func (l limit) nonnegative() bool {
   173  	return l.min >= 0
   174  }
   175  func (l limit) unsat() bool {
   176  	return l.min > l.max || l.umin > l.umax
   177  }
   178  
   179  // If x and y can add without overflow or underflow
   180  // (using b bits), safeAdd returns x+y, true.
   181  // Otherwise, returns 0, false.
   182  func safeAdd(x, y int64, b uint) (int64, bool) {
   183  	s := x + y
   184  	if x >= 0 && y >= 0 && s < 0 {
   185  		return 0, false // 64-bit overflow
   186  	}
   187  	if x < 0 && y < 0 && s >= 0 {
   188  		return 0, false // 64-bit underflow
   189  	}
   190  	if !fitsInBits(s, b) {
   191  		return 0, false
   192  	}
   193  	return s, true
   194  }
   195  
   196  // same as safeAdd for unsigned arithmetic.
   197  func safeAddU(x, y uint64, b uint) (uint64, bool) {
   198  	s := x + y
   199  	if s < x || s < y {
   200  		return 0, false // 64-bit overflow
   201  	}
   202  	if !fitsInBitsU(s, b) {
   203  		return 0, false
   204  	}
   205  	return s, true
   206  }
   207  
   208  // same as safeAdd but for subtraction.
   209  func safeSub(x, y int64, b uint) (int64, bool) {
   210  	if y == math.MinInt64 {
   211  		if x == math.MaxInt64 {
   212  			return 0, false // 64-bit overflow
   213  		}
   214  		x++
   215  		y++
   216  	}
   217  	return safeAdd(x, -y, b)
   218  }
   219  
   220  // same as safeAddU but for subtraction.
   221  func safeSubU(x, y uint64, b uint) (uint64, bool) {
   222  	if x < y {
   223  		return 0, false // 64-bit underflow
   224  	}
   225  	s := x - y
   226  	if !fitsInBitsU(s, b) {
   227  		return 0, false
   228  	}
   229  	return s, true
   230  }
   231  
   232  // fitsInBits reports whether x fits in b bits (signed).
   233  func fitsInBits(x int64, b uint) bool {
   234  	if b == 64 {
   235  		return true
   236  	}
   237  	m := int64(-1) << (b - 1)
   238  	M := -m - 1
   239  	return x >= m && x <= M
   240  }
   241  
   242  // fitsInBitsU reports whether x fits in b bits (unsigned).
   243  func fitsInBitsU(x uint64, b uint) bool {
   244  	return x>>b == 0
   245  }
   246  
   247  // add returns the limit obtained by adding a value with limit l
   248  // to a value with limit l2. The result must fit in b bits.
   249  func (l limit) add(l2 limit, b uint) limit {
   250  	r := noLimit
   251  	min, minOk := safeAdd(l.min, l2.min, b)
   252  	max, maxOk := safeAdd(l.max, l2.max, b)
   253  	if minOk && maxOk {
   254  		r.min = min
   255  		r.max = max
   256  	}
   257  	umin, uminOk := safeAddU(l.umin, l2.umin, b)
   258  	umax, umaxOk := safeAddU(l.umax, l2.umax, b)
   259  	if uminOk && umaxOk {
   260  		r.umin = umin
   261  		r.umax = umax
   262  	}
   263  	return r
   264  }
   265  
   266  // same as add but for subtraction.
   267  func (l limit) sub(l2 limit, b uint) limit {
   268  	r := noLimit
   269  	min, minOk := safeSub(l.min, l2.max, b)
   270  	max, maxOk := safeSub(l.max, l2.min, b)
   271  	if minOk && maxOk {
   272  		r.min = min
   273  		r.max = max
   274  	}
   275  	umin, uminOk := safeSubU(l.umin, l2.umax, b)
   276  	umax, umaxOk := safeSubU(l.umax, l2.umin, b)
   277  	if uminOk && umaxOk {
   278  		r.umin = umin
   279  		r.umax = umax
   280  	}
   281  	return r
   282  }
   283  
   284  // same as add but for multiplication.
   285  func (l limit) mul(l2 limit, b uint) limit {
   286  	r := noLimit
   287  	umaxhi, umaxlo := bits.Mul64(l.umax, l2.umax)
   288  	if umaxhi == 0 && fitsInBitsU(umaxlo, b) {
   289  		r.umax = umaxlo
   290  		r.umin = l.umin * l2.umin
   291  		// Note: if the code containing this multiply is
   292  		// unreachable, then we may have umin>umax, and this
   293  		// multiply may overflow.  But that's ok for
   294  		// unreachable code. If this code is reachable, we
   295  		// know umin<=umax, so this multiply will not overflow
   296  		// because the max multiply didn't.
   297  	}
   298  	// Signed is harder, so don't bother. The only useful
   299  	// case is when we know both multiplicands are nonnegative,
   300  	// but that case is handled above because we would have then
   301  	// previously propagated signed info to the unsigned domain,
   302  	// and will propagate it back after the multiply.
   303  	return r
   304  }
   305  
   306  // Similar to add, but compute 1 << l if it fits without overflow in b bits.
   307  func (l limit) exp2(b uint) limit {
   308  	r := noLimit
   309  	if l.umax < uint64(b) {
   310  		r.umin = 1 << l.umin
   311  		r.umax = 1 << l.umax
   312  		// Same as above in mul, signed<->unsigned propagation
   313  		// will handle the signed case for us.
   314  	}
   315  	return r
   316  }
   317  
   318  // Similar to add, but computes the complement of the limit for bitsize b.
   319  func (l limit) com(b uint) limit {
   320  	switch b {
   321  	case 64:
   322  		return limit{
   323  			min:  ^l.max,
   324  			max:  ^l.min,
   325  			umin: ^l.umax,
   326  			umax: ^l.umin,
   327  		}
   328  	case 32:
   329  		return limit{
   330  			min:  int64(^int32(l.max)),
   331  			max:  int64(^int32(l.min)),
   332  			umin: uint64(^uint32(l.umax)),
   333  			umax: uint64(^uint32(l.umin)),
   334  		}
   335  	case 16:
   336  		return limit{
   337  			min:  int64(^int16(l.max)),
   338  			max:  int64(^int16(l.min)),
   339  			umin: uint64(^uint16(l.umax)),
   340  			umax: uint64(^uint16(l.umin)),
   341  		}
   342  	case 8:
   343  		return limit{
   344  			min:  int64(^int8(l.max)),
   345  			max:  int64(^int8(l.min)),
   346  			umin: uint64(^uint8(l.umax)),
   347  			umax: uint64(^uint8(l.umin)),
   348  		}
   349  	default:
   350  		panic("unreachable")
   351  	}
   352  }
   353  
   354  var noLimit = limit{math.MinInt64, math.MaxInt64, 0, math.MaxUint64}
   355  
   356  // a limitFact is a limit known for a particular value.
   357  type limitFact struct {
   358  	vid   ID
   359  	limit limit
   360  }
   361  
   362  // An ordering encodes facts like v < w.
   363  type ordering struct {
   364  	next *ordering // linked list of all known orderings for v.
   365  	// Note: v is implicit here, determined by which linked list it is in.
   366  	w *Value
   367  	d domain
   368  	r relation // one of ==,!=,<,<=,>,>=
   369  	// if d is boolean or pointer, r can only be ==, !=
   370  }
   371  
   372  // factsTable keeps track of relations between pairs of values.
   373  //
   374  // The fact table logic is sound, but incomplete. Outside of a few
   375  // special cases, it performs no deduction or arithmetic. While there
   376  // are known decision procedures for this, the ad hoc approach taken
   377  // by the facts table is effective for real code while remaining very
   378  // efficient.
   379  type factsTable struct {
   380  	// unsat is true if facts contains a contradiction.
   381  	//
   382  	// Note that the factsTable logic is incomplete, so if unsat
   383  	// is false, the assertions in factsTable could be satisfiable
   384  	// *or* unsatisfiable.
   385  	unsat      bool // true if facts contains a contradiction
   386  	unsatDepth int  // number of unsat checkpoints
   387  
   388  	// order* is a couple of partial order sets that record information
   389  	// about relations between SSA values in the signed and unsigned
   390  	// domain.
   391  	orderS *poset
   392  	orderU *poset
   393  
   394  	// orderings contains a list of known orderings between values.
   395  	// These lists are indexed by v.ID.
   396  	// We do not record transitive orderings. Only explicitly learned
   397  	// orderings are recorded. Transitive orderings can be obtained
   398  	// by walking along the individual orderings.
   399  	orderings map[ID]*ordering
   400  	// stack of IDs which have had an entry added in orderings.
   401  	// In addition, ID==0 are checkpoint markers.
   402  	orderingsStack []ID
   403  	orderingCache  *ordering // unused ordering records
   404  
   405  	// known lower and upper constant bounds on individual values.
   406  	limits       []limit     // indexed by value ID
   407  	limitStack   []limitFact // previous entries
   408  	recurseCheck []bool      // recursion detector for limit propagation
   409  }
   410  
   411  // checkpointBound is an invalid value used for checkpointing
   412  // and restoring factsTable.
   413  var checkpointBound = limitFact{}
   414  
   415  func newFactsTable(f *Func) *factsTable {
   416  	ft := &factsTable{}
   417  	ft.orderS = f.newPoset()
   418  	ft.orderU = f.newPoset()
   419  	ft.orderS.SetUnsigned(false)
   420  	ft.orderU.SetUnsigned(true)
   421  	ft.orderings = make(map[ID]*ordering)
   422  	ft.limits = f.Cache.allocLimitSlice(f.NumValues())
   423  	for _, b := range f.Blocks {
   424  		for _, v := range b.Values {
   425  			ft.limits[v.ID] = initLimit(v)
   426  		}
   427  	}
   428  	ft.limitStack = make([]limitFact, 4)
   429  	ft.recurseCheck = f.Cache.allocBoolSlice(f.NumValues())
   430  	return ft
   431  }
   432  
   433  // initLimitForNewValue initializes the limits for newly created values,
   434  // possibly needing to expand the limits slice. Currently used by
   435  // simplifyBlock when certain provably constant results are folded.
   436  func (ft *factsTable) initLimitForNewValue(v *Value) {
   437  	if int(v.ID) >= len(ft.limits) {
   438  		f := v.Block.Func
   439  		n := f.NumValues()
   440  		if cap(ft.limits) >= n {
   441  			ft.limits = ft.limits[:n]
   442  		} else {
   443  			old := ft.limits
   444  			ft.limits = f.Cache.allocLimitSlice(n)
   445  			copy(ft.limits, old)
   446  			f.Cache.freeLimitSlice(old)
   447  		}
   448  	}
   449  	ft.limits[v.ID] = initLimit(v)
   450  }
   451  
   452  // signedMin records the fact that we know v is at least
   453  // min in the signed domain.
   454  func (ft *factsTable) signedMin(v *Value, min int64) bool {
   455  	return ft.newLimit(v, limit{min: min, max: math.MaxInt64, umin: 0, umax: math.MaxUint64})
   456  }
   457  
   458  // signedMax records the fact that we know v is at most
   459  // max in the signed domain.
   460  func (ft *factsTable) signedMax(v *Value, max int64) bool {
   461  	return ft.newLimit(v, limit{min: math.MinInt64, max: max, umin: 0, umax: math.MaxUint64})
   462  }
   463  func (ft *factsTable) signedMinMax(v *Value, min, max int64) bool {
   464  	return ft.newLimit(v, limit{min: min, max: max, umin: 0, umax: math.MaxUint64})
   465  }
   466  
   467  // setNonNegative records the fact that v is known to be non-negative.
   468  func (ft *factsTable) setNonNegative(v *Value) bool {
   469  	return ft.signedMin(v, 0)
   470  }
   471  
   472  // unsignedMin records the fact that we know v is at least
   473  // min in the unsigned domain.
   474  func (ft *factsTable) unsignedMin(v *Value, min uint64) bool {
   475  	return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: math.MaxUint64})
   476  }
   477  
   478  // unsignedMax records the fact that we know v is at most
   479  // max in the unsigned domain.
   480  func (ft *factsTable) unsignedMax(v *Value, max uint64) bool {
   481  	return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: 0, umax: max})
   482  }
   483  func (ft *factsTable) unsignedMinMax(v *Value, min, max uint64) bool {
   484  	return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: max})
   485  }
   486  
   487  func (ft *factsTable) booleanFalse(v *Value) bool {
   488  	return ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
   489  }
   490  func (ft *factsTable) booleanTrue(v *Value) bool {
   491  	return ft.newLimit(v, limit{min: 1, max: 1, umin: 1, umax: 1})
   492  }
   493  func (ft *factsTable) pointerNil(v *Value) bool {
   494  	return ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
   495  }
   496  func (ft *factsTable) pointerNonNil(v *Value) bool {
   497  	l := noLimit
   498  	l.umin = 1
   499  	return ft.newLimit(v, l)
   500  }
   501  
   502  // newLimit adds new limiting information for v.
   503  // Returns true if the new limit added any new information.
   504  func (ft *factsTable) newLimit(v *Value, newLim limit) bool {
   505  	oldLim := ft.limits[v.ID]
   506  
   507  	// Merge old and new information.
   508  	lim := oldLim.intersect(newLim)
   509  
   510  	// signed <-> unsigned propagation
   511  	if lim.min >= 0 {
   512  		lim = lim.unsignedMinMax(uint64(lim.min), uint64(lim.max))
   513  	}
   514  	if fitsInBitsU(lim.umax, uint(8*v.Type.Size()-1)) {
   515  		lim = lim.signedMinMax(int64(lim.umin), int64(lim.umax))
   516  	}
   517  
   518  	if lim == oldLim {
   519  		return false // nothing new to record
   520  	}
   521  
   522  	if lim.unsat() {
   523  		r := !ft.unsat
   524  		ft.unsat = true
   525  		return r
   526  	}
   527  
   528  	// Check for recursion. This normally happens because in unsatisfiable
   529  	// cases we have a < b < a, and every update to a's limits returns
   530  	// here again with the limit increased by 2.
   531  	// Normally this is caught early by the orderS/orderU posets, but in
   532  	// cases where the comparisons jump between signed and unsigned domains,
   533  	// the posets will not notice.
   534  	if ft.recurseCheck[v.ID] {
   535  		// This should only happen for unsatisfiable cases. TODO: check
   536  		return false
   537  	}
   538  	ft.recurseCheck[v.ID] = true
   539  	defer func() {
   540  		ft.recurseCheck[v.ID] = false
   541  	}()
   542  
   543  	// Record undo information.
   544  	ft.limitStack = append(ft.limitStack, limitFact{v.ID, oldLim})
   545  	// Record new information.
   546  	ft.limits[v.ID] = lim
   547  	if v.Block.Func.pass.debug > 2 {
   548  		// TODO: pos is probably wrong. This is the position where v is defined,
   549  		// not the position where we learned the fact about it (which was
   550  		// probably some subsequent compare+branch).
   551  		v.Block.Func.Warnl(v.Pos, "new limit %s %s unsat=%v", v, lim.String(), ft.unsat)
   552  	}
   553  
   554  	// Propagate this new constant range to other values
   555  	// that we know are ordered with respect to this one.
   556  	// Note overflow/underflow in the arithmetic below is ok,
   557  	// it will just lead to imprecision (undetected unsatisfiability).
   558  	for o := ft.orderings[v.ID]; o != nil; o = o.next {
   559  		switch o.d {
   560  		case signed:
   561  			switch o.r {
   562  			case eq: // v == w
   563  				ft.signedMinMax(o.w, lim.min, lim.max)
   564  			case lt | eq: // v <= w
   565  				ft.signedMin(o.w, lim.min)
   566  			case lt: // v < w
   567  				ft.signedMin(o.w, lim.min+1)
   568  			case gt | eq: // v >= w
   569  				ft.signedMax(o.w, lim.max)
   570  			case gt: // v > w
   571  				ft.signedMax(o.w, lim.max-1)
   572  			case lt | gt: // v != w
   573  				if lim.min == lim.max { // v is a constant
   574  					c := lim.min
   575  					if ft.limits[o.w.ID].min == c {
   576  						ft.signedMin(o.w, c+1)
   577  					}
   578  					if ft.limits[o.w.ID].max == c {
   579  						ft.signedMax(o.w, c-1)
   580  					}
   581  				}
   582  			}
   583  		case unsigned:
   584  			switch o.r {
   585  			case eq: // v == w
   586  				ft.unsignedMinMax(o.w, lim.umin, lim.umax)
   587  			case lt | eq: // v <= w
   588  				ft.unsignedMin(o.w, lim.umin)
   589  			case lt: // v < w
   590  				ft.unsignedMin(o.w, lim.umin+1)
   591  			case gt | eq: // v >= w
   592  				ft.unsignedMax(o.w, lim.umax)
   593  			case gt: // v > w
   594  				ft.unsignedMax(o.w, lim.umax-1)
   595  			case lt | gt: // v != w
   596  				if lim.umin == lim.umax { // v is a constant
   597  					c := lim.umin
   598  					if ft.limits[o.w.ID].umin == c {
   599  						ft.unsignedMin(o.w, c+1)
   600  					}
   601  					if ft.limits[o.w.ID].umax == c {
   602  						ft.unsignedMax(o.w, c-1)
   603  					}
   604  				}
   605  			}
   606  		case boolean:
   607  			switch o.r {
   608  			case eq:
   609  				if lim.min == 0 && lim.max == 0 { // constant false
   610  					ft.booleanFalse(o.w)
   611  				}
   612  				if lim.min == 1 && lim.max == 1 { // constant true
   613  					ft.booleanTrue(o.w)
   614  				}
   615  			case lt | gt:
   616  				if lim.min == 0 && lim.max == 0 { // constant false
   617  					ft.booleanTrue(o.w)
   618  				}
   619  				if lim.min == 1 && lim.max == 1 { // constant true
   620  					ft.booleanFalse(o.w)
   621  				}
   622  			}
   623  		case pointer:
   624  			switch o.r {
   625  			case eq:
   626  				if lim.umax == 0 { // nil
   627  					ft.pointerNil(o.w)
   628  				}
   629  				if lim.umin > 0 { // non-nil
   630  					ft.pointerNonNil(o.w)
   631  				}
   632  			case lt | gt:
   633  				if lim.umax == 0 { // nil
   634  					ft.pointerNonNil(o.w)
   635  				}
   636  				// note: not equal to non-nil doesn't tell us anything.
   637  			}
   638  		}
   639  	}
   640  
   641  	// If this is new known constant for a boolean value,
   642  	// extract relation between its args. For example, if
   643  	// We learn v is false, and v is defined as a<b, then we learn a>=b.
   644  	if v.Type.IsBoolean() {
   645  		// If we reach here, it is because we have a more restrictive
   646  		// value for v than the default. The only two such values
   647  		// are constant true or constant false.
   648  		if lim.min != lim.max {
   649  			v.Block.Func.Fatalf("boolean not constant %v", v)
   650  		}
   651  		isTrue := lim.min == 1
   652  		if dr, ok := domainRelationTable[v.Op]; ok && v.Op != OpIsInBounds && v.Op != OpIsSliceInBounds {
   653  			d := dr.d
   654  			r := dr.r
   655  			if d == signed && ft.isNonNegative(v.Args[0]) && ft.isNonNegative(v.Args[1]) {
   656  				d |= unsigned
   657  			}
   658  			if !isTrue {
   659  				r ^= lt | gt | eq
   660  			} else if d == unsigned && (r == lt || r == lt|eq) && ft.isNonNegative(v.Args[1]) {
   661  				// Since every representation of a non-negative signed number is the same
   662  				// as in the unsigned domain, we can transfer x <= y to the signed domain,
   663  				// but only for the true branch.
   664  				d |= signed
   665  			}
   666  			// TODO: v.Block is wrong?
   667  			addRestrictions(v.Block, ft, d, v.Args[0], v.Args[1], r)
   668  		}
   669  		switch v.Op {
   670  		case OpIsNonNil:
   671  			if isTrue {
   672  				ft.pointerNonNil(v.Args[0])
   673  			} else {
   674  				ft.pointerNil(v.Args[0])
   675  			}
   676  		case OpIsInBounds, OpIsSliceInBounds:
   677  			// 0 <= a0 < a1 (or 0 <= a0 <= a1)
   678  			r := lt
   679  			if v.Op == OpIsSliceInBounds {
   680  				r |= eq
   681  			}
   682  			if isTrue {
   683  				// On the positive branch, we learn:
   684  				//   signed: 0 <= a0 < a1 (or 0 <= a0 <= a1)
   685  				//   unsigned:    a0 < a1 (or a0 <= a1)
   686  				ft.setNonNegative(v.Args[0])
   687  				ft.update(v.Block, v.Args[0], v.Args[1], signed, r)
   688  				ft.update(v.Block, v.Args[0], v.Args[1], unsigned, r)
   689  			} else {
   690  				// On the negative branch, we learn (0 > a0 ||
   691  				// a0 >= a1). In the unsigned domain, this is
   692  				// simply a0 >= a1 (which is the reverse of the
   693  				// positive branch, so nothing surprising).
   694  				// But in the signed domain, we can't express the ||
   695  				// condition, so check if a0 is non-negative instead,
   696  				// to be able to learn something.
   697  				r ^= lt | gt | eq // >= (index) or > (slice)
   698  				if ft.isNonNegative(v.Args[0]) {
   699  					ft.update(v.Block, v.Args[0], v.Args[1], signed, r)
   700  				}
   701  				ft.update(v.Block, v.Args[0], v.Args[1], unsigned, r)
   702  				// TODO: v.Block is wrong here
   703  			}
   704  		}
   705  	}
   706  
   707  	return true
   708  }
   709  
   710  func (ft *factsTable) addOrdering(v, w *Value, d domain, r relation) {
   711  	o := ft.orderingCache
   712  	if o == nil {
   713  		o = &ordering{}
   714  	} else {
   715  		ft.orderingCache = o.next
   716  	}
   717  	o.w = w
   718  	o.d = d
   719  	o.r = r
   720  	o.next = ft.orderings[v.ID]
   721  	ft.orderings[v.ID] = o
   722  	ft.orderingsStack = append(ft.orderingsStack, v.ID)
   723  }
   724  
   725  // update updates the set of relations between v and w in domain d
   726  // restricting it to r.
   727  func (ft *factsTable) update(parent *Block, v, w *Value, d domain, r relation) {
   728  	if parent.Func.pass.debug > 2 {
   729  		parent.Func.Warnl(parent.Pos, "parent=%s, update %s %s %s %s", parent, d, v, w, r)
   730  	}
   731  	// No need to do anything else if we already found unsat.
   732  	if ft.unsat {
   733  		return
   734  	}
   735  
   736  	// Self-fact. It's wasteful to register it into the facts
   737  	// table, so just note whether it's satisfiable
   738  	if v == w {
   739  		if r&eq == 0 {
   740  			ft.unsat = true
   741  		}
   742  		return
   743  	}
   744  
   745  	if d == signed || d == unsigned {
   746  		var ok bool
   747  		order := ft.orderS
   748  		if d == unsigned {
   749  			order = ft.orderU
   750  		}
   751  		switch r {
   752  		case lt:
   753  			ok = order.SetOrder(v, w)
   754  		case gt:
   755  			ok = order.SetOrder(w, v)
   756  		case lt | eq:
   757  			ok = order.SetOrderOrEqual(v, w)
   758  		case gt | eq:
   759  			ok = order.SetOrderOrEqual(w, v)
   760  		case eq:
   761  			ok = order.SetEqual(v, w)
   762  		case lt | gt:
   763  			ok = order.SetNonEqual(v, w)
   764  		default:
   765  			panic("unknown relation")
   766  		}
   767  		ft.addOrdering(v, w, d, r)
   768  		ft.addOrdering(w, v, d, reverseBits[r])
   769  
   770  		if !ok {
   771  			if parent.Func.pass.debug > 2 {
   772  				parent.Func.Warnl(parent.Pos, "unsat %s %s %s", v, w, r)
   773  			}
   774  			ft.unsat = true
   775  			return
   776  		}
   777  	}
   778  	if d == boolean || d == pointer {
   779  		for o := ft.orderings[v.ID]; o != nil; o = o.next {
   780  			if o.d == d && o.w == w {
   781  				// We already know a relationship between v and w.
   782  				// Either it is a duplicate, or it is a contradiction,
   783  				// as we only allow eq and lt|gt for these domains,
   784  				if o.r != r {
   785  					ft.unsat = true
   786  				}
   787  				return
   788  			}
   789  		}
   790  		// TODO: this does not do transitive equality.
   791  		// We could use a poset like above, but somewhat degenerate (==,!= only).
   792  		ft.addOrdering(v, w, d, r)
   793  		ft.addOrdering(w, v, d, r) // note: reverseBits unnecessary for eq and lt|gt.
   794  	}
   795  
   796  	// Extract new constant limits based on the comparison.
   797  	vLimit := ft.limits[v.ID]
   798  	wLimit := ft.limits[w.ID]
   799  	// Note: all the +1/-1 below could overflow/underflow. Either will
   800  	// still generate correct results, it will just lead to imprecision.
   801  	// In fact if there is overflow/underflow, the corresponding
   802  	// code is unreachable because the known range is outside the range
   803  	// of the value's type.
   804  	switch d {
   805  	case signed:
   806  		switch r {
   807  		case eq: // v == w
   808  			ft.signedMinMax(v, wLimit.min, wLimit.max)
   809  			ft.signedMinMax(w, vLimit.min, vLimit.max)
   810  		case lt: // v < w
   811  			ft.signedMax(v, wLimit.max-1)
   812  			ft.signedMin(w, vLimit.min+1)
   813  		case lt | eq: // v <= w
   814  			ft.signedMax(v, wLimit.max)
   815  			ft.signedMin(w, vLimit.min)
   816  		case gt: // v > w
   817  			ft.signedMin(v, wLimit.min+1)
   818  			ft.signedMax(w, vLimit.max-1)
   819  		case gt | eq: // v >= w
   820  			ft.signedMin(v, wLimit.min)
   821  			ft.signedMax(w, vLimit.max)
   822  		case lt | gt: // v != w
   823  			if vLimit.min == vLimit.max { // v is a constant
   824  				c := vLimit.min
   825  				if wLimit.min == c {
   826  					ft.signedMin(w, c+1)
   827  				}
   828  				if wLimit.max == c {
   829  					ft.signedMax(w, c-1)
   830  				}
   831  			}
   832  			if wLimit.min == wLimit.max { // w is a constant
   833  				c := wLimit.min
   834  				if vLimit.min == c {
   835  					ft.signedMin(v, c+1)
   836  				}
   837  				if vLimit.max == c {
   838  					ft.signedMax(v, c-1)
   839  				}
   840  			}
   841  		}
   842  	case unsigned:
   843  		switch r {
   844  		case eq: // v == w
   845  			ft.unsignedMinMax(v, wLimit.umin, wLimit.umax)
   846  			ft.unsignedMinMax(w, vLimit.umin, vLimit.umax)
   847  		case lt: // v < w
   848  			ft.unsignedMax(v, wLimit.umax-1)
   849  			ft.unsignedMin(w, vLimit.umin+1)
   850  		case lt | eq: // v <= w
   851  			ft.unsignedMax(v, wLimit.umax)
   852  			ft.unsignedMin(w, vLimit.umin)
   853  		case gt: // v > w
   854  			ft.unsignedMin(v, wLimit.umin+1)
   855  			ft.unsignedMax(w, vLimit.umax-1)
   856  		case gt | eq: // v >= w
   857  			ft.unsignedMin(v, wLimit.umin)
   858  			ft.unsignedMax(w, vLimit.umax)
   859  		case lt | gt: // v != w
   860  			if vLimit.umin == vLimit.umax { // v is a constant
   861  				c := vLimit.umin
   862  				if wLimit.umin == c {
   863  					ft.unsignedMin(w, c+1)
   864  				}
   865  				if wLimit.umax == c {
   866  					ft.unsignedMax(w, c-1)
   867  				}
   868  			}
   869  			if wLimit.umin == wLimit.umax { // w is a constant
   870  				c := wLimit.umin
   871  				if vLimit.umin == c {
   872  					ft.unsignedMin(v, c+1)
   873  				}
   874  				if vLimit.umax == c {
   875  					ft.unsignedMax(v, c-1)
   876  				}
   877  			}
   878  		}
   879  	case boolean:
   880  		switch r {
   881  		case eq: // v == w
   882  			if vLimit.min == 1 { // v is true
   883  				ft.booleanTrue(w)
   884  			}
   885  			if vLimit.max == 0 { // v is false
   886  				ft.booleanFalse(w)
   887  			}
   888  			if wLimit.min == 1 { // w is true
   889  				ft.booleanTrue(v)
   890  			}
   891  			if wLimit.max == 0 { // w is false
   892  				ft.booleanFalse(v)
   893  			}
   894  		case lt | gt: // v != w
   895  			if vLimit.min == 1 { // v is true
   896  				ft.booleanFalse(w)
   897  			}
   898  			if vLimit.max == 0 { // v is false
   899  				ft.booleanTrue(w)
   900  			}
   901  			if wLimit.min == 1 { // w is true
   902  				ft.booleanFalse(v)
   903  			}
   904  			if wLimit.max == 0 { // w is false
   905  				ft.booleanTrue(v)
   906  			}
   907  		}
   908  	case pointer:
   909  		switch r {
   910  		case eq: // v == w
   911  			if vLimit.umax == 0 { // v is nil
   912  				ft.pointerNil(w)
   913  			}
   914  			if vLimit.umin > 0 { // v is non-nil
   915  				ft.pointerNonNil(w)
   916  			}
   917  			if wLimit.umax == 0 { // w is nil
   918  				ft.pointerNil(v)
   919  			}
   920  			if wLimit.umin > 0 { // w is non-nil
   921  				ft.pointerNonNil(v)
   922  			}
   923  		case lt | gt: // v != w
   924  			if vLimit.umax == 0 { // v is nil
   925  				ft.pointerNonNil(w)
   926  			}
   927  			if wLimit.umax == 0 { // w is nil
   928  				ft.pointerNonNil(v)
   929  			}
   930  			// Note: the other direction doesn't work.
   931  			// Being not equal to a non-nil pointer doesn't
   932  			// make you (necessarily) a nil pointer.
   933  		}
   934  	}
   935  
   936  	// Derived facts below here are only about numbers.
   937  	if d != signed && d != unsigned {
   938  		return
   939  	}
   940  
   941  	// Process fence-post implications.
   942  	//
   943  	// First, make the condition > or >=.
   944  	if r == lt || r == lt|eq {
   945  		v, w = w, v
   946  		r = reverseBits[r]
   947  	}
   948  	switch r {
   949  	case gt:
   950  		if x, delta := isConstDelta(v); x != nil && delta == 1 {
   951  			// x+1 > w  ⇒  x >= w
   952  			//
   953  			// This is useful for eliminating the
   954  			// growslice branch of append.
   955  			ft.update(parent, x, w, d, gt|eq)
   956  		} else if x, delta := isConstDelta(w); x != nil && delta == -1 {
   957  			// v > x-1  ⇒  v >= x
   958  			ft.update(parent, v, x, d, gt|eq)
   959  		}
   960  	case gt | eq:
   961  		if x, delta := isConstDelta(v); x != nil && delta == -1 {
   962  			// x-1 >= w && x > min  ⇒  x > w
   963  			//
   964  			// Useful for i > 0; s[i-1].
   965  			lim := ft.limits[x.ID]
   966  			if (d == signed && lim.min > opMin[v.Op]) || (d == unsigned && lim.umin > 0) {
   967  				ft.update(parent, x, w, d, gt)
   968  			}
   969  		} else if x, delta := isConstDelta(w); x != nil && delta == 1 {
   970  			// v >= x+1 && x < max  ⇒  v > x
   971  			lim := ft.limits[x.ID]
   972  			if (d == signed && lim.max < opMax[w.Op]) || (d == unsigned && lim.umax < opUMax[w.Op]) {
   973  				ft.update(parent, v, x, d, gt)
   974  			}
   975  		}
   976  	}
   977  
   978  	// Process: x+delta > w (with delta constant)
   979  	// Only signed domain for now (useful for accesses to slices in loops).
   980  	if r == gt || r == gt|eq {
   981  		if x, delta := isConstDelta(v); x != nil && d == signed {
   982  			if parent.Func.pass.debug > 1 {
   983  				parent.Func.Warnl(parent.Pos, "x+d %s w; x:%v %v delta:%v w:%v d:%v", r, x, parent.String(), delta, w.AuxInt, d)
   984  			}
   985  			underflow := true
   986  			if delta < 0 {
   987  				l := ft.limits[x.ID]
   988  				if (x.Type.Size() == 8 && l.min >= math.MinInt64-delta) ||
   989  					(x.Type.Size() == 4 && l.min >= math.MinInt32-delta) {
   990  					underflow = false
   991  				}
   992  			}
   993  			if delta < 0 && !underflow {
   994  				// If delta < 0 and x+delta cannot underflow then x > x+delta (that is, x > v)
   995  				ft.update(parent, x, v, signed, gt)
   996  			}
   997  			if !w.isGenericIntConst() {
   998  				// If we know that x+delta > w but w is not constant, we can derive:
   999  				//    if delta < 0 and x+delta cannot underflow, then x > w
  1000  				// This is useful for loops with bounds "len(slice)-K" (delta = -K)
  1001  				if delta < 0 && !underflow {
  1002  					ft.update(parent, x, w, signed, r)
  1003  				}
  1004  			} else {
  1005  				// With w,delta constants, we want to derive: x+delta > w  ⇒  x > w-delta
  1006  				//
  1007  				// We compute (using integers of the correct size):
  1008  				//    min = w - delta
  1009  				//    max = MaxInt - delta
  1010  				//
  1011  				// And we prove that:
  1012  				//    if min<max: min < x AND x <= max
  1013  				//    if min>max: min < x OR  x <= max
  1014  				//
  1015  				// This is always correct, even in case of overflow.
  1016  				//
  1017  				// If the initial fact is x+delta >= w instead, the derived conditions are:
  1018  				//    if min<max: min <= x AND x <= max
  1019  				//    if min>max: min <= x OR  x <= max
  1020  				//
  1021  				// Notice the conditions for max are still <=, as they handle overflows.
  1022  				var min, max int64
  1023  				switch x.Type.Size() {
  1024  				case 8:
  1025  					min = w.AuxInt - delta
  1026  					max = int64(^uint64(0)>>1) - delta
  1027  				case 4:
  1028  					min = int64(int32(w.AuxInt) - int32(delta))
  1029  					max = int64(int32(^uint32(0)>>1) - int32(delta))
  1030  				case 2:
  1031  					min = int64(int16(w.AuxInt) - int16(delta))
  1032  					max = int64(int16(^uint16(0)>>1) - int16(delta))
  1033  				case 1:
  1034  					min = int64(int8(w.AuxInt) - int8(delta))
  1035  					max = int64(int8(^uint8(0)>>1) - int8(delta))
  1036  				default:
  1037  					panic("unimplemented")
  1038  				}
  1039  
  1040  				if min < max {
  1041  					// Record that x > min and max >= x
  1042  					if r == gt {
  1043  						min++
  1044  					}
  1045  					ft.signedMinMax(x, min, max)
  1046  				} else {
  1047  					// We know that either x>min OR x<=max. factsTable cannot record OR conditions,
  1048  					// so let's see if we can already prove that one of them is false, in which case
  1049  					// the other must be true
  1050  					l := ft.limits[x.ID]
  1051  					if l.max <= min {
  1052  						if r&eq == 0 || l.max < min {
  1053  							// x>min (x>=min) is impossible, so it must be x<=max
  1054  							ft.signedMax(x, max)
  1055  						}
  1056  					} else if l.min > max {
  1057  						// x<=max is impossible, so it must be x>min
  1058  						if r == gt {
  1059  							min++
  1060  						}
  1061  						ft.signedMin(x, min)
  1062  					}
  1063  				}
  1064  			}
  1065  		}
  1066  	}
  1067  
  1068  	// Look through value-preserving extensions.
  1069  	// If the domain is appropriate for the pre-extension Type,
  1070  	// repeat the update with the pre-extension Value.
  1071  	if isCleanExt(v) {
  1072  		switch {
  1073  		case d == signed && v.Args[0].Type.IsSigned():
  1074  			fallthrough
  1075  		case d == unsigned && !v.Args[0].Type.IsSigned():
  1076  			ft.update(parent, v.Args[0], w, d, r)
  1077  		}
  1078  	}
  1079  	if isCleanExt(w) {
  1080  		switch {
  1081  		case d == signed && w.Args[0].Type.IsSigned():
  1082  			fallthrough
  1083  		case d == unsigned && !w.Args[0].Type.IsSigned():
  1084  			ft.update(parent, v, w.Args[0], d, r)
  1085  		}
  1086  	}
  1087  }
  1088  
  1089  var opMin = map[Op]int64{
  1090  	OpAdd64: math.MinInt64, OpSub64: math.MinInt64,
  1091  	OpAdd32: math.MinInt32, OpSub32: math.MinInt32,
  1092  }
  1093  
  1094  var opMax = map[Op]int64{
  1095  	OpAdd64: math.MaxInt64, OpSub64: math.MaxInt64,
  1096  	OpAdd32: math.MaxInt32, OpSub32: math.MaxInt32,
  1097  }
  1098  
  1099  var opUMax = map[Op]uint64{
  1100  	OpAdd64: math.MaxUint64, OpSub64: math.MaxUint64,
  1101  	OpAdd32: math.MaxUint32, OpSub32: math.MaxUint32,
  1102  }
  1103  
  1104  // isNonNegative reports whether v is known to be non-negative.
  1105  func (ft *factsTable) isNonNegative(v *Value) bool {
  1106  	return ft.limits[v.ID].min >= 0
  1107  }
  1108  
  1109  // checkpoint saves the current state of known relations.
  1110  // Called when descending on a branch.
  1111  func (ft *factsTable) checkpoint() {
  1112  	if ft.unsat {
  1113  		ft.unsatDepth++
  1114  	}
  1115  	ft.limitStack = append(ft.limitStack, checkpointBound)
  1116  	ft.orderS.Checkpoint()
  1117  	ft.orderU.Checkpoint()
  1118  	ft.orderingsStack = append(ft.orderingsStack, 0)
  1119  }
  1120  
  1121  // restore restores known relation to the state just
  1122  // before the previous checkpoint.
  1123  // Called when backing up on a branch.
  1124  func (ft *factsTable) restore() {
  1125  	if ft.unsatDepth > 0 {
  1126  		ft.unsatDepth--
  1127  	} else {
  1128  		ft.unsat = false
  1129  	}
  1130  	for {
  1131  		old := ft.limitStack[len(ft.limitStack)-1]
  1132  		ft.limitStack = ft.limitStack[:len(ft.limitStack)-1]
  1133  		if old.vid == 0 { // checkpointBound
  1134  			break
  1135  		}
  1136  		ft.limits[old.vid] = old.limit
  1137  	}
  1138  	ft.orderS.Undo()
  1139  	ft.orderU.Undo()
  1140  	for {
  1141  		id := ft.orderingsStack[len(ft.orderingsStack)-1]
  1142  		ft.orderingsStack = ft.orderingsStack[:len(ft.orderingsStack)-1]
  1143  		if id == 0 { // checkpoint marker
  1144  			break
  1145  		}
  1146  		o := ft.orderings[id]
  1147  		ft.orderings[id] = o.next
  1148  		o.next = ft.orderingCache
  1149  		ft.orderingCache = o
  1150  	}
  1151  }
  1152  
  1153  var (
  1154  	reverseBits = [...]relation{0, 4, 2, 6, 1, 5, 3, 7}
  1155  
  1156  	// maps what we learn when the positive branch is taken.
  1157  	// For example:
  1158  	//      OpLess8:   {signed, lt},
  1159  	//	v1 = (OpLess8 v2 v3).
  1160  	// If we learn that v1 is true, then we can deduce that v2<v3
  1161  	// in the signed domain.
  1162  	domainRelationTable = map[Op]struct {
  1163  		d domain
  1164  		r relation
  1165  	}{
  1166  		OpEq8:   {signed | unsigned, eq},
  1167  		OpEq16:  {signed | unsigned, eq},
  1168  		OpEq32:  {signed | unsigned, eq},
  1169  		OpEq64:  {signed | unsigned, eq},
  1170  		OpEqPtr: {pointer, eq},
  1171  		OpEqB:   {boolean, eq},
  1172  
  1173  		OpNeq8:   {signed | unsigned, lt | gt},
  1174  		OpNeq16:  {signed | unsigned, lt | gt},
  1175  		OpNeq32:  {signed | unsigned, lt | gt},
  1176  		OpNeq64:  {signed | unsigned, lt | gt},
  1177  		OpNeqPtr: {pointer, lt | gt},
  1178  		OpNeqB:   {boolean, lt | gt},
  1179  
  1180  		OpLess8:   {signed, lt},
  1181  		OpLess8U:  {unsigned, lt},
  1182  		OpLess16:  {signed, lt},
  1183  		OpLess16U: {unsigned, lt},
  1184  		OpLess32:  {signed, lt},
  1185  		OpLess32U: {unsigned, lt},
  1186  		OpLess64:  {signed, lt},
  1187  		OpLess64U: {unsigned, lt},
  1188  
  1189  		OpLeq8:   {signed, lt | eq},
  1190  		OpLeq8U:  {unsigned, lt | eq},
  1191  		OpLeq16:  {signed, lt | eq},
  1192  		OpLeq16U: {unsigned, lt | eq},
  1193  		OpLeq32:  {signed, lt | eq},
  1194  		OpLeq32U: {unsigned, lt | eq},
  1195  		OpLeq64:  {signed, lt | eq},
  1196  		OpLeq64U: {unsigned, lt | eq},
  1197  	}
  1198  )
  1199  
  1200  // cleanup returns the posets to the free list
  1201  func (ft *factsTable) cleanup(f *Func) {
  1202  	for _, po := range []*poset{ft.orderS, ft.orderU} {
  1203  		// Make sure it's empty as it should be. A non-empty poset
  1204  		// might cause errors and miscompilations if reused.
  1205  		if checkEnabled {
  1206  			if err := po.CheckEmpty(); err != nil {
  1207  				f.Fatalf("poset not empty after function %s: %v", f.Name, err)
  1208  			}
  1209  		}
  1210  		f.retPoset(po)
  1211  	}
  1212  	f.Cache.freeLimitSlice(ft.limits)
  1213  	f.Cache.freeBoolSlice(ft.recurseCheck)
  1214  }
  1215  
  1216  // prove removes redundant BlockIf branches that can be inferred
  1217  // from previous dominating comparisons.
  1218  //
  1219  // By far, the most common redundant pair are generated by bounds checking.
  1220  // For example for the code:
  1221  //
  1222  //	a[i] = 4
  1223  //	foo(a[i])
  1224  //
  1225  // The compiler will generate the following code:
  1226  //
  1227  //	if i >= len(a) {
  1228  //	    panic("not in bounds")
  1229  //	}
  1230  //	a[i] = 4
  1231  //	if i >= len(a) {
  1232  //	    panic("not in bounds")
  1233  //	}
  1234  //	foo(a[i])
  1235  //
  1236  // The second comparison i >= len(a) is clearly redundant because if the
  1237  // else branch of the first comparison is executed, we already know that i < len(a).
  1238  // The code for the second panic can be removed.
  1239  //
  1240  // prove works by finding contradictions and trimming branches whose
  1241  // conditions are unsatisfiable given the branches leading up to them.
  1242  // It tracks a "fact table" of branch conditions. For each branching
  1243  // block, it asserts the branch conditions that uniquely dominate that
  1244  // block, and then separately asserts the block's branch condition and
  1245  // its negation. If either leads to a contradiction, it can trim that
  1246  // successor.
  1247  func prove(f *Func) {
  1248  	// Find induction variables. Currently, findIndVars
  1249  	// is limited to one induction variable per block.
  1250  	var indVars map[*Block]indVar
  1251  	for _, v := range findIndVar(f) {
  1252  		ind := v.ind
  1253  		if len(ind.Args) != 2 {
  1254  			// the rewrite code assumes there is only ever two parents to loops
  1255  			panic("unexpected induction with too many parents")
  1256  		}
  1257  
  1258  		nxt := v.nxt
  1259  		if !(ind.Uses == 2 && // 2 used by comparison and next
  1260  			nxt.Uses == 1) { // 1 used by induction
  1261  			// ind or nxt is used inside the loop, add it for the facts table
  1262  			if indVars == nil {
  1263  				indVars = make(map[*Block]indVar)
  1264  			}
  1265  			indVars[v.entry] = v
  1266  			continue
  1267  		} else {
  1268  			// Since this induction variable is not used for anything but counting the iterations,
  1269  			// no point in putting it into the facts table.
  1270  		}
  1271  
  1272  		// try to rewrite to a downward counting loop checking against start if the
  1273  		// loop body does not depend on ind or nxt and end is known before the loop.
  1274  		// This reduces pressure on the register allocator because this does not need
  1275  		// to use end on each iteration anymore. We compare against the start constant instead.
  1276  		// That means this code:
  1277  		//
  1278  		//	loop:
  1279  		//		ind = (Phi (Const [x]) nxt),
  1280  		//		if ind < end
  1281  		//		then goto enter_loop
  1282  		//		else goto exit_loop
  1283  		//
  1284  		//	enter_loop:
  1285  		//		do something without using ind nor nxt
  1286  		//		nxt = inc + ind
  1287  		//		goto loop
  1288  		//
  1289  		//	exit_loop:
  1290  		//
  1291  		// is rewritten to:
  1292  		//
  1293  		//	loop:
  1294  		//		ind = (Phi end nxt)
  1295  		//		if (Const [x]) < ind
  1296  		//		then goto enter_loop
  1297  		//		else goto exit_loop
  1298  		//
  1299  		//	enter_loop:
  1300  		//		do something without using ind nor nxt
  1301  		//		nxt = ind - inc
  1302  		//		goto loop
  1303  		//
  1304  		//	exit_loop:
  1305  		//
  1306  		// this is better because it only requires to keep ind then nxt alive while looping,
  1307  		// while the original form keeps ind then nxt and end alive
  1308  		start, end := v.min, v.max
  1309  		if v.flags&indVarCountDown != 0 {
  1310  			start, end = end, start
  1311  		}
  1312  
  1313  		if !start.isGenericIntConst() {
  1314  			// if start is not a constant we would be winning nothing from inverting the loop
  1315  			continue
  1316  		}
  1317  		if end.isGenericIntConst() {
  1318  			// TODO: if both start and end are constants we should rewrite such that the comparison
  1319  			// is against zero and nxt is ++ or -- operation
  1320  			// That means:
  1321  			//	for i := 2; i < 11; i += 2 {
  1322  			// should be rewritten to:
  1323  			//	for i := 5; 0 < i; i-- {
  1324  			continue
  1325  		}
  1326  
  1327  		if end.Block == ind.Block {
  1328  			// we can't rewrite loops where the condition depends on the loop body
  1329  			// this simple check is forced to work because if this is true a Phi in ind.Block must exist
  1330  			continue
  1331  		}
  1332  
  1333  		check := ind.Block.Controls[0]
  1334  		// invert the check
  1335  		check.Args[0], check.Args[1] = check.Args[1], check.Args[0]
  1336  
  1337  		// swap start and end in the loop
  1338  		for i, v := range check.Args {
  1339  			if v != end {
  1340  				continue
  1341  			}
  1342  
  1343  			check.SetArg(i, start)
  1344  			goto replacedEnd
  1345  		}
  1346  		panic(fmt.Sprintf("unreachable, ind: %v, start: %v, end: %v", ind, start, end))
  1347  	replacedEnd:
  1348  
  1349  		for i, v := range ind.Args {
  1350  			if v != start {
  1351  				continue
  1352  			}
  1353  
  1354  			ind.SetArg(i, end)
  1355  			goto replacedStart
  1356  		}
  1357  		panic(fmt.Sprintf("unreachable, ind: %v, start: %v, end: %v", ind, start, end))
  1358  	replacedStart:
  1359  
  1360  		if nxt.Args[0] != ind {
  1361  			// unlike additions subtractions are not commutative so be sure we get it right
  1362  			nxt.Args[0], nxt.Args[1] = nxt.Args[1], nxt.Args[0]
  1363  		}
  1364  
  1365  		switch nxt.Op {
  1366  		case OpAdd8:
  1367  			nxt.Op = OpSub8
  1368  		case OpAdd16:
  1369  			nxt.Op = OpSub16
  1370  		case OpAdd32:
  1371  			nxt.Op = OpSub32
  1372  		case OpAdd64:
  1373  			nxt.Op = OpSub64
  1374  		case OpSub8:
  1375  			nxt.Op = OpAdd8
  1376  		case OpSub16:
  1377  			nxt.Op = OpAdd16
  1378  		case OpSub32:
  1379  			nxt.Op = OpAdd32
  1380  		case OpSub64:
  1381  			nxt.Op = OpAdd64
  1382  		default:
  1383  			panic("unreachable")
  1384  		}
  1385  
  1386  		if f.pass.debug > 0 {
  1387  			f.Warnl(ind.Pos, "Inverted loop iteration")
  1388  		}
  1389  	}
  1390  
  1391  	ft := newFactsTable(f)
  1392  	ft.checkpoint()
  1393  
  1394  	var lens map[ID]*Value
  1395  	var caps map[ID]*Value
  1396  	// Find length and capacity ops.
  1397  	for _, b := range f.Blocks {
  1398  		for _, v := range b.Values {
  1399  			if v.Uses == 0 {
  1400  				// We don't care about dead values.
  1401  				// (There can be some that are CSEd but not removed yet.)
  1402  				continue
  1403  			}
  1404  			switch v.Op {
  1405  			case OpSliceLen:
  1406  				if lens == nil {
  1407  					lens = map[ID]*Value{}
  1408  				}
  1409  				// Set all len Values for the same slice as equal in the poset.
  1410  				// The poset handles transitive relations, so Values related to
  1411  				// any OpSliceLen for this slice will be correctly related to others.
  1412  				//
  1413  				// Since we know that lens/caps are non-negative, their relation
  1414  				// can be added in both the signed and unsigned domain.
  1415  				if l, ok := lens[v.Args[0].ID]; ok {
  1416  					ft.update(b, v, l, signed, eq)
  1417  					ft.update(b, v, l, unsigned, eq)
  1418  				} else {
  1419  					lens[v.Args[0].ID] = v
  1420  				}
  1421  				if c, ok := caps[v.Args[0].ID]; ok {
  1422  					ft.update(b, v, c, signed, lt|eq)
  1423  					ft.update(b, v, c, unsigned, lt|eq)
  1424  				}
  1425  			case OpSliceCap:
  1426  				if caps == nil {
  1427  					caps = map[ID]*Value{}
  1428  				}
  1429  				// Same as case OpSliceLen above, but for slice cap.
  1430  				if c, ok := caps[v.Args[0].ID]; ok {
  1431  					ft.update(b, v, c, signed, eq)
  1432  					ft.update(b, v, c, unsigned, eq)
  1433  				} else {
  1434  					caps[v.Args[0].ID] = v
  1435  				}
  1436  				if l, ok := lens[v.Args[0].ID]; ok {
  1437  					ft.update(b, v, l, signed, gt|eq)
  1438  					ft.update(b, v, l, unsigned, gt|eq)
  1439  				}
  1440  			}
  1441  		}
  1442  	}
  1443  
  1444  	// current node state
  1445  	type walkState int
  1446  	const (
  1447  		descend walkState = iota
  1448  		simplify
  1449  	)
  1450  	// work maintains the DFS stack.
  1451  	type bp struct {
  1452  		block *Block    // current handled block
  1453  		state walkState // what's to do
  1454  	}
  1455  	work := make([]bp, 0, 256)
  1456  	work = append(work, bp{
  1457  		block: f.Entry,
  1458  		state: descend,
  1459  	})
  1460  
  1461  	idom := f.Idom()
  1462  	sdom := f.Sdom()
  1463  
  1464  	// DFS on the dominator tree.
  1465  	//
  1466  	// For efficiency, we consider only the dominator tree rather
  1467  	// than the entire flow graph. On the way down, we consider
  1468  	// incoming branches and accumulate conditions that uniquely
  1469  	// dominate the current block. If we discover a contradiction,
  1470  	// we can eliminate the entire block and all of its children.
  1471  	// On the way back up, we consider outgoing branches that
  1472  	// haven't already been considered. This way we consider each
  1473  	// branch condition only once.
  1474  	for len(work) > 0 {
  1475  		node := work[len(work)-1]
  1476  		work = work[:len(work)-1]
  1477  		parent := idom[node.block.ID]
  1478  		branch := getBranch(sdom, parent, node.block)
  1479  
  1480  		switch node.state {
  1481  		case descend:
  1482  			ft.checkpoint()
  1483  
  1484  			// Entering the block, add facts about the induction variable
  1485  			// that is bound to this block.
  1486  			if iv, ok := indVars[node.block]; ok {
  1487  				addIndVarRestrictions(ft, parent, iv)
  1488  			}
  1489  
  1490  			// Add results of reaching this block via a branch from
  1491  			// its immediate dominator (if any).
  1492  			if branch != unknown {
  1493  				addBranchRestrictions(ft, parent, branch)
  1494  			}
  1495  
  1496  			if ft.unsat {
  1497  				// node.block is unreachable.
  1498  				// Remove it and don't visit
  1499  				// its children.
  1500  				removeBranch(parent, branch)
  1501  				ft.restore()
  1502  				break
  1503  			}
  1504  			// Otherwise, we can now commit to
  1505  			// taking this branch. We'll restore
  1506  			// ft when we unwind.
  1507  
  1508  			// Add facts about the values in the current block.
  1509  			addLocalFacts(ft, node.block)
  1510  
  1511  			work = append(work, bp{
  1512  				block: node.block,
  1513  				state: simplify,
  1514  			})
  1515  			for s := sdom.Child(node.block); s != nil; s = sdom.Sibling(s) {
  1516  				work = append(work, bp{
  1517  					block: s,
  1518  					state: descend,
  1519  				})
  1520  			}
  1521  
  1522  		case simplify:
  1523  			simplifyBlock(sdom, ft, node.block)
  1524  			ft.restore()
  1525  		}
  1526  	}
  1527  
  1528  	ft.restore()
  1529  
  1530  	ft.cleanup(f)
  1531  }
  1532  
  1533  // initLimit sets initial constant limit for v.  This limit is based
  1534  // only on the operation itself, not any of its input arguments. This
  1535  // method is only used in two places, once when the prove pass startup
  1536  // and the other when a new ssa value is created, both for init. (unlike
  1537  // flowLimit, below, which computes additional constraints based on
  1538  // ranges of opcode arguments).
  1539  func initLimit(v *Value) limit {
  1540  	if v.Type.IsBoolean() {
  1541  		switch v.Op {
  1542  		case OpConstBool:
  1543  			b := v.AuxInt
  1544  			return limit{min: b, max: b, umin: uint64(b), umax: uint64(b)}
  1545  		default:
  1546  			return limit{min: 0, max: 1, umin: 0, umax: 1}
  1547  		}
  1548  	}
  1549  	if v.Type.IsPtrShaped() { // These are the types that EqPtr/NeqPtr operate on, except uintptr.
  1550  		switch v.Op {
  1551  		case OpConstNil:
  1552  			return limit{min: 0, max: 0, umin: 0, umax: 0}
  1553  		case OpAddr, OpLocalAddr: // TODO: others?
  1554  			l := noLimit
  1555  			l.umin = 1
  1556  			return l
  1557  		default:
  1558  			return noLimit
  1559  		}
  1560  	}
  1561  	if !v.Type.IsInteger() {
  1562  		return noLimit
  1563  	}
  1564  
  1565  	// Default limits based on type.
  1566  	bitsize := v.Type.Size() * 8
  1567  	lim := limit{min: -(1 << (bitsize - 1)), max: 1<<(bitsize-1) - 1, umin: 0, umax: 1<<bitsize - 1}
  1568  
  1569  	// Tighter limits on some opcodes.
  1570  	switch v.Op {
  1571  	// constants
  1572  	case OpConst64:
  1573  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(v.AuxInt), umax: uint64(v.AuxInt)}
  1574  	case OpConst32:
  1575  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(uint32(v.AuxInt)), umax: uint64(uint32(v.AuxInt))}
  1576  	case OpConst16:
  1577  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(uint16(v.AuxInt)), umax: uint64(uint16(v.AuxInt))}
  1578  	case OpConst8:
  1579  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(uint8(v.AuxInt)), umax: uint64(uint8(v.AuxInt))}
  1580  
  1581  	// extensions
  1582  	case OpZeroExt8to64, OpZeroExt8to32, OpZeroExt8to16:
  1583  		lim = lim.signedMinMax(0, 1<<8-1)
  1584  		lim = lim.unsignedMax(1<<8 - 1)
  1585  	case OpZeroExt16to64, OpZeroExt16to32:
  1586  		lim = lim.signedMinMax(0, 1<<16-1)
  1587  		lim = lim.unsignedMax(1<<16 - 1)
  1588  	case OpZeroExt32to64:
  1589  		lim = lim.signedMinMax(0, 1<<32-1)
  1590  		lim = lim.unsignedMax(1<<32 - 1)
  1591  	case OpSignExt8to64, OpSignExt8to32, OpSignExt8to16:
  1592  		lim = lim.signedMinMax(math.MinInt8, math.MaxInt8)
  1593  	case OpSignExt16to64, OpSignExt16to32:
  1594  		lim = lim.signedMinMax(math.MinInt16, math.MaxInt16)
  1595  	case OpSignExt32to64:
  1596  		lim = lim.signedMinMax(math.MinInt32, math.MaxInt32)
  1597  
  1598  	// math/bits intrinsics
  1599  	case OpCtz64, OpBitLen64, OpPopCount64,
  1600  		OpCtz32, OpBitLen32, OpPopCount32,
  1601  		OpCtz16, OpBitLen16, OpPopCount16,
  1602  		OpCtz8, OpBitLen8, OpPopCount8:
  1603  		lim = lim.unsignedMax(uint64(v.Args[0].Type.Size() * 8))
  1604  
  1605  	// bool to uint8 conversion
  1606  	case OpCvtBoolToUint8:
  1607  		lim = lim.unsignedMax(1)
  1608  
  1609  	// length operations
  1610  	case OpStringLen, OpSliceLen, OpSliceCap:
  1611  		lim = lim.signedMin(0)
  1612  	}
  1613  
  1614  	// signed <-> unsigned propagation
  1615  	if lim.min >= 0 {
  1616  		lim = lim.unsignedMinMax(uint64(lim.min), uint64(lim.max))
  1617  	}
  1618  	if fitsInBitsU(lim.umax, uint(8*v.Type.Size()-1)) {
  1619  		lim = lim.signedMinMax(int64(lim.umin), int64(lim.umax))
  1620  	}
  1621  
  1622  	return lim
  1623  }
  1624  
  1625  // flowLimit updates the known limits of v in ft. Returns true if anything changed.
  1626  // flowLimit can use the ranges of input arguments.
  1627  //
  1628  // Note: this calculation only happens at the point the value is defined. We do not reevaluate
  1629  // it later. So for example:
  1630  //
  1631  //	v := x + y
  1632  //	if 0 <= x && x < 5 && 0 <= y && y < 5 { ... use v ... }
  1633  //
  1634  // we don't discover that the range of v is bounded in the conditioned
  1635  // block. We could recompute the range of v once we enter the block so
  1636  // we know that it is 0 <= v <= 8, but we don't have a mechanism to do
  1637  // that right now.
  1638  func (ft *factsTable) flowLimit(v *Value) bool {
  1639  	if !v.Type.IsInteger() {
  1640  		// TODO: boolean?
  1641  		return false
  1642  	}
  1643  
  1644  	// Additional limits based on opcode and argument.
  1645  	// No need to repeat things here already done in initLimit.
  1646  	switch v.Op {
  1647  
  1648  	// extensions
  1649  	case OpZeroExt8to64, OpZeroExt8to32, OpZeroExt8to16, OpZeroExt16to64, OpZeroExt16to32, OpZeroExt32to64:
  1650  		a := ft.limits[v.Args[0].ID]
  1651  		return ft.unsignedMinMax(v, a.umin, a.umax)
  1652  	case OpSignExt8to64, OpSignExt8to32, OpSignExt8to16, OpSignExt16to64, OpSignExt16to32, OpSignExt32to64:
  1653  		a := ft.limits[v.Args[0].ID]
  1654  		return ft.signedMinMax(v, a.min, a.max)
  1655  	case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
  1656  		a := ft.limits[v.Args[0].ID]
  1657  		if a.umax <= 1<<(uint64(v.Type.Size())*8)-1 {
  1658  			return ft.unsignedMinMax(v, a.umin, a.umax)
  1659  		}
  1660  
  1661  	// math/bits
  1662  	case OpCtz64:
  1663  		a := ft.limits[v.Args[0].ID]
  1664  		if a.nonzero() {
  1665  			return ft.unsignedMax(v, uint64(bits.Len64(a.umax)-1))
  1666  		}
  1667  	case OpCtz32:
  1668  		a := ft.limits[v.Args[0].ID]
  1669  		if a.nonzero() {
  1670  			return ft.unsignedMax(v, uint64(bits.Len32(uint32(a.umax))-1))
  1671  		}
  1672  	case OpCtz16:
  1673  		a := ft.limits[v.Args[0].ID]
  1674  		if a.nonzero() {
  1675  			return ft.unsignedMax(v, uint64(bits.Len16(uint16(a.umax))-1))
  1676  		}
  1677  	case OpCtz8:
  1678  		a := ft.limits[v.Args[0].ID]
  1679  		if a.nonzero() {
  1680  			return ft.unsignedMax(v, uint64(bits.Len8(uint8(a.umax))-1))
  1681  		}
  1682  
  1683  	case OpPopCount64, OpPopCount32, OpPopCount16, OpPopCount8:
  1684  		a := ft.limits[v.Args[0].ID]
  1685  		changingBitsCount := uint64(bits.Len64(a.umax ^ a.umin))
  1686  		sharedLeadingMask := ^(uint64(1)<<changingBitsCount - 1)
  1687  		fixedBits := a.umax & sharedLeadingMask
  1688  		min := uint64(bits.OnesCount64(fixedBits))
  1689  		return ft.unsignedMinMax(v, min, min+changingBitsCount)
  1690  
  1691  	case OpBitLen64:
  1692  		a := ft.limits[v.Args[0].ID]
  1693  		return ft.unsignedMinMax(v,
  1694  			uint64(bits.Len64(a.umin)),
  1695  			uint64(bits.Len64(a.umax)))
  1696  	case OpBitLen32:
  1697  		a := ft.limits[v.Args[0].ID]
  1698  		return ft.unsignedMinMax(v,
  1699  			uint64(bits.Len32(uint32(a.umin))),
  1700  			uint64(bits.Len32(uint32(a.umax))))
  1701  	case OpBitLen16:
  1702  		a := ft.limits[v.Args[0].ID]
  1703  		return ft.unsignedMinMax(v,
  1704  			uint64(bits.Len16(uint16(a.umin))),
  1705  			uint64(bits.Len16(uint16(a.umax))))
  1706  	case OpBitLen8:
  1707  		a := ft.limits[v.Args[0].ID]
  1708  		return ft.unsignedMinMax(v,
  1709  			uint64(bits.Len8(uint8(a.umin))),
  1710  			uint64(bits.Len8(uint8(a.umax))))
  1711  
  1712  	// Masks.
  1713  
  1714  	// TODO: if y.umax and y.umin share a leading bit pattern, y also has that leading bit pattern.
  1715  	// we could compare the patterns of always set bits in a and b and learn more about minimum and maximum.
  1716  	// But I doubt this help any real world code.
  1717  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  1718  		// AND can only make the value smaller.
  1719  		a := ft.limits[v.Args[0].ID]
  1720  		b := ft.limits[v.Args[1].ID]
  1721  		return ft.unsignedMax(v, min(a.umax, b.umax))
  1722  	case OpOr64, OpOr32, OpOr16, OpOr8:
  1723  		// OR can only make the value bigger and can't flip bits proved to be zero in both inputs.
  1724  		a := ft.limits[v.Args[0].ID]
  1725  		b := ft.limits[v.Args[1].ID]
  1726  		return ft.unsignedMinMax(v,
  1727  			max(a.umin, b.umin),
  1728  			1<<bits.Len64(a.umax|b.umax)-1)
  1729  	case OpXor64, OpXor32, OpXor16, OpXor8:
  1730  		// XOR can't flip bits that are proved to be zero in both inputs.
  1731  		a := ft.limits[v.Args[0].ID]
  1732  		b := ft.limits[v.Args[1].ID]
  1733  		return ft.unsignedMax(v, 1<<bits.Len64(a.umax|b.umax)-1)
  1734  	case OpCom64, OpCom32, OpCom16, OpCom8:
  1735  		a := ft.limits[v.Args[0].ID]
  1736  		return ft.newLimit(v, a.com(uint(v.Type.Size())*8))
  1737  
  1738  	// Arithmetic.
  1739  	case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  1740  		a := ft.limits[v.Args[0].ID]
  1741  		b := ft.limits[v.Args[1].ID]
  1742  		return ft.newLimit(v, a.add(b, uint(v.Type.Size())*8))
  1743  	case OpSub64, OpSub32, OpSub16, OpSub8:
  1744  		a := ft.limits[v.Args[0].ID]
  1745  		b := ft.limits[v.Args[1].ID]
  1746  		sub := ft.newLimit(v, a.sub(b, uint(v.Type.Size())*8))
  1747  		mod := ft.detectSignedMod(v)
  1748  		return sub || mod
  1749  	case OpNeg64, OpNeg32, OpNeg16, OpNeg8:
  1750  		a := ft.limits[v.Args[0].ID]
  1751  		bitsize := uint(v.Type.Size()) * 8
  1752  		return ft.newLimit(v, a.com(bitsize).add(limit{min: 1, max: 1, umin: 1, umax: 1}, bitsize))
  1753  	case OpMul64, OpMul32, OpMul16, OpMul8:
  1754  		a := ft.limits[v.Args[0].ID]
  1755  		b := ft.limits[v.Args[1].ID]
  1756  		return ft.newLimit(v, a.mul(b, uint(v.Type.Size())*8))
  1757  	case OpLsh64x64, OpLsh64x32, OpLsh64x16, OpLsh64x8,
  1758  		OpLsh32x64, OpLsh32x32, OpLsh32x16, OpLsh32x8,
  1759  		OpLsh16x64, OpLsh16x32, OpLsh16x16, OpLsh16x8,
  1760  		OpLsh8x64, OpLsh8x32, OpLsh8x16, OpLsh8x8:
  1761  		a := ft.limits[v.Args[0].ID]
  1762  		b := ft.limits[v.Args[1].ID]
  1763  		bitsize := uint(v.Type.Size()) * 8
  1764  		return ft.newLimit(v, a.mul(b.exp2(bitsize), bitsize))
  1765  	case OpMod64, OpMod32, OpMod16, OpMod8:
  1766  		a := ft.limits[v.Args[0].ID]
  1767  		b := ft.limits[v.Args[1].ID]
  1768  		if !(a.nonnegative() && b.nonnegative()) {
  1769  			// TODO: we could handle signed limits but I didn't bother.
  1770  			break
  1771  		}
  1772  		fallthrough
  1773  	case OpMod64u, OpMod32u, OpMod16u, OpMod8u:
  1774  		a := ft.limits[v.Args[0].ID]
  1775  		b := ft.limits[v.Args[1].ID]
  1776  		// Underflow in the arithmetic below is ok, it gives to MaxUint64 which does nothing to the limit.
  1777  		return ft.unsignedMax(v, min(a.umax, b.umax-1))
  1778  	case OpDiv64, OpDiv32, OpDiv16, OpDiv8:
  1779  		a := ft.limits[v.Args[0].ID]
  1780  		b := ft.limits[v.Args[1].ID]
  1781  		if !(a.nonnegative() && b.nonnegative()) {
  1782  			// TODO: we could handle signed limits but I didn't bother.
  1783  			break
  1784  		}
  1785  		fallthrough
  1786  	case OpDiv64u, OpDiv32u, OpDiv16u, OpDiv8u:
  1787  		a := ft.limits[v.Args[0].ID]
  1788  		b := ft.limits[v.Args[1].ID]
  1789  		lim := noLimit
  1790  		if b.umax > 0 {
  1791  			lim = lim.unsignedMin(a.umin / b.umax)
  1792  		}
  1793  		if b.umin > 0 {
  1794  			lim = lim.unsignedMax(a.umax / b.umin)
  1795  		}
  1796  		return ft.newLimit(v, lim)
  1797  
  1798  	case OpPhi:
  1799  		{
  1800  			// Work around for go.dev/issue/68857, look for min(x, y) and max(x, y).
  1801  			b := v.Block
  1802  			if len(b.Preds) != 2 {
  1803  				goto notMinNorMax
  1804  			}
  1805  			// FIXME: this code searches for the following losange pattern
  1806  			// because that what ssagen produce for min and max builtins:
  1807  			// conditionBlock → (firstBlock, secondBlock) → v.Block
  1808  			// there are three non losange equivalent constructions
  1809  			// we could match for, but I didn't bother:
  1810  			// conditionBlock → (v.Block, secondBlock → v.Block)
  1811  			// conditionBlock → (firstBlock → v.Block, v.Block)
  1812  			// conditionBlock → (v.Block, v.Block)
  1813  			firstBlock, secondBlock := b.Preds[0].b, b.Preds[1].b
  1814  			if firstBlock.Kind != BlockPlain || secondBlock.Kind != BlockPlain {
  1815  				goto notMinNorMax
  1816  			}
  1817  			if len(firstBlock.Preds) != 1 || len(secondBlock.Preds) != 1 {
  1818  				goto notMinNorMax
  1819  			}
  1820  			conditionBlock := firstBlock.Preds[0].b
  1821  			if conditionBlock != secondBlock.Preds[0].b {
  1822  				goto notMinNorMax
  1823  			}
  1824  			if conditionBlock.Kind != BlockIf {
  1825  				goto notMinNorMax
  1826  			}
  1827  
  1828  			less := conditionBlock.Controls[0]
  1829  			var unsigned bool
  1830  			switch less.Op {
  1831  			case OpLess64U, OpLess32U, OpLess16U, OpLess8U,
  1832  				OpLeq64U, OpLeq32U, OpLeq16U, OpLeq8U:
  1833  				unsigned = true
  1834  			case OpLess64, OpLess32, OpLess16, OpLess8,
  1835  				OpLeq64, OpLeq32, OpLeq16, OpLeq8:
  1836  			default:
  1837  				goto notMinNorMax
  1838  			}
  1839  			small, big := less.Args[0], less.Args[1]
  1840  			truev, falsev := v.Args[0], v.Args[1]
  1841  			if conditionBlock.Succs[0].b == secondBlock {
  1842  				truev, falsev = falsev, truev
  1843  			}
  1844  
  1845  			bigl, smalll := ft.limits[big.ID], ft.limits[small.ID]
  1846  			if truev == big {
  1847  				if falsev == small {
  1848  					// v := big if small <¿=? big else small
  1849  					if unsigned {
  1850  						maximum := max(bigl.umax, smalll.umax)
  1851  						minimum := max(bigl.umin, smalll.umin)
  1852  						return ft.unsignedMinMax(v, minimum, maximum)
  1853  					} else {
  1854  						maximum := max(bigl.max, smalll.max)
  1855  						minimum := max(bigl.min, smalll.min)
  1856  						return ft.signedMinMax(v, minimum, maximum)
  1857  					}
  1858  				} else {
  1859  					goto notMinNorMax
  1860  				}
  1861  			} else if truev == small {
  1862  				if falsev == big {
  1863  					// v := small if small <¿=? big else big
  1864  					if unsigned {
  1865  						maximum := min(bigl.umax, smalll.umax)
  1866  						minimum := min(bigl.umin, smalll.umin)
  1867  						return ft.unsignedMinMax(v, minimum, maximum)
  1868  					} else {
  1869  						maximum := min(bigl.max, smalll.max)
  1870  						minimum := min(bigl.min, smalll.min)
  1871  						return ft.signedMinMax(v, minimum, maximum)
  1872  					}
  1873  				} else {
  1874  					goto notMinNorMax
  1875  				}
  1876  			} else {
  1877  				goto notMinNorMax
  1878  			}
  1879  		}
  1880  	notMinNorMax:
  1881  
  1882  		// Compute the union of all the input phis.
  1883  		// Often this will convey no information, because the block
  1884  		// is not dominated by its predecessors and hence the
  1885  		// phi arguments might not have been processed yet. But if
  1886  		// the values are declared earlier, it may help. e.g., for
  1887  		//    v = phi(c3, c5)
  1888  		// where c3 = OpConst [3] and c5 = OpConst [5] are
  1889  		// defined in the entry block, we can derive [3,5]
  1890  		// as the limit for v.
  1891  		l := ft.limits[v.Args[0].ID]
  1892  		for _, a := range v.Args[1:] {
  1893  			l2 := ft.limits[a.ID]
  1894  			l.min = min(l.min, l2.min)
  1895  			l.max = max(l.max, l2.max)
  1896  			l.umin = min(l.umin, l2.umin)
  1897  			l.umax = max(l.umax, l2.umax)
  1898  		}
  1899  		return ft.newLimit(v, l)
  1900  	}
  1901  	return false
  1902  }
  1903  
  1904  // See if we can get any facts because v is the result of signed mod by a constant.
  1905  // The mod operation has already been rewritten, so we have to try and reconstruct it.
  1906  //
  1907  //	x % d
  1908  //
  1909  // is rewritten as
  1910  //
  1911  //	x - (x / d) * d
  1912  //
  1913  // furthermore, the divide itself gets rewritten. If d is a power of 2 (d == 1<<k), we do
  1914  //
  1915  //	(x / d) * d = ((x + adj) >> k) << k
  1916  //	            = (x + adj) & (-1<<k)
  1917  //
  1918  // with adj being an adjustment in case x is negative (see below).
  1919  // if d is not a power of 2, we do
  1920  //
  1921  //	x / d = ... TODO ...
  1922  func (ft *factsTable) detectSignedMod(v *Value) bool {
  1923  	if ft.detectSignedModByPowerOfTwo(v) {
  1924  		return true
  1925  	}
  1926  	// TODO: non-powers-of-2
  1927  	return false
  1928  }
  1929  func (ft *factsTable) detectSignedModByPowerOfTwo(v *Value) bool {
  1930  	// We're looking for:
  1931  	//
  1932  	//   x % d ==
  1933  	//   x - (x / d) * d
  1934  	//
  1935  	// which for d a power of 2, d == 1<<k, is done as
  1936  	//
  1937  	//   x - ((x + (x>>(w-1))>>>(w-k)) & (-1<<k))
  1938  	//
  1939  	// w = bit width of x.
  1940  	// (>> = signed shift, >>> = unsigned shift).
  1941  	// See ./_gen/generic.rules, search for "Signed divide by power of 2".
  1942  
  1943  	var w int64
  1944  	var addOp, andOp, constOp, sshiftOp, ushiftOp Op
  1945  	switch v.Op {
  1946  	case OpSub64:
  1947  		w = 64
  1948  		addOp = OpAdd64
  1949  		andOp = OpAnd64
  1950  		constOp = OpConst64
  1951  		sshiftOp = OpRsh64x64
  1952  		ushiftOp = OpRsh64Ux64
  1953  	case OpSub32:
  1954  		w = 32
  1955  		addOp = OpAdd32
  1956  		andOp = OpAnd32
  1957  		constOp = OpConst32
  1958  		sshiftOp = OpRsh32x64
  1959  		ushiftOp = OpRsh32Ux64
  1960  	case OpSub16:
  1961  		w = 16
  1962  		addOp = OpAdd16
  1963  		andOp = OpAnd16
  1964  		constOp = OpConst16
  1965  		sshiftOp = OpRsh16x64
  1966  		ushiftOp = OpRsh16Ux64
  1967  	case OpSub8:
  1968  		w = 8
  1969  		addOp = OpAdd8
  1970  		andOp = OpAnd8
  1971  		constOp = OpConst8
  1972  		sshiftOp = OpRsh8x64
  1973  		ushiftOp = OpRsh8Ux64
  1974  	default:
  1975  		return false
  1976  	}
  1977  
  1978  	x := v.Args[0]
  1979  	and := v.Args[1]
  1980  	if and.Op != andOp {
  1981  		return false
  1982  	}
  1983  	var add, mask *Value
  1984  	if and.Args[0].Op == addOp && and.Args[1].Op == constOp {
  1985  		add = and.Args[0]
  1986  		mask = and.Args[1]
  1987  	} else if and.Args[1].Op == addOp && and.Args[0].Op == constOp {
  1988  		add = and.Args[1]
  1989  		mask = and.Args[0]
  1990  	} else {
  1991  		return false
  1992  	}
  1993  	var ushift *Value
  1994  	if add.Args[0] == x {
  1995  		ushift = add.Args[1]
  1996  	} else if add.Args[1] == x {
  1997  		ushift = add.Args[0]
  1998  	} else {
  1999  		return false
  2000  	}
  2001  	if ushift.Op != ushiftOp {
  2002  		return false
  2003  	}
  2004  	if ushift.Args[1].Op != OpConst64 {
  2005  		return false
  2006  	}
  2007  	k := w - ushift.Args[1].AuxInt // Now we know k!
  2008  	d := int64(1) << k             // divisor
  2009  	sshift := ushift.Args[0]
  2010  	if sshift.Op != sshiftOp {
  2011  		return false
  2012  	}
  2013  	if sshift.Args[0] != x {
  2014  		return false
  2015  	}
  2016  	if sshift.Args[1].Op != OpConst64 || sshift.Args[1].AuxInt != w-1 {
  2017  		return false
  2018  	}
  2019  	if mask.AuxInt != -d {
  2020  		return false
  2021  	}
  2022  
  2023  	// All looks ok. x % d is at most +/- d-1.
  2024  	return ft.signedMinMax(v, -d+1, d-1)
  2025  }
  2026  
  2027  // getBranch returns the range restrictions added by p
  2028  // when reaching b. p is the immediate dominator of b.
  2029  func getBranch(sdom SparseTree, p *Block, b *Block) branch {
  2030  	if p == nil {
  2031  		return unknown
  2032  	}
  2033  	switch p.Kind {
  2034  	case BlockIf:
  2035  		// If p and p.Succs[0] are dominators it means that every path
  2036  		// from entry to b passes through p and p.Succs[0]. We care that
  2037  		// no path from entry to b passes through p.Succs[1]. If p.Succs[0]
  2038  		// has one predecessor then (apart from the degenerate case),
  2039  		// there is no path from entry that can reach b through p.Succs[1].
  2040  		// TODO: how about p->yes->b->yes, i.e. a loop in yes.
  2041  		if sdom.IsAncestorEq(p.Succs[0].b, b) && len(p.Succs[0].b.Preds) == 1 {
  2042  			return positive
  2043  		}
  2044  		if sdom.IsAncestorEq(p.Succs[1].b, b) && len(p.Succs[1].b.Preds) == 1 {
  2045  			return negative
  2046  		}
  2047  	case BlockJumpTable:
  2048  		// TODO: this loop can lead to quadratic behavior, as
  2049  		// getBranch can be called len(p.Succs) times.
  2050  		for i, e := range p.Succs {
  2051  			if sdom.IsAncestorEq(e.b, b) && len(e.b.Preds) == 1 {
  2052  				return jumpTable0 + branch(i)
  2053  			}
  2054  		}
  2055  	}
  2056  	return unknown
  2057  }
  2058  
  2059  // addIndVarRestrictions updates the factsTables ft with the facts
  2060  // learned from the induction variable indVar which drives the loop
  2061  // starting in Block b.
  2062  func addIndVarRestrictions(ft *factsTable, b *Block, iv indVar) {
  2063  	d := signed
  2064  	if ft.isNonNegative(iv.min) && ft.isNonNegative(iv.max) {
  2065  		d |= unsigned
  2066  	}
  2067  
  2068  	if iv.flags&indVarMinExc == 0 {
  2069  		addRestrictions(b, ft, d, iv.min, iv.ind, lt|eq)
  2070  	} else {
  2071  		addRestrictions(b, ft, d, iv.min, iv.ind, lt)
  2072  	}
  2073  
  2074  	if iv.flags&indVarMaxInc == 0 {
  2075  		addRestrictions(b, ft, d, iv.ind, iv.max, lt)
  2076  	} else {
  2077  		addRestrictions(b, ft, d, iv.ind, iv.max, lt|eq)
  2078  	}
  2079  }
  2080  
  2081  // addBranchRestrictions updates the factsTables ft with the facts learned when
  2082  // branching from Block b in direction br.
  2083  func addBranchRestrictions(ft *factsTable, b *Block, br branch) {
  2084  	c := b.Controls[0]
  2085  	switch {
  2086  	case br == negative:
  2087  		ft.booleanFalse(c)
  2088  	case br == positive:
  2089  		ft.booleanTrue(c)
  2090  	case br >= jumpTable0:
  2091  		idx := br - jumpTable0
  2092  		val := int64(idx)
  2093  		if v, off := isConstDelta(c); v != nil {
  2094  			// Establish the bound on the underlying value we're switching on,
  2095  			// not on the offset-ed value used as the jump table index.
  2096  			c = v
  2097  			val -= off
  2098  		}
  2099  		ft.newLimit(c, limit{min: val, max: val, umin: uint64(val), umax: uint64(val)})
  2100  	default:
  2101  		panic("unknown branch")
  2102  	}
  2103  }
  2104  
  2105  // addRestrictions updates restrictions from the immediate
  2106  // dominating block (p) using r.
  2107  func addRestrictions(parent *Block, ft *factsTable, t domain, v, w *Value, r relation) {
  2108  	if t == 0 {
  2109  		// Trivial case: nothing to do.
  2110  		// Should not happen, but just in case.
  2111  		return
  2112  	}
  2113  	for i := domain(1); i <= t; i <<= 1 {
  2114  		if t&i == 0 {
  2115  			continue
  2116  		}
  2117  		ft.update(parent, v, w, i, r)
  2118  	}
  2119  }
  2120  
  2121  func unsignedAddOverflows(a, b uint64, t *types.Type) bool {
  2122  	switch t.Size() {
  2123  	case 8:
  2124  		return a+b < a
  2125  	case 4:
  2126  		return a+b > math.MaxUint32
  2127  	case 2:
  2128  		return a+b > math.MaxUint16
  2129  	case 1:
  2130  		return a+b > math.MaxUint8
  2131  	default:
  2132  		panic("unreachable")
  2133  	}
  2134  }
  2135  
  2136  func signedAddOverflowsOrUnderflows(a, b int64, t *types.Type) bool {
  2137  	r := a + b
  2138  	switch t.Size() {
  2139  	case 8:
  2140  		return (a >= 0 && b >= 0 && r < 0) || (a < 0 && b < 0 && r >= 0)
  2141  	case 4:
  2142  		return r < math.MinInt32 || math.MaxInt32 < r
  2143  	case 2:
  2144  		return r < math.MinInt16 || math.MaxInt16 < r
  2145  	case 1:
  2146  		return r < math.MinInt8 || math.MaxInt8 < r
  2147  	default:
  2148  		panic("unreachable")
  2149  	}
  2150  }
  2151  
  2152  func unsignedSubUnderflows(a, b uint64) bool {
  2153  	return a < b
  2154  }
  2155  
  2156  func addLocalFacts(ft *factsTable, b *Block) {
  2157  	// Propagate constant ranges among values in this block.
  2158  	// We do this before the second loop so that we have the
  2159  	// most up-to-date constant bounds for isNonNegative calls.
  2160  	for {
  2161  		changed := false
  2162  		for _, v := range b.Values {
  2163  			changed = ft.flowLimit(v) || changed
  2164  		}
  2165  		if !changed {
  2166  			break
  2167  		}
  2168  	}
  2169  
  2170  	// Add facts about individual operations.
  2171  	for _, v := range b.Values {
  2172  		// FIXME(go.dev/issue/68857): this loop only set up limits properly when b.Values is in topological order.
  2173  		// flowLimit can also depend on limits given by this loop which right now is not handled.
  2174  		switch v.Op {
  2175  		case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  2176  			x := ft.limits[v.Args[0].ID]
  2177  			y := ft.limits[v.Args[1].ID]
  2178  			if !unsignedAddOverflows(x.umax, y.umax, v.Type) {
  2179  				r := gt
  2180  				if !x.nonzero() {
  2181  					r |= eq
  2182  				}
  2183  				ft.update(b, v, v.Args[1], unsigned, r)
  2184  				r = gt
  2185  				if !y.nonzero() {
  2186  					r |= eq
  2187  				}
  2188  				ft.update(b, v, v.Args[0], unsigned, r)
  2189  			}
  2190  			if x.min >= 0 && !signedAddOverflowsOrUnderflows(x.max, y.max, v.Type) {
  2191  				r := gt
  2192  				if !x.nonzero() {
  2193  					r |= eq
  2194  				}
  2195  				ft.update(b, v, v.Args[1], signed, r)
  2196  			}
  2197  			if y.min >= 0 && !signedAddOverflowsOrUnderflows(x.max, y.max, v.Type) {
  2198  				r := gt
  2199  				if !y.nonzero() {
  2200  					r |= eq
  2201  				}
  2202  				ft.update(b, v, v.Args[0], signed, r)
  2203  			}
  2204  			if x.max <= 0 && !signedAddOverflowsOrUnderflows(x.min, y.min, v.Type) {
  2205  				r := lt
  2206  				if !x.nonzero() {
  2207  					r |= eq
  2208  				}
  2209  				ft.update(b, v, v.Args[1], signed, r)
  2210  			}
  2211  			if y.max <= 0 && !signedAddOverflowsOrUnderflows(x.min, y.min, v.Type) {
  2212  				r := lt
  2213  				if !y.nonzero() {
  2214  					r |= eq
  2215  				}
  2216  				ft.update(b, v, v.Args[0], signed, r)
  2217  			}
  2218  		case OpSub64, OpSub32, OpSub16, OpSub8:
  2219  			x := ft.limits[v.Args[0].ID]
  2220  			y := ft.limits[v.Args[1].ID]
  2221  			if !unsignedSubUnderflows(x.umin, y.umax) {
  2222  				r := lt
  2223  				if !y.nonzero() {
  2224  					r |= eq
  2225  				}
  2226  				ft.update(b, v, v.Args[0], unsigned, r)
  2227  			}
  2228  			// FIXME: we could also do signed facts but the overflow checks are much trickier and I don't need it yet.
  2229  		case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2230  			ft.update(b, v, v.Args[0], unsigned, lt|eq)
  2231  			ft.update(b, v, v.Args[1], unsigned, lt|eq)
  2232  			if ft.isNonNegative(v.Args[0]) {
  2233  				ft.update(b, v, v.Args[0], signed, lt|eq)
  2234  			}
  2235  			if ft.isNonNegative(v.Args[1]) {
  2236  				ft.update(b, v, v.Args[1], signed, lt|eq)
  2237  			}
  2238  		case OpOr64, OpOr32, OpOr16, OpOr8:
  2239  			// TODO: investigate how to always add facts without much slowdown, see issue #57959
  2240  			//ft.update(b, v, v.Args[0], unsigned, gt|eq)
  2241  			//ft.update(b, v, v.Args[1], unsigned, gt|eq)
  2242  		case OpDiv64u, OpDiv32u, OpDiv16u, OpDiv8u,
  2243  			OpRsh8Ux64, OpRsh8Ux32, OpRsh8Ux16, OpRsh8Ux8,
  2244  			OpRsh16Ux64, OpRsh16Ux32, OpRsh16Ux16, OpRsh16Ux8,
  2245  			OpRsh32Ux64, OpRsh32Ux32, OpRsh32Ux16, OpRsh32Ux8,
  2246  			OpRsh64Ux64, OpRsh64Ux32, OpRsh64Ux16, OpRsh64Ux8:
  2247  			ft.update(b, v, v.Args[0], unsigned, lt|eq)
  2248  			if ft.isNonNegative(v.Args[0]) {
  2249  				ft.update(b, v, v.Args[0], signed, lt|eq)
  2250  			}
  2251  		case OpMod64u, OpMod32u, OpMod16u, OpMod8u:
  2252  			ft.update(b, v, v.Args[0], unsigned, lt|eq)
  2253  			// Note: we have to be careful that this doesn't imply
  2254  			// that the modulus is >0, which isn't true until *after*
  2255  			// the mod instruction executes (and thus panics if the
  2256  			// modulus is 0). See issue 67625.
  2257  			ft.update(b, v, v.Args[1], unsigned, lt)
  2258  		case OpStringLen:
  2259  			if v.Args[0].Op == OpStringMake {
  2260  				ft.update(b, v, v.Args[0].Args[1], signed, eq)
  2261  			}
  2262  		case OpSliceLen:
  2263  			if v.Args[0].Op == OpSliceMake {
  2264  				ft.update(b, v, v.Args[0].Args[1], signed, eq)
  2265  			}
  2266  		case OpSliceCap:
  2267  			if v.Args[0].Op == OpSliceMake {
  2268  				ft.update(b, v, v.Args[0].Args[2], signed, eq)
  2269  			}
  2270  		case OpPhi:
  2271  			addLocalFactsPhi(ft, v)
  2272  		}
  2273  	}
  2274  }
  2275  
  2276  func addLocalFactsPhi(ft *factsTable, v *Value) {
  2277  	// Look for phis that implement min/max.
  2278  	//   z:
  2279  	//      c = Less64 x y (or other Less/Leq operation)
  2280  	//      If c -> bx by
  2281  	//   bx: <- z
  2282  	//       -> b ...
  2283  	//   by: <- z
  2284  	//      -> b ...
  2285  	//   b: <- bx by
  2286  	//      v = Phi x y
  2287  	// Then v is either min or max of x,y.
  2288  	// If it is the min, then we deduce v <= x && v <= y.
  2289  	// If it is the max, then we deduce v >= x && v >= y.
  2290  	// The min case is useful for the copy builtin, see issue 16833.
  2291  	if len(v.Args) != 2 {
  2292  		return
  2293  	}
  2294  	b := v.Block
  2295  	x := v.Args[0]
  2296  	y := v.Args[1]
  2297  	bx := b.Preds[0].b
  2298  	by := b.Preds[1].b
  2299  	var z *Block // branch point
  2300  	switch {
  2301  	case bx == by: // bx == by == z case
  2302  		z = bx
  2303  	case by.uniquePred() == bx: // bx == z case
  2304  		z = bx
  2305  	case bx.uniquePred() == by: // by == z case
  2306  		z = by
  2307  	case bx.uniquePred() == by.uniquePred():
  2308  		z = bx.uniquePred()
  2309  	}
  2310  	if z == nil || z.Kind != BlockIf {
  2311  		return
  2312  	}
  2313  	c := z.Controls[0]
  2314  	if len(c.Args) != 2 {
  2315  		return
  2316  	}
  2317  	var isMin bool // if c, a less-than comparison, is true, phi chooses x.
  2318  	if bx == z {
  2319  		isMin = b.Preds[0].i == 0
  2320  	} else {
  2321  		isMin = bx.Preds[0].i == 0
  2322  	}
  2323  	if c.Args[0] == x && c.Args[1] == y {
  2324  		// ok
  2325  	} else if c.Args[0] == y && c.Args[1] == x {
  2326  		// Comparison is reversed from how the values are listed in the Phi.
  2327  		isMin = !isMin
  2328  	} else {
  2329  		// Not comparing x and y.
  2330  		return
  2331  	}
  2332  	var dom domain
  2333  	switch c.Op {
  2334  	case OpLess64, OpLess32, OpLess16, OpLess8, OpLeq64, OpLeq32, OpLeq16, OpLeq8:
  2335  		dom = signed
  2336  	case OpLess64U, OpLess32U, OpLess16U, OpLess8U, OpLeq64U, OpLeq32U, OpLeq16U, OpLeq8U:
  2337  		dom = unsigned
  2338  	default:
  2339  		return
  2340  	}
  2341  	var rel relation
  2342  	if isMin {
  2343  		rel = lt | eq
  2344  	} else {
  2345  		rel = gt | eq
  2346  	}
  2347  	ft.update(b, v, x, dom, rel)
  2348  	ft.update(b, v, y, dom, rel)
  2349  }
  2350  
  2351  var ctzNonZeroOp = map[Op]Op{OpCtz8: OpCtz8NonZero, OpCtz16: OpCtz16NonZero, OpCtz32: OpCtz32NonZero, OpCtz64: OpCtz64NonZero}
  2352  var mostNegativeDividend = map[Op]int64{
  2353  	OpDiv16: -1 << 15,
  2354  	OpMod16: -1 << 15,
  2355  	OpDiv32: -1 << 31,
  2356  	OpMod32: -1 << 31,
  2357  	OpDiv64: -1 << 63,
  2358  	OpMod64: -1 << 63}
  2359  
  2360  // simplifyBlock simplifies some constant values in b and evaluates
  2361  // branches to non-uniquely dominated successors of b.
  2362  func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) {
  2363  	for _, v := range b.Values {
  2364  		switch v.Op {
  2365  		case OpSlicemask:
  2366  			// Replace OpSlicemask operations in b with constants where possible.
  2367  			x, delta := isConstDelta(v.Args[0])
  2368  			if x == nil {
  2369  				break
  2370  			}
  2371  			// slicemask(x + y)
  2372  			// if x is larger than -y (y is negative), then slicemask is -1.
  2373  			lim := ft.limits[x.ID]
  2374  			if lim.umin > uint64(-delta) {
  2375  				if v.Args[0].Op == OpAdd64 {
  2376  					v.reset(OpConst64)
  2377  				} else {
  2378  					v.reset(OpConst32)
  2379  				}
  2380  				if b.Func.pass.debug > 0 {
  2381  					b.Func.Warnl(v.Pos, "Proved slicemask not needed")
  2382  				}
  2383  				v.AuxInt = -1
  2384  			}
  2385  		case OpCtz8, OpCtz16, OpCtz32, OpCtz64:
  2386  			// On some architectures, notably amd64, we can generate much better
  2387  			// code for CtzNN if we know that the argument is non-zero.
  2388  			// Capture that information here for use in arch-specific optimizations.
  2389  			x := v.Args[0]
  2390  			lim := ft.limits[x.ID]
  2391  			if lim.umin > 0 || lim.min > 0 || lim.max < 0 {
  2392  				if b.Func.pass.debug > 0 {
  2393  					b.Func.Warnl(v.Pos, "Proved %v non-zero", v.Op)
  2394  				}
  2395  				v.Op = ctzNonZeroOp[v.Op]
  2396  			}
  2397  		case OpRsh8x8, OpRsh8x16, OpRsh8x32, OpRsh8x64,
  2398  			OpRsh16x8, OpRsh16x16, OpRsh16x32, OpRsh16x64,
  2399  			OpRsh32x8, OpRsh32x16, OpRsh32x32, OpRsh32x64,
  2400  			OpRsh64x8, OpRsh64x16, OpRsh64x32, OpRsh64x64:
  2401  			// Check whether, for a >> b, we know that a is non-negative
  2402  			// and b is all of a's bits except the MSB. If so, a is shifted to zero.
  2403  			bits := 8 * v.Args[0].Type.Size()
  2404  			if v.Args[1].isGenericIntConst() && v.Args[1].AuxInt >= bits-1 && ft.isNonNegative(v.Args[0]) {
  2405  				if b.Func.pass.debug > 0 {
  2406  					b.Func.Warnl(v.Pos, "Proved %v shifts to zero", v.Op)
  2407  				}
  2408  				switch bits {
  2409  				case 64:
  2410  					v.reset(OpConst64)
  2411  				case 32:
  2412  					v.reset(OpConst32)
  2413  				case 16:
  2414  					v.reset(OpConst16)
  2415  				case 8:
  2416  					v.reset(OpConst8)
  2417  				default:
  2418  					panic("unexpected integer size")
  2419  				}
  2420  				v.AuxInt = 0
  2421  				break // Be sure not to fallthrough - this is no longer OpRsh.
  2422  			}
  2423  			// If the Rsh hasn't been replaced with 0, still check if it is bounded.
  2424  			fallthrough
  2425  		case OpLsh8x8, OpLsh8x16, OpLsh8x32, OpLsh8x64,
  2426  			OpLsh16x8, OpLsh16x16, OpLsh16x32, OpLsh16x64,
  2427  			OpLsh32x8, OpLsh32x16, OpLsh32x32, OpLsh32x64,
  2428  			OpLsh64x8, OpLsh64x16, OpLsh64x32, OpLsh64x64,
  2429  			OpRsh8Ux8, OpRsh8Ux16, OpRsh8Ux32, OpRsh8Ux64,
  2430  			OpRsh16Ux8, OpRsh16Ux16, OpRsh16Ux32, OpRsh16Ux64,
  2431  			OpRsh32Ux8, OpRsh32Ux16, OpRsh32Ux32, OpRsh32Ux64,
  2432  			OpRsh64Ux8, OpRsh64Ux16, OpRsh64Ux32, OpRsh64Ux64:
  2433  			// Check whether, for a << b, we know that b
  2434  			// is strictly less than the number of bits in a.
  2435  			by := v.Args[1]
  2436  			lim := ft.limits[by.ID]
  2437  			bits := 8 * v.Args[0].Type.Size()
  2438  			if lim.umax < uint64(bits) || (lim.max < bits && ft.isNonNegative(by)) {
  2439  				v.AuxInt = 1 // see shiftIsBounded
  2440  				if b.Func.pass.debug > 0 && !by.isGenericIntConst() {
  2441  					b.Func.Warnl(v.Pos, "Proved %v bounded", v.Op)
  2442  				}
  2443  			}
  2444  		case OpDiv16, OpDiv32, OpDiv64, OpMod16, OpMod32, OpMod64:
  2445  			// On amd64 and 386 fix-up code can be avoided if we know
  2446  			//  the divisor is not -1 or the dividend > MinIntNN.
  2447  			// Don't modify AuxInt on other architectures,
  2448  			// as that can interfere with CSE.
  2449  			// TODO: add other architectures?
  2450  			if b.Func.Config.arch != "386" && b.Func.Config.arch != "amd64" {
  2451  				break
  2452  			}
  2453  			divr := v.Args[1]
  2454  			divrLim := ft.limits[divr.ID]
  2455  			divd := v.Args[0]
  2456  			divdLim := ft.limits[divd.ID]
  2457  			if divrLim.max < -1 || divrLim.min > -1 || divdLim.min > mostNegativeDividend[v.Op] {
  2458  				// See DivisionNeedsFixUp in rewrite.go.
  2459  				// v.AuxInt = 1 means we have proved both that the divisor is not -1
  2460  				// and that the dividend is not the most negative integer,
  2461  				// so we do not need to add fix-up code.
  2462  				v.AuxInt = 1
  2463  				if b.Func.pass.debug > 0 {
  2464  					b.Func.Warnl(v.Pos, "Proved %v does not need fix-up", v.Op)
  2465  				}
  2466  			}
  2467  		}
  2468  		// Fold provable constant results.
  2469  		// Helps in cases where we reuse a value after branching on its equality.
  2470  		for i, arg := range v.Args {
  2471  			lim := ft.limits[arg.ID]
  2472  			var constValue int64
  2473  			switch {
  2474  			case lim.min == lim.max:
  2475  				constValue = lim.min
  2476  			case lim.umin == lim.umax:
  2477  				constValue = int64(lim.umin)
  2478  			default:
  2479  				continue
  2480  			}
  2481  			switch arg.Op {
  2482  			case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConstNil:
  2483  				continue
  2484  			}
  2485  			typ := arg.Type
  2486  			f := b.Func
  2487  			var c *Value
  2488  			switch {
  2489  			case typ.IsBoolean():
  2490  				c = f.ConstBool(typ, constValue != 0)
  2491  			case typ.IsInteger() && typ.Size() == 1:
  2492  				c = f.ConstInt8(typ, int8(constValue))
  2493  			case typ.IsInteger() && typ.Size() == 2:
  2494  				c = f.ConstInt16(typ, int16(constValue))
  2495  			case typ.IsInteger() && typ.Size() == 4:
  2496  				c = f.ConstInt32(typ, int32(constValue))
  2497  			case typ.IsInteger() && typ.Size() == 8:
  2498  				c = f.ConstInt64(typ, constValue)
  2499  			case typ.IsPtrShaped():
  2500  				if constValue == 0 {
  2501  					c = f.ConstNil(typ)
  2502  				} else {
  2503  					// Not sure how this might happen, but if it
  2504  					// does, just skip it.
  2505  					continue
  2506  				}
  2507  			default:
  2508  				// Not sure how this might happen, but if it
  2509  				// does, just skip it.
  2510  				continue
  2511  			}
  2512  			v.SetArg(i, c)
  2513  			ft.initLimitForNewValue(c)
  2514  			if b.Func.pass.debug > 1 {
  2515  				b.Func.Warnl(v.Pos, "Proved %v's arg %d (%v) is constant %d", v, i, arg, constValue)
  2516  			}
  2517  		}
  2518  	}
  2519  
  2520  	if b.Kind != BlockIf {
  2521  		return
  2522  	}
  2523  
  2524  	// Consider outgoing edges from this block.
  2525  	parent := b
  2526  	for i, branch := range [...]branch{positive, negative} {
  2527  		child := parent.Succs[i].b
  2528  		if getBranch(sdom, parent, child) != unknown {
  2529  			// For edges to uniquely dominated blocks, we
  2530  			// already did this when we visited the child.
  2531  			continue
  2532  		}
  2533  		// For edges to other blocks, this can trim a branch
  2534  		// even if we couldn't get rid of the child itself.
  2535  		ft.checkpoint()
  2536  		addBranchRestrictions(ft, parent, branch)
  2537  		unsat := ft.unsat
  2538  		ft.restore()
  2539  		if unsat {
  2540  			// This branch is impossible, so remove it
  2541  			// from the block.
  2542  			removeBranch(parent, branch)
  2543  			// No point in considering the other branch.
  2544  			// (It *is* possible for both to be
  2545  			// unsatisfiable since the fact table is
  2546  			// incomplete. We could turn this into a
  2547  			// BlockExit, but it doesn't seem worth it.)
  2548  			break
  2549  		}
  2550  	}
  2551  }
  2552  
  2553  func removeBranch(b *Block, branch branch) {
  2554  	c := b.Controls[0]
  2555  	if b.Func.pass.debug > 0 {
  2556  		verb := "Proved"
  2557  		if branch == positive {
  2558  			verb = "Disproved"
  2559  		}
  2560  		if b.Func.pass.debug > 1 {
  2561  			b.Func.Warnl(b.Pos, "%s %s (%s)", verb, c.Op, c)
  2562  		} else {
  2563  			b.Func.Warnl(b.Pos, "%s %s", verb, c.Op)
  2564  		}
  2565  	}
  2566  	if c != nil && c.Pos.IsStmt() == src.PosIsStmt && c.Pos.SameFileAndLine(b.Pos) {
  2567  		// attempt to preserve statement marker.
  2568  		b.Pos = b.Pos.WithIsStmt()
  2569  	}
  2570  	if branch == positive || branch == negative {
  2571  		b.Kind = BlockFirst
  2572  		b.ResetControls()
  2573  		if branch == positive {
  2574  			b.swapSuccessors()
  2575  		}
  2576  	} else {
  2577  		// TODO: figure out how to remove an entry from a jump table
  2578  	}
  2579  }
  2580  
  2581  // isConstDelta returns non-nil if v is equivalent to w+delta (signed).
  2582  func isConstDelta(v *Value) (w *Value, delta int64) {
  2583  	cop := OpConst64
  2584  	switch v.Op {
  2585  	case OpAdd32, OpSub32:
  2586  		cop = OpConst32
  2587  	case OpAdd16, OpSub16:
  2588  		cop = OpConst16
  2589  	case OpAdd8, OpSub8:
  2590  		cop = OpConst8
  2591  	}
  2592  	switch v.Op {
  2593  	case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  2594  		if v.Args[0].Op == cop {
  2595  			return v.Args[1], v.Args[0].AuxInt
  2596  		}
  2597  		if v.Args[1].Op == cop {
  2598  			return v.Args[0], v.Args[1].AuxInt
  2599  		}
  2600  	case OpSub64, OpSub32, OpSub16, OpSub8:
  2601  		if v.Args[1].Op == cop {
  2602  			aux := v.Args[1].AuxInt
  2603  			if aux != -aux { // Overflow; too bad
  2604  				return v.Args[0], -aux
  2605  			}
  2606  		}
  2607  	}
  2608  	return nil, 0
  2609  }
  2610  
  2611  // isCleanExt reports whether v is the result of a value-preserving
  2612  // sign or zero extension.
  2613  func isCleanExt(v *Value) bool {
  2614  	switch v.Op {
  2615  	case OpSignExt8to16, OpSignExt8to32, OpSignExt8to64,
  2616  		OpSignExt16to32, OpSignExt16to64, OpSignExt32to64:
  2617  		// signed -> signed is the only value-preserving sign extension
  2618  		return v.Args[0].Type.IsSigned() && v.Type.IsSigned()
  2619  
  2620  	case OpZeroExt8to16, OpZeroExt8to32, OpZeroExt8to64,
  2621  		OpZeroExt16to32, OpZeroExt16to64, OpZeroExt32to64:
  2622  		// unsigned -> signed/unsigned are value-preserving zero extensions
  2623  		return !v.Args[0].Type.IsSigned()
  2624  	}
  2625  	return false
  2626  }
  2627  

View as plain text