Source file src/cmd/compile/internal/ssa/regalloc.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Register allocation.
     6  //
     7  // We use a version of a linear scan register allocator. We treat the
     8  // whole function as a single long basic block and run through
     9  // it using a greedy register allocator. Then all merge edges
    10  // (those targeting a block with len(Preds)>1) are processed to
    11  // shuffle data into the place that the target of the edge expects.
    12  //
    13  // The greedy allocator moves values into registers just before they
    14  // are used, spills registers only when necessary, and spills the
    15  // value whose next use is farthest in the future.
    16  //
    17  // The register allocator requires that a block is not scheduled until
    18  // at least one of its predecessors have been scheduled. The most recent
    19  // such predecessor provides the starting register state for a block.
    20  //
    21  // It also requires that there are no critical edges (critical =
    22  // comes from a block with >1 successor and goes to a block with >1
    23  // predecessor).  This makes it easy to add fixup code on merge edges -
    24  // the source of a merge edge has only one successor, so we can add
    25  // fixup code to the end of that block.
    26  
    27  // Spilling
    28  //
    29  // During the normal course of the allocator, we might throw a still-live
    30  // value out of all registers. When that value is subsequently used, we must
    31  // load it from a slot on the stack. We must also issue an instruction to
    32  // initialize that stack location with a copy of v.
    33  //
    34  // pre-regalloc:
    35  //   (1) v = Op ...
    36  //   (2) x = Op ...
    37  //   (3) ... = Op v ...
    38  //
    39  // post-regalloc:
    40  //   (1) v = Op ...    : AX // computes v, store result in AX
    41  //       s = StoreReg v     // spill v to a stack slot
    42  //   (2) x = Op ...    : AX // some other op uses AX
    43  //       c = LoadReg s : CX // restore v from stack slot
    44  //   (3) ... = Op c ...     // use the restored value
    45  //
    46  // Allocation occurs normally until we reach (3) and we realize we have
    47  // a use of v and it isn't in any register. At that point, we allocate
    48  // a spill (a StoreReg) for v. We can't determine the correct place for
    49  // the spill at this point, so we allocate the spill as blockless initially.
    50  // The restore is then generated to load v back into a register so it can
    51  // be used. Subsequent uses of v will use the restored value c instead.
    52  //
    53  // What remains is the question of where to schedule the spill.
    54  // During allocation, we keep track of the dominator of all restores of v.
    55  // The spill of v must dominate that block. The spill must also be issued at
    56  // a point where v is still in a register.
    57  //
    58  // To find the right place, start at b, the block which dominates all restores.
    59  //  - If b is v.Block, then issue the spill right after v.
    60  //    It is known to be in a register at that point, and dominates any restores.
    61  //  - Otherwise, if v is in a register at the start of b,
    62  //    put the spill of v at the start of b.
    63  //  - Otherwise, set b = immediate dominator of b, and repeat.
    64  //
    65  // Phi values are special, as always. We define two kinds of phis, those
    66  // where the merge happens in a register (a "register" phi) and those where
    67  // the merge happens in a stack location (a "stack" phi).
    68  //
    69  // A register phi must have the phi and all of its inputs allocated to the
    70  // same register. Register phis are spilled similarly to regular ops.
    71  //
    72  // A stack phi must have the phi and all of its inputs allocated to the same
    73  // stack location. Stack phis start out life already spilled - each phi
    74  // input must be a store (using StoreReg) at the end of the corresponding
    75  // predecessor block.
    76  //     b1: y = ... : AX        b2: z = ... : BX
    77  //         y2 = StoreReg y         z2 = StoreReg z
    78  //         goto b3                 goto b3
    79  //     b3: x = phi(y2, z2)
    80  // The stack allocator knows that StoreReg args of stack-allocated phis
    81  // must be allocated to the same stack slot as the phi that uses them.
    82  // x is now a spilled value and a restore must appear before its first use.
    83  
    84  // TODO
    85  
    86  // Use an affinity graph to mark two values which should use the
    87  // same register. This affinity graph will be used to prefer certain
    88  // registers for allocation. This affinity helps eliminate moves that
    89  // are required for phi implementations and helps generate allocations
    90  // for 2-register architectures.
    91  
    92  // Note: regalloc generates a not-quite-SSA output. If we have:
    93  //
    94  //             b1: x = ... : AX
    95  //                 x2 = StoreReg x
    96  //                 ... AX gets reused for something else ...
    97  //                 if ... goto b3 else b4
    98  //
    99  //   b3: x3 = LoadReg x2 : BX       b4: x4 = LoadReg x2 : CX
   100  //       ... use x3 ...                 ... use x4 ...
   101  //
   102  //             b2: ... use x3 ...
   103  //
   104  // If b3 is the primary predecessor of b2, then we use x3 in b2 and
   105  // add a x4:CX->BX copy at the end of b4.
   106  // But the definition of x3 doesn't dominate b2.  We should really
   107  // insert an extra phi at the start of b2 (x5=phi(x3,x4):BX) to keep
   108  // SSA form. For now, we ignore this problem as remaining in strict
   109  // SSA form isn't needed after regalloc. We'll just leave the use
   110  // of x3 not dominated by the definition of x3, and the CX->BX copy
   111  // will have no use (so don't run deadcode after regalloc!).
   112  // TODO: maybe we should introduce these extra phis?
   113  
   114  package ssa
   115  
   116  import (
   117  	"cmd/compile/internal/base"
   118  	"cmd/compile/internal/ir"
   119  	"cmd/compile/internal/types"
   120  	"cmd/internal/src"
   121  	"cmd/internal/sys"
   122  	"fmt"
   123  	"internal/buildcfg"
   124  	"math"
   125  	"math/bits"
   126  	"unsafe"
   127  )
   128  
   129  const (
   130  	moveSpills = iota
   131  	logSpills
   132  	regDebug
   133  	stackDebug
   134  )
   135  
   136  // distance is a measure of how far into the future values are used.
   137  // distance is measured in units of instructions.
   138  const (
   139  	likelyDistance   = 1
   140  	normalDistance   = 10
   141  	unlikelyDistance = 100
   142  )
   143  
   144  // regalloc performs register allocation on f. It sets f.RegAlloc
   145  // to the resulting allocation.
   146  func regalloc(f *Func) {
   147  	var s regAllocState
   148  	s.init(f)
   149  	s.regalloc(f)
   150  	s.close()
   151  }
   152  
   153  type register uint8
   154  
   155  const noRegister register = 255
   156  
   157  // For bulk initializing
   158  var noRegisters [32]register = [32]register{
   159  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   160  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   161  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   162  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   163  }
   164  
   165  // A regMask encodes a set of machine registers.
   166  // TODO: regMask -> regSet?
   167  type regMask uint64
   168  
   169  func (m regMask) String() string {
   170  	s := ""
   171  	for r := register(0); m != 0; r++ {
   172  		if m>>r&1 == 0 {
   173  			continue
   174  		}
   175  		m &^= regMask(1) << r
   176  		if s != "" {
   177  			s += " "
   178  		}
   179  		s += fmt.Sprintf("r%d", r)
   180  	}
   181  	return s
   182  }
   183  
   184  func (m regMask) contains(r register) bool {
   185  	return m>>r&1 != 0
   186  }
   187  
   188  func (s *regAllocState) RegMaskString(m regMask) string {
   189  	str := ""
   190  	for r := register(0); m != 0; r++ {
   191  		if m>>r&1 == 0 {
   192  			continue
   193  		}
   194  		m &^= regMask(1) << r
   195  		if str != "" {
   196  			str += " "
   197  		}
   198  		str += s.registers[r].String()
   199  	}
   200  	return str
   201  }
   202  
   203  // countRegs returns the number of set bits in the register mask.
   204  func countRegs(r regMask) int {
   205  	return bits.OnesCount64(uint64(r))
   206  }
   207  
   208  // pickReg picks an arbitrary register from the register mask.
   209  func pickReg(r regMask) register {
   210  	if r == 0 {
   211  		panic("can't pick a register from an empty set")
   212  	}
   213  	// pick the lowest one
   214  	return register(bits.TrailingZeros64(uint64(r)))
   215  }
   216  
   217  type use struct {
   218  	// distance from start of the block to a use of a value
   219  	//   dist == 0                 used by first instruction in block
   220  	//   dist == len(b.Values)-1   used by last instruction in block
   221  	//   dist == len(b.Values)     used by block's control value
   222  	//   dist  > len(b.Values)     used by a subsequent block
   223  	dist int32
   224  	pos  src.XPos // source position of the use
   225  	next *use     // linked list of uses of a value in nondecreasing dist order
   226  }
   227  
   228  // A valState records the register allocation state for a (pre-regalloc) value.
   229  type valState struct {
   230  	regs              regMask // the set of registers holding a Value (usually just one)
   231  	uses              *use    // list of uses in this block
   232  	spill             *Value  // spilled copy of the Value (if any)
   233  	restoreMin        int32   // minimum of all restores' blocks' sdom.entry
   234  	restoreMax        int32   // maximum of all restores' blocks' sdom.exit
   235  	needReg           bool    // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags()
   236  	rematerializeable bool    // cached value of v.rematerializeable()
   237  }
   238  
   239  type regState struct {
   240  	v *Value // Original (preregalloc) Value stored in this register.
   241  	c *Value // A Value equal to v which is currently in a register.  Might be v or a copy of it.
   242  	// If a register is unused, v==c==nil
   243  }
   244  
   245  type regAllocState struct {
   246  	f *Func
   247  
   248  	sdom        SparseTree
   249  	registers   []Register
   250  	numRegs     register
   251  	SPReg       register
   252  	SBReg       register
   253  	GReg        register
   254  	ZeroIntReg  register
   255  	allocatable regMask
   256  
   257  	// live values at the end of each block.  live[b.ID] is a list of value IDs
   258  	// which are live at the end of b, together with a count of how many instructions
   259  	// forward to the next use.
   260  	live [][]liveInfo
   261  	// desired register assignments at the end of each block.
   262  	// Note that this is a static map computed before allocation occurs. Dynamic
   263  	// register desires (from partially completed allocations) will trump
   264  	// this information.
   265  	desired []desiredState
   266  
   267  	// current state of each (preregalloc) Value
   268  	values []valState
   269  
   270  	// ID of SP, SB values
   271  	sp, sb ID
   272  
   273  	// For each Value, map from its value ID back to the
   274  	// preregalloc Value it was derived from.
   275  	orig []*Value
   276  
   277  	// current state of each register.
   278  	// Includes only registers in allocatable.
   279  	regs []regState
   280  
   281  	// registers that contain values which can't be kicked out
   282  	nospill regMask
   283  
   284  	// mask of registers currently in use
   285  	used regMask
   286  
   287  	// mask of registers used since the start of the current block
   288  	usedSinceBlockStart regMask
   289  
   290  	// mask of registers used in the current instruction
   291  	tmpused regMask
   292  
   293  	// current block we're working on
   294  	curBlock *Block
   295  
   296  	// cache of use records
   297  	freeUseRecords *use
   298  
   299  	// endRegs[blockid] is the register state at the end of each block.
   300  	// encoded as a set of endReg records.
   301  	endRegs [][]endReg
   302  
   303  	// startRegs[blockid] is the register state at the start of merge blocks.
   304  	// saved state does not include the state of phi ops in the block.
   305  	startRegs [][]startReg
   306  
   307  	// startRegsMask is a mask of the registers in startRegs[curBlock.ID].
   308  	// Registers dropped from startRegsMask are later synchronoized back to
   309  	// startRegs by dropping from there as well.
   310  	startRegsMask regMask
   311  
   312  	// spillLive[blockid] is the set of live spills at the end of each block
   313  	spillLive [][]ID
   314  
   315  	// a set of copies we generated to move things around, and
   316  	// whether it is used in shuffle. Unused copies will be deleted.
   317  	copies map[*Value]bool
   318  
   319  	loopnest *loopnest
   320  
   321  	// choose a good order in which to visit blocks for allocation purposes.
   322  	visitOrder []*Block
   323  
   324  	// blockOrder[b.ID] corresponds to the index of block b in visitOrder.
   325  	blockOrder []int32
   326  
   327  	// whether to insert instructions that clobber dead registers at call sites
   328  	doClobber bool
   329  
   330  	// For each instruction index in a basic block, the index of the next call
   331  	// at or after that instruction index.
   332  	// If there is no next call, returns maxInt32.
   333  	// nextCall for a call instruction points to itself.
   334  	// (Indexes and results are pre-regalloc.)
   335  	nextCall []int32
   336  
   337  	// Index of the instruction we're currently working on.
   338  	// Index is expressed in terms of the pre-regalloc b.Values list.
   339  	curIdx int
   340  }
   341  
   342  type endReg struct {
   343  	r register
   344  	v *Value // pre-regalloc value held in this register (TODO: can we use ID here?)
   345  	c *Value // cached version of the value
   346  }
   347  
   348  type startReg struct {
   349  	r   register
   350  	v   *Value   // pre-regalloc value needed in this register
   351  	c   *Value   // cached version of the value
   352  	pos src.XPos // source position of use of this register
   353  }
   354  
   355  // freeReg frees up register r. Any current user of r is kicked out.
   356  func (s *regAllocState) freeReg(r register) {
   357  	if !s.allocatable.contains(r) && !s.isGReg(r) {
   358  		return
   359  	}
   360  	v := s.regs[r].v
   361  	if v == nil {
   362  		s.f.Fatalf("tried to free an already free register %d\n", r)
   363  	}
   364  
   365  	// Mark r as unused.
   366  	if s.f.pass.debug > regDebug {
   367  		fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
   368  	}
   369  	s.regs[r] = regState{}
   370  	s.values[v.ID].regs &^= regMask(1) << r
   371  	s.used &^= regMask(1) << r
   372  }
   373  
   374  // freeRegs frees up all registers listed in m.
   375  func (s *regAllocState) freeRegs(m regMask) {
   376  	for m&s.used != 0 {
   377  		s.freeReg(pickReg(m & s.used))
   378  	}
   379  }
   380  
   381  // clobberRegs inserts instructions that clobber registers listed in m.
   382  func (s *regAllocState) clobberRegs(m regMask) {
   383  	m &= s.allocatable & s.f.Config.gpRegMask // only integer register can contain pointers, only clobber them
   384  	for m != 0 {
   385  		r := pickReg(m)
   386  		m &^= 1 << r
   387  		x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
   388  		s.f.setHome(x, &s.registers[r])
   389  	}
   390  }
   391  
   392  // setOrig records that c's original value is the same as
   393  // v's original value.
   394  func (s *regAllocState) setOrig(c *Value, v *Value) {
   395  	if int(c.ID) >= cap(s.orig) {
   396  		x := s.f.Cache.allocValueSlice(int(c.ID) + 1)
   397  		copy(x, s.orig)
   398  		s.f.Cache.freeValueSlice(s.orig)
   399  		s.orig = x
   400  	}
   401  	for int(c.ID) >= len(s.orig) {
   402  		s.orig = append(s.orig, nil)
   403  	}
   404  	if s.orig[c.ID] != nil {
   405  		s.f.Fatalf("orig value set twice %s %s", c, v)
   406  	}
   407  	s.orig[c.ID] = s.orig[v.ID]
   408  }
   409  
   410  // assignReg assigns register r to hold c, a copy of v.
   411  // r must be unused.
   412  func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
   413  	if s.f.pass.debug > regDebug {
   414  		fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
   415  	}
   416  	// Allocate v to r.
   417  	s.values[v.ID].regs |= regMask(1) << r
   418  	s.f.setHome(c, &s.registers[r])
   419  
   420  	// Allocate r to v.
   421  	if !s.allocatable.contains(r) && !s.isGReg(r) {
   422  		return
   423  	}
   424  	if s.regs[r].v != nil {
   425  		s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
   426  	}
   427  	s.regs[r] = regState{v, c}
   428  	s.used |= regMask(1) << r
   429  }
   430  
   431  // allocReg chooses a register from the set of registers in mask.
   432  // If there is no unused register, a Value will be kicked out of
   433  // a register to make room.
   434  func (s *regAllocState) allocReg(mask regMask, v *Value) register {
   435  	if v.OnWasmStack {
   436  		return noRegister
   437  	}
   438  
   439  	mask &= s.allocatable
   440  	mask &^= s.nospill
   441  	if mask == 0 {
   442  		s.f.Fatalf("no register available for %s", v.LongString())
   443  	}
   444  
   445  	// Pick an unused register if one is available.
   446  	if mask&^s.used != 0 {
   447  		r := pickReg(mask &^ s.used)
   448  		s.usedSinceBlockStart |= regMask(1) << r
   449  		return r
   450  	}
   451  
   452  	// Pick a value to spill. Spill the value with the
   453  	// farthest-in-the-future use.
   454  	// TODO: Prefer registers with already spilled Values?
   455  	// TODO: Modify preference using affinity graph.
   456  	// TODO: if a single value is in multiple registers, spill one of them
   457  	// before spilling a value in just a single register.
   458  
   459  	// Find a register to spill. We spill the register containing the value
   460  	// whose next use is as far in the future as possible.
   461  	// https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm
   462  	var r register
   463  	maxuse := int32(-1)
   464  	for t := register(0); t < s.numRegs; t++ {
   465  		if mask>>t&1 == 0 {
   466  			continue
   467  		}
   468  		v := s.regs[t].v
   469  		if n := s.values[v.ID].uses.dist; n > maxuse {
   470  			// v's next use is farther in the future than any value
   471  			// we've seen so far. A new best spill candidate.
   472  			r = t
   473  			maxuse = n
   474  		}
   475  	}
   476  	if maxuse == -1 {
   477  		s.f.Fatalf("couldn't find register to spill")
   478  	}
   479  
   480  	if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
   481  		// TODO(neelance): In theory this should never happen, because all wasm registers are equal.
   482  		// So if there is still a free register, the allocation should have picked that one in the first place instead of
   483  		// trying to kick some other value out. In practice, this case does happen and it breaks the stack optimization.
   484  		s.freeReg(r)
   485  		return r
   486  	}
   487  
   488  	// Try to move it around before kicking out, if there is a free register.
   489  	// We generate a Copy and record it. It will be deleted if never used.
   490  	v2 := s.regs[r].v
   491  	m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
   492  	if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
   493  		s.usedSinceBlockStart |= regMask(1) << r
   494  		r2 := pickReg(m)
   495  		c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
   496  		s.copies[c] = false
   497  		if s.f.pass.debug > regDebug {
   498  			fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
   499  		}
   500  		s.setOrig(c, v2)
   501  		s.assignReg(r2, v2, c)
   502  	}
   503  
   504  	// If the evicted register isn't used between the start of the block
   505  	// and now then there is no reason to even request it on entry. We can
   506  	// drop from startRegs in that case.
   507  	if s.usedSinceBlockStart&(regMask(1)<<r) == 0 {
   508  		if s.startRegsMask&(regMask(1)<<r) == 1 {
   509  			if s.f.pass.debug > regDebug {
   510  				fmt.Printf("dropped from startRegs: %s\n", &s.registers[r])
   511  			}
   512  			s.startRegsMask &^= regMask(1) << r
   513  		}
   514  	}
   515  
   516  	s.freeReg(r)
   517  	s.usedSinceBlockStart |= regMask(1) << r
   518  	return r
   519  }
   520  
   521  // makeSpill returns a Value which represents the spilled value of v.
   522  // b is the block in which the spill is used.
   523  func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
   524  	vi := &s.values[v.ID]
   525  	if vi.spill != nil {
   526  		// Final block not known - keep track of subtree where restores reside.
   527  		vi.restoreMin = min(vi.restoreMin, s.sdom[b.ID].entry)
   528  		vi.restoreMax = max(vi.restoreMax, s.sdom[b.ID].exit)
   529  		return vi.spill
   530  	}
   531  	// Make a spill for v. We don't know where we want
   532  	// to put it yet, so we leave it blockless for now.
   533  	spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
   534  	// We also don't know what the spill's arg will be.
   535  	// Leave it argless for now.
   536  	s.setOrig(spill, v)
   537  	vi.spill = spill
   538  	vi.restoreMin = s.sdom[b.ID].entry
   539  	vi.restoreMax = s.sdom[b.ID].exit
   540  	return spill
   541  }
   542  
   543  // allocValToReg allocates v to a register selected from regMask and
   544  // returns the register copy of v. Any previous user is kicked out and spilled
   545  // (if necessary). Load code is added at the current pc. If nospill is set the
   546  // allocated register is marked nospill so the assignment cannot be
   547  // undone until the caller allows it by clearing nospill. Returns a
   548  // *Value which is either v or a copy of v allocated to the chosen register.
   549  func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
   550  	if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
   551  		c := v.copyIntoWithXPos(s.curBlock, pos)
   552  		c.OnWasmStack = true
   553  		s.setOrig(c, v)
   554  		return c
   555  	}
   556  	if v.OnWasmStack {
   557  		return v
   558  	}
   559  
   560  	vi := &s.values[v.ID]
   561  	pos = pos.WithNotStmt()
   562  	// Check if v is already in a requested register.
   563  	if mask&vi.regs != 0 {
   564  		r := pickReg(mask & vi.regs)
   565  		if !s.allocatable.contains(r) {
   566  			return v // v is in a fixed register
   567  		}
   568  		if s.regs[r].v != v || s.regs[r].c == nil {
   569  			panic("bad register state")
   570  		}
   571  		if nospill {
   572  			s.nospill |= regMask(1) << r
   573  		}
   574  		s.usedSinceBlockStart |= regMask(1) << r
   575  		return s.regs[r].c
   576  	}
   577  
   578  	var r register
   579  	// If nospill is set, the value is used immediately, so it can live on the WebAssembly stack.
   580  	onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
   581  	if !onWasmStack {
   582  		// Allocate a register.
   583  		r = s.allocReg(mask, v)
   584  	}
   585  
   586  	// Allocate v to the new register.
   587  	var c *Value
   588  	if vi.regs != 0 {
   589  		// Copy from a register that v is already in.
   590  		r2 := pickReg(vi.regs)
   591  		var current *Value
   592  		if !s.allocatable.contains(r2) {
   593  			current = v // v is in a fixed register
   594  		} else {
   595  			if s.regs[r2].v != v {
   596  				panic("bad register state")
   597  			}
   598  			current = s.regs[r2].c
   599  		}
   600  		s.usedSinceBlockStart |= regMask(1) << r2
   601  		c = s.curBlock.NewValue1(pos, OpCopy, v.Type, current)
   602  	} else if v.rematerializeable() {
   603  		// Rematerialize instead of loading from the spill location.
   604  		c = v.copyIntoWithXPos(s.curBlock, pos)
   605  	} else {
   606  		// Load v from its spill location.
   607  		spill := s.makeSpill(v, s.curBlock)
   608  		if s.f.pass.debug > logSpills {
   609  			s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
   610  		}
   611  		c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
   612  	}
   613  
   614  	s.setOrig(c, v)
   615  
   616  	if onWasmStack {
   617  		c.OnWasmStack = true
   618  		return c
   619  	}
   620  
   621  	s.assignReg(r, v, c)
   622  	if c.Op == OpLoadReg && s.isGReg(r) {
   623  		s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
   624  	}
   625  	if nospill {
   626  		s.nospill |= regMask(1) << r
   627  	}
   628  	return c
   629  }
   630  
   631  // isLeaf reports whether f performs any calls.
   632  func isLeaf(f *Func) bool {
   633  	for _, b := range f.Blocks {
   634  		for _, v := range b.Values {
   635  			if v.Op.IsCall() && !v.Op.IsTailCall() {
   636  				// tail call is not counted as it does not save the return PC or need a frame
   637  				return false
   638  			}
   639  		}
   640  	}
   641  	return true
   642  }
   643  
   644  // needRegister reports whether v needs a register.
   645  func (v *Value) needRegister() bool {
   646  	return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple()
   647  }
   648  
   649  func (s *regAllocState) init(f *Func) {
   650  	s.f = f
   651  	s.f.RegAlloc = s.f.Cache.locs[:0]
   652  	s.registers = f.Config.registers
   653  	if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
   654  		s.f.Fatalf("bad number of registers: %d", nr)
   655  	} else {
   656  		s.numRegs = register(nr)
   657  	}
   658  	// Locate SP, SB, and g registers.
   659  	s.SPReg = noRegister
   660  	s.SBReg = noRegister
   661  	s.GReg = noRegister
   662  	s.ZeroIntReg = noRegister
   663  	for r := register(0); r < s.numRegs; r++ {
   664  		switch s.registers[r].String() {
   665  		case "SP":
   666  			s.SPReg = r
   667  		case "SB":
   668  			s.SBReg = r
   669  		case "g":
   670  			s.GReg = r
   671  		case "ZERO": // TODO: arch-specific?
   672  			s.ZeroIntReg = r
   673  		}
   674  	}
   675  	// Make sure we found all required registers.
   676  	switch noRegister {
   677  	case s.SPReg:
   678  		s.f.Fatalf("no SP register found")
   679  	case s.SBReg:
   680  		s.f.Fatalf("no SB register found")
   681  	case s.GReg:
   682  		if f.Config.hasGReg {
   683  			s.f.Fatalf("no g register found")
   684  		}
   685  	}
   686  
   687  	// Figure out which registers we're allowed to use.
   688  	s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
   689  	s.allocatable &^= 1 << s.SPReg
   690  	s.allocatable &^= 1 << s.SBReg
   691  	if s.f.Config.hasGReg {
   692  		s.allocatable &^= 1 << s.GReg
   693  	}
   694  	if s.ZeroIntReg != noRegister {
   695  		s.allocatable &^= 1 << s.ZeroIntReg
   696  	}
   697  	if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
   698  		s.allocatable &^= 1 << uint(s.f.Config.FPReg)
   699  	}
   700  	if s.f.Config.LinkReg != -1 {
   701  		if isLeaf(f) {
   702  			// Leaf functions don't save/restore the link register.
   703  			s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
   704  		}
   705  	}
   706  	if s.f.Config.ctxt.Flag_dynlink {
   707  		switch s.f.Config.arch {
   708  		case "386":
   709  			// nothing to do.
   710  			// Note that for Flag_shared (position independent code)
   711  			// we do need to be careful, but that carefulness is hidden
   712  			// in the rewrite rules so we always have a free register
   713  			// available for global load/stores. See _gen/386.rules (search for Flag_shared).
   714  		case "amd64":
   715  			s.allocatable &^= 1 << 15 // R15
   716  		case "arm":
   717  			s.allocatable &^= 1 << 9 // R9
   718  		case "arm64":
   719  			// nothing to do
   720  		case "loong64": // R2 (aka TP) already reserved.
   721  			// nothing to do
   722  		case "ppc64le": // R2 already reserved.
   723  			// nothing to do
   724  		case "riscv64": // X3 (aka GP) and X4 (aka TP) already reserved.
   725  			// nothing to do
   726  		case "s390x":
   727  			s.allocatable &^= 1 << 11 // R11
   728  		default:
   729  			s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
   730  		}
   731  	}
   732  
   733  	// Linear scan register allocation can be influenced by the order in which blocks appear.
   734  	// Decouple the register allocation order from the generated block order.
   735  	// This also creates an opportunity for experiments to find a better order.
   736  	s.visitOrder = layoutRegallocOrder(f)
   737  
   738  	// Compute block order. This array allows us to distinguish forward edges
   739  	// from backward edges and compute how far they go.
   740  	s.blockOrder = make([]int32, f.NumBlocks())
   741  	for i, b := range s.visitOrder {
   742  		s.blockOrder[b.ID] = int32(i)
   743  	}
   744  
   745  	s.regs = make([]regState, s.numRegs)
   746  	nv := f.NumValues()
   747  	if cap(s.f.Cache.regallocValues) >= nv {
   748  		s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
   749  	} else {
   750  		s.f.Cache.regallocValues = make([]valState, nv)
   751  	}
   752  	s.values = s.f.Cache.regallocValues
   753  	s.orig = s.f.Cache.allocValueSlice(nv)
   754  	s.copies = make(map[*Value]bool)
   755  	for _, b := range s.visitOrder {
   756  		for _, v := range b.Values {
   757  			if v.needRegister() {
   758  				s.values[v.ID].needReg = true
   759  				s.values[v.ID].rematerializeable = v.rematerializeable()
   760  				s.orig[v.ID] = v
   761  			}
   762  			// Note: needReg is false for values returning Tuple types.
   763  			// Instead, we mark the corresponding Selects as needReg.
   764  		}
   765  	}
   766  	s.computeLive()
   767  
   768  	s.endRegs = make([][]endReg, f.NumBlocks())
   769  	s.startRegs = make([][]startReg, f.NumBlocks())
   770  	s.spillLive = make([][]ID, f.NumBlocks())
   771  	s.sdom = f.Sdom()
   772  
   773  	// wasm: Mark instructions that can be optimized to have their values only on the WebAssembly stack.
   774  	if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
   775  		canLiveOnStack := f.newSparseSet(f.NumValues())
   776  		defer f.retSparseSet(canLiveOnStack)
   777  		for _, b := range f.Blocks {
   778  			// New block. Clear candidate set.
   779  			canLiveOnStack.clear()
   780  			for _, c := range b.ControlValues() {
   781  				if c.Uses == 1 && !opcodeTable[c.Op].generic {
   782  					canLiveOnStack.add(c.ID)
   783  				}
   784  			}
   785  			// Walking backwards.
   786  			for i := len(b.Values) - 1; i >= 0; i-- {
   787  				v := b.Values[i]
   788  				if canLiveOnStack.contains(v.ID) {
   789  					v.OnWasmStack = true
   790  				} else {
   791  					// Value can not live on stack. Values are not allowed to be reordered, so clear candidate set.
   792  					canLiveOnStack.clear()
   793  				}
   794  				for _, arg := range v.Args {
   795  					// Value can live on the stack if:
   796  					// - it is only used once
   797  					// - it is used in the same basic block
   798  					// - it is not a "mem" value
   799  					// - it is a WebAssembly op
   800  					if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
   801  						canLiveOnStack.add(arg.ID)
   802  					}
   803  				}
   804  			}
   805  		}
   806  	}
   807  
   808  	// The clobberdeadreg experiment inserts code to clobber dead registers
   809  	// at call sites.
   810  	// Ignore huge functions to avoid doing too much work.
   811  	if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
   812  		// TODO: honor GOCLOBBERDEADHASH, or maybe GOSSAHASH.
   813  		s.doClobber = true
   814  	}
   815  }
   816  
   817  func (s *regAllocState) close() {
   818  	s.f.Cache.freeValueSlice(s.orig)
   819  }
   820  
   821  // Adds a use record for id at distance dist from the start of the block.
   822  // All calls to addUse must happen with nonincreasing dist.
   823  func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
   824  	r := s.freeUseRecords
   825  	if r != nil {
   826  		s.freeUseRecords = r.next
   827  	} else {
   828  		r = &use{}
   829  	}
   830  	r.dist = dist
   831  	r.pos = pos
   832  	r.next = s.values[id].uses
   833  	s.values[id].uses = r
   834  	if r.next != nil && dist > r.next.dist {
   835  		s.f.Fatalf("uses added in wrong order")
   836  	}
   837  }
   838  
   839  // advanceUses advances the uses of v's args from the state before v to the state after v.
   840  // Any values which have no more uses are deallocated from registers.
   841  func (s *regAllocState) advanceUses(v *Value) {
   842  	for _, a := range v.Args {
   843  		if !s.values[a.ID].needReg {
   844  			continue
   845  		}
   846  		ai := &s.values[a.ID]
   847  		r := ai.uses
   848  		ai.uses = r.next
   849  		if r.next == nil || (!opcodeTable[a.Op].fixedReg && r.next.dist > s.nextCall[s.curIdx]) {
   850  			// Value is dead (or is not used again until after a call), free all registers that hold it.
   851  			s.freeRegs(ai.regs)
   852  		}
   853  		r.next = s.freeUseRecords
   854  		s.freeUseRecords = r
   855  	}
   856  	s.dropIfUnused(v)
   857  }
   858  
   859  // Drop v from registers if it isn't used again, or its only uses are after
   860  // a call instruction.
   861  func (s *regAllocState) dropIfUnused(v *Value) {
   862  	if !s.values[v.ID].needReg {
   863  		return
   864  	}
   865  	vi := &s.values[v.ID]
   866  	r := vi.uses
   867  	if r == nil || (!opcodeTable[v.Op].fixedReg && r.dist > s.nextCall[s.curIdx]) {
   868  		s.freeRegs(vi.regs)
   869  	}
   870  }
   871  
   872  // liveAfterCurrentInstruction reports whether v is live after
   873  // the current instruction is completed.  v must be used by the
   874  // current instruction.
   875  func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
   876  	u := s.values[v.ID].uses
   877  	if u == nil {
   878  		panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
   879  	}
   880  	d := u.dist
   881  	for u != nil && u.dist == d {
   882  		u = u.next
   883  	}
   884  	return u != nil && u.dist > d
   885  }
   886  
   887  // Sets the state of the registers to that encoded in regs.
   888  func (s *regAllocState) setState(regs []endReg) {
   889  	s.freeRegs(s.used)
   890  	for _, x := range regs {
   891  		s.assignReg(x.r, x.v, x.c)
   892  	}
   893  }
   894  
   895  // compatRegs returns the set of registers which can store a type t.
   896  func (s *regAllocState) compatRegs(t *types.Type) regMask {
   897  	var m regMask
   898  	if t.IsTuple() || t.IsFlags() {
   899  		return 0
   900  	}
   901  	if t.IsFloat() || t == types.TypeInt128 {
   902  		if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
   903  			m = s.f.Config.fp32RegMask
   904  		} else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
   905  			m = s.f.Config.fp64RegMask
   906  		} else {
   907  			m = s.f.Config.fpRegMask
   908  		}
   909  	} else {
   910  		m = s.f.Config.gpRegMask
   911  	}
   912  	return m & s.allocatable
   913  }
   914  
   915  // regspec returns the regInfo for operation op.
   916  func (s *regAllocState) regspec(v *Value) regInfo {
   917  	op := v.Op
   918  	if op == OpConvert {
   919  		// OpConvert is a generic op, so it doesn't have a
   920  		// register set in the static table. It can use any
   921  		// allocatable integer register.
   922  		m := s.allocatable & s.f.Config.gpRegMask
   923  		return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
   924  	}
   925  	if op == OpArgIntReg {
   926  		reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
   927  		return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
   928  	}
   929  	if op == OpArgFloatReg {
   930  		reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
   931  		return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
   932  	}
   933  	if op.IsCall() {
   934  		if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
   935  			return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
   936  		}
   937  	}
   938  	if op == OpMakeResult && s.f.OwnAux.reg != nil {
   939  		return *s.f.OwnAux.ResultReg(s.f.Config)
   940  	}
   941  	return opcodeTable[op].reg
   942  }
   943  
   944  func (s *regAllocState) isGReg(r register) bool {
   945  	return s.f.Config.hasGReg && s.GReg == r
   946  }
   947  
   948  // Dummy value used to represent the value being held in a temporary register.
   949  var tmpVal Value
   950  
   951  func (s *regAllocState) regalloc(f *Func) {
   952  	regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register
   953  	defer f.retSparseSet(regValLiveSet)
   954  	var oldSched []*Value
   955  	var phis []*Value
   956  	var phiRegs []register
   957  	var args []*Value
   958  
   959  	// Data structure used for computing desired registers.
   960  	var desired desiredState
   961  	desiredSecondReg := map[ID][4]register{} // desired register allocation for 2nd part of a tuple
   962  
   963  	// Desired registers for inputs & outputs for each instruction in the block.
   964  	type dentry struct {
   965  		out [4]register    // desired output registers
   966  		in  [3][4]register // desired input registers (for inputs 0,1, and 2)
   967  	}
   968  	var dinfo []dentry
   969  
   970  	if f.Entry != f.Blocks[0] {
   971  		f.Fatalf("entry block must be first")
   972  	}
   973  
   974  	for _, b := range s.visitOrder {
   975  		if s.f.pass.debug > regDebug {
   976  			fmt.Printf("Begin processing block %v\n", b)
   977  		}
   978  		s.curBlock = b
   979  		s.startRegsMask = 0
   980  		s.usedSinceBlockStart = 0
   981  		clear(desiredSecondReg)
   982  
   983  		// Initialize regValLiveSet and uses fields for this block.
   984  		// Walk backwards through the block doing liveness analysis.
   985  		regValLiveSet.clear()
   986  		for _, e := range s.live[b.ID] {
   987  			s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block
   988  			regValLiveSet.add(e.ID)
   989  		}
   990  		for _, v := range b.ControlValues() {
   991  			if s.values[v.ID].needReg {
   992  				s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control values
   993  				regValLiveSet.add(v.ID)
   994  			}
   995  		}
   996  		if len(s.nextCall) < len(b.Values) {
   997  			s.nextCall = append(s.nextCall, make([]int32, len(b.Values)-len(s.nextCall))...)
   998  		}
   999  		var nextCall int32 = math.MaxInt32
  1000  		for i := len(b.Values) - 1; i >= 0; i-- {
  1001  			v := b.Values[i]
  1002  			regValLiveSet.remove(v.ID)
  1003  			if v.Op == OpPhi {
  1004  				// Remove v from the live set, but don't add
  1005  				// any inputs. This is the state the len(b.Preds)>1
  1006  				// case below desires; it wants to process phis specially.
  1007  				s.nextCall[i] = nextCall
  1008  				continue
  1009  			}
  1010  			if opcodeTable[v.Op].call {
  1011  				// Function call clobbers all the registers but SP and SB.
  1012  				regValLiveSet.clear()
  1013  				if s.sp != 0 && s.values[s.sp].uses != nil {
  1014  					regValLiveSet.add(s.sp)
  1015  				}
  1016  				if s.sb != 0 && s.values[s.sb].uses != nil {
  1017  					regValLiveSet.add(s.sb)
  1018  				}
  1019  				nextCall = int32(i)
  1020  			}
  1021  			for _, a := range v.Args {
  1022  				if !s.values[a.ID].needReg {
  1023  					continue
  1024  				}
  1025  				s.addUse(a.ID, int32(i), v.Pos)
  1026  				regValLiveSet.add(a.ID)
  1027  			}
  1028  			s.nextCall[i] = nextCall
  1029  		}
  1030  		if s.f.pass.debug > regDebug {
  1031  			fmt.Printf("use distances for %s\n", b)
  1032  			for i := range s.values {
  1033  				vi := &s.values[i]
  1034  				u := vi.uses
  1035  				if u == nil {
  1036  					continue
  1037  				}
  1038  				fmt.Printf("  v%d:", i)
  1039  				for u != nil {
  1040  					fmt.Printf(" %d", u.dist)
  1041  					u = u.next
  1042  				}
  1043  				fmt.Println()
  1044  			}
  1045  		}
  1046  
  1047  		// Make a copy of the block schedule so we can generate a new one in place.
  1048  		// We make a separate copy for phis and regular values.
  1049  		nphi := 0
  1050  		for _, v := range b.Values {
  1051  			if v.Op != OpPhi {
  1052  				break
  1053  			}
  1054  			nphi++
  1055  		}
  1056  		phis = append(phis[:0], b.Values[:nphi]...)
  1057  		oldSched = append(oldSched[:0], b.Values[nphi:]...)
  1058  		b.Values = b.Values[:0]
  1059  
  1060  		// Initialize start state of block.
  1061  		if b == f.Entry {
  1062  			// Regalloc state is empty to start.
  1063  			if nphi > 0 {
  1064  				f.Fatalf("phis in entry block")
  1065  			}
  1066  		} else if len(b.Preds) == 1 {
  1067  			// Start regalloc state with the end state of the previous block.
  1068  			s.setState(s.endRegs[b.Preds[0].b.ID])
  1069  			if nphi > 0 {
  1070  				f.Fatalf("phis in single-predecessor block")
  1071  			}
  1072  			// Drop any values which are no longer live.
  1073  			// This may happen because at the end of p, a value may be
  1074  			// live but only used by some other successor of p.
  1075  			for r := register(0); r < s.numRegs; r++ {
  1076  				v := s.regs[r].v
  1077  				if v != nil && !regValLiveSet.contains(v.ID) {
  1078  					s.freeReg(r)
  1079  				}
  1080  			}
  1081  		} else {
  1082  			// This is the complicated case. We have more than one predecessor,
  1083  			// which means we may have Phi ops.
  1084  
  1085  			// Start with the final register state of the predecessor with least spill values.
  1086  			// This is based on the following points:
  1087  			// 1, The less spill value indicates that the register pressure of this path is smaller,
  1088  			//    so the values of this block are more likely to be allocated to registers.
  1089  			// 2, Avoid the predecessor that contains the function call, because the predecessor that
  1090  			//    contains the function call usually generates a lot of spills and lose the previous
  1091  			//    allocation state.
  1092  			// TODO: Improve this part. At least the size of endRegs of the predecessor also has
  1093  			// an impact on the code size and compiler speed. But it is not easy to find a simple
  1094  			// and efficient method that combines multiple factors.
  1095  			idx := -1
  1096  			for i, p := range b.Preds {
  1097  				// If the predecessor has not been visited yet, skip it because its end state
  1098  				// (redRegs and spillLive) has not been computed yet.
  1099  				pb := p.b
  1100  				if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
  1101  					continue
  1102  				}
  1103  				if idx == -1 {
  1104  					idx = i
  1105  					continue
  1106  				}
  1107  				pSel := b.Preds[idx].b
  1108  				if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
  1109  					idx = i
  1110  				} else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
  1111  					// Use a bit of likely information. After critical pass, pb and pSel must
  1112  					// be plain blocks, so check edge pb->pb.Preds instead of edge pb->b.
  1113  					// TODO: improve the prediction of the likely predecessor. The following
  1114  					// method is only suitable for the simplest cases. For complex cases,
  1115  					// the prediction may be inaccurate, but this does not affect the
  1116  					// correctness of the program.
  1117  					// According to the layout algorithm, the predecessor with the
  1118  					// smaller blockOrder is the true branch, and the test results show
  1119  					// that it is better to choose the predecessor with a smaller
  1120  					// blockOrder than no choice.
  1121  					if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
  1122  						idx = i
  1123  					}
  1124  				}
  1125  			}
  1126  			if idx < 0 {
  1127  				f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
  1128  			}
  1129  			p := b.Preds[idx].b
  1130  			s.setState(s.endRegs[p.ID])
  1131  
  1132  			if s.f.pass.debug > regDebug {
  1133  				fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
  1134  				for _, x := range s.endRegs[p.ID] {
  1135  					fmt.Printf("  %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
  1136  				}
  1137  			}
  1138  
  1139  			// Decide on registers for phi ops. Use the registers determined
  1140  			// by the primary predecessor if we can.
  1141  			// TODO: pick best of (already processed) predecessors?
  1142  			// Majority vote? Deepest nesting level?
  1143  			phiRegs = phiRegs[:0]
  1144  			var phiUsed regMask
  1145  
  1146  			for _, v := range phis {
  1147  				if !s.values[v.ID].needReg {
  1148  					phiRegs = append(phiRegs, noRegister)
  1149  					continue
  1150  				}
  1151  				a := v.Args[idx]
  1152  				// Some instructions target not-allocatable registers.
  1153  				// They're not suitable for further (phi-function) allocation.
  1154  				m := s.values[a.ID].regs &^ phiUsed & s.allocatable
  1155  				if m != 0 {
  1156  					r := pickReg(m)
  1157  					phiUsed |= regMask(1) << r
  1158  					phiRegs = append(phiRegs, r)
  1159  				} else {
  1160  					phiRegs = append(phiRegs, noRegister)
  1161  				}
  1162  			}
  1163  
  1164  			// Second pass - deallocate all in-register phi inputs.
  1165  			for i, v := range phis {
  1166  				if !s.values[v.ID].needReg {
  1167  					continue
  1168  				}
  1169  				a := v.Args[idx]
  1170  				r := phiRegs[i]
  1171  				if r == noRegister {
  1172  					continue
  1173  				}
  1174  				if regValLiveSet.contains(a.ID) {
  1175  					// Input value is still live (it is used by something other than Phi).
  1176  					// Try to move it around before kicking out, if there is a free register.
  1177  					// We generate a Copy in the predecessor block and record it. It will be
  1178  					// deleted later if never used.
  1179  					//
  1180  					// Pick a free register. At this point some registers used in the predecessor
  1181  					// block may have been deallocated. Those are the ones used for Phis. Exclude
  1182  					// them (and they are not going to be helpful anyway).
  1183  					m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
  1184  					if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
  1185  						r2 := pickReg(m)
  1186  						c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
  1187  						s.copies[c] = false
  1188  						if s.f.pass.debug > regDebug {
  1189  							fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
  1190  						}
  1191  						s.setOrig(c, a)
  1192  						s.assignReg(r2, a, c)
  1193  						s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
  1194  					}
  1195  				}
  1196  				s.freeReg(r)
  1197  			}
  1198  
  1199  			// Copy phi ops into new schedule.
  1200  			b.Values = append(b.Values, phis...)
  1201  
  1202  			// Third pass - pick registers for phis whose input
  1203  			// was not in a register in the primary predecessor.
  1204  			for i, v := range phis {
  1205  				if !s.values[v.ID].needReg {
  1206  					continue
  1207  				}
  1208  				if phiRegs[i] != noRegister {
  1209  					continue
  1210  				}
  1211  				m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
  1212  				// If one of the other inputs of v is in a register, and the register is available,
  1213  				// select this register, which can save some unnecessary copies.
  1214  				for i, pe := range b.Preds {
  1215  					if i == idx {
  1216  						continue
  1217  					}
  1218  					ri := noRegister
  1219  					for _, er := range s.endRegs[pe.b.ID] {
  1220  						if er.v == s.orig[v.Args[i].ID] {
  1221  							ri = er.r
  1222  							break
  1223  						}
  1224  					}
  1225  					if ri != noRegister && m>>ri&1 != 0 {
  1226  						m = regMask(1) << ri
  1227  						break
  1228  					}
  1229  				}
  1230  				if m != 0 {
  1231  					r := pickReg(m)
  1232  					phiRegs[i] = r
  1233  					phiUsed |= regMask(1) << r
  1234  				}
  1235  			}
  1236  
  1237  			// Set registers for phis. Add phi spill code.
  1238  			for i, v := range phis {
  1239  				if !s.values[v.ID].needReg {
  1240  					continue
  1241  				}
  1242  				r := phiRegs[i]
  1243  				if r == noRegister {
  1244  					// stack-based phi
  1245  					// Spills will be inserted in all the predecessors below.
  1246  					s.values[v.ID].spill = v // v starts life spilled
  1247  					continue
  1248  				}
  1249  				// register-based phi
  1250  				s.assignReg(r, v, v)
  1251  			}
  1252  
  1253  			// Deallocate any values which are no longer live. Phis are excluded.
  1254  			for r := register(0); r < s.numRegs; r++ {
  1255  				if phiUsed>>r&1 != 0 {
  1256  					continue
  1257  				}
  1258  				v := s.regs[r].v
  1259  				if v != nil && !regValLiveSet.contains(v.ID) {
  1260  					s.freeReg(r)
  1261  				}
  1262  			}
  1263  
  1264  			// Save the starting state for use by merge edges.
  1265  			// We append to a stack allocated variable that we'll
  1266  			// later copy into s.startRegs in one fell swoop, to save
  1267  			// on allocations.
  1268  			regList := make([]startReg, 0, 32)
  1269  			for r := register(0); r < s.numRegs; r++ {
  1270  				v := s.regs[r].v
  1271  				if v == nil {
  1272  					continue
  1273  				}
  1274  				if phiUsed>>r&1 != 0 {
  1275  					// Skip registers that phis used, we'll handle those
  1276  					// specially during merge edge processing.
  1277  					continue
  1278  				}
  1279  				regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
  1280  				s.startRegsMask |= regMask(1) << r
  1281  			}
  1282  			s.startRegs[b.ID] = make([]startReg, len(regList))
  1283  			copy(s.startRegs[b.ID], regList)
  1284  
  1285  			if s.f.pass.debug > regDebug {
  1286  				fmt.Printf("after phis\n")
  1287  				for _, x := range s.startRegs[b.ID] {
  1288  					fmt.Printf("  %s: v%d\n", &s.registers[x.r], x.v.ID)
  1289  				}
  1290  			}
  1291  		}
  1292  
  1293  		// Drop phis from registers if they immediately go dead.
  1294  		for i, v := range phis {
  1295  			s.curIdx = i
  1296  			s.dropIfUnused(v)
  1297  		}
  1298  
  1299  		// Allocate space to record the desired registers for each value.
  1300  		if l := len(oldSched); cap(dinfo) < l {
  1301  			dinfo = make([]dentry, l)
  1302  		} else {
  1303  			dinfo = dinfo[:l]
  1304  			clear(dinfo)
  1305  		}
  1306  
  1307  		// Load static desired register info at the end of the block.
  1308  		desired.copy(&s.desired[b.ID])
  1309  
  1310  		// Check actual assigned registers at the start of the next block(s).
  1311  		// Dynamically assigned registers will trump the static
  1312  		// desired registers computed during liveness analysis.
  1313  		// Note that we do this phase after startRegs is set above, so that
  1314  		// we get the right behavior for a block which branches to itself.
  1315  		for _, e := range b.Succs {
  1316  			succ := e.b
  1317  			// TODO: prioritize likely successor?
  1318  			for _, x := range s.startRegs[succ.ID] {
  1319  				desired.add(x.v.ID, x.r)
  1320  			}
  1321  			// Process phi ops in succ.
  1322  			pidx := e.i
  1323  			for _, v := range succ.Values {
  1324  				if v.Op != OpPhi {
  1325  					break
  1326  				}
  1327  				if !s.values[v.ID].needReg {
  1328  					continue
  1329  				}
  1330  				rp, ok := s.f.getHome(v.ID).(*Register)
  1331  				if !ok {
  1332  					// If v is not assigned a register, pick a register assigned to one of v's inputs.
  1333  					// Hopefully v will get assigned that register later.
  1334  					// If the inputs have allocated register information, add it to desired,
  1335  					// which may reduce spill or copy operations when the register is available.
  1336  					for _, a := range v.Args {
  1337  						rp, ok = s.f.getHome(a.ID).(*Register)
  1338  						if ok {
  1339  							break
  1340  						}
  1341  					}
  1342  					if !ok {
  1343  						continue
  1344  					}
  1345  				}
  1346  				desired.add(v.Args[pidx].ID, register(rp.num))
  1347  			}
  1348  		}
  1349  		// Walk values backwards computing desired register info.
  1350  		// See computeLive for more comments.
  1351  		for i := len(oldSched) - 1; i >= 0; i-- {
  1352  			v := oldSched[i]
  1353  			prefs := desired.remove(v.ID)
  1354  			regspec := s.regspec(v)
  1355  			desired.clobber(regspec.clobbers)
  1356  			for _, j := range regspec.inputs {
  1357  				if countRegs(j.regs) != 1 {
  1358  					continue
  1359  				}
  1360  				desired.clobber(j.regs)
  1361  				desired.add(v.Args[j.idx].ID, pickReg(j.regs))
  1362  			}
  1363  			if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
  1364  				if opcodeTable[v.Op].commutative {
  1365  					desired.addList(v.Args[1].ID, prefs)
  1366  				}
  1367  				desired.addList(v.Args[0].ID, prefs)
  1368  			}
  1369  			// Save desired registers for this value.
  1370  			dinfo[i].out = prefs
  1371  			for j, a := range v.Args {
  1372  				if j >= len(dinfo[i].in) {
  1373  					break
  1374  				}
  1375  				dinfo[i].in[j] = desired.get(a.ID)
  1376  			}
  1377  			if v.Op == OpSelect1 && prefs[0] != noRegister {
  1378  				// Save desired registers of select1 for
  1379  				// use by the tuple generating instruction.
  1380  				desiredSecondReg[v.Args[0].ID] = prefs
  1381  			}
  1382  		}
  1383  
  1384  		// Process all the non-phi values.
  1385  		for idx, v := range oldSched {
  1386  			s.curIdx = nphi + idx
  1387  			tmpReg := noRegister
  1388  			if s.f.pass.debug > regDebug {
  1389  				fmt.Printf("  processing %s\n", v.LongString())
  1390  			}
  1391  			regspec := s.regspec(v)
  1392  			if v.Op == OpPhi {
  1393  				f.Fatalf("phi %s not at start of block", v)
  1394  			}
  1395  			if opcodeTable[v.Op].fixedReg {
  1396  				switch v.Op {
  1397  				case OpSP:
  1398  					s.assignReg(s.SPReg, v, v)
  1399  					s.sp = v.ID
  1400  				case OpSB:
  1401  					s.assignReg(s.SBReg, v, v)
  1402  					s.sb = v.ID
  1403  				case OpARM64ZERO:
  1404  					s.assignReg(s.ZeroIntReg, v, v)
  1405  				default:
  1406  					f.Fatalf("unknown fixed-register op %s", v)
  1407  				}
  1408  				b.Values = append(b.Values, v)
  1409  				s.advanceUses(v)
  1410  				continue
  1411  			}
  1412  			if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
  1413  				if s.values[v.ID].needReg {
  1414  					if v.Op == OpSelectN {
  1415  						s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
  1416  					} else {
  1417  						var i = 0
  1418  						if v.Op == OpSelect1 {
  1419  							i = 1
  1420  						}
  1421  						s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
  1422  					}
  1423  				}
  1424  				b.Values = append(b.Values, v)
  1425  				s.advanceUses(v)
  1426  				continue
  1427  			}
  1428  			if v.Op == OpGetG && s.f.Config.hasGReg {
  1429  				// use hardware g register
  1430  				if s.regs[s.GReg].v != nil {
  1431  					s.freeReg(s.GReg) // kick out the old value
  1432  				}
  1433  				s.assignReg(s.GReg, v, v)
  1434  				b.Values = append(b.Values, v)
  1435  				s.advanceUses(v)
  1436  				continue
  1437  			}
  1438  			if v.Op == OpArg {
  1439  				// Args are "pre-spilled" values. We don't allocate
  1440  				// any register here. We just set up the spill pointer to
  1441  				// point at itself and any later user will restore it to use it.
  1442  				s.values[v.ID].spill = v
  1443  				b.Values = append(b.Values, v)
  1444  				s.advanceUses(v)
  1445  				continue
  1446  			}
  1447  			if v.Op == OpKeepAlive {
  1448  				// Make sure the argument to v is still live here.
  1449  				s.advanceUses(v)
  1450  				a := v.Args[0]
  1451  				vi := &s.values[a.ID]
  1452  				if vi.regs == 0 && !vi.rematerializeable {
  1453  					// Use the spill location.
  1454  					// This forces later liveness analysis to make the
  1455  					// value live at this point.
  1456  					v.SetArg(0, s.makeSpill(a, b))
  1457  				} else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
  1458  					// Rematerializeable value with a *ir.Name. This is the address of
  1459  					// a stack object (e.g. an LEAQ). Keep the object live.
  1460  					// Change it to VarLive, which is what plive expects for locals.
  1461  					v.Op = OpVarLive
  1462  					v.SetArgs1(v.Args[1])
  1463  					v.Aux = a.Aux
  1464  				} else {
  1465  					// In-register and rematerializeable values are already live.
  1466  					// These are typically rematerializeable constants like nil,
  1467  					// or values of a variable that were modified since the last call.
  1468  					v.Op = OpCopy
  1469  					v.SetArgs1(v.Args[1])
  1470  				}
  1471  				b.Values = append(b.Values, v)
  1472  				continue
  1473  			}
  1474  			if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
  1475  				// No register allocation required (or none specified yet)
  1476  				if s.doClobber && v.Op.IsCall() {
  1477  					s.clobberRegs(regspec.clobbers)
  1478  				}
  1479  				s.freeRegs(regspec.clobbers)
  1480  				b.Values = append(b.Values, v)
  1481  				s.advanceUses(v)
  1482  				continue
  1483  			}
  1484  
  1485  			if s.values[v.ID].rematerializeable {
  1486  				// Value is rematerializeable, don't issue it here.
  1487  				// It will get issued just before each use (see
  1488  				// allocValueToReg).
  1489  				for _, a := range v.Args {
  1490  					a.Uses--
  1491  				}
  1492  				s.advanceUses(v)
  1493  				continue
  1494  			}
  1495  
  1496  			if s.f.pass.debug > regDebug {
  1497  				fmt.Printf("value %s\n", v.LongString())
  1498  				fmt.Printf("  out:")
  1499  				for _, r := range dinfo[idx].out {
  1500  					if r != noRegister {
  1501  						fmt.Printf(" %s", &s.registers[r])
  1502  					}
  1503  				}
  1504  				fmt.Println()
  1505  				for i := 0; i < len(v.Args) && i < 3; i++ {
  1506  					fmt.Printf("  in%d:", i)
  1507  					for _, r := range dinfo[idx].in[i] {
  1508  						if r != noRegister {
  1509  							fmt.Printf(" %s", &s.registers[r])
  1510  						}
  1511  					}
  1512  					fmt.Println()
  1513  				}
  1514  			}
  1515  
  1516  			// Move arguments to registers.
  1517  			// First, if an arg must be in a specific register and it is already
  1518  			// in place, keep it.
  1519  			args = append(args[:0], make([]*Value, len(v.Args))...)
  1520  			for i, a := range v.Args {
  1521  				if !s.values[a.ID].needReg {
  1522  					args[i] = a
  1523  				}
  1524  			}
  1525  			for _, i := range regspec.inputs {
  1526  				mask := i.regs
  1527  				if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 {
  1528  					args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1529  				}
  1530  			}
  1531  			// Then, if an arg must be in a specific register and that
  1532  			// register is free, allocate that one. Otherwise when processing
  1533  			// another input we may kick a value into the free register, which
  1534  			// then will be kicked out again.
  1535  			// This is a common case for passing-in-register arguments for
  1536  			// function calls.
  1537  			for {
  1538  				freed := false
  1539  				for _, i := range regspec.inputs {
  1540  					if args[i.idx] != nil {
  1541  						continue // already allocated
  1542  					}
  1543  					mask := i.regs
  1544  					if countRegs(mask) == 1 && mask&^s.used != 0 {
  1545  						args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1546  						// If the input is in other registers that will be clobbered by v,
  1547  						// or the input is dead, free the registers. This may make room
  1548  						// for other inputs.
  1549  						oldregs := s.values[v.Args[i.idx].ID].regs
  1550  						if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) {
  1551  							s.freeRegs(oldregs &^ mask &^ s.nospill)
  1552  							freed = true
  1553  						}
  1554  					}
  1555  				}
  1556  				if !freed {
  1557  					break
  1558  				}
  1559  			}
  1560  			// Last, allocate remaining ones, in an ordering defined
  1561  			// by the register specification (most constrained first).
  1562  			for _, i := range regspec.inputs {
  1563  				if args[i.idx] != nil {
  1564  					continue // already allocated
  1565  				}
  1566  				mask := i.regs
  1567  				if mask&s.values[v.Args[i.idx].ID].regs == 0 {
  1568  					// Need a new register for the input.
  1569  					mask &= s.allocatable
  1570  					mask &^= s.nospill
  1571  					// Used desired register if available.
  1572  					if i.idx < 3 {
  1573  						for _, r := range dinfo[idx].in[i.idx] {
  1574  							if r != noRegister && (mask&^s.used)>>r&1 != 0 {
  1575  								// Desired register is allowed and unused.
  1576  								mask = regMask(1) << r
  1577  								break
  1578  							}
  1579  						}
  1580  					}
  1581  					// Avoid registers we're saving for other values.
  1582  					if mask&^desired.avoid != 0 {
  1583  						mask &^= desired.avoid
  1584  					}
  1585  				}
  1586  				if mask&s.values[v.Args[i.idx].ID].regs&(1<<s.SPReg) != 0 {
  1587  					// Prefer SP register. This ensures that local variables
  1588  					// use SP as their base register (instead of a copy of the
  1589  					// stack pointer living in another register). See issue 74836.
  1590  					mask = 1 << s.SPReg
  1591  				}
  1592  				args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1593  			}
  1594  
  1595  			// If the output clobbers the input register, make sure we have
  1596  			// at least two copies of the input register so we don't
  1597  			// have to reload the value from the spill location.
  1598  			if opcodeTable[v.Op].resultInArg0 {
  1599  				var m regMask
  1600  				if !s.liveAfterCurrentInstruction(v.Args[0]) {
  1601  					// arg0 is dead.  We can clobber its register.
  1602  					goto ok
  1603  				}
  1604  				if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
  1605  					args[0], args[1] = args[1], args[0]
  1606  					goto ok
  1607  				}
  1608  				if s.values[v.Args[0].ID].rematerializeable {
  1609  					// We can rematerialize the input, don't worry about clobbering it.
  1610  					goto ok
  1611  				}
  1612  				if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
  1613  					args[0], args[1] = args[1], args[0]
  1614  					goto ok
  1615  				}
  1616  				if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
  1617  					// we have at least 2 copies of arg0.  We can afford to clobber one.
  1618  					goto ok
  1619  				}
  1620  				if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
  1621  					args[0], args[1] = args[1], args[0]
  1622  					goto ok
  1623  				}
  1624  
  1625  				// We can't overwrite arg0 (or arg1, if commutative).  So we
  1626  				// need to make a copy of an input so we have a register we can modify.
  1627  
  1628  				// Possible new registers to copy into.
  1629  				m = s.compatRegs(v.Args[0].Type) &^ s.used
  1630  				if m == 0 {
  1631  					// No free registers.  In this case we'll just clobber
  1632  					// an input and future uses of that input must use a restore.
  1633  					// TODO(khr): We should really do this like allocReg does it,
  1634  					// spilling the value with the most distant next use.
  1635  					goto ok
  1636  				}
  1637  
  1638  				// Try to move an input to the desired output, if allowed.
  1639  				for _, r := range dinfo[idx].out {
  1640  					if r != noRegister && (m&regspec.outputs[0].regs)>>r&1 != 0 {
  1641  						m = regMask(1) << r
  1642  						args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
  1643  						// Note: we update args[0] so the instruction will
  1644  						// use the register copy we just made.
  1645  						goto ok
  1646  					}
  1647  				}
  1648  				// Try to copy input to its desired location & use its old
  1649  				// location as the result register.
  1650  				for _, r := range dinfo[idx].in[0] {
  1651  					if r != noRegister && m>>r&1 != 0 {
  1652  						m = regMask(1) << r
  1653  						c := s.allocValToReg(v.Args[0], m, true, v.Pos)
  1654  						s.copies[c] = false
  1655  						// Note: no update to args[0] so the instruction will
  1656  						// use the original copy.
  1657  						goto ok
  1658  					}
  1659  				}
  1660  				if opcodeTable[v.Op].commutative {
  1661  					for _, r := range dinfo[idx].in[1] {
  1662  						if r != noRegister && m>>r&1 != 0 {
  1663  							m = regMask(1) << r
  1664  							c := s.allocValToReg(v.Args[1], m, true, v.Pos)
  1665  							s.copies[c] = false
  1666  							args[0], args[1] = args[1], args[0]
  1667  							goto ok
  1668  						}
  1669  					}
  1670  				}
  1671  
  1672  				// Avoid future fixed uses if we can.
  1673  				if m&^desired.avoid != 0 {
  1674  					m &^= desired.avoid
  1675  				}
  1676  				// Save input 0 to a new register so we can clobber it.
  1677  				c := s.allocValToReg(v.Args[0], m, true, v.Pos)
  1678  				s.copies[c] = false
  1679  
  1680  				// Normally we use the register of the old copy of input 0 as the target.
  1681  				// However, if input 0 is already in its desired register then we use
  1682  				// the register of the new copy instead.
  1683  				if regspec.outputs[0].regs>>s.f.getHome(c.ID).(*Register).num&1 != 0 {
  1684  					if rp, ok := s.f.getHome(args[0].ID).(*Register); ok {
  1685  						r := register(rp.num)
  1686  						for _, r2 := range dinfo[idx].in[0] {
  1687  							if r == r2 {
  1688  								args[0] = c
  1689  								break
  1690  							}
  1691  						}
  1692  					}
  1693  				}
  1694  			}
  1695  		ok:
  1696  			for i := 0; i < 2; i++ {
  1697  				if !(i == 0 && regspec.clobbersArg0 || i == 1 && regspec.clobbersArg1) {
  1698  					continue
  1699  				}
  1700  				if !s.liveAfterCurrentInstruction(v.Args[i]) {
  1701  					// arg is dead.  We can clobber its register.
  1702  					continue
  1703  				}
  1704  				if s.values[v.Args[i].ID].rematerializeable {
  1705  					// We can rematerialize the input, don't worry about clobbering it.
  1706  					continue
  1707  				}
  1708  				if countRegs(s.values[v.Args[i].ID].regs) >= 2 {
  1709  					// We have at least 2 copies of arg.  We can afford to clobber one.
  1710  					continue
  1711  				}
  1712  				// Possible new registers to copy into.
  1713  				m := s.compatRegs(v.Args[i].Type) &^ s.used
  1714  				if m == 0 {
  1715  					// No free registers.  In this case we'll just clobber the
  1716  					// input and future uses of that input must use a restore.
  1717  					// TODO(khr): We should really do this like allocReg does it,
  1718  					// spilling the value with the most distant next use.
  1719  					continue
  1720  				}
  1721  				// Copy input to a new clobberable register.
  1722  				c := s.allocValToReg(v.Args[i], m, true, v.Pos)
  1723  				s.copies[c] = false
  1724  				args[i] = c
  1725  			}
  1726  
  1727  			// Pick a temporary register if needed.
  1728  			// It should be distinct from all the input registers, so we
  1729  			// allocate it after all the input registers, but before
  1730  			// the input registers are freed via advanceUses below.
  1731  			// (Not all instructions need that distinct part, but it is conservative.)
  1732  			// We also ensure it is not any of the single-choice output registers.
  1733  			if opcodeTable[v.Op].needIntTemp {
  1734  				m := s.allocatable & s.f.Config.gpRegMask
  1735  				for _, out := range regspec.outputs {
  1736  					if countRegs(out.regs) == 1 {
  1737  						m &^= out.regs
  1738  					}
  1739  				}
  1740  				if m&^desired.avoid&^s.nospill != 0 {
  1741  					m &^= desired.avoid
  1742  				}
  1743  				tmpReg = s.allocReg(m, &tmpVal)
  1744  				s.nospill |= regMask(1) << tmpReg
  1745  				s.tmpused |= regMask(1) << tmpReg
  1746  			}
  1747  
  1748  			if regspec.clobbersArg0 {
  1749  				s.freeReg(register(s.f.getHome(args[0].ID).(*Register).num))
  1750  			}
  1751  			if regspec.clobbersArg1 {
  1752  				s.freeReg(register(s.f.getHome(args[1].ID).(*Register).num))
  1753  			}
  1754  
  1755  			// Now that all args are in regs, we're ready to issue the value itself.
  1756  			// Before we pick a register for the output value, allow input registers
  1757  			// to be deallocated. We do this here so that the output can use the
  1758  			// same register as a dying input.
  1759  			if !opcodeTable[v.Op].resultNotInArgs {
  1760  				s.tmpused = s.nospill
  1761  				s.nospill = 0
  1762  				s.advanceUses(v) // frees any registers holding args that are no longer live
  1763  			}
  1764  
  1765  			// Dump any registers which will be clobbered
  1766  			if s.doClobber && v.Op.IsCall() {
  1767  				// clobber registers that are marked as clobber in regmask, but
  1768  				// don't clobber inputs.
  1769  				s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill)
  1770  			}
  1771  			s.freeRegs(regspec.clobbers)
  1772  			s.tmpused |= regspec.clobbers
  1773  
  1774  			// Pick registers for outputs.
  1775  			{
  1776  				outRegs := noRegisters // TODO if this is costly, hoist and clear incrementally below.
  1777  				maxOutIdx := -1
  1778  				var used regMask
  1779  				if tmpReg != noRegister {
  1780  					// Ensure output registers are distinct from the temporary register.
  1781  					// (Not all instructions need that distinct part, but it is conservative.)
  1782  					used |= regMask(1) << tmpReg
  1783  				}
  1784  				for _, out := range regspec.outputs {
  1785  					if out.regs == 0 {
  1786  						continue
  1787  					}
  1788  					mask := out.regs & s.allocatable &^ used
  1789  					if mask == 0 {
  1790  						s.f.Fatalf("can't find any output register %s", v.LongString())
  1791  					}
  1792  					if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
  1793  						if !opcodeTable[v.Op].commutative {
  1794  							// Output must use the same register as input 0.
  1795  							r := register(s.f.getHome(args[0].ID).(*Register).num)
  1796  							if mask>>r&1 == 0 {
  1797  								s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString())
  1798  							}
  1799  							mask = regMask(1) << r
  1800  						} else {
  1801  							// Output must use the same register as input 0 or 1.
  1802  							r0 := register(s.f.getHome(args[0].ID).(*Register).num)
  1803  							r1 := register(s.f.getHome(args[1].ID).(*Register).num)
  1804  							// Check r0 and r1 for desired output register.
  1805  							found := false
  1806  							for _, r := range dinfo[idx].out {
  1807  								if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
  1808  									mask = regMask(1) << r
  1809  									found = true
  1810  									if r == r1 {
  1811  										args[0], args[1] = args[1], args[0]
  1812  									}
  1813  									break
  1814  								}
  1815  							}
  1816  							if !found {
  1817  								// Neither are desired, pick r0.
  1818  								mask = regMask(1) << r0
  1819  							}
  1820  						}
  1821  					}
  1822  					if out.idx == 0 { // desired registers only apply to the first element of a tuple result
  1823  						for _, r := range dinfo[idx].out {
  1824  							if r != noRegister && (mask&^s.used)>>r&1 != 0 {
  1825  								// Desired register is allowed and unused.
  1826  								mask = regMask(1) << r
  1827  								break
  1828  							}
  1829  						}
  1830  					}
  1831  					if out.idx == 1 {
  1832  						if prefs, ok := desiredSecondReg[v.ID]; ok {
  1833  							for _, r := range prefs {
  1834  								if r != noRegister && (mask&^s.used)>>r&1 != 0 {
  1835  									// Desired register is allowed and unused.
  1836  									mask = regMask(1) << r
  1837  									break
  1838  								}
  1839  							}
  1840  						}
  1841  					}
  1842  					// Avoid registers we're saving for other values.
  1843  					if mask&^desired.avoid&^s.nospill&^s.used != 0 {
  1844  						mask &^= desired.avoid
  1845  					}
  1846  					r := s.allocReg(mask, v)
  1847  					if out.idx > maxOutIdx {
  1848  						maxOutIdx = out.idx
  1849  					}
  1850  					outRegs[out.idx] = r
  1851  					used |= regMask(1) << r
  1852  					s.tmpused |= regMask(1) << r
  1853  				}
  1854  				// Record register choices
  1855  				if v.Type.IsTuple() {
  1856  					var outLocs LocPair
  1857  					if r := outRegs[0]; r != noRegister {
  1858  						outLocs[0] = &s.registers[r]
  1859  					}
  1860  					if r := outRegs[1]; r != noRegister {
  1861  						outLocs[1] = &s.registers[r]
  1862  					}
  1863  					s.f.setHome(v, outLocs)
  1864  					// Note that subsequent SelectX instructions will do the assignReg calls.
  1865  				} else if v.Type.IsResults() {
  1866  					// preallocate outLocs to the right size, which is maxOutIdx+1
  1867  					outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
  1868  					for i := 0; i <= maxOutIdx; i++ {
  1869  						if r := outRegs[i]; r != noRegister {
  1870  							outLocs[i] = &s.registers[r]
  1871  						}
  1872  					}
  1873  					s.f.setHome(v, outLocs)
  1874  				} else {
  1875  					if r := outRegs[0]; r != noRegister {
  1876  						s.assignReg(r, v, v)
  1877  					}
  1878  				}
  1879  				if tmpReg != noRegister {
  1880  					// Remember the temp register allocation, if any.
  1881  					if s.f.tempRegs == nil {
  1882  						s.f.tempRegs = map[ID]*Register{}
  1883  					}
  1884  					s.f.tempRegs[v.ID] = &s.registers[tmpReg]
  1885  				}
  1886  			}
  1887  
  1888  			// deallocate dead args, if we have not done so
  1889  			if opcodeTable[v.Op].resultNotInArgs {
  1890  				s.nospill = 0
  1891  				s.advanceUses(v) // frees any registers holding args that are no longer live
  1892  			}
  1893  			s.tmpused = 0
  1894  
  1895  			// Issue the Value itself.
  1896  			for i, a := range args {
  1897  				v.SetArg(i, a) // use register version of arguments
  1898  			}
  1899  			b.Values = append(b.Values, v)
  1900  			s.dropIfUnused(v)
  1901  		}
  1902  
  1903  		// Copy the control values - we need this so we can reduce the
  1904  		// uses property of these values later.
  1905  		controls := append(make([]*Value, 0, 2), b.ControlValues()...)
  1906  
  1907  		// Load control values into registers.
  1908  		for i, v := range b.ControlValues() {
  1909  			if !s.values[v.ID].needReg {
  1910  				continue
  1911  			}
  1912  			if s.f.pass.debug > regDebug {
  1913  				fmt.Printf("  processing control %s\n", v.LongString())
  1914  			}
  1915  			// We assume that a control input can be passed in any
  1916  			// type-compatible register. If this turns out not to be true,
  1917  			// we'll need to introduce a regspec for a block's control value.
  1918  			b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
  1919  		}
  1920  
  1921  		// Reduce the uses of the control values once registers have been loaded.
  1922  		// This loop is equivalent to the advanceUses method.
  1923  		for _, v := range controls {
  1924  			vi := &s.values[v.ID]
  1925  			if !vi.needReg {
  1926  				continue
  1927  			}
  1928  			// Remove this use from the uses list.
  1929  			u := vi.uses
  1930  			vi.uses = u.next
  1931  			if u.next == nil {
  1932  				s.freeRegs(vi.regs) // value is dead
  1933  			}
  1934  			u.next = s.freeUseRecords
  1935  			s.freeUseRecords = u
  1936  		}
  1937  
  1938  		// If we are approaching a merge point and we are the primary
  1939  		// predecessor of it, find live values that we use soon after
  1940  		// the merge point and promote them to registers now.
  1941  		if len(b.Succs) == 1 {
  1942  			if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
  1943  				s.freeReg(s.GReg) // Spill value in G register before any merge.
  1944  			}
  1945  			if s.blockOrder[b.ID] > s.blockOrder[b.Succs[0].b.ID] {
  1946  				// No point if we've already regalloc'd the destination.
  1947  				goto badloop
  1948  			}
  1949  			// For this to be worthwhile, the loop must have no calls in it.
  1950  			top := b.Succs[0].b
  1951  			loop := s.loopnest.b2l[top.ID]
  1952  			if loop == nil || loop.header != top || loop.containsUnavoidableCall {
  1953  				goto badloop
  1954  			}
  1955  
  1956  			// TODO: sort by distance, pick the closest ones?
  1957  			for _, live := range s.live[b.ID] {
  1958  				if live.dist >= unlikelyDistance {
  1959  					// Don't preload anything live after the loop.
  1960  					continue
  1961  				}
  1962  				vid := live.ID
  1963  				vi := &s.values[vid]
  1964  				if vi.regs != 0 {
  1965  					continue
  1966  				}
  1967  				if vi.rematerializeable {
  1968  					continue
  1969  				}
  1970  				v := s.orig[vid]
  1971  				m := s.compatRegs(v.Type) &^ s.used
  1972  				// Used desired register if available.
  1973  			outerloop:
  1974  				for _, e := range desired.entries {
  1975  					if e.ID != v.ID {
  1976  						continue
  1977  					}
  1978  					for _, r := range e.regs {
  1979  						if r != noRegister && m>>r&1 != 0 {
  1980  							m = regMask(1) << r
  1981  							break outerloop
  1982  						}
  1983  					}
  1984  				}
  1985  				if m&^desired.avoid != 0 {
  1986  					m &^= desired.avoid
  1987  				}
  1988  				if m != 0 {
  1989  					s.allocValToReg(v, m, false, b.Pos)
  1990  				}
  1991  			}
  1992  		}
  1993  	badloop:
  1994  		;
  1995  
  1996  		// Save end-of-block register state.
  1997  		// First count how many, this cuts allocations in half.
  1998  		k := 0
  1999  		for r := register(0); r < s.numRegs; r++ {
  2000  			v := s.regs[r].v
  2001  			if v == nil {
  2002  				continue
  2003  			}
  2004  			k++
  2005  		}
  2006  		regList := make([]endReg, 0, k)
  2007  		for r := register(0); r < s.numRegs; r++ {
  2008  			v := s.regs[r].v
  2009  			if v == nil {
  2010  				continue
  2011  			}
  2012  			regList = append(regList, endReg{r, v, s.regs[r].c})
  2013  		}
  2014  		s.endRegs[b.ID] = regList
  2015  
  2016  		if checkEnabled {
  2017  			regValLiveSet.clear()
  2018  			for _, x := range s.live[b.ID] {
  2019  				regValLiveSet.add(x.ID)
  2020  			}
  2021  			for r := register(0); r < s.numRegs; r++ {
  2022  				v := s.regs[r].v
  2023  				if v == nil {
  2024  					continue
  2025  				}
  2026  				if !regValLiveSet.contains(v.ID) {
  2027  					s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
  2028  				}
  2029  			}
  2030  		}
  2031  
  2032  		// If a value is live at the end of the block and
  2033  		// isn't in a register, generate a use for the spill location.
  2034  		// We need to remember this information so that
  2035  		// the liveness analysis in stackalloc is correct.
  2036  		for _, e := range s.live[b.ID] {
  2037  			vi := &s.values[e.ID]
  2038  			if vi.regs != 0 {
  2039  				// in a register, we'll use that source for the merge.
  2040  				continue
  2041  			}
  2042  			if vi.rematerializeable {
  2043  				// we'll rematerialize during the merge.
  2044  				continue
  2045  			}
  2046  			if s.f.pass.debug > regDebug {
  2047  				fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
  2048  			}
  2049  			spill := s.makeSpill(s.orig[e.ID], b)
  2050  			s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
  2051  		}
  2052  
  2053  		// Clear any final uses.
  2054  		// All that is left should be the pseudo-uses added for values which
  2055  		// are live at the end of b.
  2056  		for _, e := range s.live[b.ID] {
  2057  			u := s.values[e.ID].uses
  2058  			if u == nil {
  2059  				f.Fatalf("live at end, no uses v%d", e.ID)
  2060  			}
  2061  			if u.next != nil {
  2062  				f.Fatalf("live at end, too many uses v%d", e.ID)
  2063  			}
  2064  			s.values[e.ID].uses = nil
  2065  			u.next = s.freeUseRecords
  2066  			s.freeUseRecords = u
  2067  		}
  2068  
  2069  		// allocReg may have dropped registers from startRegsMask that
  2070  		// aren't actually needed in startRegs. Synchronize back to
  2071  		// startRegs.
  2072  		//
  2073  		// This must be done before placing spills, which will look at
  2074  		// startRegs to decide if a block is a valid block for a spill.
  2075  		if c := countRegs(s.startRegsMask); c != len(s.startRegs[b.ID]) {
  2076  			regs := make([]startReg, 0, c)
  2077  			for _, sr := range s.startRegs[b.ID] {
  2078  				if s.startRegsMask&(regMask(1)<<sr.r) == 0 {
  2079  					continue
  2080  				}
  2081  				regs = append(regs, sr)
  2082  			}
  2083  			s.startRegs[b.ID] = regs
  2084  		}
  2085  	}
  2086  
  2087  	// Decide where the spills we generated will go.
  2088  	s.placeSpills()
  2089  
  2090  	// Anything that didn't get a register gets a stack location here.
  2091  	// (StoreReg, stack-based phis, inputs, ...)
  2092  	stacklive := stackalloc(s.f, s.spillLive)
  2093  
  2094  	// Fix up all merge edges.
  2095  	s.shuffle(stacklive)
  2096  
  2097  	// Erase any copies we never used.
  2098  	// Also, an unused copy might be the only use of another copy,
  2099  	// so continue erasing until we reach a fixed point.
  2100  	for {
  2101  		progress := false
  2102  		for c, used := range s.copies {
  2103  			if !used && c.Uses == 0 {
  2104  				if s.f.pass.debug > regDebug {
  2105  					fmt.Printf("delete copied value %s\n", c.LongString())
  2106  				}
  2107  				c.resetArgs()
  2108  				f.freeValue(c)
  2109  				delete(s.copies, c)
  2110  				progress = true
  2111  			}
  2112  		}
  2113  		if !progress {
  2114  			break
  2115  		}
  2116  	}
  2117  
  2118  	for _, b := range s.visitOrder {
  2119  		i := 0
  2120  		for _, v := range b.Values {
  2121  			if v.Op == OpInvalid {
  2122  				continue
  2123  			}
  2124  			b.Values[i] = v
  2125  			i++
  2126  		}
  2127  		b.Values = b.Values[:i]
  2128  	}
  2129  }
  2130  
  2131  func (s *regAllocState) placeSpills() {
  2132  	mustBeFirst := func(op Op) bool {
  2133  		return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg
  2134  	}
  2135  
  2136  	// Start maps block IDs to the list of spills
  2137  	// that go at the start of the block (but after any phis).
  2138  	start := map[ID][]*Value{}
  2139  	// After maps value IDs to the list of spills
  2140  	// that go immediately after that value ID.
  2141  	after := map[ID][]*Value{}
  2142  
  2143  	for i := range s.values {
  2144  		vi := s.values[i]
  2145  		spill := vi.spill
  2146  		if spill == nil {
  2147  			continue
  2148  		}
  2149  		if spill.Block != nil {
  2150  			// Some spills are already fully set up,
  2151  			// like OpArgs and stack-based phis.
  2152  			continue
  2153  		}
  2154  		v := s.orig[i]
  2155  
  2156  		// Walk down the dominator tree looking for a good place to
  2157  		// put the spill of v.  At the start "best" is the best place
  2158  		// we have found so far.
  2159  		// TODO: find a way to make this O(1) without arbitrary cutoffs.
  2160  		if v == nil {
  2161  			panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
  2162  		}
  2163  		best := v.Block
  2164  		bestArg := v
  2165  		var bestDepth int16
  2166  		if l := s.loopnest.b2l[best.ID]; l != nil {
  2167  			bestDepth = l.depth
  2168  		}
  2169  		b := best
  2170  		const maxSpillSearch = 100
  2171  		for i := 0; i < maxSpillSearch; i++ {
  2172  			// Find the child of b in the dominator tree which
  2173  			// dominates all restores.
  2174  			p := b
  2175  			b = nil
  2176  			for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
  2177  				if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
  2178  					// c also dominates all restores.  Walk down into c.
  2179  					b = c
  2180  					break
  2181  				}
  2182  			}
  2183  			if b == nil {
  2184  				// Ran out of blocks which dominate all restores.
  2185  				break
  2186  			}
  2187  
  2188  			var depth int16
  2189  			if l := s.loopnest.b2l[b.ID]; l != nil {
  2190  				depth = l.depth
  2191  			}
  2192  			if depth > bestDepth {
  2193  				// Don't push the spill into a deeper loop.
  2194  				continue
  2195  			}
  2196  
  2197  			// If v is in a register at the start of b, we can
  2198  			// place the spill here (after the phis).
  2199  			if len(b.Preds) == 1 {
  2200  				for _, e := range s.endRegs[b.Preds[0].b.ID] {
  2201  					if e.v == v {
  2202  						// Found a better spot for the spill.
  2203  						best = b
  2204  						bestArg = e.c
  2205  						bestDepth = depth
  2206  						break
  2207  					}
  2208  				}
  2209  			} else {
  2210  				for _, e := range s.startRegs[b.ID] {
  2211  					if e.v == v {
  2212  						// Found a better spot for the spill.
  2213  						best = b
  2214  						bestArg = e.c
  2215  						bestDepth = depth
  2216  						break
  2217  					}
  2218  				}
  2219  			}
  2220  		}
  2221  
  2222  		// Put the spill in the best block we found.
  2223  		spill.Block = best
  2224  		spill.AddArg(bestArg)
  2225  		if best == v.Block && !mustBeFirst(v.Op) {
  2226  			// Place immediately after v.
  2227  			after[v.ID] = append(after[v.ID], spill)
  2228  		} else {
  2229  			// Place at the start of best block.
  2230  			start[best.ID] = append(start[best.ID], spill)
  2231  		}
  2232  	}
  2233  
  2234  	// Insert spill instructions into the block schedules.
  2235  	var oldSched []*Value
  2236  	for _, b := range s.visitOrder {
  2237  		nfirst := 0
  2238  		for _, v := range b.Values {
  2239  			if !mustBeFirst(v.Op) {
  2240  				break
  2241  			}
  2242  			nfirst++
  2243  		}
  2244  		oldSched = append(oldSched[:0], b.Values[nfirst:]...)
  2245  		b.Values = b.Values[:nfirst]
  2246  		b.Values = append(b.Values, start[b.ID]...)
  2247  		for _, v := range oldSched {
  2248  			b.Values = append(b.Values, v)
  2249  			b.Values = append(b.Values, after[v.ID]...)
  2250  		}
  2251  	}
  2252  }
  2253  
  2254  // shuffle fixes up all the merge edges (those going into blocks of indegree > 1).
  2255  func (s *regAllocState) shuffle(stacklive [][]ID) {
  2256  	var e edgeState
  2257  	e.s = s
  2258  	e.cache = map[ID][]*Value{}
  2259  	e.contents = map[Location]contentRecord{}
  2260  	if s.f.pass.debug > regDebug {
  2261  		fmt.Printf("shuffle %s\n", s.f.Name)
  2262  		fmt.Println(s.f.String())
  2263  	}
  2264  
  2265  	for _, b := range s.visitOrder {
  2266  		if len(b.Preds) <= 1 {
  2267  			continue
  2268  		}
  2269  		e.b = b
  2270  		for i, edge := range b.Preds {
  2271  			p := edge.b
  2272  			e.p = p
  2273  			e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
  2274  			e.process()
  2275  		}
  2276  	}
  2277  
  2278  	if s.f.pass.debug > regDebug {
  2279  		fmt.Printf("post shuffle %s\n", s.f.Name)
  2280  		fmt.Println(s.f.String())
  2281  	}
  2282  }
  2283  
  2284  type edgeState struct {
  2285  	s    *regAllocState
  2286  	p, b *Block // edge goes from p->b.
  2287  
  2288  	// for each pre-regalloc value, a list of equivalent cached values
  2289  	cache      map[ID][]*Value
  2290  	cachedVals []ID // (superset of) keys of the above map, for deterministic iteration
  2291  
  2292  	// map from location to the value it contains
  2293  	contents map[Location]contentRecord
  2294  
  2295  	// desired destination locations
  2296  	destinations []dstRecord
  2297  	extra        []dstRecord
  2298  
  2299  	usedRegs              regMask // registers currently holding something
  2300  	uniqueRegs            regMask // registers holding the only copy of a value
  2301  	finalRegs             regMask // registers holding final target
  2302  	rematerializeableRegs regMask // registers that hold rematerializeable values
  2303  }
  2304  
  2305  type contentRecord struct {
  2306  	vid   ID       // pre-regalloc value
  2307  	c     *Value   // cached value
  2308  	final bool     // this is a satisfied destination
  2309  	pos   src.XPos // source position of use of the value
  2310  }
  2311  
  2312  type dstRecord struct {
  2313  	loc    Location // register or stack slot
  2314  	vid    ID       // pre-regalloc value it should contain
  2315  	splice **Value  // place to store reference to the generating instruction
  2316  	pos    src.XPos // source position of use of this location
  2317  }
  2318  
  2319  // setup initializes the edge state for shuffling.
  2320  func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
  2321  	if e.s.f.pass.debug > regDebug {
  2322  		fmt.Printf("edge %s->%s\n", e.p, e.b)
  2323  	}
  2324  
  2325  	// Clear state.
  2326  	clear(e.cache)
  2327  	e.cachedVals = e.cachedVals[:0]
  2328  	clear(e.contents)
  2329  	e.usedRegs = 0
  2330  	e.uniqueRegs = 0
  2331  	e.finalRegs = 0
  2332  	e.rematerializeableRegs = 0
  2333  
  2334  	// Live registers can be sources.
  2335  	for _, x := range srcReg {
  2336  		e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos) // don't care the position of the source
  2337  	}
  2338  	// So can all of the spill locations.
  2339  	for _, spillID := range stacklive {
  2340  		v := e.s.orig[spillID]
  2341  		spill := e.s.values[v.ID].spill
  2342  		if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
  2343  			// Spills were placed that only dominate the uses found
  2344  			// during the first regalloc pass. The edge fixup code
  2345  			// can't use a spill location if the spill doesn't dominate
  2346  			// the edge.
  2347  			// We are guaranteed that if the spill doesn't dominate this edge,
  2348  			// then the value is available in a register (because we called
  2349  			// makeSpill for every value not in a register at the start
  2350  			// of an edge).
  2351  			continue
  2352  		}
  2353  		e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source
  2354  	}
  2355  
  2356  	// Figure out all the destinations we need.
  2357  	dsts := e.destinations[:0]
  2358  	for _, x := range dstReg {
  2359  		dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
  2360  	}
  2361  	// Phis need their args to end up in a specific location.
  2362  	for _, v := range e.b.Values {
  2363  		if v.Op != OpPhi {
  2364  			break
  2365  		}
  2366  		loc := e.s.f.getHome(v.ID)
  2367  		if loc == nil {
  2368  			continue
  2369  		}
  2370  		dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
  2371  	}
  2372  	e.destinations = dsts
  2373  
  2374  	if e.s.f.pass.debug > regDebug {
  2375  		for _, vid := range e.cachedVals {
  2376  			a := e.cache[vid]
  2377  			for _, c := range a {
  2378  				fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
  2379  			}
  2380  		}
  2381  		for _, d := range e.destinations {
  2382  			fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
  2383  		}
  2384  	}
  2385  }
  2386  
  2387  // process generates code to move all the values to the right destination locations.
  2388  func (e *edgeState) process() {
  2389  	dsts := e.destinations
  2390  
  2391  	// Process the destinations until they are all satisfied.
  2392  	for len(dsts) > 0 {
  2393  		i := 0
  2394  		for _, d := range dsts {
  2395  			if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
  2396  				// Failed - save for next iteration.
  2397  				dsts[i] = d
  2398  				i++
  2399  			}
  2400  		}
  2401  		if i < len(dsts) {
  2402  			// Made some progress. Go around again.
  2403  			dsts = dsts[:i]
  2404  
  2405  			// Append any extras destinations we generated.
  2406  			dsts = append(dsts, e.extra...)
  2407  			e.extra = e.extra[:0]
  2408  			continue
  2409  		}
  2410  
  2411  		// We made no progress. That means that any
  2412  		// remaining unsatisfied moves are in simple cycles.
  2413  		// For example, A -> B -> C -> D -> A.
  2414  		//   A ----> B
  2415  		//   ^       |
  2416  		//   |       |
  2417  		//   |       v
  2418  		//   D <---- C
  2419  
  2420  		// To break the cycle, we pick an unused register, say R,
  2421  		// and put a copy of B there.
  2422  		//   A ----> B
  2423  		//   ^       |
  2424  		//   |       |
  2425  		//   |       v
  2426  		//   D <---- C <---- R=copyofB
  2427  		// When we resume the outer loop, the A->B move can now proceed,
  2428  		// and eventually the whole cycle completes.
  2429  
  2430  		// Copy any cycle location to a temp register. This duplicates
  2431  		// one of the cycle entries, allowing the just duplicated value
  2432  		// to be overwritten and the cycle to proceed.
  2433  		d := dsts[0]
  2434  		loc := d.loc
  2435  		vid := e.contents[loc].vid
  2436  		c := e.contents[loc].c
  2437  		r := e.findRegFor(c.Type)
  2438  		if e.s.f.pass.debug > regDebug {
  2439  			fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
  2440  		}
  2441  		e.erase(r)
  2442  		pos := d.pos.WithNotStmt()
  2443  		if _, isReg := loc.(*Register); isReg {
  2444  			c = e.p.NewValue1(pos, OpCopy, c.Type, c)
  2445  		} else {
  2446  			c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2447  		}
  2448  		e.set(r, vid, c, false, pos)
  2449  		if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
  2450  			e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
  2451  		}
  2452  	}
  2453  }
  2454  
  2455  // processDest generates code to put value vid into location loc. Returns true
  2456  // if progress was made.
  2457  func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
  2458  	pos = pos.WithNotStmt()
  2459  	occupant := e.contents[loc]
  2460  	if occupant.vid == vid {
  2461  		// Value is already in the correct place.
  2462  		e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
  2463  		if splice != nil {
  2464  			(*splice).Uses--
  2465  			*splice = occupant.c
  2466  			occupant.c.Uses++
  2467  		}
  2468  		// Note: if splice==nil then c will appear dead. This is
  2469  		// non-SSA formed code, so be careful after this pass not to run
  2470  		// deadcode elimination.
  2471  		if _, ok := e.s.copies[occupant.c]; ok {
  2472  			// The copy at occupant.c was used to avoid spill.
  2473  			e.s.copies[occupant.c] = true
  2474  		}
  2475  		return true
  2476  	}
  2477  
  2478  	// Check if we're allowed to clobber the destination location.
  2479  	if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable {
  2480  		// We can't overwrite the last copy
  2481  		// of a value that needs to survive.
  2482  		return false
  2483  	}
  2484  
  2485  	// Copy from a source of v, register preferred.
  2486  	v := e.s.orig[vid]
  2487  	var c *Value
  2488  	var src Location
  2489  	if e.s.f.pass.debug > regDebug {
  2490  		fmt.Printf("moving v%d to %s\n", vid, loc)
  2491  		fmt.Printf("sources of v%d:", vid)
  2492  	}
  2493  	if opcodeTable[v.Op].fixedReg {
  2494  		c = v
  2495  		src = e.s.f.getHome(v.ID)
  2496  	} else {
  2497  		for _, w := range e.cache[vid] {
  2498  			h := e.s.f.getHome(w.ID)
  2499  			if e.s.f.pass.debug > regDebug {
  2500  				fmt.Printf(" %s:%s", h, w)
  2501  			}
  2502  			_, isreg := h.(*Register)
  2503  			if src == nil || isreg {
  2504  				c = w
  2505  				src = h
  2506  			}
  2507  		}
  2508  	}
  2509  	if e.s.f.pass.debug > regDebug {
  2510  		if src != nil {
  2511  			fmt.Printf(" [use %s]\n", src)
  2512  		} else {
  2513  			fmt.Printf(" [no source]\n")
  2514  		}
  2515  	}
  2516  	_, dstReg := loc.(*Register)
  2517  
  2518  	// Pre-clobber destination. This avoids the
  2519  	// following situation:
  2520  	//   - v is currently held in R0 and stacktmp0.
  2521  	//   - We want to copy stacktmp1 to stacktmp0.
  2522  	//   - We choose R0 as the temporary register.
  2523  	// During the copy, both R0 and stacktmp0 are
  2524  	// clobbered, losing both copies of v. Oops!
  2525  	// Erasing the destination early means R0 will not
  2526  	// be chosen as the temp register, as it will then
  2527  	// be the last copy of v.
  2528  	e.erase(loc)
  2529  	var x *Value
  2530  	if c == nil || e.s.values[vid].rematerializeable {
  2531  		if !e.s.values[vid].rematerializeable {
  2532  			e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
  2533  		}
  2534  		if dstReg {
  2535  			x = v.copyInto(e.p)
  2536  		} else {
  2537  			// Rematerialize into stack slot. Need a free
  2538  			// register to accomplish this.
  2539  			r := e.findRegFor(v.Type)
  2540  			e.erase(r)
  2541  			x = v.copyIntoWithXPos(e.p, pos)
  2542  			e.set(r, vid, x, false, pos)
  2543  			// Make sure we spill with the size of the slot, not the
  2544  			// size of x (which might be wider due to our dropping
  2545  			// of narrowing conversions).
  2546  			x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
  2547  		}
  2548  	} else {
  2549  		// Emit move from src to dst.
  2550  		_, srcReg := src.(*Register)
  2551  		if srcReg {
  2552  			if dstReg {
  2553  				x = e.p.NewValue1(pos, OpCopy, c.Type, c)
  2554  			} else {
  2555  				x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
  2556  			}
  2557  		} else {
  2558  			if dstReg {
  2559  				x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2560  			} else {
  2561  				// mem->mem. Use temp register.
  2562  				r := e.findRegFor(c.Type)
  2563  				e.erase(r)
  2564  				t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2565  				e.set(r, vid, t, false, pos)
  2566  				x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
  2567  			}
  2568  		}
  2569  	}
  2570  	e.set(loc, vid, x, true, pos)
  2571  	if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
  2572  		e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
  2573  	}
  2574  	if splice != nil {
  2575  		(*splice).Uses--
  2576  		*splice = x
  2577  		x.Uses++
  2578  	}
  2579  	return true
  2580  }
  2581  
  2582  // set changes the contents of location loc to hold the given value and its cached representative.
  2583  func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
  2584  	e.s.f.setHome(c, loc)
  2585  	e.contents[loc] = contentRecord{vid, c, final, pos}
  2586  	a := e.cache[vid]
  2587  	if len(a) == 0 {
  2588  		e.cachedVals = append(e.cachedVals, vid)
  2589  	}
  2590  	a = append(a, c)
  2591  	e.cache[vid] = a
  2592  	if r, ok := loc.(*Register); ok {
  2593  		if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 {
  2594  			e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
  2595  		}
  2596  		e.usedRegs |= regMask(1) << uint(r.num)
  2597  		if final {
  2598  			e.finalRegs |= regMask(1) << uint(r.num)
  2599  		}
  2600  		if len(a) == 1 {
  2601  			e.uniqueRegs |= regMask(1) << uint(r.num)
  2602  		}
  2603  		if len(a) == 2 {
  2604  			if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
  2605  				e.uniqueRegs &^= regMask(1) << uint(t.num)
  2606  			}
  2607  		}
  2608  		if e.s.values[vid].rematerializeable {
  2609  			e.rematerializeableRegs |= regMask(1) << uint(r.num)
  2610  		}
  2611  	}
  2612  	if e.s.f.pass.debug > regDebug {
  2613  		fmt.Printf("%s\n", c.LongString())
  2614  		fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
  2615  	}
  2616  }
  2617  
  2618  // erase removes any user of loc.
  2619  func (e *edgeState) erase(loc Location) {
  2620  	cr := e.contents[loc]
  2621  	if cr.c == nil {
  2622  		return
  2623  	}
  2624  	vid := cr.vid
  2625  
  2626  	if cr.final {
  2627  		// Add a destination to move this value back into place.
  2628  		// Make sure it gets added to the tail of the destination queue
  2629  		// so we make progress on other moves first.
  2630  		e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
  2631  	}
  2632  
  2633  	// Remove c from the list of cached values.
  2634  	a := e.cache[vid]
  2635  	for i, c := range a {
  2636  		if e.s.f.getHome(c.ID) == loc {
  2637  			if e.s.f.pass.debug > regDebug {
  2638  				fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
  2639  			}
  2640  			a[i], a = a[len(a)-1], a[:len(a)-1]
  2641  			break
  2642  		}
  2643  	}
  2644  	e.cache[vid] = a
  2645  
  2646  	// Update register masks.
  2647  	if r, ok := loc.(*Register); ok {
  2648  		e.usedRegs &^= regMask(1) << uint(r.num)
  2649  		if cr.final {
  2650  			e.finalRegs &^= regMask(1) << uint(r.num)
  2651  		}
  2652  		e.rematerializeableRegs &^= regMask(1) << uint(r.num)
  2653  	}
  2654  	if len(a) == 1 {
  2655  		if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
  2656  			e.uniqueRegs |= regMask(1) << uint(r.num)
  2657  		}
  2658  	}
  2659  }
  2660  
  2661  // findRegFor finds a register we can use to make a temp copy of type typ.
  2662  func (e *edgeState) findRegFor(typ *types.Type) Location {
  2663  	// Which registers are possibilities.
  2664  	types := &e.s.f.Config.Types
  2665  	m := e.s.compatRegs(typ)
  2666  
  2667  	// Pick a register. In priority order:
  2668  	// 1) an unused register
  2669  	// 2) a non-unique register not holding a final value
  2670  	// 3) a non-unique register
  2671  	// 4) a register holding a rematerializeable value
  2672  	x := m &^ e.usedRegs
  2673  	if x != 0 {
  2674  		return &e.s.registers[pickReg(x)]
  2675  	}
  2676  	x = m &^ e.uniqueRegs &^ e.finalRegs
  2677  	if x != 0 {
  2678  		return &e.s.registers[pickReg(x)]
  2679  	}
  2680  	x = m &^ e.uniqueRegs
  2681  	if x != 0 {
  2682  		return &e.s.registers[pickReg(x)]
  2683  	}
  2684  	x = m & e.rematerializeableRegs
  2685  	if x != 0 {
  2686  		return &e.s.registers[pickReg(x)]
  2687  	}
  2688  
  2689  	// No register is available.
  2690  	// Pick a register to spill.
  2691  	for _, vid := range e.cachedVals {
  2692  		a := e.cache[vid]
  2693  		for _, c := range a {
  2694  			if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
  2695  				if !c.rematerializeable() {
  2696  					x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
  2697  					// Allocate a temp location to spill a register to.
  2698  					// The type of the slot is immaterial - it will not be live across
  2699  					// any safepoint. Just use a type big enough to hold any register.
  2700  					t := LocalSlot{N: e.s.f.NewLocal(c.Pos, types.Int64), Type: types.Int64}
  2701  					// TODO: reuse these slots. They'll need to be erased first.
  2702  					e.set(t, vid, x, false, c.Pos)
  2703  					if e.s.f.pass.debug > regDebug {
  2704  						fmt.Printf("  SPILL %s->%s %s\n", r, t, x.LongString())
  2705  					}
  2706  				}
  2707  				// r will now be overwritten by the caller. At some point
  2708  				// later, the newly saved value will be moved back to its
  2709  				// final destination in processDest.
  2710  				return r
  2711  			}
  2712  		}
  2713  	}
  2714  
  2715  	fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
  2716  	for _, vid := range e.cachedVals {
  2717  		a := e.cache[vid]
  2718  		for _, c := range a {
  2719  			fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
  2720  		}
  2721  	}
  2722  	e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
  2723  	return nil
  2724  }
  2725  
  2726  // rematerializeable reports whether the register allocator should recompute
  2727  // a value instead of spilling/restoring it.
  2728  func (v *Value) rematerializeable() bool {
  2729  	if !opcodeTable[v.Op].rematerializeable {
  2730  		return false
  2731  	}
  2732  	for _, a := range v.Args {
  2733  		// Fixed-register allocations (SP, SB, etc.) are always available.
  2734  		// Any other argument of an opcode makes it not rematerializeable.
  2735  		if !opcodeTable[a.Op].fixedReg {
  2736  			return false
  2737  		}
  2738  	}
  2739  	return true
  2740  }
  2741  
  2742  type liveInfo struct {
  2743  	ID   ID       // ID of value
  2744  	dist int32    // # of instructions before next use
  2745  	pos  src.XPos // source position of next use
  2746  }
  2747  
  2748  // computeLive computes a map from block ID to a list of value IDs live at the end
  2749  // of that block. Together with the value ID is a count of how many instructions
  2750  // to the next use of that value. The resulting map is stored in s.live.
  2751  // computeLive also computes the desired register information at the end of each block.
  2752  // This desired register information is stored in s.desired.
  2753  // TODO: this could be quadratic if lots of variables are live across lots of
  2754  // basic blocks. Figure out a way to make this function (or, more precisely, the user
  2755  // of this function) require only linear size & time.
  2756  func (s *regAllocState) computeLive() {
  2757  	f := s.f
  2758  	s.live = make([][]liveInfo, f.NumBlocks())
  2759  	s.desired = make([]desiredState, f.NumBlocks())
  2760  	var phis []*Value
  2761  
  2762  	live := f.newSparseMapPos(f.NumValues())
  2763  	defer f.retSparseMapPos(live)
  2764  	t := f.newSparseMapPos(f.NumValues())
  2765  	defer f.retSparseMapPos(t)
  2766  
  2767  	// Keep track of which value we want in each register.
  2768  	var desired desiredState
  2769  
  2770  	// Instead of iterating over f.Blocks, iterate over their postordering.
  2771  	// Liveness information flows backward, so starting at the end
  2772  	// increases the probability that we will stabilize quickly.
  2773  	// TODO: Do a better job yet. Here's one possibility:
  2774  	// Calculate the dominator tree and locate all strongly connected components.
  2775  	// If a value is live in one block of an SCC, it is live in all.
  2776  	// Walk the dominator tree from end to beginning, just once, treating SCC
  2777  	// components as single blocks, duplicated calculated liveness information
  2778  	// out to all of them.
  2779  	po := f.postorder()
  2780  	s.loopnest = f.loopnest()
  2781  	s.loopnest.computeUnavoidableCalls()
  2782  	for {
  2783  		changed := false
  2784  
  2785  		for _, b := range po {
  2786  			// Start with known live values at the end of the block.
  2787  			// Add len(b.Values) to adjust from end-of-block distance
  2788  			// to beginning-of-block distance.
  2789  			live.clear()
  2790  			for _, e := range s.live[b.ID] {
  2791  				live.set(e.ID, e.dist+int32(len(b.Values)), e.pos)
  2792  			}
  2793  
  2794  			// Mark control values as live
  2795  			for _, c := range b.ControlValues() {
  2796  				if s.values[c.ID].needReg {
  2797  					live.set(c.ID, int32(len(b.Values)), b.Pos)
  2798  				}
  2799  			}
  2800  
  2801  			// Propagate backwards to the start of the block
  2802  			// Assumes Values have been scheduled.
  2803  			phis = phis[:0]
  2804  			for i := len(b.Values) - 1; i >= 0; i-- {
  2805  				v := b.Values[i]
  2806  				live.remove(v.ID)
  2807  				if v.Op == OpPhi {
  2808  					// save phi ops for later
  2809  					phis = append(phis, v)
  2810  					continue
  2811  				}
  2812  				if opcodeTable[v.Op].call {
  2813  					c := live.contents()
  2814  					for i := range c {
  2815  						c[i].val += unlikelyDistance
  2816  					}
  2817  				}
  2818  				for _, a := range v.Args {
  2819  					if s.values[a.ID].needReg {
  2820  						live.set(a.ID, int32(i), v.Pos)
  2821  					}
  2822  				}
  2823  			}
  2824  			// Propagate desired registers backwards.
  2825  			desired.copy(&s.desired[b.ID])
  2826  			for i := len(b.Values) - 1; i >= 0; i-- {
  2827  				v := b.Values[i]
  2828  				prefs := desired.remove(v.ID)
  2829  				if v.Op == OpPhi {
  2830  					// TODO: if v is a phi, save desired register for phi inputs.
  2831  					// For now, we just drop it and don't propagate
  2832  					// desired registers back though phi nodes.
  2833  					continue
  2834  				}
  2835  				regspec := s.regspec(v)
  2836  				// Cancel desired registers if they get clobbered.
  2837  				desired.clobber(regspec.clobbers)
  2838  				// Update desired registers if there are any fixed register inputs.
  2839  				for _, j := range regspec.inputs {
  2840  					if countRegs(j.regs) != 1 {
  2841  						continue
  2842  					}
  2843  					desired.clobber(j.regs)
  2844  					desired.add(v.Args[j.idx].ID, pickReg(j.regs))
  2845  				}
  2846  				// Set desired register of input 0 if this is a 2-operand instruction.
  2847  				if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
  2848  					// ADDQconst is added here because we want to treat it as resultInArg0 for
  2849  					// the purposes of desired registers, even though it is not an absolute requirement.
  2850  					// This is because we'd rather implement it as ADDQ instead of LEAQ.
  2851  					// Same for ADDLconst
  2852  					// Select0 is added here to propagate the desired register to the tuple-generating instruction.
  2853  					if opcodeTable[v.Op].commutative {
  2854  						desired.addList(v.Args[1].ID, prefs)
  2855  					}
  2856  					desired.addList(v.Args[0].ID, prefs)
  2857  				}
  2858  			}
  2859  
  2860  			// For each predecessor of b, expand its list of live-at-end values.
  2861  			// invariant: live contains the values live at the start of b (excluding phi inputs)
  2862  			for i, e := range b.Preds {
  2863  				p := e.b
  2864  				// Compute additional distance for the edge.
  2865  				// Note: delta must be at least 1 to distinguish the control
  2866  				// value use from the first user in a successor block.
  2867  				delta := int32(normalDistance)
  2868  				if len(p.Succs) == 2 {
  2869  					if p.Succs[0].b == b && p.Likely == BranchLikely ||
  2870  						p.Succs[1].b == b && p.Likely == BranchUnlikely {
  2871  						delta = likelyDistance
  2872  					}
  2873  					if p.Succs[0].b == b && p.Likely == BranchUnlikely ||
  2874  						p.Succs[1].b == b && p.Likely == BranchLikely {
  2875  						delta = unlikelyDistance
  2876  					}
  2877  				}
  2878  
  2879  				// Update any desired registers at the end of p.
  2880  				s.desired[p.ID].merge(&desired)
  2881  
  2882  				// Start t off with the previously known live values at the end of p.
  2883  				t.clear()
  2884  				for _, e := range s.live[p.ID] {
  2885  					t.set(e.ID, e.dist, e.pos)
  2886  				}
  2887  				update := false
  2888  
  2889  				// Add new live values from scanning this block.
  2890  				for _, e := range live.contents() {
  2891  					d := e.val + delta
  2892  					if !t.contains(e.key) || d < t.get(e.key) {
  2893  						update = true
  2894  						t.set(e.key, d, e.pos)
  2895  					}
  2896  				}
  2897  				// Also add the correct arg from the saved phi values.
  2898  				// All phis are at distance delta (we consider them
  2899  				// simultaneously happening at the start of the block).
  2900  				for _, v := range phis {
  2901  					id := v.Args[i].ID
  2902  					if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) {
  2903  						update = true
  2904  						t.set(id, delta, v.Pos)
  2905  					}
  2906  				}
  2907  
  2908  				if !update {
  2909  					continue
  2910  				}
  2911  				// The live set has changed, update it.
  2912  				l := s.live[p.ID][:0]
  2913  				if cap(l) < t.size() {
  2914  					l = make([]liveInfo, 0, t.size())
  2915  				}
  2916  				for _, e := range t.contents() {
  2917  					l = append(l, liveInfo{e.key, e.val, e.pos})
  2918  				}
  2919  				s.live[p.ID] = l
  2920  				changed = true
  2921  			}
  2922  		}
  2923  
  2924  		if !changed {
  2925  			break
  2926  		}
  2927  	}
  2928  	if f.pass.debug > regDebug {
  2929  		fmt.Println("live values at end of each block")
  2930  		for _, b := range f.Blocks {
  2931  			fmt.Printf("  %s:", b)
  2932  			for _, x := range s.live[b.ID] {
  2933  				fmt.Printf(" v%d(%d)", x.ID, x.dist)
  2934  				for _, e := range s.desired[b.ID].entries {
  2935  					if e.ID != x.ID {
  2936  						continue
  2937  					}
  2938  					fmt.Printf("[")
  2939  					first := true
  2940  					for _, r := range e.regs {
  2941  						if r == noRegister {
  2942  							continue
  2943  						}
  2944  						if !first {
  2945  							fmt.Printf(",")
  2946  						}
  2947  						fmt.Print(&s.registers[r])
  2948  						first = false
  2949  					}
  2950  					fmt.Printf("]")
  2951  				}
  2952  			}
  2953  			if avoid := s.desired[b.ID].avoid; avoid != 0 {
  2954  				fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
  2955  			}
  2956  			fmt.Println()
  2957  		}
  2958  	}
  2959  }
  2960  
  2961  // A desiredState represents desired register assignments.
  2962  type desiredState struct {
  2963  	// Desired assignments will be small, so we just use a list
  2964  	// of valueID+registers entries.
  2965  	entries []desiredStateEntry
  2966  	// Registers that other values want to be in.  This value will
  2967  	// contain at least the union of the regs fields of entries, but
  2968  	// may contain additional entries for values that were once in
  2969  	// this data structure but are no longer.
  2970  	avoid regMask
  2971  }
  2972  type desiredStateEntry struct {
  2973  	// (pre-regalloc) value
  2974  	ID ID
  2975  	// Registers it would like to be in, in priority order.
  2976  	// Unused slots are filled with noRegister.
  2977  	// For opcodes that return tuples, we track desired registers only
  2978  	// for the first element of the tuple (see desiredSecondReg for
  2979  	// tracking the desired register for second part of a tuple).
  2980  	regs [4]register
  2981  }
  2982  
  2983  // get returns a list of desired registers for value vid.
  2984  func (d *desiredState) get(vid ID) [4]register {
  2985  	for _, e := range d.entries {
  2986  		if e.ID == vid {
  2987  			return e.regs
  2988  		}
  2989  	}
  2990  	return [4]register{noRegister, noRegister, noRegister, noRegister}
  2991  }
  2992  
  2993  // add records that we'd like value vid to be in register r.
  2994  func (d *desiredState) add(vid ID, r register) {
  2995  	d.avoid |= regMask(1) << r
  2996  	for i := range d.entries {
  2997  		e := &d.entries[i]
  2998  		if e.ID != vid {
  2999  			continue
  3000  		}
  3001  		if e.regs[0] == r {
  3002  			// Already known and highest priority
  3003  			return
  3004  		}
  3005  		for j := 1; j < len(e.regs); j++ {
  3006  			if e.regs[j] == r {
  3007  				// Move from lower priority to top priority
  3008  				copy(e.regs[1:], e.regs[:j])
  3009  				e.regs[0] = r
  3010  				return
  3011  			}
  3012  		}
  3013  		copy(e.regs[1:], e.regs[:])
  3014  		e.regs[0] = r
  3015  		return
  3016  	}
  3017  	d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
  3018  }
  3019  
  3020  func (d *desiredState) addList(vid ID, regs [4]register) {
  3021  	// regs is in priority order, so iterate in reverse order.
  3022  	for i := len(regs) - 1; i >= 0; i-- {
  3023  		r := regs[i]
  3024  		if r != noRegister {
  3025  			d.add(vid, r)
  3026  		}
  3027  	}
  3028  }
  3029  
  3030  // clobber erases any desired registers in the set m.
  3031  func (d *desiredState) clobber(m regMask) {
  3032  	for i := 0; i < len(d.entries); {
  3033  		e := &d.entries[i]
  3034  		j := 0
  3035  		for _, r := range e.regs {
  3036  			if r != noRegister && m>>r&1 == 0 {
  3037  				e.regs[j] = r
  3038  				j++
  3039  			}
  3040  		}
  3041  		if j == 0 {
  3042  			// No more desired registers for this value.
  3043  			d.entries[i] = d.entries[len(d.entries)-1]
  3044  			d.entries = d.entries[:len(d.entries)-1]
  3045  			continue
  3046  		}
  3047  		for ; j < len(e.regs); j++ {
  3048  			e.regs[j] = noRegister
  3049  		}
  3050  		i++
  3051  	}
  3052  	d.avoid &^= m
  3053  }
  3054  
  3055  // copy copies a desired state from another desiredState x.
  3056  func (d *desiredState) copy(x *desiredState) {
  3057  	d.entries = append(d.entries[:0], x.entries...)
  3058  	d.avoid = x.avoid
  3059  }
  3060  
  3061  // remove removes the desired registers for vid and returns them.
  3062  func (d *desiredState) remove(vid ID) [4]register {
  3063  	for i := range d.entries {
  3064  		if d.entries[i].ID == vid {
  3065  			regs := d.entries[i].regs
  3066  			d.entries[i] = d.entries[len(d.entries)-1]
  3067  			d.entries = d.entries[:len(d.entries)-1]
  3068  			return regs
  3069  		}
  3070  	}
  3071  	return [4]register{noRegister, noRegister, noRegister, noRegister}
  3072  }
  3073  
  3074  // merge merges another desired state x into d.
  3075  func (d *desiredState) merge(x *desiredState) {
  3076  	d.avoid |= x.avoid
  3077  	// There should only be a few desired registers, so
  3078  	// linear insert is ok.
  3079  	for _, e := range x.entries {
  3080  		d.addList(e.ID, e.regs)
  3081  	}
  3082  }
  3083  
  3084  // computeUnavoidableCalls computes the containsUnavoidableCall fields in the loop nest.
  3085  func (loopnest *loopnest) computeUnavoidableCalls() {
  3086  	f := loopnest.f
  3087  
  3088  	hasCall := f.Cache.allocBoolSlice(f.NumBlocks())
  3089  	defer f.Cache.freeBoolSlice(hasCall)
  3090  	for _, b := range f.Blocks {
  3091  		if b.containsCall() {
  3092  			hasCall[b.ID] = true
  3093  		}
  3094  	}
  3095  	found := f.Cache.allocSparseSet(f.NumBlocks())
  3096  	defer f.Cache.freeSparseSet(found)
  3097  	// Run dfs to find path through the loop that avoids all calls.
  3098  	// Such path either escapes the loop or returns back to the header.
  3099  	// It isn't enough to have exit not dominated by any call, for example:
  3100  	// ... some loop
  3101  	// call1    call2
  3102  	//   \       /
  3103  	//     block
  3104  	// ...
  3105  	// block is not dominated by any single call, but we don't have call-free path to it.
  3106  loopLoop:
  3107  	for _, l := range loopnest.loops {
  3108  		found.clear()
  3109  		tovisit := make([]*Block, 0, 8)
  3110  		tovisit = append(tovisit, l.header)
  3111  		for len(tovisit) > 0 {
  3112  			cur := tovisit[len(tovisit)-1]
  3113  			tovisit = tovisit[:len(tovisit)-1]
  3114  			if hasCall[cur.ID] {
  3115  				continue
  3116  			}
  3117  			for _, s := range cur.Succs {
  3118  				nb := s.Block()
  3119  				if nb == l.header {
  3120  					// Found a call-free path around the loop.
  3121  					continue loopLoop
  3122  				}
  3123  				if found.contains(nb.ID) {
  3124  					// Already found via another path.
  3125  					continue
  3126  				}
  3127  				nl := loopnest.b2l[nb.ID]
  3128  				if nl == nil || (nl.depth <= l.depth && nl != l) {
  3129  					// Left the loop.
  3130  					continue
  3131  				}
  3132  				tovisit = append(tovisit, nb)
  3133  				found.add(nb.ID)
  3134  			}
  3135  		}
  3136  		// No call-free path was found.
  3137  		l.containsUnavoidableCall = true
  3138  	}
  3139  }
  3140  
  3141  func (b *Block) containsCall() bool {
  3142  	if b.Kind == BlockDefer {
  3143  		return true
  3144  	}
  3145  	for _, v := range b.Values {
  3146  		if opcodeTable[v.Op].call {
  3147  			return true
  3148  		}
  3149  	}
  3150  	return false
  3151  }
  3152  

View as plain text