Source file src/cmd/compile/internal/ppc64/ssa.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ppc64
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/ir"
    10  	"cmd/compile/internal/logopt"
    11  	"cmd/compile/internal/objw"
    12  	"cmd/compile/internal/ssa"
    13  	"cmd/compile/internal/ssagen"
    14  	"cmd/compile/internal/types"
    15  	"cmd/internal/obj"
    16  	"cmd/internal/obj/ppc64"
    17  	"internal/abi"
    18  	"internal/buildcfg"
    19  	"math"
    20  	"strings"
    21  )
    22  
    23  // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
    24  func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
    25  	//	flive := b.FlagsLiveAtEnd
    26  	//	if b.Control != nil && b.Control.Type.IsFlags() {
    27  	//		flive = true
    28  	//	}
    29  	//	for i := len(b.Values) - 1; i >= 0; i-- {
    30  	//		v := b.Values[i]
    31  	//		if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
    32  	//			// The "mark" is any non-nil Aux value.
    33  	//			v.Aux = v
    34  	//		}
    35  	//		if v.Type.IsFlags() {
    36  	//			flive = false
    37  	//		}
    38  	//		for _, a := range v.Args {
    39  	//			if a.Type.IsFlags() {
    40  	//				flive = true
    41  	//			}
    42  	//		}
    43  	//	}
    44  }
    45  
    46  // loadByType returns the load instruction of the given type.
    47  func loadByType(t *types.Type) obj.As {
    48  	if t.IsFloat() {
    49  		switch t.Size() {
    50  		case 4:
    51  			return ppc64.AFMOVS
    52  		case 8:
    53  			return ppc64.AFMOVD
    54  		}
    55  	} else {
    56  		switch t.Size() {
    57  		case 1:
    58  			if t.IsSigned() {
    59  				return ppc64.AMOVB
    60  			} else {
    61  				return ppc64.AMOVBZ
    62  			}
    63  		case 2:
    64  			if t.IsSigned() {
    65  				return ppc64.AMOVH
    66  			} else {
    67  				return ppc64.AMOVHZ
    68  			}
    69  		case 4:
    70  			if t.IsSigned() {
    71  				return ppc64.AMOVW
    72  			} else {
    73  				return ppc64.AMOVWZ
    74  			}
    75  		case 8:
    76  			return ppc64.AMOVD
    77  		}
    78  	}
    79  	panic("bad load type")
    80  }
    81  
    82  // storeByType returns the store instruction of the given type.
    83  func storeByType(t *types.Type) obj.As {
    84  	if t.IsFloat() {
    85  		switch t.Size() {
    86  		case 4:
    87  			return ppc64.AFMOVS
    88  		case 8:
    89  			return ppc64.AFMOVD
    90  		}
    91  	} else {
    92  		switch t.Size() {
    93  		case 1:
    94  			return ppc64.AMOVB
    95  		case 2:
    96  			return ppc64.AMOVH
    97  		case 4:
    98  			return ppc64.AMOVW
    99  		case 8:
   100  			return ppc64.AMOVD
   101  		}
   102  	}
   103  	panic("bad store type")
   104  }
   105  
   106  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   107  	switch v.Op {
   108  	case ssa.OpCopy:
   109  		t := v.Type
   110  		if t.IsMemory() {
   111  			return
   112  		}
   113  		x := v.Args[0].Reg()
   114  		y := v.Reg()
   115  		if x != y {
   116  			rt := obj.TYPE_REG
   117  			op := ppc64.AMOVD
   118  
   119  			if t.IsFloat() {
   120  				op = ppc64.AFMOVD
   121  			}
   122  			p := s.Prog(op)
   123  			p.From.Type = rt
   124  			p.From.Reg = x
   125  			p.To.Type = rt
   126  			p.To.Reg = y
   127  		}
   128  
   129  	case ssa.OpPPC64LoweredAtomicAnd8,
   130  		ssa.OpPPC64LoweredAtomicAnd32,
   131  		ssa.OpPPC64LoweredAtomicOr8,
   132  		ssa.OpPPC64LoweredAtomicOr32:
   133  		// LWSYNC
   134  		// LBAR/LWAR	(Rarg0), Rtmp
   135  		// AND/OR	Rarg1, Rtmp
   136  		// STBCCC/STWCCC Rtmp, (Rarg0)
   137  		// BNE		-3(PC)
   138  		ld := ppc64.ALBAR
   139  		st := ppc64.ASTBCCC
   140  		if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
   141  			ld = ppc64.ALWAR
   142  			st = ppc64.ASTWCCC
   143  		}
   144  		r0 := v.Args[0].Reg()
   145  		r1 := v.Args[1].Reg()
   146  		// LWSYNC - Assuming shared data not write-through-required nor
   147  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   148  		plwsync := s.Prog(ppc64.ALWSYNC)
   149  		plwsync.To.Type = obj.TYPE_NONE
   150  		// LBAR or LWAR
   151  		p := s.Prog(ld)
   152  		p.From.Type = obj.TYPE_MEM
   153  		p.From.Reg = r0
   154  		p.To.Type = obj.TYPE_REG
   155  		p.To.Reg = ppc64.REGTMP
   156  		// AND/OR reg1,out
   157  		p1 := s.Prog(v.Op.Asm())
   158  		p1.From.Type = obj.TYPE_REG
   159  		p1.From.Reg = r1
   160  		p1.To.Type = obj.TYPE_REG
   161  		p1.To.Reg = ppc64.REGTMP
   162  		// STBCCC or STWCCC
   163  		p2 := s.Prog(st)
   164  		p2.From.Type = obj.TYPE_REG
   165  		p2.From.Reg = ppc64.REGTMP
   166  		p2.To.Type = obj.TYPE_MEM
   167  		p2.To.Reg = r0
   168  		p2.RegTo2 = ppc64.REGTMP
   169  		// BNE retry
   170  		p3 := s.Prog(ppc64.ABNE)
   171  		p3.To.Type = obj.TYPE_BRANCH
   172  		p3.To.SetTarget(p)
   173  
   174  	case ssa.OpPPC64LoweredAtomicAdd32,
   175  		ssa.OpPPC64LoweredAtomicAdd64:
   176  		// LWSYNC
   177  		// LDAR/LWAR    (Rarg0), Rout
   178  		// ADD		Rarg1, Rout
   179  		// STDCCC/STWCCC Rout, (Rarg0)
   180  		// BNE         -3(PC)
   181  		// MOVW		Rout,Rout (if Add32)
   182  		ld := ppc64.ALDAR
   183  		st := ppc64.ASTDCCC
   184  		if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
   185  			ld = ppc64.ALWAR
   186  			st = ppc64.ASTWCCC
   187  		}
   188  		r0 := v.Args[0].Reg()
   189  		r1 := v.Args[1].Reg()
   190  		out := v.Reg0()
   191  		// LWSYNC - Assuming shared data not write-through-required nor
   192  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   193  		plwsync := s.Prog(ppc64.ALWSYNC)
   194  		plwsync.To.Type = obj.TYPE_NONE
   195  		// LDAR or LWAR
   196  		p := s.Prog(ld)
   197  		p.From.Type = obj.TYPE_MEM
   198  		p.From.Reg = r0
   199  		p.To.Type = obj.TYPE_REG
   200  		p.To.Reg = out
   201  		// ADD reg1,out
   202  		p1 := s.Prog(ppc64.AADD)
   203  		p1.From.Type = obj.TYPE_REG
   204  		p1.From.Reg = r1
   205  		p1.To.Reg = out
   206  		p1.To.Type = obj.TYPE_REG
   207  		// STDCCC or STWCCC
   208  		p3 := s.Prog(st)
   209  		p3.From.Type = obj.TYPE_REG
   210  		p3.From.Reg = out
   211  		p3.To.Type = obj.TYPE_MEM
   212  		p3.To.Reg = r0
   213  		// BNE retry
   214  		p4 := s.Prog(ppc64.ABNE)
   215  		p4.To.Type = obj.TYPE_BRANCH
   216  		p4.To.SetTarget(p)
   217  
   218  		// Ensure a 32 bit result
   219  		if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
   220  			p5 := s.Prog(ppc64.AMOVWZ)
   221  			p5.To.Type = obj.TYPE_REG
   222  			p5.To.Reg = out
   223  			p5.From.Type = obj.TYPE_REG
   224  			p5.From.Reg = out
   225  		}
   226  
   227  	case ssa.OpPPC64LoweredAtomicExchange8,
   228  		ssa.OpPPC64LoweredAtomicExchange32,
   229  		ssa.OpPPC64LoweredAtomicExchange64:
   230  		// LWSYNC
   231  		// LDAR/LWAR/LBAR        (Rarg0), Rout
   232  		// STDCCC/STWCCC/STBWCCC Rout, (Rarg0)
   233  		// BNE         -2(PC)
   234  		// ISYNC
   235  		ld := ppc64.ALDAR
   236  		st := ppc64.ASTDCCC
   237  		switch v.Op {
   238  		case ssa.OpPPC64LoweredAtomicExchange8:
   239  			ld = ppc64.ALBAR
   240  			st = ppc64.ASTBCCC
   241  		case ssa.OpPPC64LoweredAtomicExchange32:
   242  			ld = ppc64.ALWAR
   243  			st = ppc64.ASTWCCC
   244  		}
   245  		r0 := v.Args[0].Reg()
   246  		r1 := v.Args[1].Reg()
   247  		out := v.Reg0()
   248  		// LWSYNC - Assuming shared data not write-through-required nor
   249  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   250  		plwsync := s.Prog(ppc64.ALWSYNC)
   251  		plwsync.To.Type = obj.TYPE_NONE
   252  		// L[B|W|D]AR
   253  		p := s.Prog(ld)
   254  		p.From.Type = obj.TYPE_MEM
   255  		p.From.Reg = r0
   256  		p.To.Type = obj.TYPE_REG
   257  		p.To.Reg = out
   258  		// ST[B|W|D]CCC
   259  		p1 := s.Prog(st)
   260  		p1.From.Type = obj.TYPE_REG
   261  		p1.From.Reg = r1
   262  		p1.To.Type = obj.TYPE_MEM
   263  		p1.To.Reg = r0
   264  		// BNE retry
   265  		p2 := s.Prog(ppc64.ABNE)
   266  		p2.To.Type = obj.TYPE_BRANCH
   267  		p2.To.SetTarget(p)
   268  		// ISYNC
   269  		pisync := s.Prog(ppc64.AISYNC)
   270  		pisync.To.Type = obj.TYPE_NONE
   271  
   272  	case ssa.OpPPC64LoweredAtomicLoad8,
   273  		ssa.OpPPC64LoweredAtomicLoad32,
   274  		ssa.OpPPC64LoweredAtomicLoad64,
   275  		ssa.OpPPC64LoweredAtomicLoadPtr:
   276  		// SYNC
   277  		// MOVB/MOVD/MOVW (Rarg0), Rout
   278  		// CMP Rout,Rout
   279  		// BNE 1(PC)
   280  		// ISYNC
   281  		ld := ppc64.AMOVD
   282  		cmp := ppc64.ACMP
   283  		switch v.Op {
   284  		case ssa.OpPPC64LoweredAtomicLoad8:
   285  			ld = ppc64.AMOVBZ
   286  		case ssa.OpPPC64LoweredAtomicLoad32:
   287  			ld = ppc64.AMOVWZ
   288  			cmp = ppc64.ACMPW
   289  		}
   290  		arg0 := v.Args[0].Reg()
   291  		out := v.Reg0()
   292  		// SYNC when AuxInt == 1; otherwise, load-acquire
   293  		if v.AuxInt == 1 {
   294  			psync := s.Prog(ppc64.ASYNC)
   295  			psync.To.Type = obj.TYPE_NONE
   296  		}
   297  		// Load
   298  		p := s.Prog(ld)
   299  		p.From.Type = obj.TYPE_MEM
   300  		p.From.Reg = arg0
   301  		p.To.Type = obj.TYPE_REG
   302  		p.To.Reg = out
   303  		// CMP
   304  		p1 := s.Prog(cmp)
   305  		p1.From.Type = obj.TYPE_REG
   306  		p1.From.Reg = out
   307  		p1.To.Type = obj.TYPE_REG
   308  		p1.To.Reg = out
   309  		// BNE
   310  		p2 := s.Prog(ppc64.ABNE)
   311  		p2.To.Type = obj.TYPE_BRANCH
   312  		// ISYNC
   313  		pisync := s.Prog(ppc64.AISYNC)
   314  		pisync.To.Type = obj.TYPE_NONE
   315  		p2.To.SetTarget(pisync)
   316  
   317  	case ssa.OpPPC64LoweredAtomicStore8,
   318  		ssa.OpPPC64LoweredAtomicStore32,
   319  		ssa.OpPPC64LoweredAtomicStore64:
   320  		// SYNC or LWSYNC
   321  		// MOVB/MOVW/MOVD arg1,(arg0)
   322  		st := ppc64.AMOVD
   323  		switch v.Op {
   324  		case ssa.OpPPC64LoweredAtomicStore8:
   325  			st = ppc64.AMOVB
   326  		case ssa.OpPPC64LoweredAtomicStore32:
   327  			st = ppc64.AMOVW
   328  		}
   329  		arg0 := v.Args[0].Reg()
   330  		arg1 := v.Args[1].Reg()
   331  		// If AuxInt == 0, LWSYNC (Store-Release), else SYNC
   332  		// SYNC
   333  		syncOp := ppc64.ASYNC
   334  		if v.AuxInt == 0 {
   335  			syncOp = ppc64.ALWSYNC
   336  		}
   337  		psync := s.Prog(syncOp)
   338  		psync.To.Type = obj.TYPE_NONE
   339  		// Store
   340  		p := s.Prog(st)
   341  		p.To.Type = obj.TYPE_MEM
   342  		p.To.Reg = arg0
   343  		p.From.Type = obj.TYPE_REG
   344  		p.From.Reg = arg1
   345  
   346  	case ssa.OpPPC64LoweredAtomicCas64,
   347  		ssa.OpPPC64LoweredAtomicCas32:
   348  		// MOVD        $0, Rout
   349  		// LWSYNC
   350  		// loop:
   351  		// LDAR        (Rarg0), MutexHint, Rtmp
   352  		// CMP         Rarg1, Rtmp
   353  		// BNE         end
   354  		// STDCCC      Rarg2, (Rarg0)
   355  		// BNE         loop
   356  		// MOVD        $1, Rout
   357  		// end:
   358  		// LWSYNC      // Only for sequential consistency; not required in CasRel.
   359  		ld := ppc64.ALDAR
   360  		st := ppc64.ASTDCCC
   361  		cmp := ppc64.ACMP
   362  		if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
   363  			ld = ppc64.ALWAR
   364  			st = ppc64.ASTWCCC
   365  			cmp = ppc64.ACMPW
   366  		}
   367  		r0 := v.Args[0].Reg()
   368  		r1 := v.Args[1].Reg()
   369  		r2 := v.Args[2].Reg()
   370  		out := v.Reg0()
   371  		// Initialize return value to false
   372  		p := s.Prog(ppc64.AMOVD)
   373  		p.From.Type = obj.TYPE_CONST
   374  		p.From.Offset = 0
   375  		p.To.Type = obj.TYPE_REG
   376  		p.To.Reg = out
   377  		// LWSYNC - Assuming shared data not write-through-required nor
   378  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   379  		plwsync1 := s.Prog(ppc64.ALWSYNC)
   380  		plwsync1.To.Type = obj.TYPE_NONE
   381  		// LDAR or LWAR
   382  		p0 := s.Prog(ld)
   383  		p0.From.Type = obj.TYPE_MEM
   384  		p0.From.Reg = r0
   385  		p0.To.Type = obj.TYPE_REG
   386  		p0.To.Reg = ppc64.REGTMP
   387  		// If it is a Compare-and-Swap-Release operation, set the EH field with
   388  		// the release hint.
   389  		if v.AuxInt == 0 {
   390  			p0.AddRestSourceConst(0)
   391  		}
   392  		// CMP reg1,reg2
   393  		p1 := s.Prog(cmp)
   394  		p1.From.Type = obj.TYPE_REG
   395  		p1.From.Reg = r1
   396  		p1.To.Reg = ppc64.REGTMP
   397  		p1.To.Type = obj.TYPE_REG
   398  		// BNE done with return value = false
   399  		p2 := s.Prog(ppc64.ABNE)
   400  		p2.To.Type = obj.TYPE_BRANCH
   401  		// STDCCC or STWCCC
   402  		p3 := s.Prog(st)
   403  		p3.From.Type = obj.TYPE_REG
   404  		p3.From.Reg = r2
   405  		p3.To.Type = obj.TYPE_MEM
   406  		p3.To.Reg = r0
   407  		// BNE retry
   408  		p4 := s.Prog(ppc64.ABNE)
   409  		p4.To.Type = obj.TYPE_BRANCH
   410  		p4.To.SetTarget(p0)
   411  		// return value true
   412  		p5 := s.Prog(ppc64.AMOVD)
   413  		p5.From.Type = obj.TYPE_CONST
   414  		p5.From.Offset = 1
   415  		p5.To.Type = obj.TYPE_REG
   416  		p5.To.Reg = out
   417  		// LWSYNC - Assuming shared data not write-through-required nor
   418  		// caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
   419  		// If the operation is a CAS-Release, then synchronization is not necessary.
   420  		if v.AuxInt != 0 {
   421  			plwsync2 := s.Prog(ppc64.ALWSYNC)
   422  			plwsync2.To.Type = obj.TYPE_NONE
   423  			p2.To.SetTarget(plwsync2)
   424  		} else {
   425  			// done (label)
   426  			p6 := s.Prog(obj.ANOP)
   427  			p2.To.SetTarget(p6)
   428  		}
   429  
   430  	case ssa.OpPPC64LoweredPubBarrier:
   431  		// LWSYNC
   432  		s.Prog(v.Op.Asm())
   433  
   434  	case ssa.OpPPC64LoweredGetClosurePtr:
   435  		// Closure pointer is R11 (already)
   436  		ssagen.CheckLoweredGetClosurePtr(v)
   437  
   438  	case ssa.OpPPC64LoweredGetCallerSP:
   439  		// caller's SP is FixedFrameSize below the address of the first arg
   440  		p := s.Prog(ppc64.AMOVD)
   441  		p.From.Type = obj.TYPE_ADDR
   442  		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
   443  		p.From.Name = obj.NAME_PARAM
   444  		p.To.Type = obj.TYPE_REG
   445  		p.To.Reg = v.Reg()
   446  
   447  	case ssa.OpPPC64LoweredGetCallerPC:
   448  		p := s.Prog(obj.AGETCALLERPC)
   449  		p.To.Type = obj.TYPE_REG
   450  		p.To.Reg = v.Reg()
   451  
   452  	case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
   453  		// input is already rounded
   454  
   455  	case ssa.OpLoadReg:
   456  		loadOp := loadByType(v.Type)
   457  		p := s.Prog(loadOp)
   458  		ssagen.AddrAuto(&p.From, v.Args[0])
   459  		p.To.Type = obj.TYPE_REG
   460  		p.To.Reg = v.Reg()
   461  
   462  	case ssa.OpStoreReg:
   463  		storeOp := storeByType(v.Type)
   464  		p := s.Prog(storeOp)
   465  		p.From.Type = obj.TYPE_REG
   466  		p.From.Reg = v.Args[0].Reg()
   467  		ssagen.AddrAuto(&p.To, v)
   468  
   469  	case ssa.OpArgIntReg, ssa.OpArgFloatReg:
   470  		// The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
   471  		// The loop only runs once.
   472  		for _, a := range v.Block.Func.RegArgs {
   473  			// Pass the spill/unspill information along to the assembler, offset by size of
   474  			// the saved LR slot.
   475  			addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
   476  			s.FuncInfo().AddSpill(
   477  				obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
   478  		}
   479  		v.Block.Func.RegArgs = nil
   480  
   481  		ssagen.CheckArgReg(v)
   482  
   483  	case ssa.OpPPC64DIVD:
   484  		// For now,
   485  		//
   486  		// cmp arg1, -1
   487  		// be  ahead
   488  		// v = arg0 / arg1
   489  		// b over
   490  		// ahead: v = - arg0
   491  		// over: nop
   492  		r := v.Reg()
   493  		r0 := v.Args[0].Reg()
   494  		r1 := v.Args[1].Reg()
   495  
   496  		p := s.Prog(ppc64.ACMP)
   497  		p.From.Type = obj.TYPE_REG
   498  		p.From.Reg = r1
   499  		p.To.Type = obj.TYPE_CONST
   500  		p.To.Offset = -1
   501  
   502  		pbahead := s.Prog(ppc64.ABEQ)
   503  		pbahead.To.Type = obj.TYPE_BRANCH
   504  
   505  		p = s.Prog(v.Op.Asm())
   506  		p.From.Type = obj.TYPE_REG
   507  		p.From.Reg = r1
   508  		p.Reg = r0
   509  		p.To.Type = obj.TYPE_REG
   510  		p.To.Reg = r
   511  
   512  		pbover := s.Prog(obj.AJMP)
   513  		pbover.To.Type = obj.TYPE_BRANCH
   514  
   515  		p = s.Prog(ppc64.ANEG)
   516  		p.To.Type = obj.TYPE_REG
   517  		p.To.Reg = r
   518  		p.From.Type = obj.TYPE_REG
   519  		p.From.Reg = r0
   520  		pbahead.To.SetTarget(p)
   521  
   522  		p = s.Prog(obj.ANOP)
   523  		pbover.To.SetTarget(p)
   524  
   525  	case ssa.OpPPC64DIVW:
   526  		// word-width version of above
   527  		r := v.Reg()
   528  		r0 := v.Args[0].Reg()
   529  		r1 := v.Args[1].Reg()
   530  
   531  		p := s.Prog(ppc64.ACMPW)
   532  		p.From.Type = obj.TYPE_REG
   533  		p.From.Reg = r1
   534  		p.To.Type = obj.TYPE_CONST
   535  		p.To.Offset = -1
   536  
   537  		pbahead := s.Prog(ppc64.ABEQ)
   538  		pbahead.To.Type = obj.TYPE_BRANCH
   539  
   540  		p = s.Prog(v.Op.Asm())
   541  		p.From.Type = obj.TYPE_REG
   542  		p.From.Reg = r1
   543  		p.Reg = r0
   544  		p.To.Type = obj.TYPE_REG
   545  		p.To.Reg = r
   546  
   547  		pbover := s.Prog(obj.AJMP)
   548  		pbover.To.Type = obj.TYPE_BRANCH
   549  
   550  		p = s.Prog(ppc64.ANEG)
   551  		p.To.Type = obj.TYPE_REG
   552  		p.To.Reg = r
   553  		p.From.Type = obj.TYPE_REG
   554  		p.From.Reg = r0
   555  		pbahead.To.SetTarget(p)
   556  
   557  		p = s.Prog(obj.ANOP)
   558  		pbover.To.SetTarget(p)
   559  
   560  	case ssa.OpPPC64CLRLSLWI:
   561  		r := v.Reg()
   562  		r1 := v.Args[0].Reg()
   563  		shifts := v.AuxInt
   564  		p := s.Prog(v.Op.Asm())
   565  		// clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
   566  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
   567  		p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts))
   568  		p.Reg = r1
   569  		p.To.Type = obj.TYPE_REG
   570  		p.To.Reg = r
   571  
   572  	case ssa.OpPPC64CLRLSLDI:
   573  		r := v.Reg()
   574  		r1 := v.Args[0].Reg()
   575  		shifts := v.AuxInt
   576  		p := s.Prog(v.Op.Asm())
   577  		// clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
   578  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
   579  		p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts))
   580  		p.Reg = r1
   581  		p.To.Type = obj.TYPE_REG
   582  		p.To.Reg = r
   583  
   584  	case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
   585  		ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
   586  		ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
   587  		ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
   588  		ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
   589  		ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
   590  		ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
   591  		ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW, ssa.OpPPC64XSMINJDP, ssa.OpPPC64XSMAXJDP:
   592  		r := v.Reg()
   593  		r1 := v.Args[0].Reg()
   594  		r2 := v.Args[1].Reg()
   595  		p := s.Prog(v.Op.Asm())
   596  		p.From.Type = obj.TYPE_REG
   597  		p.From.Reg = r2
   598  		p.Reg = r1
   599  		p.To.Type = obj.TYPE_REG
   600  		p.To.Reg = r
   601  
   602  	case ssa.OpPPC64ADDCC, ssa.OpPPC64ANDCC, ssa.OpPPC64SUBCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC, ssa.OpPPC64NORCC,
   603  		ssa.OpPPC64ANDNCC, ssa.OpPPC64MULHDUCC:
   604  		r1 := v.Args[0].Reg()
   605  		r2 := v.Args[1].Reg()
   606  		p := s.Prog(v.Op.Asm())
   607  		p.From.Type = obj.TYPE_REG
   608  		p.From.Reg = r2
   609  		p.Reg = r1
   610  		p.To.Type = obj.TYPE_REG
   611  		p.To.Reg = v.Reg0()
   612  
   613  	case ssa.OpPPC64NEGCC, ssa.OpPPC64CNTLZDCC:
   614  		p := s.Prog(v.Op.Asm())
   615  		p.To.Type = obj.TYPE_REG
   616  		p.To.Reg = v.Reg0()
   617  		p.From.Type = obj.TYPE_REG
   618  		p.From.Reg = v.Args[0].Reg()
   619  
   620  	case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
   621  		p := s.Prog(v.Op.Asm())
   622  		p.From.Type = obj.TYPE_CONST
   623  		p.From.Offset = v.AuxInt
   624  		p.Reg = v.Args[0].Reg()
   625  		p.To.Type = obj.TYPE_REG
   626  		p.To.Reg = v.Reg()
   627  
   628  		// Auxint holds encoded rotate + mask
   629  	case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
   630  		sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
   631  		p := s.Prog(v.Op.Asm())
   632  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
   633  		p.Reg = v.Args[0].Reg()
   634  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(sh)}
   635  		p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
   636  		// Auxint holds mask
   637  
   638  	case ssa.OpPPC64RLDICL, ssa.OpPPC64RLDICLCC, ssa.OpPPC64RLDICR:
   639  		sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
   640  		p := s.Prog(v.Op.Asm())
   641  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: sh}
   642  		switch v.Op {
   643  		case ssa.OpPPC64RLDICL, ssa.OpPPC64RLDICLCC:
   644  			p.AddRestSourceConst(mb)
   645  		case ssa.OpPPC64RLDICR:
   646  			p.AddRestSourceConst(me)
   647  		}
   648  		p.Reg = v.Args[0].Reg()
   649  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.ResultReg()}
   650  
   651  	case ssa.OpPPC64RLWNM:
   652  		_, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
   653  		p := s.Prog(v.Op.Asm())
   654  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
   655  		p.Reg = v.Args[0].Reg()
   656  		p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
   657  		p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
   658  
   659  	case ssa.OpPPC64MADDLD:
   660  		r := v.Reg()
   661  		r1 := v.Args[0].Reg()
   662  		r2 := v.Args[1].Reg()
   663  		r3 := v.Args[2].Reg()
   664  		// r = r1*r2 ± r3
   665  		p := s.Prog(v.Op.Asm())
   666  		p.From.Type = obj.TYPE_REG
   667  		p.From.Reg = r1
   668  		p.Reg = r2
   669  		p.AddRestSourceReg(r3)
   670  		p.To.Type = obj.TYPE_REG
   671  		p.To.Reg = r
   672  
   673  	case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
   674  		r := v.Reg()
   675  		r1 := v.Args[0].Reg()
   676  		r2 := v.Args[1].Reg()
   677  		r3 := v.Args[2].Reg()
   678  		// r = r1*r2 ± r3
   679  		p := s.Prog(v.Op.Asm())
   680  		p.From.Type = obj.TYPE_REG
   681  		p.From.Reg = r1
   682  		p.Reg = r3
   683  		p.AddRestSourceReg(r2)
   684  		p.To.Type = obj.TYPE_REG
   685  		p.To.Reg = r
   686  
   687  	case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
   688  		ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
   689  		ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
   690  		ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD, ssa.OpPPC64BRH, ssa.OpPPC64BRW, ssa.OpPPC64BRD:
   691  		r := v.Reg()
   692  		p := s.Prog(v.Op.Asm())
   693  		p.To.Type = obj.TYPE_REG
   694  		p.To.Reg = r
   695  		p.From.Type = obj.TYPE_REG
   696  		p.From.Reg = v.Args[0].Reg()
   697  
   698  	case ssa.OpPPC64ADDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
   699  		ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
   700  		ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst,
   701  		ssa.OpPPC64ANDconst:
   702  		p := s.Prog(v.Op.Asm())
   703  		p.Reg = v.Args[0].Reg()
   704  		p.From.Type = obj.TYPE_CONST
   705  		p.From.Offset = v.AuxInt
   706  		p.To.Type = obj.TYPE_REG
   707  		p.To.Reg = v.Reg()
   708  
   709  	case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
   710  		r := v.Reg0() // CA is the first, implied argument.
   711  		r1 := v.Args[0].Reg()
   712  		r2 := v.Args[1].Reg()
   713  		p := s.Prog(v.Op.Asm())
   714  		p.From.Type = obj.TYPE_REG
   715  		p.From.Reg = r2
   716  		p.Reg = r1
   717  		p.To.Type = obj.TYPE_REG
   718  		p.To.Reg = r
   719  
   720  	case ssa.OpPPC64ADDZE:
   721  		p := s.Prog(v.Op.Asm())
   722  		p.From.Type = obj.TYPE_REG
   723  		p.From.Reg = v.Args[0].Reg()
   724  		p.To.Type = obj.TYPE_REG
   725  		p.To.Reg = v.Reg0()
   726  
   727  	case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
   728  		p := s.Prog(v.Op.Asm())
   729  		p.From.Type = obj.TYPE_REG
   730  		p.From.Reg = ppc64.REG_R0
   731  		p.To.Type = obj.TYPE_REG
   732  		p.To.Reg = v.Reg()
   733  
   734  	case ssa.OpPPC64ADDCconst:
   735  		p := s.Prog(v.Op.Asm())
   736  		p.Reg = v.Args[0].Reg()
   737  		p.From.Type = obj.TYPE_CONST
   738  		p.From.Offset = v.AuxInt
   739  		p.To.Type = obj.TYPE_REG
   740  		// Output is a pair, the second is the CA, which is implied.
   741  		p.To.Reg = v.Reg0()
   742  
   743  	case ssa.OpPPC64SUBCconst:
   744  		p := s.Prog(v.Op.Asm())
   745  		p.AddRestSourceConst(v.AuxInt)
   746  		p.From.Type = obj.TYPE_REG
   747  		p.From.Reg = v.Args[0].Reg()
   748  		p.To.Type = obj.TYPE_REG
   749  		p.To.Reg = v.Reg0()
   750  
   751  	case ssa.OpPPC64SUBFCconst:
   752  		p := s.Prog(v.Op.Asm())
   753  		p.AddRestSourceConst(v.AuxInt)
   754  		p.From.Type = obj.TYPE_REG
   755  		p.From.Reg = v.Args[0].Reg()
   756  		p.To.Type = obj.TYPE_REG
   757  		p.To.Reg = v.Reg()
   758  
   759  	case ssa.OpPPC64ADDCCconst, ssa.OpPPC64ANDCCconst:
   760  		p := s.Prog(v.Op.Asm())
   761  		p.Reg = v.Args[0].Reg()
   762  		p.From.Type = obj.TYPE_CONST
   763  		p.From.Offset = v.AuxInt
   764  		p.To.Type = obj.TYPE_REG
   765  		p.To.Reg = v.Reg0()
   766  
   767  	case ssa.OpPPC64MOVDaddr:
   768  		switch v.Aux.(type) {
   769  		default:
   770  			v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
   771  		case nil:
   772  			// If aux offset and aux int are both 0, and the same
   773  			// input and output regs are used, no instruction
   774  			// needs to be generated, since it would just be
   775  			// addi rx, rx, 0.
   776  			if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
   777  				p := s.Prog(ppc64.AMOVD)
   778  				p.From.Type = obj.TYPE_ADDR
   779  				p.From.Reg = v.Args[0].Reg()
   780  				p.From.Offset = v.AuxInt
   781  				p.To.Type = obj.TYPE_REG
   782  				p.To.Reg = v.Reg()
   783  			}
   784  
   785  		case *obj.LSym, ir.Node:
   786  			p := s.Prog(ppc64.AMOVD)
   787  			p.From.Type = obj.TYPE_ADDR
   788  			p.From.Reg = v.Args[0].Reg()
   789  			p.To.Type = obj.TYPE_REG
   790  			p.To.Reg = v.Reg()
   791  			ssagen.AddAux(&p.From, v)
   792  
   793  		}
   794  
   795  	case ssa.OpPPC64MOVDconst:
   796  		p := s.Prog(v.Op.Asm())
   797  		p.From.Type = obj.TYPE_CONST
   798  		p.From.Offset = v.AuxInt
   799  		p.To.Type = obj.TYPE_REG
   800  		p.To.Reg = v.Reg()
   801  
   802  	case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
   803  		p := s.Prog(v.Op.Asm())
   804  		p.From.Type = obj.TYPE_FCONST
   805  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   806  		p.To.Type = obj.TYPE_REG
   807  		p.To.Reg = v.Reg()
   808  
   809  	case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
   810  		p := s.Prog(v.Op.Asm())
   811  		p.From.Type = obj.TYPE_REG
   812  		p.From.Reg = v.Args[0].Reg()
   813  		p.To.Type = obj.TYPE_REG
   814  		p.To.Reg = v.Args[1].Reg()
   815  
   816  	case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
   817  		p := s.Prog(v.Op.Asm())
   818  		p.From.Type = obj.TYPE_REG
   819  		p.From.Reg = v.Args[0].Reg()
   820  		p.To.Type = obj.TYPE_CONST
   821  		p.To.Offset = v.AuxInt
   822  
   823  	case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
   824  		// Shift in register to required size
   825  		p := s.Prog(v.Op.Asm())
   826  		p.From.Type = obj.TYPE_REG
   827  		p.From.Reg = v.Args[0].Reg()
   828  		p.To.Reg = v.Reg()
   829  		p.To.Type = obj.TYPE_REG
   830  
   831  	case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
   832  
   833  		// MOVDload and MOVWload are DS form instructions that are restricted to
   834  		// offsets that are a multiple of 4. If the offset is not a multiple of 4,
   835  		// then the address of the symbol to be loaded is computed (base + offset)
   836  		// and used as the new base register and the offset field in the instruction
   837  		// can be set to zero.
   838  
   839  		// This same problem can happen with gostrings since the final offset is not
   840  		// known yet, but could be unaligned after the relocation is resolved.
   841  		// So gostrings are handled the same way.
   842  
   843  		// This allows the MOVDload and MOVWload to be generated in more cases and
   844  		// eliminates some offset and alignment checking in the rules file.
   845  
   846  		fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
   847  		ssagen.AddAux(&fromAddr, v)
   848  
   849  		genAddr := false
   850  
   851  		switch fromAddr.Name {
   852  		case obj.NAME_EXTERN, obj.NAME_STATIC:
   853  			// Special case for a rule combines the bytes of gostring.
   854  			// The v alignment might seem OK, but we don't want to load it
   855  			// using an offset because relocation comes later.
   856  			genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go:string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
   857  		default:
   858  			genAddr = fromAddr.Offset%4 != 0
   859  		}
   860  		if genAddr {
   861  			// Load full address into the temp register.
   862  			p := s.Prog(ppc64.AMOVD)
   863  			p.From.Type = obj.TYPE_ADDR
   864  			p.From.Reg = v.Args[0].Reg()
   865  			ssagen.AddAux(&p.From, v)
   866  			// Load target using temp as base register
   867  			// and offset zero. Setting NAME_NONE
   868  			// prevents any extra offsets from being
   869  			// added.
   870  			p.To.Type = obj.TYPE_REG
   871  			p.To.Reg = ppc64.REGTMP
   872  			fromAddr.Reg = ppc64.REGTMP
   873  			// Clear the offset field and other
   874  			// information that might be used
   875  			// by the assembler to add to the
   876  			// final offset value.
   877  			fromAddr.Offset = 0
   878  			fromAddr.Name = obj.NAME_NONE
   879  			fromAddr.Sym = nil
   880  		}
   881  		p := s.Prog(v.Op.Asm())
   882  		p.From = fromAddr
   883  		p.To.Type = obj.TYPE_REG
   884  		p.To.Reg = v.Reg()
   885  
   886  	case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
   887  		p := s.Prog(v.Op.Asm())
   888  		p.From.Type = obj.TYPE_MEM
   889  		p.From.Reg = v.Args[0].Reg()
   890  		ssagen.AddAux(&p.From, v)
   891  		p.To.Type = obj.TYPE_REG
   892  		p.To.Reg = v.Reg()
   893  
   894  	case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
   895  		p := s.Prog(v.Op.Asm())
   896  		p.From.Type = obj.TYPE_MEM
   897  		p.From.Reg = v.Args[0].Reg()
   898  		p.To.Type = obj.TYPE_REG
   899  		p.To.Reg = v.Reg()
   900  
   901  	case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
   902  		p := s.Prog(v.Op.Asm())
   903  		p.To.Type = obj.TYPE_MEM
   904  		p.To.Reg = v.Args[0].Reg()
   905  		p.From.Type = obj.TYPE_REG
   906  		p.From.Reg = v.Args[1].Reg()
   907  
   908  	case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
   909  		ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
   910  		ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
   911  		p := s.Prog(v.Op.Asm())
   912  		p.From.Type = obj.TYPE_MEM
   913  		p.From.Reg = v.Args[0].Reg()
   914  		p.From.Index = v.Args[1].Reg()
   915  		p.To.Type = obj.TYPE_REG
   916  		p.To.Reg = v.Reg()
   917  
   918  	case ssa.OpPPC64DCBT:
   919  		p := s.Prog(v.Op.Asm())
   920  		p.From.Type = obj.TYPE_MEM
   921  		p.From.Reg = v.Args[0].Reg()
   922  		p.To.Type = obj.TYPE_CONST
   923  		p.To.Offset = v.AuxInt
   924  
   925  	case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
   926  		p := s.Prog(v.Op.Asm())
   927  		p.From.Type = obj.TYPE_REG
   928  		p.From.Reg = ppc64.REGZERO
   929  		p.To.Type = obj.TYPE_MEM
   930  		p.To.Reg = v.Args[0].Reg()
   931  		ssagen.AddAux(&p.To, v)
   932  
   933  	case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
   934  
   935  		// MOVDstore and MOVDstorezero become DS form instructions that are restricted
   936  		// to offset values that are a multiple of 4. If the offset field is not a
   937  		// multiple of 4, then the full address of the store target is computed (base +
   938  		// offset) and used as the new base register and the offset in the instruction
   939  		// is set to 0.
   940  
   941  		// This allows the MOVDstore and MOVDstorezero to be generated in more cases,
   942  		// and prevents checking of the offset value and alignment in the rules.
   943  
   944  		toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
   945  		ssagen.AddAux(&toAddr, v)
   946  
   947  		if toAddr.Offset%4 != 0 {
   948  			p := s.Prog(ppc64.AMOVD)
   949  			p.From.Type = obj.TYPE_ADDR
   950  			p.From.Reg = v.Args[0].Reg()
   951  			ssagen.AddAux(&p.From, v)
   952  			p.To.Type = obj.TYPE_REG
   953  			p.To.Reg = ppc64.REGTMP
   954  			toAddr.Reg = ppc64.REGTMP
   955  			// Clear the offset field and other
   956  			// information that might be used
   957  			// by the assembler to add to the
   958  			// final offset value.
   959  			toAddr.Offset = 0
   960  			toAddr.Name = obj.NAME_NONE
   961  			toAddr.Sym = nil
   962  		}
   963  		p := s.Prog(v.Op.Asm())
   964  		p.To = toAddr
   965  		p.From.Type = obj.TYPE_REG
   966  		if v.Op == ssa.OpPPC64MOVDstorezero {
   967  			p.From.Reg = ppc64.REGZERO
   968  		} else {
   969  			p.From.Reg = v.Args[1].Reg()
   970  		}
   971  
   972  	case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
   973  		p := s.Prog(v.Op.Asm())
   974  		p.From.Type = obj.TYPE_REG
   975  		p.From.Reg = v.Args[1].Reg()
   976  		p.To.Type = obj.TYPE_MEM
   977  		p.To.Reg = v.Args[0].Reg()
   978  		ssagen.AddAux(&p.To, v)
   979  
   980  	case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
   981  		ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
   982  		ssa.OpPPC64MOVHBRstoreidx:
   983  		p := s.Prog(v.Op.Asm())
   984  		p.From.Type = obj.TYPE_REG
   985  		p.From.Reg = v.Args[2].Reg()
   986  		p.To.Index = v.Args[1].Reg()
   987  		p.To.Type = obj.TYPE_MEM
   988  		p.To.Reg = v.Args[0].Reg()
   989  
   990  	case ssa.OpPPC64ISEL, ssa.OpPPC64ISELZ:
   991  		// ISEL  AuxInt ? arg0 : arg1
   992  		// ISELZ is a special case of ISEL where arg1 is implicitly $0.
   993  		//
   994  		// AuxInt value indicates conditions 0=LT 1=GT 2=EQ 3=SO 4=GE 5=LE 6=NE 7=NSO.
   995  		// ISEL accepts a CR bit argument, not a condition as expressed by AuxInt.
   996  		// Convert the condition to a CR bit argument by the following conversion:
   997  		//
   998  		// AuxInt&3 ? arg0 : arg1 for conditions LT, GT, EQ, SO
   999  		// AuxInt&3 ? arg1 : arg0 for conditions GE, LE, NE, NSO
  1000  		p := s.Prog(v.Op.Asm())
  1001  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
  1002  		p.Reg = v.Args[0].Reg()
  1003  		if v.Op == ssa.OpPPC64ISEL {
  1004  			p.AddRestSourceReg(v.Args[1].Reg())
  1005  		} else {
  1006  			p.AddRestSourceReg(ppc64.REG_R0)
  1007  		}
  1008  		// AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
  1009  		if v.AuxInt > 3 {
  1010  			p.Reg, p.GetFrom3().Reg = p.GetFrom3().Reg, p.Reg
  1011  		}
  1012  		p.From.SetConst(v.AuxInt & 3)
  1013  
  1014  	case ssa.OpPPC64SETBC, ssa.OpPPC64SETBCR:
  1015  		p := s.Prog(v.Op.Asm())
  1016  		p.To.Type = obj.TYPE_REG
  1017  		p.To.Reg = v.Reg()
  1018  		p.From.Type = obj.TYPE_REG
  1019  		p.From.Reg = int16(ppc64.REG_CR0LT + v.AuxInt)
  1020  
  1021  	case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
  1022  		// The LoweredQuad code generation
  1023  		// generates STXV instructions on
  1024  		// power9. The Short variation is used
  1025  		// if no loop is generated.
  1026  
  1027  		// sizes >= 64 generate a loop as follows:
  1028  
  1029  		// Set up loop counter in CTR, used by BC
  1030  		// XXLXOR clears VS32
  1031  		//       XXLXOR VS32,VS32,VS32
  1032  		//       MOVD len/64,REG_TMP
  1033  		//       MOVD REG_TMP,CTR
  1034  		//       loop:
  1035  		//       STXV VS32,0(R20)
  1036  		//       STXV VS32,16(R20)
  1037  		//       STXV VS32,32(R20)
  1038  		//       STXV VS32,48(R20)
  1039  		//       ADD  $64,R20
  1040  		//       BC   16, 0, loop
  1041  
  1042  		// Bytes per iteration
  1043  		ctr := v.AuxInt / 64
  1044  
  1045  		// Remainder bytes
  1046  		rem := v.AuxInt % 64
  1047  
  1048  		// Only generate a loop if there is more
  1049  		// than 1 iteration.
  1050  		if ctr > 1 {
  1051  			// Set up VS32 (V0) to hold 0s
  1052  			p := s.Prog(ppc64.AXXLXOR)
  1053  			p.From.Type = obj.TYPE_REG
  1054  			p.From.Reg = ppc64.REG_VS32
  1055  			p.To.Type = obj.TYPE_REG
  1056  			p.To.Reg = ppc64.REG_VS32
  1057  			p.Reg = ppc64.REG_VS32
  1058  
  1059  			// Set up CTR loop counter
  1060  			p = s.Prog(ppc64.AMOVD)
  1061  			p.From.Type = obj.TYPE_CONST
  1062  			p.From.Offset = ctr
  1063  			p.To.Type = obj.TYPE_REG
  1064  			p.To.Reg = ppc64.REGTMP
  1065  
  1066  			p = s.Prog(ppc64.AMOVD)
  1067  			p.From.Type = obj.TYPE_REG
  1068  			p.From.Reg = ppc64.REGTMP
  1069  			p.To.Type = obj.TYPE_REG
  1070  			p.To.Reg = ppc64.REG_CTR
  1071  
  1072  			// Don't generate padding for
  1073  			// loops with few iterations.
  1074  			if ctr > 3 {
  1075  				p = s.Prog(obj.APCALIGN)
  1076  				p.From.Type = obj.TYPE_CONST
  1077  				p.From.Offset = 16
  1078  			}
  1079  
  1080  			// generate 4 STXVs to zero 64 bytes
  1081  			var top *obj.Prog
  1082  
  1083  			p = s.Prog(ppc64.ASTXV)
  1084  			p.From.Type = obj.TYPE_REG
  1085  			p.From.Reg = ppc64.REG_VS32
  1086  			p.To.Type = obj.TYPE_MEM
  1087  			p.To.Reg = v.Args[0].Reg()
  1088  
  1089  			//  Save the top of loop
  1090  			if top == nil {
  1091  				top = p
  1092  			}
  1093  			p = s.Prog(ppc64.ASTXV)
  1094  			p.From.Type = obj.TYPE_REG
  1095  			p.From.Reg = ppc64.REG_VS32
  1096  			p.To.Type = obj.TYPE_MEM
  1097  			p.To.Reg = v.Args[0].Reg()
  1098  			p.To.Offset = 16
  1099  
  1100  			p = s.Prog(ppc64.ASTXV)
  1101  			p.From.Type = obj.TYPE_REG
  1102  			p.From.Reg = ppc64.REG_VS32
  1103  			p.To.Type = obj.TYPE_MEM
  1104  			p.To.Reg = v.Args[0].Reg()
  1105  			p.To.Offset = 32
  1106  
  1107  			p = s.Prog(ppc64.ASTXV)
  1108  			p.From.Type = obj.TYPE_REG
  1109  			p.From.Reg = ppc64.REG_VS32
  1110  			p.To.Type = obj.TYPE_MEM
  1111  			p.To.Reg = v.Args[0].Reg()
  1112  			p.To.Offset = 48
  1113  
  1114  			// Increment address for the
  1115  			// 64 bytes just zeroed.
  1116  			p = s.Prog(ppc64.AADD)
  1117  			p.Reg = v.Args[0].Reg()
  1118  			p.From.Type = obj.TYPE_CONST
  1119  			p.From.Offset = 64
  1120  			p.To.Type = obj.TYPE_REG
  1121  			p.To.Reg = v.Args[0].Reg()
  1122  
  1123  			// Branch back to top of loop
  1124  			// based on CTR
  1125  			// BC with BO_BCTR generates bdnz
  1126  			p = s.Prog(ppc64.ABC)
  1127  			p.From.Type = obj.TYPE_CONST
  1128  			p.From.Offset = ppc64.BO_BCTR
  1129  			p.Reg = ppc64.REG_CR0LT
  1130  			p.To.Type = obj.TYPE_BRANCH
  1131  			p.To.SetTarget(top)
  1132  		}
  1133  		// When ctr == 1 the loop was not generated but
  1134  		// there are at least 64 bytes to clear, so add
  1135  		// that to the remainder to generate the code
  1136  		// to clear those doublewords
  1137  		if ctr == 1 {
  1138  			rem += 64
  1139  		}
  1140  
  1141  		// Clear the remainder starting at offset zero
  1142  		offset := int64(0)
  1143  
  1144  		if rem >= 16 && ctr <= 1 {
  1145  			// If the XXLXOR hasn't already been
  1146  			// generated, do it here to initialize
  1147  			// VS32 (V0) to 0.
  1148  			p := s.Prog(ppc64.AXXLXOR)
  1149  			p.From.Type = obj.TYPE_REG
  1150  			p.From.Reg = ppc64.REG_VS32
  1151  			p.To.Type = obj.TYPE_REG
  1152  			p.To.Reg = ppc64.REG_VS32
  1153  			p.Reg = ppc64.REG_VS32
  1154  		}
  1155  		// Generate STXV for 32 or 64
  1156  		// bytes.
  1157  		for rem >= 32 {
  1158  			p := s.Prog(ppc64.ASTXV)
  1159  			p.From.Type = obj.TYPE_REG
  1160  			p.From.Reg = ppc64.REG_VS32
  1161  			p.To.Type = obj.TYPE_MEM
  1162  			p.To.Reg = v.Args[0].Reg()
  1163  			p.To.Offset = offset
  1164  
  1165  			p = s.Prog(ppc64.ASTXV)
  1166  			p.From.Type = obj.TYPE_REG
  1167  			p.From.Reg = ppc64.REG_VS32
  1168  			p.To.Type = obj.TYPE_MEM
  1169  			p.To.Reg = v.Args[0].Reg()
  1170  			p.To.Offset = offset + 16
  1171  			offset += 32
  1172  			rem -= 32
  1173  		}
  1174  		// Generate 16 bytes
  1175  		if rem >= 16 {
  1176  			p := s.Prog(ppc64.ASTXV)
  1177  			p.From.Type = obj.TYPE_REG
  1178  			p.From.Reg = ppc64.REG_VS32
  1179  			p.To.Type = obj.TYPE_MEM
  1180  			p.To.Reg = v.Args[0].Reg()
  1181  			p.To.Offset = offset
  1182  			offset += 16
  1183  			rem -= 16
  1184  		}
  1185  
  1186  		// first clear as many doublewords as possible
  1187  		// then clear remaining sizes as available
  1188  		for rem > 0 {
  1189  			op, size := ppc64.AMOVB, int64(1)
  1190  			switch {
  1191  			case rem >= 8:
  1192  				op, size = ppc64.AMOVD, 8
  1193  			case rem >= 4:
  1194  				op, size = ppc64.AMOVW, 4
  1195  			case rem >= 2:
  1196  				op, size = ppc64.AMOVH, 2
  1197  			}
  1198  			p := s.Prog(op)
  1199  			p.From.Type = obj.TYPE_REG
  1200  			p.From.Reg = ppc64.REG_R0
  1201  			p.To.Type = obj.TYPE_MEM
  1202  			p.To.Reg = v.Args[0].Reg()
  1203  			p.To.Offset = offset
  1204  			rem -= size
  1205  			offset += size
  1206  		}
  1207  
  1208  	case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
  1209  
  1210  		// Unaligned data doesn't hurt performance
  1211  		// for these instructions on power8.
  1212  
  1213  		// For sizes >= 64 generate a loop as follows:
  1214  
  1215  		// Set up loop counter in CTR, used by BC
  1216  		//       XXLXOR VS32,VS32,VS32
  1217  		//	 MOVD len/32,REG_TMP
  1218  		//	 MOVD REG_TMP,CTR
  1219  		//       MOVD $16,REG_TMP
  1220  		//	 loop:
  1221  		//	 STXVD2X VS32,(R0)(R20)
  1222  		//	 STXVD2X VS32,(R31)(R20)
  1223  		//	 ADD  $32,R20
  1224  		//	 BC   16, 0, loop
  1225  		//
  1226  		// any remainder is done as described below
  1227  
  1228  		// for sizes < 64 bytes, first clear as many doublewords as possible,
  1229  		// then handle the remainder
  1230  		//	MOVD R0,(R20)
  1231  		//	MOVD R0,8(R20)
  1232  		// .... etc.
  1233  		//
  1234  		// the remainder bytes are cleared using one or more
  1235  		// of the following instructions with the appropriate
  1236  		// offsets depending which instructions are needed
  1237  		//
  1238  		//	MOVW R0,n1(R20)	4 bytes
  1239  		//	MOVH R0,n2(R20)	2 bytes
  1240  		//	MOVB R0,n3(R20)	1 byte
  1241  		//
  1242  		// 7 bytes: MOVW, MOVH, MOVB
  1243  		// 6 bytes: MOVW, MOVH
  1244  		// 5 bytes: MOVW, MOVB
  1245  		// 3 bytes: MOVH, MOVB
  1246  
  1247  		// each loop iteration does 32 bytes
  1248  		ctr := v.AuxInt / 32
  1249  
  1250  		// remainder bytes
  1251  		rem := v.AuxInt % 32
  1252  
  1253  		// only generate a loop if there is more
  1254  		// than 1 iteration.
  1255  		if ctr > 1 {
  1256  			// Set up VS32 (V0) to hold 0s
  1257  			p := s.Prog(ppc64.AXXLXOR)
  1258  			p.From.Type = obj.TYPE_REG
  1259  			p.From.Reg = ppc64.REG_VS32
  1260  			p.To.Type = obj.TYPE_REG
  1261  			p.To.Reg = ppc64.REG_VS32
  1262  			p.Reg = ppc64.REG_VS32
  1263  
  1264  			// Set up CTR loop counter
  1265  			p = s.Prog(ppc64.AMOVD)
  1266  			p.From.Type = obj.TYPE_CONST
  1267  			p.From.Offset = ctr
  1268  			p.To.Type = obj.TYPE_REG
  1269  			p.To.Reg = ppc64.REGTMP
  1270  
  1271  			p = s.Prog(ppc64.AMOVD)
  1272  			p.From.Type = obj.TYPE_REG
  1273  			p.From.Reg = ppc64.REGTMP
  1274  			p.To.Type = obj.TYPE_REG
  1275  			p.To.Reg = ppc64.REG_CTR
  1276  
  1277  			// Set up R31 to hold index value 16
  1278  			p = s.Prog(ppc64.AMOVD)
  1279  			p.From.Type = obj.TYPE_CONST
  1280  			p.From.Offset = 16
  1281  			p.To.Type = obj.TYPE_REG
  1282  			p.To.Reg = ppc64.REGTMP
  1283  
  1284  			// Don't add padding for alignment
  1285  			// with few loop iterations.
  1286  			if ctr > 3 {
  1287  				p = s.Prog(obj.APCALIGN)
  1288  				p.From.Type = obj.TYPE_CONST
  1289  				p.From.Offset = 16
  1290  			}
  1291  
  1292  			// generate 2 STXVD2Xs to store 16 bytes
  1293  			// when this is a loop then the top must be saved
  1294  			var top *obj.Prog
  1295  			// This is the top of loop
  1296  
  1297  			p = s.Prog(ppc64.ASTXVD2X)
  1298  			p.From.Type = obj.TYPE_REG
  1299  			p.From.Reg = ppc64.REG_VS32
  1300  			p.To.Type = obj.TYPE_MEM
  1301  			p.To.Reg = v.Args[0].Reg()
  1302  			p.To.Index = ppc64.REGZERO
  1303  			// Save the top of loop
  1304  			if top == nil {
  1305  				top = p
  1306  			}
  1307  			p = s.Prog(ppc64.ASTXVD2X)
  1308  			p.From.Type = obj.TYPE_REG
  1309  			p.From.Reg = ppc64.REG_VS32
  1310  			p.To.Type = obj.TYPE_MEM
  1311  			p.To.Reg = v.Args[0].Reg()
  1312  			p.To.Index = ppc64.REGTMP
  1313  
  1314  			// Increment address for the
  1315  			// 4 doublewords just zeroed.
  1316  			p = s.Prog(ppc64.AADD)
  1317  			p.Reg = v.Args[0].Reg()
  1318  			p.From.Type = obj.TYPE_CONST
  1319  			p.From.Offset = 32
  1320  			p.To.Type = obj.TYPE_REG
  1321  			p.To.Reg = v.Args[0].Reg()
  1322  
  1323  			// Branch back to top of loop
  1324  			// based on CTR
  1325  			// BC with BO_BCTR generates bdnz
  1326  			p = s.Prog(ppc64.ABC)
  1327  			p.From.Type = obj.TYPE_CONST
  1328  			p.From.Offset = ppc64.BO_BCTR
  1329  			p.Reg = ppc64.REG_CR0LT
  1330  			p.To.Type = obj.TYPE_BRANCH
  1331  			p.To.SetTarget(top)
  1332  		}
  1333  
  1334  		// when ctr == 1 the loop was not generated but
  1335  		// there are at least 32 bytes to clear, so add
  1336  		// that to the remainder to generate the code
  1337  		// to clear those doublewords
  1338  		if ctr == 1 {
  1339  			rem += 32
  1340  		}
  1341  
  1342  		// clear the remainder starting at offset zero
  1343  		offset := int64(0)
  1344  
  1345  		// first clear as many doublewords as possible
  1346  		// then clear remaining sizes as available
  1347  		for rem > 0 {
  1348  			op, size := ppc64.AMOVB, int64(1)
  1349  			switch {
  1350  			case rem >= 8:
  1351  				op, size = ppc64.AMOVD, 8
  1352  			case rem >= 4:
  1353  				op, size = ppc64.AMOVW, 4
  1354  			case rem >= 2:
  1355  				op, size = ppc64.AMOVH, 2
  1356  			}
  1357  			p := s.Prog(op)
  1358  			p.From.Type = obj.TYPE_REG
  1359  			p.From.Reg = ppc64.REG_R0
  1360  			p.To.Type = obj.TYPE_MEM
  1361  			p.To.Reg = v.Args[0].Reg()
  1362  			p.To.Offset = offset
  1363  			rem -= size
  1364  			offset += size
  1365  		}
  1366  
  1367  	case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
  1368  
  1369  		bytesPerLoop := int64(32)
  1370  		// This will be used when moving more
  1371  		// than 8 bytes.  Moves start with
  1372  		// as many 8 byte moves as possible, then
  1373  		// 4, 2, or 1 byte(s) as remaining.  This will
  1374  		// work and be efficient for power8 or later.
  1375  		// If there are 64 or more bytes, then a
  1376  		// loop is generated to move 32 bytes and
  1377  		// update the src and dst addresses on each
  1378  		// iteration. When < 64 bytes, the appropriate
  1379  		// number of moves are generated based on the
  1380  		// size.
  1381  		// When moving >= 64 bytes a loop is used
  1382  		//	MOVD len/32,REG_TMP
  1383  		//	MOVD REG_TMP,CTR
  1384  		//	MOVD $16,REG_TMP
  1385  		// top:
  1386  		//	LXVD2X (R0)(R21),VS32
  1387  		//	LXVD2X (R31)(R21),VS33
  1388  		//	ADD $32,R21
  1389  		//	STXVD2X VS32,(R0)(R20)
  1390  		//	STXVD2X VS33,(R31)(R20)
  1391  		//	ADD $32,R20
  1392  		//	BC 16,0,top
  1393  		// Bytes not moved by this loop are moved
  1394  		// with a combination of the following instructions,
  1395  		// starting with the largest sizes and generating as
  1396  		// many as needed, using the appropriate offset value.
  1397  		//	MOVD  n(R21),R31
  1398  		//	MOVD  R31,n(R20)
  1399  		//	MOVW  n1(R21),R31
  1400  		//	MOVW  R31,n1(R20)
  1401  		//	MOVH  n2(R21),R31
  1402  		//	MOVH  R31,n2(R20)
  1403  		//	MOVB  n3(R21),R31
  1404  		//	MOVB  R31,n3(R20)
  1405  
  1406  		// Each loop iteration moves 32 bytes
  1407  		ctr := v.AuxInt / bytesPerLoop
  1408  
  1409  		// Remainder after the loop
  1410  		rem := v.AuxInt % bytesPerLoop
  1411  
  1412  		dstReg := v.Args[0].Reg()
  1413  		srcReg := v.Args[1].Reg()
  1414  
  1415  		// The set of registers used here, must match the clobbered reg list
  1416  		// in PPC64Ops.go.
  1417  		offset := int64(0)
  1418  
  1419  		// top of the loop
  1420  		var top *obj.Prog
  1421  		// Only generate looping code when loop counter is > 1 for >= 64 bytes
  1422  		if ctr > 1 {
  1423  			// Set up the CTR
  1424  			p := s.Prog(ppc64.AMOVD)
  1425  			p.From.Type = obj.TYPE_CONST
  1426  			p.From.Offset = ctr
  1427  			p.To.Type = obj.TYPE_REG
  1428  			p.To.Reg = ppc64.REGTMP
  1429  
  1430  			p = s.Prog(ppc64.AMOVD)
  1431  			p.From.Type = obj.TYPE_REG
  1432  			p.From.Reg = ppc64.REGTMP
  1433  			p.To.Type = obj.TYPE_REG
  1434  			p.To.Reg = ppc64.REG_CTR
  1435  
  1436  			// Use REGTMP as index reg
  1437  			p = s.Prog(ppc64.AMOVD)
  1438  			p.From.Type = obj.TYPE_CONST
  1439  			p.From.Offset = 16
  1440  			p.To.Type = obj.TYPE_REG
  1441  			p.To.Reg = ppc64.REGTMP
  1442  
  1443  			// Don't adding padding for
  1444  			// alignment with small iteration
  1445  			// counts.
  1446  			if ctr > 3 {
  1447  				p = s.Prog(obj.APCALIGN)
  1448  				p.From.Type = obj.TYPE_CONST
  1449  				p.From.Offset = 16
  1450  			}
  1451  
  1452  			// Generate 16 byte loads and stores.
  1453  			// Use temp register for index (16)
  1454  			// on the second one.
  1455  
  1456  			p = s.Prog(ppc64.ALXVD2X)
  1457  			p.From.Type = obj.TYPE_MEM
  1458  			p.From.Reg = srcReg
  1459  			p.From.Index = ppc64.REGZERO
  1460  			p.To.Type = obj.TYPE_REG
  1461  			p.To.Reg = ppc64.REG_VS32
  1462  			if top == nil {
  1463  				top = p
  1464  			}
  1465  			p = s.Prog(ppc64.ALXVD2X)
  1466  			p.From.Type = obj.TYPE_MEM
  1467  			p.From.Reg = srcReg
  1468  			p.From.Index = ppc64.REGTMP
  1469  			p.To.Type = obj.TYPE_REG
  1470  			p.To.Reg = ppc64.REG_VS33
  1471  
  1472  			// increment the src reg for next iteration
  1473  			p = s.Prog(ppc64.AADD)
  1474  			p.Reg = srcReg
  1475  			p.From.Type = obj.TYPE_CONST
  1476  			p.From.Offset = bytesPerLoop
  1477  			p.To.Type = obj.TYPE_REG
  1478  			p.To.Reg = srcReg
  1479  
  1480  			// generate 16 byte stores
  1481  			p = s.Prog(ppc64.ASTXVD2X)
  1482  			p.From.Type = obj.TYPE_REG
  1483  			p.From.Reg = ppc64.REG_VS32
  1484  			p.To.Type = obj.TYPE_MEM
  1485  			p.To.Reg = dstReg
  1486  			p.To.Index = ppc64.REGZERO
  1487  
  1488  			p = s.Prog(ppc64.ASTXVD2X)
  1489  			p.From.Type = obj.TYPE_REG
  1490  			p.From.Reg = ppc64.REG_VS33
  1491  			p.To.Type = obj.TYPE_MEM
  1492  			p.To.Reg = dstReg
  1493  			p.To.Index = ppc64.REGTMP
  1494  
  1495  			// increment the dst reg for next iteration
  1496  			p = s.Prog(ppc64.AADD)
  1497  			p.Reg = dstReg
  1498  			p.From.Type = obj.TYPE_CONST
  1499  			p.From.Offset = bytesPerLoop
  1500  			p.To.Type = obj.TYPE_REG
  1501  			p.To.Reg = dstReg
  1502  
  1503  			// BC with BO_BCTR generates bdnz to branch on nonzero CTR
  1504  			// to loop top.
  1505  			p = s.Prog(ppc64.ABC)
  1506  			p.From.Type = obj.TYPE_CONST
  1507  			p.From.Offset = ppc64.BO_BCTR
  1508  			p.Reg = ppc64.REG_CR0LT
  1509  			p.To.Type = obj.TYPE_BRANCH
  1510  			p.To.SetTarget(top)
  1511  
  1512  			// srcReg and dstReg were incremented in the loop, so
  1513  			// later instructions start with offset 0.
  1514  			offset = int64(0)
  1515  		}
  1516  
  1517  		// No loop was generated for one iteration, so
  1518  		// add 32 bytes to the remainder to move those bytes.
  1519  		if ctr == 1 {
  1520  			rem += bytesPerLoop
  1521  		}
  1522  
  1523  		if rem >= 16 {
  1524  			// Generate 16 byte loads and stores.
  1525  			// Use temp register for index (value 16)
  1526  			// on the second one.
  1527  			p := s.Prog(ppc64.ALXVD2X)
  1528  			p.From.Type = obj.TYPE_MEM
  1529  			p.From.Reg = srcReg
  1530  			p.From.Index = ppc64.REGZERO
  1531  			p.To.Type = obj.TYPE_REG
  1532  			p.To.Reg = ppc64.REG_VS32
  1533  
  1534  			p = s.Prog(ppc64.ASTXVD2X)
  1535  			p.From.Type = obj.TYPE_REG
  1536  			p.From.Reg = ppc64.REG_VS32
  1537  			p.To.Type = obj.TYPE_MEM
  1538  			p.To.Reg = dstReg
  1539  			p.To.Index = ppc64.REGZERO
  1540  
  1541  			offset = 16
  1542  			rem -= 16
  1543  
  1544  			if rem >= 16 {
  1545  				// Use REGTMP as index reg
  1546  				p := s.Prog(ppc64.AMOVD)
  1547  				p.From.Type = obj.TYPE_CONST
  1548  				p.From.Offset = 16
  1549  				p.To.Type = obj.TYPE_REG
  1550  				p.To.Reg = ppc64.REGTMP
  1551  
  1552  				p = s.Prog(ppc64.ALXVD2X)
  1553  				p.From.Type = obj.TYPE_MEM
  1554  				p.From.Reg = srcReg
  1555  				p.From.Index = ppc64.REGTMP
  1556  				p.To.Type = obj.TYPE_REG
  1557  				p.To.Reg = ppc64.REG_VS32
  1558  
  1559  				p = s.Prog(ppc64.ASTXVD2X)
  1560  				p.From.Type = obj.TYPE_REG
  1561  				p.From.Reg = ppc64.REG_VS32
  1562  				p.To.Type = obj.TYPE_MEM
  1563  				p.To.Reg = dstReg
  1564  				p.To.Index = ppc64.REGTMP
  1565  
  1566  				offset = 32
  1567  				rem -= 16
  1568  			}
  1569  		}
  1570  
  1571  		// Generate all the remaining load and store pairs, starting with
  1572  		// as many 8 byte moves as possible, then 4, 2, 1.
  1573  		for rem > 0 {
  1574  			op, size := ppc64.AMOVB, int64(1)
  1575  			switch {
  1576  			case rem >= 8:
  1577  				op, size = ppc64.AMOVD, 8
  1578  			case rem >= 4:
  1579  				op, size = ppc64.AMOVWZ, 4
  1580  			case rem >= 2:
  1581  				op, size = ppc64.AMOVH, 2
  1582  			}
  1583  			// Load
  1584  			p := s.Prog(op)
  1585  			p.To.Type = obj.TYPE_REG
  1586  			p.To.Reg = ppc64.REGTMP
  1587  			p.From.Type = obj.TYPE_MEM
  1588  			p.From.Reg = srcReg
  1589  			p.From.Offset = offset
  1590  
  1591  			// Store
  1592  			p = s.Prog(op)
  1593  			p.From.Type = obj.TYPE_REG
  1594  			p.From.Reg = ppc64.REGTMP
  1595  			p.To.Type = obj.TYPE_MEM
  1596  			p.To.Reg = dstReg
  1597  			p.To.Offset = offset
  1598  			rem -= size
  1599  			offset += size
  1600  		}
  1601  
  1602  	case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
  1603  		bytesPerLoop := int64(64)
  1604  		// This is used when moving more
  1605  		// than 8 bytes on power9.  Moves start with
  1606  		// as many 8 byte moves as possible, then
  1607  		// 4, 2, or 1 byte(s) as remaining.  This will
  1608  		// work and be efficient for power8 or later.
  1609  		// If there are 64 or more bytes, then a
  1610  		// loop is generated to move 32 bytes and
  1611  		// update the src and dst addresses on each
  1612  		// iteration. When < 64 bytes, the appropriate
  1613  		// number of moves are generated based on the
  1614  		// size.
  1615  		// When moving >= 64 bytes a loop is used
  1616  		//      MOVD len/32,REG_TMP
  1617  		//      MOVD REG_TMP,CTR
  1618  		// top:
  1619  		//      LXV 0(R21),VS32
  1620  		//      LXV 16(R21),VS33
  1621  		//      ADD $32,R21
  1622  		//      STXV VS32,0(R20)
  1623  		//      STXV VS33,16(R20)
  1624  		//      ADD $32,R20
  1625  		//      BC 16,0,top
  1626  		// Bytes not moved by this loop are moved
  1627  		// with a combination of the following instructions,
  1628  		// starting with the largest sizes and generating as
  1629  		// many as needed, using the appropriate offset value.
  1630  		//      MOVD  n(R21),R31
  1631  		//      MOVD  R31,n(R20)
  1632  		//      MOVW  n1(R21),R31
  1633  		//      MOVW  R31,n1(R20)
  1634  		//      MOVH  n2(R21),R31
  1635  		//      MOVH  R31,n2(R20)
  1636  		//      MOVB  n3(R21),R31
  1637  		//      MOVB  R31,n3(R20)
  1638  
  1639  		// Each loop iteration moves 32 bytes
  1640  		ctr := v.AuxInt / bytesPerLoop
  1641  
  1642  		// Remainder after the loop
  1643  		rem := v.AuxInt % bytesPerLoop
  1644  
  1645  		dstReg := v.Args[0].Reg()
  1646  		srcReg := v.Args[1].Reg()
  1647  
  1648  		offset := int64(0)
  1649  
  1650  		// top of the loop
  1651  		var top *obj.Prog
  1652  
  1653  		// Only generate looping code when loop counter is > 1 for >= 64 bytes
  1654  		if ctr > 1 {
  1655  			// Set up the CTR
  1656  			p := s.Prog(ppc64.AMOVD)
  1657  			p.From.Type = obj.TYPE_CONST
  1658  			p.From.Offset = ctr
  1659  			p.To.Type = obj.TYPE_REG
  1660  			p.To.Reg = ppc64.REGTMP
  1661  
  1662  			p = s.Prog(ppc64.AMOVD)
  1663  			p.From.Type = obj.TYPE_REG
  1664  			p.From.Reg = ppc64.REGTMP
  1665  			p.To.Type = obj.TYPE_REG
  1666  			p.To.Reg = ppc64.REG_CTR
  1667  
  1668  			p = s.Prog(obj.APCALIGN)
  1669  			p.From.Type = obj.TYPE_CONST
  1670  			p.From.Offset = 16
  1671  
  1672  			// Generate 16 byte loads and stores.
  1673  			p = s.Prog(ppc64.ALXV)
  1674  			p.From.Type = obj.TYPE_MEM
  1675  			p.From.Reg = srcReg
  1676  			p.From.Offset = offset
  1677  			p.To.Type = obj.TYPE_REG
  1678  			p.To.Reg = ppc64.REG_VS32
  1679  			if top == nil {
  1680  				top = p
  1681  			}
  1682  			p = s.Prog(ppc64.ALXV)
  1683  			p.From.Type = obj.TYPE_MEM
  1684  			p.From.Reg = srcReg
  1685  			p.From.Offset = offset + 16
  1686  			p.To.Type = obj.TYPE_REG
  1687  			p.To.Reg = ppc64.REG_VS33
  1688  
  1689  			// generate 16 byte stores
  1690  			p = s.Prog(ppc64.ASTXV)
  1691  			p.From.Type = obj.TYPE_REG
  1692  			p.From.Reg = ppc64.REG_VS32
  1693  			p.To.Type = obj.TYPE_MEM
  1694  			p.To.Reg = dstReg
  1695  			p.To.Offset = offset
  1696  
  1697  			p = s.Prog(ppc64.ASTXV)
  1698  			p.From.Type = obj.TYPE_REG
  1699  			p.From.Reg = ppc64.REG_VS33
  1700  			p.To.Type = obj.TYPE_MEM
  1701  			p.To.Reg = dstReg
  1702  			p.To.Offset = offset + 16
  1703  
  1704  			// Generate 16 byte loads and stores.
  1705  			p = s.Prog(ppc64.ALXV)
  1706  			p.From.Type = obj.TYPE_MEM
  1707  			p.From.Reg = srcReg
  1708  			p.From.Offset = offset + 32
  1709  			p.To.Type = obj.TYPE_REG
  1710  			p.To.Reg = ppc64.REG_VS32
  1711  
  1712  			p = s.Prog(ppc64.ALXV)
  1713  			p.From.Type = obj.TYPE_MEM
  1714  			p.From.Reg = srcReg
  1715  			p.From.Offset = offset + 48
  1716  			p.To.Type = obj.TYPE_REG
  1717  			p.To.Reg = ppc64.REG_VS33
  1718  
  1719  			// generate 16 byte stores
  1720  			p = s.Prog(ppc64.ASTXV)
  1721  			p.From.Type = obj.TYPE_REG
  1722  			p.From.Reg = ppc64.REG_VS32
  1723  			p.To.Type = obj.TYPE_MEM
  1724  			p.To.Reg = dstReg
  1725  			p.To.Offset = offset + 32
  1726  
  1727  			p = s.Prog(ppc64.ASTXV)
  1728  			p.From.Type = obj.TYPE_REG
  1729  			p.From.Reg = ppc64.REG_VS33
  1730  			p.To.Type = obj.TYPE_MEM
  1731  			p.To.Reg = dstReg
  1732  			p.To.Offset = offset + 48
  1733  
  1734  			// increment the src reg for next iteration
  1735  			p = s.Prog(ppc64.AADD)
  1736  			p.Reg = srcReg
  1737  			p.From.Type = obj.TYPE_CONST
  1738  			p.From.Offset = bytesPerLoop
  1739  			p.To.Type = obj.TYPE_REG
  1740  			p.To.Reg = srcReg
  1741  
  1742  			// increment the dst reg for next iteration
  1743  			p = s.Prog(ppc64.AADD)
  1744  			p.Reg = dstReg
  1745  			p.From.Type = obj.TYPE_CONST
  1746  			p.From.Offset = bytesPerLoop
  1747  			p.To.Type = obj.TYPE_REG
  1748  			p.To.Reg = dstReg
  1749  
  1750  			// BC with BO_BCTR generates bdnz to branch on nonzero CTR
  1751  			// to loop top.
  1752  			p = s.Prog(ppc64.ABC)
  1753  			p.From.Type = obj.TYPE_CONST
  1754  			p.From.Offset = ppc64.BO_BCTR
  1755  			p.Reg = ppc64.REG_CR0LT
  1756  			p.To.Type = obj.TYPE_BRANCH
  1757  			p.To.SetTarget(top)
  1758  
  1759  			// srcReg and dstReg were incremented in the loop, so
  1760  			// later instructions start with offset 0.
  1761  			offset = int64(0)
  1762  		}
  1763  
  1764  		// No loop was generated for one iteration, so
  1765  		// add 32 bytes to the remainder to move those bytes.
  1766  		if ctr == 1 {
  1767  			rem += bytesPerLoop
  1768  		}
  1769  		if rem >= 32 {
  1770  			p := s.Prog(ppc64.ALXV)
  1771  			p.From.Type = obj.TYPE_MEM
  1772  			p.From.Reg = srcReg
  1773  			p.To.Type = obj.TYPE_REG
  1774  			p.To.Reg = ppc64.REG_VS32
  1775  
  1776  			p = s.Prog(ppc64.ALXV)
  1777  			p.From.Type = obj.TYPE_MEM
  1778  			p.From.Reg = srcReg
  1779  			p.From.Offset = 16
  1780  			p.To.Type = obj.TYPE_REG
  1781  			p.To.Reg = ppc64.REG_VS33
  1782  
  1783  			p = s.Prog(ppc64.ASTXV)
  1784  			p.From.Type = obj.TYPE_REG
  1785  			p.From.Reg = ppc64.REG_VS32
  1786  			p.To.Type = obj.TYPE_MEM
  1787  			p.To.Reg = dstReg
  1788  
  1789  			p = s.Prog(ppc64.ASTXV)
  1790  			p.From.Type = obj.TYPE_REG
  1791  			p.From.Reg = ppc64.REG_VS33
  1792  			p.To.Type = obj.TYPE_MEM
  1793  			p.To.Reg = dstReg
  1794  			p.To.Offset = 16
  1795  
  1796  			offset = 32
  1797  			rem -= 32
  1798  		}
  1799  
  1800  		if rem >= 16 {
  1801  			// Generate 16 byte loads and stores.
  1802  			p := s.Prog(ppc64.ALXV)
  1803  			p.From.Type = obj.TYPE_MEM
  1804  			p.From.Reg = srcReg
  1805  			p.From.Offset = offset
  1806  			p.To.Type = obj.TYPE_REG
  1807  			p.To.Reg = ppc64.REG_VS32
  1808  
  1809  			p = s.Prog(ppc64.ASTXV)
  1810  			p.From.Type = obj.TYPE_REG
  1811  			p.From.Reg = ppc64.REG_VS32
  1812  			p.To.Type = obj.TYPE_MEM
  1813  			p.To.Reg = dstReg
  1814  			p.To.Offset = offset
  1815  
  1816  			offset += 16
  1817  			rem -= 16
  1818  
  1819  			if rem >= 16 {
  1820  				p := s.Prog(ppc64.ALXV)
  1821  				p.From.Type = obj.TYPE_MEM
  1822  				p.From.Reg = srcReg
  1823  				p.From.Offset = offset
  1824  				p.To.Type = obj.TYPE_REG
  1825  				p.To.Reg = ppc64.REG_VS32
  1826  
  1827  				p = s.Prog(ppc64.ASTXV)
  1828  				p.From.Type = obj.TYPE_REG
  1829  				p.From.Reg = ppc64.REG_VS32
  1830  				p.To.Type = obj.TYPE_MEM
  1831  				p.To.Reg = dstReg
  1832  				p.To.Offset = offset
  1833  
  1834  				offset += 16
  1835  				rem -= 16
  1836  			}
  1837  		}
  1838  		// Generate all the remaining load and store pairs, starting with
  1839  		// as many 8 byte moves as possible, then 4, 2, 1.
  1840  		for rem > 0 {
  1841  			op, size := ppc64.AMOVB, int64(1)
  1842  			switch {
  1843  			case rem >= 8:
  1844  				op, size = ppc64.AMOVD, 8
  1845  			case rem >= 4:
  1846  				op, size = ppc64.AMOVWZ, 4
  1847  			case rem >= 2:
  1848  				op, size = ppc64.AMOVH, 2
  1849  			}
  1850  			// Load
  1851  			p := s.Prog(op)
  1852  			p.To.Type = obj.TYPE_REG
  1853  			p.To.Reg = ppc64.REGTMP
  1854  			p.From.Type = obj.TYPE_MEM
  1855  			p.From.Reg = srcReg
  1856  			p.From.Offset = offset
  1857  
  1858  			// Store
  1859  			p = s.Prog(op)
  1860  			p.From.Type = obj.TYPE_REG
  1861  			p.From.Reg = ppc64.REGTMP
  1862  			p.To.Type = obj.TYPE_MEM
  1863  			p.To.Reg = dstReg
  1864  			p.To.Offset = offset
  1865  			rem -= size
  1866  			offset += size
  1867  		}
  1868  
  1869  	case ssa.OpPPC64CALLstatic:
  1870  		s.Call(v)
  1871  
  1872  	case ssa.OpPPC64CALLtail:
  1873  		s.TailCall(v)
  1874  
  1875  	case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
  1876  		p := s.Prog(ppc64.AMOVD)
  1877  		p.From.Type = obj.TYPE_REG
  1878  		p.From.Reg = v.Args[0].Reg()
  1879  		p.To.Type = obj.TYPE_REG
  1880  		p.To.Reg = ppc64.REG_LR
  1881  
  1882  		if v.Args[0].Reg() != ppc64.REG_R12 {
  1883  			v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
  1884  		}
  1885  
  1886  		pp := s.Call(v)
  1887  
  1888  		// Convert the call into a blrl with hint this is not a subroutine return.
  1889  		// The full bclrl opcode must be specified when passing a hint.
  1890  		pp.As = ppc64.ABCL
  1891  		pp.From.Type = obj.TYPE_CONST
  1892  		pp.From.Offset = ppc64.BO_ALWAYS
  1893  		pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored.
  1894  		pp.To.Reg = ppc64.REG_LR
  1895  		pp.AddRestSourceConst(1)
  1896  
  1897  		if ppc64.NeedTOCpointer(base.Ctxt) {
  1898  			// When compiling Go into PIC, the function we just
  1899  			// called via pointer might have been implemented in
  1900  			// a separate module and so overwritten the TOC
  1901  			// pointer in R2; reload it.
  1902  			q := s.Prog(ppc64.AMOVD)
  1903  			q.From.Type = obj.TYPE_MEM
  1904  			q.From.Offset = 24
  1905  			q.From.Reg = ppc64.REGSP
  1906  			q.To.Type = obj.TYPE_REG
  1907  			q.To.Reg = ppc64.REG_R2
  1908  		}
  1909  
  1910  	case ssa.OpPPC64LoweredWB:
  1911  		p := s.Prog(obj.ACALL)
  1912  		p.To.Type = obj.TYPE_MEM
  1913  		p.To.Name = obj.NAME_EXTERN
  1914  		// AuxInt encodes how many buffer entries we need.
  1915  		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
  1916  
  1917  	case ssa.OpPPC64LoweredPanicBoundsRR, ssa.OpPPC64LoweredPanicBoundsRC, ssa.OpPPC64LoweredPanicBoundsCR, ssa.OpPPC64LoweredPanicBoundsCC:
  1918  		// Compute the constant we put in the PCData entry for this call.
  1919  		code, signed := ssa.BoundsKind(v.AuxInt).Code()
  1920  		xIsReg := false
  1921  		yIsReg := false
  1922  		xVal := 0
  1923  		yVal := 0
  1924  		switch v.Op {
  1925  		case ssa.OpPPC64LoweredPanicBoundsRR:
  1926  			xIsReg = true
  1927  			xVal = int(v.Args[0].Reg() - ppc64.REG_R3)
  1928  			yIsReg = true
  1929  			yVal = int(v.Args[1].Reg() - ppc64.REG_R3)
  1930  		case ssa.OpPPC64LoweredPanicBoundsRC:
  1931  			xIsReg = true
  1932  			xVal = int(v.Args[0].Reg() - ppc64.REG_R3)
  1933  			c := v.Aux.(ssa.PanicBoundsC).C
  1934  			if c >= 0 && c <= abi.BoundsMaxConst {
  1935  				yVal = int(c)
  1936  			} else {
  1937  				// Move constant to a register
  1938  				yIsReg = true
  1939  				if yVal == xVal {
  1940  					yVal = 1
  1941  				}
  1942  				p := s.Prog(ppc64.AMOVD)
  1943  				p.From.Type = obj.TYPE_CONST
  1944  				p.From.Offset = c
  1945  				p.To.Type = obj.TYPE_REG
  1946  				p.To.Reg = ppc64.REG_R3 + int16(yVal)
  1947  			}
  1948  		case ssa.OpPPC64LoweredPanicBoundsCR:
  1949  			yIsReg = true
  1950  			yVal := int(v.Args[0].Reg() - ppc64.REG_R3)
  1951  			c := v.Aux.(ssa.PanicBoundsC).C
  1952  			if c >= 0 && c <= abi.BoundsMaxConst {
  1953  				xVal = int(c)
  1954  			} else {
  1955  				// Move constant to a register
  1956  				if xVal == yVal {
  1957  					xVal = 1
  1958  				}
  1959  				p := s.Prog(ppc64.AMOVD)
  1960  				p.From.Type = obj.TYPE_CONST
  1961  				p.From.Offset = c
  1962  				p.To.Type = obj.TYPE_REG
  1963  				p.To.Reg = ppc64.REG_R3 + int16(xVal)
  1964  			}
  1965  		case ssa.OpPPC64LoweredPanicBoundsCC:
  1966  			c := v.Aux.(ssa.PanicBoundsCC).Cx
  1967  			if c >= 0 && c <= abi.BoundsMaxConst {
  1968  				xVal = int(c)
  1969  			} else {
  1970  				// Move constant to a register
  1971  				xIsReg = true
  1972  				p := s.Prog(ppc64.AMOVD)
  1973  				p.From.Type = obj.TYPE_CONST
  1974  				p.From.Offset = c
  1975  				p.To.Type = obj.TYPE_REG
  1976  				p.To.Reg = ppc64.REG_R3 + int16(xVal)
  1977  			}
  1978  			c = v.Aux.(ssa.PanicBoundsCC).Cy
  1979  			if c >= 0 && c <= abi.BoundsMaxConst {
  1980  				yVal = int(c)
  1981  			} else {
  1982  				// Move constant to a register
  1983  				yIsReg = true
  1984  				yVal = 1
  1985  				p := s.Prog(ppc64.AMOVD)
  1986  				p.From.Type = obj.TYPE_CONST
  1987  				p.From.Offset = c
  1988  				p.To.Type = obj.TYPE_REG
  1989  				p.To.Reg = ppc64.REG_R3 + int16(yVal)
  1990  			}
  1991  		}
  1992  		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
  1993  
  1994  		p := s.Prog(obj.APCDATA)
  1995  		p.From.SetConst(abi.PCDATA_PanicBounds)
  1996  		p.To.SetConst(int64(c))
  1997  		p = s.Prog(obj.ACALL)
  1998  		p.To.Type = obj.TYPE_MEM
  1999  		p.To.Name = obj.NAME_EXTERN
  2000  		p.To.Sym = ir.Syms.PanicBounds
  2001  
  2002  	case ssa.OpPPC64LoweredNilCheck:
  2003  		if buildcfg.GOOS == "aix" {
  2004  			// CMP Rarg0, $0
  2005  			// BNE 2(PC)
  2006  			// STW R0, 0(R0)
  2007  			// NOP (so the BNE has somewhere to land)
  2008  
  2009  			// CMP Rarg0, $0
  2010  			p := s.Prog(ppc64.ACMP)
  2011  			p.From.Type = obj.TYPE_REG
  2012  			p.From.Reg = v.Args[0].Reg()
  2013  			p.To.Type = obj.TYPE_CONST
  2014  			p.To.Offset = 0
  2015  
  2016  			// BNE 2(PC)
  2017  			p2 := s.Prog(ppc64.ABNE)
  2018  			p2.To.Type = obj.TYPE_BRANCH
  2019  
  2020  			// STW R0, 0(R0)
  2021  			// Write at 0 is forbidden and will trigger a SIGSEGV
  2022  			p = s.Prog(ppc64.AMOVW)
  2023  			p.From.Type = obj.TYPE_REG
  2024  			p.From.Reg = ppc64.REG_R0
  2025  			p.To.Type = obj.TYPE_MEM
  2026  			p.To.Reg = ppc64.REG_R0
  2027  
  2028  			// NOP (so the BNE has somewhere to land)
  2029  			nop := s.Prog(obj.ANOP)
  2030  			p2.To.SetTarget(nop)
  2031  
  2032  		} else {
  2033  			// Issue a load which will fault if arg is nil.
  2034  			p := s.Prog(ppc64.AMOVBZ)
  2035  			p.From.Type = obj.TYPE_MEM
  2036  			p.From.Reg = v.Args[0].Reg()
  2037  			ssagen.AddAux(&p.From, v)
  2038  			p.To.Type = obj.TYPE_REG
  2039  			p.To.Reg = ppc64.REGTMP
  2040  		}
  2041  		if logopt.Enabled() {
  2042  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  2043  		}
  2044  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
  2045  			base.WarnfAt(v.Pos, "generated nil check")
  2046  		}
  2047  
  2048  	// These should be resolved by rules and not make it here.
  2049  	case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
  2050  		ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
  2051  		ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
  2052  		v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
  2053  	case ssa.OpPPC64InvertFlags:
  2054  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  2055  	case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
  2056  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
  2057  	case ssa.OpClobber, ssa.OpClobberReg:
  2058  		// TODO: implement for clobberdead experiment. Nop is ok for now.
  2059  	default:
  2060  		v.Fatalf("genValue not implemented: %s", v.LongString())
  2061  	}
  2062  }
  2063  
  2064  var blockJump = [...]struct {
  2065  	asm, invasm     obj.As
  2066  	asmeq, invasmun bool
  2067  }{
  2068  	ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
  2069  	ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
  2070  
  2071  	ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
  2072  	ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
  2073  	ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
  2074  	ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
  2075  
  2076  	// TODO: need to work FP comparisons into block jumps
  2077  	ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
  2078  	ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
  2079  	ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
  2080  	ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
  2081  }
  2082  
  2083  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
  2084  	switch b.Kind {
  2085  	case ssa.BlockPlain, ssa.BlockDefer:
  2086  		if b.Succs[0].Block() != next {
  2087  			p := s.Prog(obj.AJMP)
  2088  			p.To.Type = obj.TYPE_BRANCH
  2089  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  2090  		}
  2091  	case ssa.BlockExit, ssa.BlockRetJmp:
  2092  	case ssa.BlockRet:
  2093  		s.Prog(obj.ARET)
  2094  
  2095  	case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
  2096  		ssa.BlockPPC64LT, ssa.BlockPPC64GE,
  2097  		ssa.BlockPPC64LE, ssa.BlockPPC64GT,
  2098  		ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
  2099  		ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
  2100  		jmp := blockJump[b.Kind]
  2101  		switch next {
  2102  		case b.Succs[0].Block():
  2103  			s.Br(jmp.invasm, b.Succs[1].Block())
  2104  			if jmp.invasmun {
  2105  				// TODO: The second branch is probably predict-not-taken since it is for FP unordered
  2106  				s.Br(ppc64.ABVS, b.Succs[1].Block())
  2107  			}
  2108  		case b.Succs[1].Block():
  2109  			s.Br(jmp.asm, b.Succs[0].Block())
  2110  			if jmp.asmeq {
  2111  				s.Br(ppc64.ABEQ, b.Succs[0].Block())
  2112  			}
  2113  		default:
  2114  			if b.Likely != ssa.BranchUnlikely {
  2115  				s.Br(jmp.asm, b.Succs[0].Block())
  2116  				if jmp.asmeq {
  2117  					s.Br(ppc64.ABEQ, b.Succs[0].Block())
  2118  				}
  2119  				s.Br(obj.AJMP, b.Succs[1].Block())
  2120  			} else {
  2121  				s.Br(jmp.invasm, b.Succs[1].Block())
  2122  				if jmp.invasmun {
  2123  					// TODO: The second branch is probably predict-not-taken since it is for FP unordered
  2124  					s.Br(ppc64.ABVS, b.Succs[1].Block())
  2125  				}
  2126  				s.Br(obj.AJMP, b.Succs[0].Block())
  2127  			}
  2128  		}
  2129  	default:
  2130  		b.Fatalf("branch not implemented: %s", b.LongString())
  2131  	}
  2132  }
  2133  
  2134  func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  2135  	p := s.Prog(loadByType(t))
  2136  	p.From.Type = obj.TYPE_MEM
  2137  	p.From.Name = obj.NAME_AUTO
  2138  	p.From.Sym = n.Linksym()
  2139  	p.From.Offset = n.FrameOffset() + off
  2140  	p.To.Type = obj.TYPE_REG
  2141  	p.To.Reg = reg
  2142  	return p
  2143  }
  2144  
  2145  func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  2146  	p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
  2147  	p.To.Name = obj.NAME_PARAM
  2148  	p.To.Sym = n.Linksym()
  2149  	p.Pos = p.Pos.WithNotStmt()
  2150  	return p
  2151  }
  2152  

View as plain text