1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114 package ssa
115
116 import (
117 "cmd/compile/internal/base"
118 "cmd/compile/internal/ir"
119 "cmd/compile/internal/types"
120 "cmd/internal/src"
121 "cmd/internal/sys"
122 "fmt"
123 "internal/buildcfg"
124 "math"
125 "math/bits"
126 "unsafe"
127 )
128
129 const (
130 moveSpills = iota
131 logSpills
132 regDebug
133 stackDebug
134 )
135
136
137
138 const (
139 likelyDistance = 1
140 normalDistance = 10
141 unlikelyDistance = 100
142 )
143
144
145
146 func regalloc(f *Func) {
147 var s regAllocState
148 s.init(f)
149 s.regalloc(f)
150 s.close()
151 }
152
153 type register uint8
154
155 const noRegister register = 255
156
157
158 var noRegisters [32]register = [32]register{
159 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
160 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
161 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
162 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
163 }
164
165
166
167 type regMask uint64
168
169 func (m regMask) String() string {
170 s := ""
171 for r := register(0); m != 0; r++ {
172 if m>>r&1 == 0 {
173 continue
174 }
175 m &^= regMask(1) << r
176 if s != "" {
177 s += " "
178 }
179 s += fmt.Sprintf("r%d", r)
180 }
181 return s
182 }
183
184 func (m regMask) contains(r register) bool {
185 return m>>r&1 != 0
186 }
187
188 func (s *regAllocState) RegMaskString(m regMask) string {
189 str := ""
190 for r := register(0); m != 0; r++ {
191 if m>>r&1 == 0 {
192 continue
193 }
194 m &^= regMask(1) << r
195 if str != "" {
196 str += " "
197 }
198 str += s.registers[r].String()
199 }
200 return str
201 }
202
203
204 func countRegs(r regMask) int {
205 return bits.OnesCount64(uint64(r))
206 }
207
208
209 func pickReg(r regMask) register {
210 if r == 0 {
211 panic("can't pick a register from an empty set")
212 }
213
214 return register(bits.TrailingZeros64(uint64(r)))
215 }
216
217 type use struct {
218
219
220
221
222
223 dist int32
224 pos src.XPos
225 next *use
226 }
227
228
229 type valState struct {
230 regs regMask
231 uses *use
232 spill *Value
233 restoreMin int32
234 restoreMax int32
235 needReg bool
236 rematerializeable bool
237 }
238
239 type regState struct {
240 v *Value
241 c *Value
242
243 }
244
245 type regAllocState struct {
246 f *Func
247
248 sdom SparseTree
249 registers []Register
250 numRegs register
251 SPReg register
252 SBReg register
253 GReg register
254 ZeroIntReg register
255 allocatable regMask
256
257
258
259
260 live [][]liveInfo
261
262
263
264
265 desired []desiredState
266
267
268 values []valState
269
270
271 sp, sb ID
272
273
274
275 orig []*Value
276
277
278
279 regs []regState
280
281
282 nospill regMask
283
284
285 used regMask
286
287
288 usedSinceBlockStart regMask
289
290
291 tmpused regMask
292
293
294 curBlock *Block
295
296
297 freeUseRecords *use
298
299
300
301 endRegs [][]endReg
302
303
304
305 startRegs [][]startReg
306
307
308
309
310 startRegsMask regMask
311
312
313 spillLive [][]ID
314
315
316
317 copies map[*Value]bool
318
319 loopnest *loopnest
320
321
322 visitOrder []*Block
323
324
325 blockOrder []int32
326
327
328 doClobber bool
329
330
331
332
333
334
335 nextCall []int32
336
337
338
339 curIdx int
340 }
341
342 type endReg struct {
343 r register
344 v *Value
345 c *Value
346 }
347
348 type startReg struct {
349 r register
350 v *Value
351 c *Value
352 pos src.XPos
353 }
354
355
356 func (s *regAllocState) freeReg(r register) {
357 if !s.allocatable.contains(r) && !s.isGReg(r) {
358 return
359 }
360 v := s.regs[r].v
361 if v == nil {
362 s.f.Fatalf("tried to free an already free register %d\n", r)
363 }
364
365
366 if s.f.pass.debug > regDebug {
367 fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
368 }
369 s.regs[r] = regState{}
370 s.values[v.ID].regs &^= regMask(1) << r
371 s.used &^= regMask(1) << r
372 }
373
374
375 func (s *regAllocState) freeRegs(m regMask) {
376 for m&s.used != 0 {
377 s.freeReg(pickReg(m & s.used))
378 }
379 }
380
381
382 func (s *regAllocState) clobberRegs(m regMask) {
383 m &= s.allocatable & s.f.Config.gpRegMask
384 for m != 0 {
385 r := pickReg(m)
386 m &^= 1 << r
387 x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
388 s.f.setHome(x, &s.registers[r])
389 }
390 }
391
392
393
394 func (s *regAllocState) setOrig(c *Value, v *Value) {
395 if int(c.ID) >= cap(s.orig) {
396 x := s.f.Cache.allocValueSlice(int(c.ID) + 1)
397 copy(x, s.orig)
398 s.f.Cache.freeValueSlice(s.orig)
399 s.orig = x
400 }
401 for int(c.ID) >= len(s.orig) {
402 s.orig = append(s.orig, nil)
403 }
404 if s.orig[c.ID] != nil {
405 s.f.Fatalf("orig value set twice %s %s", c, v)
406 }
407 s.orig[c.ID] = s.orig[v.ID]
408 }
409
410
411
412 func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
413 if s.f.pass.debug > regDebug {
414 fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
415 }
416
417 s.values[v.ID].regs |= regMask(1) << r
418 s.f.setHome(c, &s.registers[r])
419
420
421 if !s.allocatable.contains(r) && !s.isGReg(r) {
422 return
423 }
424 if s.regs[r].v != nil {
425 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
426 }
427 s.regs[r] = regState{v, c}
428 s.used |= regMask(1) << r
429 }
430
431
432
433
434 func (s *regAllocState) allocReg(mask regMask, v *Value) register {
435 if v.OnWasmStack {
436 return noRegister
437 }
438
439 mask &= s.allocatable
440 mask &^= s.nospill
441 if mask == 0 {
442 s.f.Fatalf("no register available for %s", v.LongString())
443 }
444
445
446 if mask&^s.used != 0 {
447 r := pickReg(mask &^ s.used)
448 s.usedSinceBlockStart |= regMask(1) << r
449 return r
450 }
451
452
453
454
455
456
457
458
459
460
461
462 var r register
463 maxuse := int32(-1)
464 for t := register(0); t < s.numRegs; t++ {
465 if mask>>t&1 == 0 {
466 continue
467 }
468 v := s.regs[t].v
469 if n := s.values[v.ID].uses.dist; n > maxuse {
470
471
472 r = t
473 maxuse = n
474 }
475 }
476 if maxuse == -1 {
477 s.f.Fatalf("couldn't find register to spill")
478 }
479
480 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
481
482
483
484 s.freeReg(r)
485 return r
486 }
487
488
489
490 v2 := s.regs[r].v
491 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
492 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
493 s.usedSinceBlockStart |= regMask(1) << r
494 r2 := pickReg(m)
495 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
496 s.copies[c] = false
497 if s.f.pass.debug > regDebug {
498 fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
499 }
500 s.setOrig(c, v2)
501 s.assignReg(r2, v2, c)
502 }
503
504
505
506
507 if s.usedSinceBlockStart&(regMask(1)<<r) == 0 {
508 if s.startRegsMask&(regMask(1)<<r) == 1 {
509 if s.f.pass.debug > regDebug {
510 fmt.Printf("dropped from startRegs: %s\n", &s.registers[r])
511 }
512 s.startRegsMask &^= regMask(1) << r
513 }
514 }
515
516 s.freeReg(r)
517 s.usedSinceBlockStart |= regMask(1) << r
518 return r
519 }
520
521
522
523 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
524 vi := &s.values[v.ID]
525 if vi.spill != nil {
526
527 vi.restoreMin = min(vi.restoreMin, s.sdom[b.ID].entry)
528 vi.restoreMax = max(vi.restoreMax, s.sdom[b.ID].exit)
529 return vi.spill
530 }
531
532
533 spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
534
535
536 s.setOrig(spill, v)
537 vi.spill = spill
538 vi.restoreMin = s.sdom[b.ID].entry
539 vi.restoreMax = s.sdom[b.ID].exit
540 return spill
541 }
542
543
544
545
546
547
548
549 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
550 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
551 c := v.copyIntoWithXPos(s.curBlock, pos)
552 c.OnWasmStack = true
553 s.setOrig(c, v)
554 return c
555 }
556 if v.OnWasmStack {
557 return v
558 }
559
560 vi := &s.values[v.ID]
561 pos = pos.WithNotStmt()
562
563 if mask&vi.regs != 0 {
564 r := pickReg(mask & vi.regs)
565 if !s.allocatable.contains(r) {
566 return v
567 }
568 if s.regs[r].v != v || s.regs[r].c == nil {
569 panic("bad register state")
570 }
571 if nospill {
572 s.nospill |= regMask(1) << r
573 }
574 s.usedSinceBlockStart |= regMask(1) << r
575 return s.regs[r].c
576 }
577
578 var r register
579
580 onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
581 if !onWasmStack {
582
583 r = s.allocReg(mask, v)
584 }
585
586
587 var c *Value
588 if vi.regs != 0 {
589
590 r2 := pickReg(vi.regs)
591 var current *Value
592 if !s.allocatable.contains(r2) {
593 current = v
594 } else {
595 if s.regs[r2].v != v {
596 panic("bad register state")
597 }
598 current = s.regs[r2].c
599 }
600 s.usedSinceBlockStart |= regMask(1) << r2
601 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, current)
602 } else if v.rematerializeable() {
603
604 c = v.copyIntoWithXPos(s.curBlock, pos)
605 } else {
606
607 spill := s.makeSpill(v, s.curBlock)
608 if s.f.pass.debug > logSpills {
609 s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
610 }
611 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
612 }
613
614 s.setOrig(c, v)
615
616 if onWasmStack {
617 c.OnWasmStack = true
618 return c
619 }
620
621 s.assignReg(r, v, c)
622 if c.Op == OpLoadReg && s.isGReg(r) {
623 s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
624 }
625 if nospill {
626 s.nospill |= regMask(1) << r
627 }
628 return c
629 }
630
631
632 func isLeaf(f *Func) bool {
633 for _, b := range f.Blocks {
634 for _, v := range b.Values {
635 if v.Op.IsCall() && !v.Op.IsTailCall() {
636
637 return false
638 }
639 }
640 }
641 return true
642 }
643
644
645 func (v *Value) needRegister() bool {
646 return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple()
647 }
648
649 func (s *regAllocState) init(f *Func) {
650 s.f = f
651 s.f.RegAlloc = s.f.Cache.locs[:0]
652 s.registers = f.Config.registers
653 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
654 s.f.Fatalf("bad number of registers: %d", nr)
655 } else {
656 s.numRegs = register(nr)
657 }
658
659 s.SPReg = noRegister
660 s.SBReg = noRegister
661 s.GReg = noRegister
662 s.ZeroIntReg = noRegister
663 for r := register(0); r < s.numRegs; r++ {
664 switch s.registers[r].String() {
665 case "SP":
666 s.SPReg = r
667 case "SB":
668 s.SBReg = r
669 case "g":
670 s.GReg = r
671 case "ZERO":
672 s.ZeroIntReg = r
673 }
674 }
675
676 switch noRegister {
677 case s.SPReg:
678 s.f.Fatalf("no SP register found")
679 case s.SBReg:
680 s.f.Fatalf("no SB register found")
681 case s.GReg:
682 if f.Config.hasGReg {
683 s.f.Fatalf("no g register found")
684 }
685 }
686
687
688 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
689 s.allocatable &^= 1 << s.SPReg
690 s.allocatable &^= 1 << s.SBReg
691 if s.f.Config.hasGReg {
692 s.allocatable &^= 1 << s.GReg
693 }
694 if s.ZeroIntReg != noRegister {
695 s.allocatable &^= 1 << s.ZeroIntReg
696 }
697 if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
698 s.allocatable &^= 1 << uint(s.f.Config.FPReg)
699 }
700 if s.f.Config.LinkReg != -1 {
701 if isLeaf(f) {
702
703 s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
704 }
705 }
706 if s.f.Config.ctxt.Flag_dynlink {
707 switch s.f.Config.arch {
708 case "386":
709
710
711
712
713
714 case "amd64":
715 s.allocatable &^= 1 << 15
716 case "arm":
717 s.allocatable &^= 1 << 9
718 case "arm64":
719
720 case "loong64":
721
722 case "ppc64le":
723
724 case "riscv64":
725
726 case "s390x":
727 s.allocatable &^= 1 << 11
728 default:
729 s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
730 }
731 }
732
733
734
735
736 s.visitOrder = layoutRegallocOrder(f)
737
738
739
740 s.blockOrder = make([]int32, f.NumBlocks())
741 for i, b := range s.visitOrder {
742 s.blockOrder[b.ID] = int32(i)
743 }
744
745 s.regs = make([]regState, s.numRegs)
746 nv := f.NumValues()
747 if cap(s.f.Cache.regallocValues) >= nv {
748 s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
749 } else {
750 s.f.Cache.regallocValues = make([]valState, nv)
751 }
752 s.values = s.f.Cache.regallocValues
753 s.orig = s.f.Cache.allocValueSlice(nv)
754 s.copies = make(map[*Value]bool)
755 for _, b := range s.visitOrder {
756 for _, v := range b.Values {
757 if v.needRegister() {
758 s.values[v.ID].needReg = true
759 s.values[v.ID].rematerializeable = v.rematerializeable()
760 s.orig[v.ID] = v
761 }
762
763
764 }
765 }
766 s.computeLive()
767
768 s.endRegs = make([][]endReg, f.NumBlocks())
769 s.startRegs = make([][]startReg, f.NumBlocks())
770 s.spillLive = make([][]ID, f.NumBlocks())
771 s.sdom = f.Sdom()
772
773
774 if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
775 canLiveOnStack := f.newSparseSet(f.NumValues())
776 defer f.retSparseSet(canLiveOnStack)
777 for _, b := range f.Blocks {
778
779 canLiveOnStack.clear()
780 for _, c := range b.ControlValues() {
781 if c.Uses == 1 && !opcodeTable[c.Op].generic {
782 canLiveOnStack.add(c.ID)
783 }
784 }
785
786 for i := len(b.Values) - 1; i >= 0; i-- {
787 v := b.Values[i]
788 if canLiveOnStack.contains(v.ID) {
789 v.OnWasmStack = true
790 } else {
791
792 canLiveOnStack.clear()
793 }
794 for _, arg := range v.Args {
795
796
797
798
799
800 if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
801 canLiveOnStack.add(arg.ID)
802 }
803 }
804 }
805 }
806 }
807
808
809
810
811 if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
812
813 s.doClobber = true
814 }
815 }
816
817 func (s *regAllocState) close() {
818 s.f.Cache.freeValueSlice(s.orig)
819 }
820
821
822
823 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
824 r := s.freeUseRecords
825 if r != nil {
826 s.freeUseRecords = r.next
827 } else {
828 r = &use{}
829 }
830 r.dist = dist
831 r.pos = pos
832 r.next = s.values[id].uses
833 s.values[id].uses = r
834 if r.next != nil && dist > r.next.dist {
835 s.f.Fatalf("uses added in wrong order")
836 }
837 }
838
839
840
841 func (s *regAllocState) advanceUses(v *Value) {
842 for _, a := range v.Args {
843 if !s.values[a.ID].needReg {
844 continue
845 }
846 ai := &s.values[a.ID]
847 r := ai.uses
848 ai.uses = r.next
849 if r.next == nil || (!opcodeTable[a.Op].fixedReg && r.next.dist > s.nextCall[s.curIdx]) {
850
851 s.freeRegs(ai.regs)
852 }
853 r.next = s.freeUseRecords
854 s.freeUseRecords = r
855 }
856 s.dropIfUnused(v)
857 }
858
859
860
861 func (s *regAllocState) dropIfUnused(v *Value) {
862 if !s.values[v.ID].needReg {
863 return
864 }
865 vi := &s.values[v.ID]
866 r := vi.uses
867 if r == nil || (!opcodeTable[v.Op].fixedReg && r.dist > s.nextCall[s.curIdx]) {
868 s.freeRegs(vi.regs)
869 }
870 }
871
872
873
874
875 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
876 u := s.values[v.ID].uses
877 if u == nil {
878 panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
879 }
880 d := u.dist
881 for u != nil && u.dist == d {
882 u = u.next
883 }
884 return u != nil && u.dist > d
885 }
886
887
888 func (s *regAllocState) setState(regs []endReg) {
889 s.freeRegs(s.used)
890 for _, x := range regs {
891 s.assignReg(x.r, x.v, x.c)
892 }
893 }
894
895
896 func (s *regAllocState) compatRegs(t *types.Type) regMask {
897 var m regMask
898 if t.IsTuple() || t.IsFlags() {
899 return 0
900 }
901 if t.IsFloat() || t == types.TypeInt128 {
902 if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
903 m = s.f.Config.fp32RegMask
904 } else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
905 m = s.f.Config.fp64RegMask
906 } else {
907 m = s.f.Config.fpRegMask
908 }
909 } else {
910 m = s.f.Config.gpRegMask
911 }
912 return m & s.allocatable
913 }
914
915
916 func (s *regAllocState) regspec(v *Value) regInfo {
917 op := v.Op
918 if op == OpConvert {
919
920
921
922 m := s.allocatable & s.f.Config.gpRegMask
923 return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
924 }
925 if op == OpArgIntReg {
926 reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
927 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
928 }
929 if op == OpArgFloatReg {
930 reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
931 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
932 }
933 if op.IsCall() {
934 if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
935 return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
936 }
937 }
938 if op == OpMakeResult && s.f.OwnAux.reg != nil {
939 return *s.f.OwnAux.ResultReg(s.f.Config)
940 }
941 return opcodeTable[op].reg
942 }
943
944 func (s *regAllocState) isGReg(r register) bool {
945 return s.f.Config.hasGReg && s.GReg == r
946 }
947
948
949 var tmpVal Value
950
951 func (s *regAllocState) regalloc(f *Func) {
952 regValLiveSet := f.newSparseSet(f.NumValues())
953 defer f.retSparseSet(regValLiveSet)
954 var oldSched []*Value
955 var phis []*Value
956 var phiRegs []register
957 var args []*Value
958
959
960 var desired desiredState
961 desiredSecondReg := map[ID][4]register{}
962
963
964 type dentry struct {
965 out [4]register
966 in [3][4]register
967 }
968 var dinfo []dentry
969
970 if f.Entry != f.Blocks[0] {
971 f.Fatalf("entry block must be first")
972 }
973
974 for _, b := range s.visitOrder {
975 if s.f.pass.debug > regDebug {
976 fmt.Printf("Begin processing block %v\n", b)
977 }
978 s.curBlock = b
979 s.startRegsMask = 0
980 s.usedSinceBlockStart = 0
981 clear(desiredSecondReg)
982
983
984
985 regValLiveSet.clear()
986 for _, e := range s.live[b.ID] {
987 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos)
988 regValLiveSet.add(e.ID)
989 }
990 for _, v := range b.ControlValues() {
991 if s.values[v.ID].needReg {
992 s.addUse(v.ID, int32(len(b.Values)), b.Pos)
993 regValLiveSet.add(v.ID)
994 }
995 }
996 if len(s.nextCall) < len(b.Values) {
997 s.nextCall = append(s.nextCall, make([]int32, len(b.Values)-len(s.nextCall))...)
998 }
999 var nextCall int32 = math.MaxInt32
1000 for i := len(b.Values) - 1; i >= 0; i-- {
1001 v := b.Values[i]
1002 regValLiveSet.remove(v.ID)
1003 if v.Op == OpPhi {
1004
1005
1006
1007 s.nextCall[i] = nextCall
1008 continue
1009 }
1010 if opcodeTable[v.Op].call {
1011
1012 regValLiveSet.clear()
1013 if s.sp != 0 && s.values[s.sp].uses != nil {
1014 regValLiveSet.add(s.sp)
1015 }
1016 if s.sb != 0 && s.values[s.sb].uses != nil {
1017 regValLiveSet.add(s.sb)
1018 }
1019 nextCall = int32(i)
1020 }
1021 for _, a := range v.Args {
1022 if !s.values[a.ID].needReg {
1023 continue
1024 }
1025 s.addUse(a.ID, int32(i), v.Pos)
1026 regValLiveSet.add(a.ID)
1027 }
1028 s.nextCall[i] = nextCall
1029 }
1030 if s.f.pass.debug > regDebug {
1031 fmt.Printf("use distances for %s\n", b)
1032 for i := range s.values {
1033 vi := &s.values[i]
1034 u := vi.uses
1035 if u == nil {
1036 continue
1037 }
1038 fmt.Printf(" v%d:", i)
1039 for u != nil {
1040 fmt.Printf(" %d", u.dist)
1041 u = u.next
1042 }
1043 fmt.Println()
1044 }
1045 }
1046
1047
1048
1049 nphi := 0
1050 for _, v := range b.Values {
1051 if v.Op != OpPhi {
1052 break
1053 }
1054 nphi++
1055 }
1056 phis = append(phis[:0], b.Values[:nphi]...)
1057 oldSched = append(oldSched[:0], b.Values[nphi:]...)
1058 b.Values = b.Values[:0]
1059
1060
1061 if b == f.Entry {
1062
1063 if nphi > 0 {
1064 f.Fatalf("phis in entry block")
1065 }
1066 } else if len(b.Preds) == 1 {
1067
1068 s.setState(s.endRegs[b.Preds[0].b.ID])
1069 if nphi > 0 {
1070 f.Fatalf("phis in single-predecessor block")
1071 }
1072
1073
1074
1075 for r := register(0); r < s.numRegs; r++ {
1076 v := s.regs[r].v
1077 if v != nil && !regValLiveSet.contains(v.ID) {
1078 s.freeReg(r)
1079 }
1080 }
1081 } else {
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095 idx := -1
1096 for i, p := range b.Preds {
1097
1098
1099 pb := p.b
1100 if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
1101 continue
1102 }
1103 if idx == -1 {
1104 idx = i
1105 continue
1106 }
1107 pSel := b.Preds[idx].b
1108 if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
1109 idx = i
1110 } else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121 if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
1122 idx = i
1123 }
1124 }
1125 }
1126 if idx < 0 {
1127 f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
1128 }
1129 p := b.Preds[idx].b
1130 s.setState(s.endRegs[p.ID])
1131
1132 if s.f.pass.debug > regDebug {
1133 fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
1134 for _, x := range s.endRegs[p.ID] {
1135 fmt.Printf(" %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
1136 }
1137 }
1138
1139
1140
1141
1142
1143 phiRegs = phiRegs[:0]
1144 var phiUsed regMask
1145
1146 for _, v := range phis {
1147 if !s.values[v.ID].needReg {
1148 phiRegs = append(phiRegs, noRegister)
1149 continue
1150 }
1151 a := v.Args[idx]
1152
1153
1154 m := s.values[a.ID].regs &^ phiUsed & s.allocatable
1155 if m != 0 {
1156 r := pickReg(m)
1157 phiUsed |= regMask(1) << r
1158 phiRegs = append(phiRegs, r)
1159 } else {
1160 phiRegs = append(phiRegs, noRegister)
1161 }
1162 }
1163
1164
1165 for i, v := range phis {
1166 if !s.values[v.ID].needReg {
1167 continue
1168 }
1169 a := v.Args[idx]
1170 r := phiRegs[i]
1171 if r == noRegister {
1172 continue
1173 }
1174 if regValLiveSet.contains(a.ID) {
1175
1176
1177
1178
1179
1180
1181
1182
1183 m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
1184 if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
1185 r2 := pickReg(m)
1186 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
1187 s.copies[c] = false
1188 if s.f.pass.debug > regDebug {
1189 fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
1190 }
1191 s.setOrig(c, a)
1192 s.assignReg(r2, a, c)
1193 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
1194 }
1195 }
1196 s.freeReg(r)
1197 }
1198
1199
1200 b.Values = append(b.Values, phis...)
1201
1202
1203
1204 for i, v := range phis {
1205 if !s.values[v.ID].needReg {
1206 continue
1207 }
1208 if phiRegs[i] != noRegister {
1209 continue
1210 }
1211 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
1212
1213
1214 for i, pe := range b.Preds {
1215 if i == idx {
1216 continue
1217 }
1218 ri := noRegister
1219 for _, er := range s.endRegs[pe.b.ID] {
1220 if er.v == s.orig[v.Args[i].ID] {
1221 ri = er.r
1222 break
1223 }
1224 }
1225 if ri != noRegister && m>>ri&1 != 0 {
1226 m = regMask(1) << ri
1227 break
1228 }
1229 }
1230 if m != 0 {
1231 r := pickReg(m)
1232 phiRegs[i] = r
1233 phiUsed |= regMask(1) << r
1234 }
1235 }
1236
1237
1238 for i, v := range phis {
1239 if !s.values[v.ID].needReg {
1240 continue
1241 }
1242 r := phiRegs[i]
1243 if r == noRegister {
1244
1245
1246 s.values[v.ID].spill = v
1247 continue
1248 }
1249
1250 s.assignReg(r, v, v)
1251 }
1252
1253
1254 for r := register(0); r < s.numRegs; r++ {
1255 if phiUsed>>r&1 != 0 {
1256 continue
1257 }
1258 v := s.regs[r].v
1259 if v != nil && !regValLiveSet.contains(v.ID) {
1260 s.freeReg(r)
1261 }
1262 }
1263
1264
1265
1266
1267
1268 regList := make([]startReg, 0, 32)
1269 for r := register(0); r < s.numRegs; r++ {
1270 v := s.regs[r].v
1271 if v == nil {
1272 continue
1273 }
1274 if phiUsed>>r&1 != 0 {
1275
1276
1277 continue
1278 }
1279 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
1280 s.startRegsMask |= regMask(1) << r
1281 }
1282 s.startRegs[b.ID] = make([]startReg, len(regList))
1283 copy(s.startRegs[b.ID], regList)
1284
1285 if s.f.pass.debug > regDebug {
1286 fmt.Printf("after phis\n")
1287 for _, x := range s.startRegs[b.ID] {
1288 fmt.Printf(" %s: v%d\n", &s.registers[x.r], x.v.ID)
1289 }
1290 }
1291 }
1292
1293
1294 for i, v := range phis {
1295 s.curIdx = i
1296 s.dropIfUnused(v)
1297 }
1298
1299
1300 if l := len(oldSched); cap(dinfo) < l {
1301 dinfo = make([]dentry, l)
1302 } else {
1303 dinfo = dinfo[:l]
1304 clear(dinfo)
1305 }
1306
1307
1308 desired.copy(&s.desired[b.ID])
1309
1310
1311
1312
1313
1314
1315 for _, e := range b.Succs {
1316 succ := e.b
1317
1318 for _, x := range s.startRegs[succ.ID] {
1319 desired.add(x.v.ID, x.r)
1320 }
1321
1322 pidx := e.i
1323 for _, v := range succ.Values {
1324 if v.Op != OpPhi {
1325 break
1326 }
1327 if !s.values[v.ID].needReg {
1328 continue
1329 }
1330 rp, ok := s.f.getHome(v.ID).(*Register)
1331 if !ok {
1332
1333
1334
1335
1336 for _, a := range v.Args {
1337 rp, ok = s.f.getHome(a.ID).(*Register)
1338 if ok {
1339 break
1340 }
1341 }
1342 if !ok {
1343 continue
1344 }
1345 }
1346 desired.add(v.Args[pidx].ID, register(rp.num))
1347 }
1348 }
1349
1350
1351 for i := len(oldSched) - 1; i >= 0; i-- {
1352 v := oldSched[i]
1353 prefs := desired.remove(v.ID)
1354 regspec := s.regspec(v)
1355 desired.clobber(regspec.clobbers)
1356 for _, j := range regspec.inputs {
1357 if countRegs(j.regs) != 1 {
1358 continue
1359 }
1360 desired.clobber(j.regs)
1361 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
1362 }
1363 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
1364 if opcodeTable[v.Op].commutative {
1365 desired.addList(v.Args[1].ID, prefs)
1366 }
1367 desired.addList(v.Args[0].ID, prefs)
1368 }
1369
1370 dinfo[i].out = prefs
1371 for j, a := range v.Args {
1372 if j >= len(dinfo[i].in) {
1373 break
1374 }
1375 dinfo[i].in[j] = desired.get(a.ID)
1376 }
1377 if v.Op == OpSelect1 && prefs[0] != noRegister {
1378
1379
1380 desiredSecondReg[v.Args[0].ID] = prefs
1381 }
1382 }
1383
1384
1385 for idx, v := range oldSched {
1386 s.curIdx = nphi + idx
1387 tmpReg := noRegister
1388 if s.f.pass.debug > regDebug {
1389 fmt.Printf(" processing %s\n", v.LongString())
1390 }
1391 regspec := s.regspec(v)
1392 if v.Op == OpPhi {
1393 f.Fatalf("phi %s not at start of block", v)
1394 }
1395 if opcodeTable[v.Op].fixedReg {
1396 switch v.Op {
1397 case OpSP:
1398 s.assignReg(s.SPReg, v, v)
1399 s.sp = v.ID
1400 case OpSB:
1401 s.assignReg(s.SBReg, v, v)
1402 s.sb = v.ID
1403 case OpARM64ZERO:
1404 s.assignReg(s.ZeroIntReg, v, v)
1405 default:
1406 f.Fatalf("unknown fixed-register op %s", v)
1407 }
1408 b.Values = append(b.Values, v)
1409 s.advanceUses(v)
1410 continue
1411 }
1412 if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
1413 if s.values[v.ID].needReg {
1414 if v.Op == OpSelectN {
1415 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
1416 } else {
1417 var i = 0
1418 if v.Op == OpSelect1 {
1419 i = 1
1420 }
1421 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
1422 }
1423 }
1424 b.Values = append(b.Values, v)
1425 s.advanceUses(v)
1426 continue
1427 }
1428 if v.Op == OpGetG && s.f.Config.hasGReg {
1429
1430 if s.regs[s.GReg].v != nil {
1431 s.freeReg(s.GReg)
1432 }
1433 s.assignReg(s.GReg, v, v)
1434 b.Values = append(b.Values, v)
1435 s.advanceUses(v)
1436 continue
1437 }
1438 if v.Op == OpArg {
1439
1440
1441
1442 s.values[v.ID].spill = v
1443 b.Values = append(b.Values, v)
1444 s.advanceUses(v)
1445 continue
1446 }
1447 if v.Op == OpKeepAlive {
1448
1449 s.advanceUses(v)
1450 a := v.Args[0]
1451 vi := &s.values[a.ID]
1452 if vi.regs == 0 && !vi.rematerializeable {
1453
1454
1455
1456 v.SetArg(0, s.makeSpill(a, b))
1457 } else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
1458
1459
1460
1461 v.Op = OpVarLive
1462 v.SetArgs1(v.Args[1])
1463 v.Aux = a.Aux
1464 } else {
1465
1466
1467
1468 v.Op = OpCopy
1469 v.SetArgs1(v.Args[1])
1470 }
1471 b.Values = append(b.Values, v)
1472 continue
1473 }
1474 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
1475
1476 if s.doClobber && v.Op.IsCall() {
1477 s.clobberRegs(regspec.clobbers)
1478 }
1479 s.freeRegs(regspec.clobbers)
1480 b.Values = append(b.Values, v)
1481 s.advanceUses(v)
1482 continue
1483 }
1484
1485 if s.values[v.ID].rematerializeable {
1486
1487
1488
1489 for _, a := range v.Args {
1490 a.Uses--
1491 }
1492 s.advanceUses(v)
1493 continue
1494 }
1495
1496 if s.f.pass.debug > regDebug {
1497 fmt.Printf("value %s\n", v.LongString())
1498 fmt.Printf(" out:")
1499 for _, r := range dinfo[idx].out {
1500 if r != noRegister {
1501 fmt.Printf(" %s", &s.registers[r])
1502 }
1503 }
1504 fmt.Println()
1505 for i := 0; i < len(v.Args) && i < 3; i++ {
1506 fmt.Printf(" in%d:", i)
1507 for _, r := range dinfo[idx].in[i] {
1508 if r != noRegister {
1509 fmt.Printf(" %s", &s.registers[r])
1510 }
1511 }
1512 fmt.Println()
1513 }
1514 }
1515
1516
1517
1518
1519 args = append(args[:0], make([]*Value, len(v.Args))...)
1520 for i, a := range v.Args {
1521 if !s.values[a.ID].needReg {
1522 args[i] = a
1523 }
1524 }
1525 for _, i := range regspec.inputs {
1526 mask := i.regs
1527 if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 {
1528 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1529 }
1530 }
1531
1532
1533
1534
1535
1536
1537 for {
1538 freed := false
1539 for _, i := range regspec.inputs {
1540 if args[i.idx] != nil {
1541 continue
1542 }
1543 mask := i.regs
1544 if countRegs(mask) == 1 && mask&^s.used != 0 {
1545 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1546
1547
1548
1549 oldregs := s.values[v.Args[i.idx].ID].regs
1550 if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) {
1551 s.freeRegs(oldregs &^ mask &^ s.nospill)
1552 freed = true
1553 }
1554 }
1555 }
1556 if !freed {
1557 break
1558 }
1559 }
1560
1561
1562 for _, i := range regspec.inputs {
1563 if args[i.idx] != nil {
1564 continue
1565 }
1566 mask := i.regs
1567 if mask&s.values[v.Args[i.idx].ID].regs == 0 {
1568
1569 mask &= s.allocatable
1570 mask &^= s.nospill
1571
1572 if i.idx < 3 {
1573 for _, r := range dinfo[idx].in[i.idx] {
1574 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1575
1576 mask = regMask(1) << r
1577 break
1578 }
1579 }
1580 }
1581
1582 if mask&^desired.avoid != 0 {
1583 mask &^= desired.avoid
1584 }
1585 }
1586 if mask&s.values[v.Args[i.idx].ID].regs&(1<<s.SPReg) != 0 {
1587
1588
1589
1590 mask = 1 << s.SPReg
1591 }
1592 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1593 }
1594
1595
1596
1597
1598 if opcodeTable[v.Op].resultInArg0 {
1599 var m regMask
1600 if !s.liveAfterCurrentInstruction(v.Args[0]) {
1601
1602 goto ok
1603 }
1604 if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
1605 args[0], args[1] = args[1], args[0]
1606 goto ok
1607 }
1608 if s.values[v.Args[0].ID].rematerializeable {
1609
1610 goto ok
1611 }
1612 if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
1613 args[0], args[1] = args[1], args[0]
1614 goto ok
1615 }
1616 if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
1617
1618 goto ok
1619 }
1620 if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
1621 args[0], args[1] = args[1], args[0]
1622 goto ok
1623 }
1624
1625
1626
1627
1628
1629 m = s.compatRegs(v.Args[0].Type) &^ s.used
1630 if m == 0 {
1631
1632
1633
1634
1635 goto ok
1636 }
1637
1638
1639 for _, r := range dinfo[idx].out {
1640 if r != noRegister && (m®spec.outputs[0].regs)>>r&1 != 0 {
1641 m = regMask(1) << r
1642 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
1643
1644
1645 goto ok
1646 }
1647 }
1648
1649
1650 for _, r := range dinfo[idx].in[0] {
1651 if r != noRegister && m>>r&1 != 0 {
1652 m = regMask(1) << r
1653 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1654 s.copies[c] = false
1655
1656
1657 goto ok
1658 }
1659 }
1660 if opcodeTable[v.Op].commutative {
1661 for _, r := range dinfo[idx].in[1] {
1662 if r != noRegister && m>>r&1 != 0 {
1663 m = regMask(1) << r
1664 c := s.allocValToReg(v.Args[1], m, true, v.Pos)
1665 s.copies[c] = false
1666 args[0], args[1] = args[1], args[0]
1667 goto ok
1668 }
1669 }
1670 }
1671
1672
1673 if m&^desired.avoid != 0 {
1674 m &^= desired.avoid
1675 }
1676
1677 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1678 s.copies[c] = false
1679
1680
1681
1682
1683 if regspec.outputs[0].regs>>s.f.getHome(c.ID).(*Register).num&1 != 0 {
1684 if rp, ok := s.f.getHome(args[0].ID).(*Register); ok {
1685 r := register(rp.num)
1686 for _, r2 := range dinfo[idx].in[0] {
1687 if r == r2 {
1688 args[0] = c
1689 break
1690 }
1691 }
1692 }
1693 }
1694 }
1695 ok:
1696 for i := 0; i < 2; i++ {
1697 if !(i == 0 && regspec.clobbersArg0 || i == 1 && regspec.clobbersArg1) {
1698 continue
1699 }
1700 if !s.liveAfterCurrentInstruction(v.Args[i]) {
1701
1702 continue
1703 }
1704 if s.values[v.Args[i].ID].rematerializeable {
1705
1706 continue
1707 }
1708 if countRegs(s.values[v.Args[i].ID].regs) >= 2 {
1709
1710 continue
1711 }
1712
1713 m := s.compatRegs(v.Args[i].Type) &^ s.used
1714 if m == 0 {
1715
1716
1717
1718
1719 continue
1720 }
1721
1722 c := s.allocValToReg(v.Args[i], m, true, v.Pos)
1723 s.copies[c] = false
1724 args[i] = c
1725 }
1726
1727
1728
1729
1730
1731
1732
1733 if opcodeTable[v.Op].needIntTemp {
1734 m := s.allocatable & s.f.Config.gpRegMask
1735 for _, out := range regspec.outputs {
1736 if countRegs(out.regs) == 1 {
1737 m &^= out.regs
1738 }
1739 }
1740 if m&^desired.avoid&^s.nospill != 0 {
1741 m &^= desired.avoid
1742 }
1743 tmpReg = s.allocReg(m, &tmpVal)
1744 s.nospill |= regMask(1) << tmpReg
1745 s.tmpused |= regMask(1) << tmpReg
1746 }
1747
1748 if regspec.clobbersArg0 {
1749 s.freeReg(register(s.f.getHome(args[0].ID).(*Register).num))
1750 }
1751 if regspec.clobbersArg1 {
1752 s.freeReg(register(s.f.getHome(args[1].ID).(*Register).num))
1753 }
1754
1755
1756
1757
1758
1759 if !opcodeTable[v.Op].resultNotInArgs {
1760 s.tmpused = s.nospill
1761 s.nospill = 0
1762 s.advanceUses(v)
1763 }
1764
1765
1766 if s.doClobber && v.Op.IsCall() {
1767
1768
1769 s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill)
1770 }
1771 s.freeRegs(regspec.clobbers)
1772 s.tmpused |= regspec.clobbers
1773
1774
1775 {
1776 outRegs := noRegisters
1777 maxOutIdx := -1
1778 var used regMask
1779 if tmpReg != noRegister {
1780
1781
1782 used |= regMask(1) << tmpReg
1783 }
1784 for _, out := range regspec.outputs {
1785 if out.regs == 0 {
1786 continue
1787 }
1788 mask := out.regs & s.allocatable &^ used
1789 if mask == 0 {
1790 s.f.Fatalf("can't find any output register %s", v.LongString())
1791 }
1792 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
1793 if !opcodeTable[v.Op].commutative {
1794
1795 r := register(s.f.getHome(args[0].ID).(*Register).num)
1796 if mask>>r&1 == 0 {
1797 s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString())
1798 }
1799 mask = regMask(1) << r
1800 } else {
1801
1802 r0 := register(s.f.getHome(args[0].ID).(*Register).num)
1803 r1 := register(s.f.getHome(args[1].ID).(*Register).num)
1804
1805 found := false
1806 for _, r := range dinfo[idx].out {
1807 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
1808 mask = regMask(1) << r
1809 found = true
1810 if r == r1 {
1811 args[0], args[1] = args[1], args[0]
1812 }
1813 break
1814 }
1815 }
1816 if !found {
1817
1818 mask = regMask(1) << r0
1819 }
1820 }
1821 }
1822 if out.idx == 0 {
1823 for _, r := range dinfo[idx].out {
1824 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1825
1826 mask = regMask(1) << r
1827 break
1828 }
1829 }
1830 }
1831 if out.idx == 1 {
1832 if prefs, ok := desiredSecondReg[v.ID]; ok {
1833 for _, r := range prefs {
1834 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1835
1836 mask = regMask(1) << r
1837 break
1838 }
1839 }
1840 }
1841 }
1842
1843 if mask&^desired.avoid&^s.nospill&^s.used != 0 {
1844 mask &^= desired.avoid
1845 }
1846 r := s.allocReg(mask, v)
1847 if out.idx > maxOutIdx {
1848 maxOutIdx = out.idx
1849 }
1850 outRegs[out.idx] = r
1851 used |= regMask(1) << r
1852 s.tmpused |= regMask(1) << r
1853 }
1854
1855 if v.Type.IsTuple() {
1856 var outLocs LocPair
1857 if r := outRegs[0]; r != noRegister {
1858 outLocs[0] = &s.registers[r]
1859 }
1860 if r := outRegs[1]; r != noRegister {
1861 outLocs[1] = &s.registers[r]
1862 }
1863 s.f.setHome(v, outLocs)
1864
1865 } else if v.Type.IsResults() {
1866
1867 outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
1868 for i := 0; i <= maxOutIdx; i++ {
1869 if r := outRegs[i]; r != noRegister {
1870 outLocs[i] = &s.registers[r]
1871 }
1872 }
1873 s.f.setHome(v, outLocs)
1874 } else {
1875 if r := outRegs[0]; r != noRegister {
1876 s.assignReg(r, v, v)
1877 }
1878 }
1879 if tmpReg != noRegister {
1880
1881 if s.f.tempRegs == nil {
1882 s.f.tempRegs = map[ID]*Register{}
1883 }
1884 s.f.tempRegs[v.ID] = &s.registers[tmpReg]
1885 }
1886 }
1887
1888
1889 if opcodeTable[v.Op].resultNotInArgs {
1890 s.nospill = 0
1891 s.advanceUses(v)
1892 }
1893 s.tmpused = 0
1894
1895
1896 for i, a := range args {
1897 v.SetArg(i, a)
1898 }
1899 b.Values = append(b.Values, v)
1900 s.dropIfUnused(v)
1901 }
1902
1903
1904
1905 controls := append(make([]*Value, 0, 2), b.ControlValues()...)
1906
1907
1908 for i, v := range b.ControlValues() {
1909 if !s.values[v.ID].needReg {
1910 continue
1911 }
1912 if s.f.pass.debug > regDebug {
1913 fmt.Printf(" processing control %s\n", v.LongString())
1914 }
1915
1916
1917
1918 b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
1919 }
1920
1921
1922
1923 for _, v := range controls {
1924 vi := &s.values[v.ID]
1925 if !vi.needReg {
1926 continue
1927 }
1928
1929 u := vi.uses
1930 vi.uses = u.next
1931 if u.next == nil {
1932 s.freeRegs(vi.regs)
1933 }
1934 u.next = s.freeUseRecords
1935 s.freeUseRecords = u
1936 }
1937
1938
1939
1940
1941 if len(b.Succs) == 1 {
1942 if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
1943 s.freeReg(s.GReg)
1944 }
1945 if s.blockOrder[b.ID] > s.blockOrder[b.Succs[0].b.ID] {
1946
1947 goto badloop
1948 }
1949
1950 top := b.Succs[0].b
1951 loop := s.loopnest.b2l[top.ID]
1952 if loop == nil || loop.header != top || loop.containsUnavoidableCall {
1953 goto badloop
1954 }
1955
1956
1957 for _, live := range s.live[b.ID] {
1958 if live.dist >= unlikelyDistance {
1959
1960 continue
1961 }
1962 vid := live.ID
1963 vi := &s.values[vid]
1964 if vi.regs != 0 {
1965 continue
1966 }
1967 if vi.rematerializeable {
1968 continue
1969 }
1970 v := s.orig[vid]
1971 m := s.compatRegs(v.Type) &^ s.used
1972
1973 outerloop:
1974 for _, e := range desired.entries {
1975 if e.ID != v.ID {
1976 continue
1977 }
1978 for _, r := range e.regs {
1979 if r != noRegister && m>>r&1 != 0 {
1980 m = regMask(1) << r
1981 break outerloop
1982 }
1983 }
1984 }
1985 if m&^desired.avoid != 0 {
1986 m &^= desired.avoid
1987 }
1988 if m != 0 {
1989 s.allocValToReg(v, m, false, b.Pos)
1990 }
1991 }
1992 }
1993 badloop:
1994 ;
1995
1996
1997
1998 k := 0
1999 for r := register(0); r < s.numRegs; r++ {
2000 v := s.regs[r].v
2001 if v == nil {
2002 continue
2003 }
2004 k++
2005 }
2006 regList := make([]endReg, 0, k)
2007 for r := register(0); r < s.numRegs; r++ {
2008 v := s.regs[r].v
2009 if v == nil {
2010 continue
2011 }
2012 regList = append(regList, endReg{r, v, s.regs[r].c})
2013 }
2014 s.endRegs[b.ID] = regList
2015
2016 if checkEnabled {
2017 regValLiveSet.clear()
2018 for _, x := range s.live[b.ID] {
2019 regValLiveSet.add(x.ID)
2020 }
2021 for r := register(0); r < s.numRegs; r++ {
2022 v := s.regs[r].v
2023 if v == nil {
2024 continue
2025 }
2026 if !regValLiveSet.contains(v.ID) {
2027 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
2028 }
2029 }
2030 }
2031
2032
2033
2034
2035
2036 for _, e := range s.live[b.ID] {
2037 vi := &s.values[e.ID]
2038 if vi.regs != 0 {
2039
2040 continue
2041 }
2042 if vi.rematerializeable {
2043
2044 continue
2045 }
2046 if s.f.pass.debug > regDebug {
2047 fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
2048 }
2049 spill := s.makeSpill(s.orig[e.ID], b)
2050 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
2051 }
2052
2053
2054
2055
2056 for _, e := range s.live[b.ID] {
2057 u := s.values[e.ID].uses
2058 if u == nil {
2059 f.Fatalf("live at end, no uses v%d", e.ID)
2060 }
2061 if u.next != nil {
2062 f.Fatalf("live at end, too many uses v%d", e.ID)
2063 }
2064 s.values[e.ID].uses = nil
2065 u.next = s.freeUseRecords
2066 s.freeUseRecords = u
2067 }
2068
2069
2070
2071
2072
2073
2074
2075 if c := countRegs(s.startRegsMask); c != len(s.startRegs[b.ID]) {
2076 regs := make([]startReg, 0, c)
2077 for _, sr := range s.startRegs[b.ID] {
2078 if s.startRegsMask&(regMask(1)<<sr.r) == 0 {
2079 continue
2080 }
2081 regs = append(regs, sr)
2082 }
2083 s.startRegs[b.ID] = regs
2084 }
2085 }
2086
2087
2088 s.placeSpills()
2089
2090
2091
2092 stacklive := stackalloc(s.f, s.spillLive)
2093
2094
2095 s.shuffle(stacklive)
2096
2097
2098
2099
2100 for {
2101 progress := false
2102 for c, used := range s.copies {
2103 if !used && c.Uses == 0 {
2104 if s.f.pass.debug > regDebug {
2105 fmt.Printf("delete copied value %s\n", c.LongString())
2106 }
2107 c.resetArgs()
2108 f.freeValue(c)
2109 delete(s.copies, c)
2110 progress = true
2111 }
2112 }
2113 if !progress {
2114 break
2115 }
2116 }
2117
2118 for _, b := range s.visitOrder {
2119 i := 0
2120 for _, v := range b.Values {
2121 if v.Op == OpInvalid {
2122 continue
2123 }
2124 b.Values[i] = v
2125 i++
2126 }
2127 b.Values = b.Values[:i]
2128 }
2129 }
2130
2131 func (s *regAllocState) placeSpills() {
2132 mustBeFirst := func(op Op) bool {
2133 return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg
2134 }
2135
2136
2137
2138 start := map[ID][]*Value{}
2139
2140
2141 after := map[ID][]*Value{}
2142
2143 for i := range s.values {
2144 vi := s.values[i]
2145 spill := vi.spill
2146 if spill == nil {
2147 continue
2148 }
2149 if spill.Block != nil {
2150
2151
2152 continue
2153 }
2154 v := s.orig[i]
2155
2156
2157
2158
2159
2160 if v == nil {
2161 panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
2162 }
2163 best := v.Block
2164 bestArg := v
2165 var bestDepth int16
2166 if l := s.loopnest.b2l[best.ID]; l != nil {
2167 bestDepth = l.depth
2168 }
2169 b := best
2170 const maxSpillSearch = 100
2171 for i := 0; i < maxSpillSearch; i++ {
2172
2173
2174 p := b
2175 b = nil
2176 for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
2177 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
2178
2179 b = c
2180 break
2181 }
2182 }
2183 if b == nil {
2184
2185 break
2186 }
2187
2188 var depth int16
2189 if l := s.loopnest.b2l[b.ID]; l != nil {
2190 depth = l.depth
2191 }
2192 if depth > bestDepth {
2193
2194 continue
2195 }
2196
2197
2198
2199 if len(b.Preds) == 1 {
2200 for _, e := range s.endRegs[b.Preds[0].b.ID] {
2201 if e.v == v {
2202
2203 best = b
2204 bestArg = e.c
2205 bestDepth = depth
2206 break
2207 }
2208 }
2209 } else {
2210 for _, e := range s.startRegs[b.ID] {
2211 if e.v == v {
2212
2213 best = b
2214 bestArg = e.c
2215 bestDepth = depth
2216 break
2217 }
2218 }
2219 }
2220 }
2221
2222
2223 spill.Block = best
2224 spill.AddArg(bestArg)
2225 if best == v.Block && !mustBeFirst(v.Op) {
2226
2227 after[v.ID] = append(after[v.ID], spill)
2228 } else {
2229
2230 start[best.ID] = append(start[best.ID], spill)
2231 }
2232 }
2233
2234
2235 var oldSched []*Value
2236 for _, b := range s.visitOrder {
2237 nfirst := 0
2238 for _, v := range b.Values {
2239 if !mustBeFirst(v.Op) {
2240 break
2241 }
2242 nfirst++
2243 }
2244 oldSched = append(oldSched[:0], b.Values[nfirst:]...)
2245 b.Values = b.Values[:nfirst]
2246 b.Values = append(b.Values, start[b.ID]...)
2247 for _, v := range oldSched {
2248 b.Values = append(b.Values, v)
2249 b.Values = append(b.Values, after[v.ID]...)
2250 }
2251 }
2252 }
2253
2254
2255 func (s *regAllocState) shuffle(stacklive [][]ID) {
2256 var e edgeState
2257 e.s = s
2258 e.cache = map[ID][]*Value{}
2259 e.contents = map[Location]contentRecord{}
2260 if s.f.pass.debug > regDebug {
2261 fmt.Printf("shuffle %s\n", s.f.Name)
2262 fmt.Println(s.f.String())
2263 }
2264
2265 for _, b := range s.visitOrder {
2266 if len(b.Preds) <= 1 {
2267 continue
2268 }
2269 e.b = b
2270 for i, edge := range b.Preds {
2271 p := edge.b
2272 e.p = p
2273 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
2274 e.process()
2275 }
2276 }
2277
2278 if s.f.pass.debug > regDebug {
2279 fmt.Printf("post shuffle %s\n", s.f.Name)
2280 fmt.Println(s.f.String())
2281 }
2282 }
2283
2284 type edgeState struct {
2285 s *regAllocState
2286 p, b *Block
2287
2288
2289 cache map[ID][]*Value
2290 cachedVals []ID
2291
2292
2293 contents map[Location]contentRecord
2294
2295
2296 destinations []dstRecord
2297 extra []dstRecord
2298
2299 usedRegs regMask
2300 uniqueRegs regMask
2301 finalRegs regMask
2302 rematerializeableRegs regMask
2303 }
2304
2305 type contentRecord struct {
2306 vid ID
2307 c *Value
2308 final bool
2309 pos src.XPos
2310 }
2311
2312 type dstRecord struct {
2313 loc Location
2314 vid ID
2315 splice **Value
2316 pos src.XPos
2317 }
2318
2319
2320 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
2321 if e.s.f.pass.debug > regDebug {
2322 fmt.Printf("edge %s->%s\n", e.p, e.b)
2323 }
2324
2325
2326 clear(e.cache)
2327 e.cachedVals = e.cachedVals[:0]
2328 clear(e.contents)
2329 e.usedRegs = 0
2330 e.uniqueRegs = 0
2331 e.finalRegs = 0
2332 e.rematerializeableRegs = 0
2333
2334
2335 for _, x := range srcReg {
2336 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos)
2337 }
2338
2339 for _, spillID := range stacklive {
2340 v := e.s.orig[spillID]
2341 spill := e.s.values[v.ID].spill
2342 if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
2343
2344
2345
2346
2347
2348
2349
2350
2351 continue
2352 }
2353 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos)
2354 }
2355
2356
2357 dsts := e.destinations[:0]
2358 for _, x := range dstReg {
2359 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
2360 }
2361
2362 for _, v := range e.b.Values {
2363 if v.Op != OpPhi {
2364 break
2365 }
2366 loc := e.s.f.getHome(v.ID)
2367 if loc == nil {
2368 continue
2369 }
2370 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
2371 }
2372 e.destinations = dsts
2373
2374 if e.s.f.pass.debug > regDebug {
2375 for _, vid := range e.cachedVals {
2376 a := e.cache[vid]
2377 for _, c := range a {
2378 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
2379 }
2380 }
2381 for _, d := range e.destinations {
2382 fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
2383 }
2384 }
2385 }
2386
2387
2388 func (e *edgeState) process() {
2389 dsts := e.destinations
2390
2391
2392 for len(dsts) > 0 {
2393 i := 0
2394 for _, d := range dsts {
2395 if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
2396
2397 dsts[i] = d
2398 i++
2399 }
2400 }
2401 if i < len(dsts) {
2402
2403 dsts = dsts[:i]
2404
2405
2406 dsts = append(dsts, e.extra...)
2407 e.extra = e.extra[:0]
2408 continue
2409 }
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433 d := dsts[0]
2434 loc := d.loc
2435 vid := e.contents[loc].vid
2436 c := e.contents[loc].c
2437 r := e.findRegFor(c.Type)
2438 if e.s.f.pass.debug > regDebug {
2439 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
2440 }
2441 e.erase(r)
2442 pos := d.pos.WithNotStmt()
2443 if _, isReg := loc.(*Register); isReg {
2444 c = e.p.NewValue1(pos, OpCopy, c.Type, c)
2445 } else {
2446 c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2447 }
2448 e.set(r, vid, c, false, pos)
2449 if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
2450 e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
2451 }
2452 }
2453 }
2454
2455
2456
2457 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
2458 pos = pos.WithNotStmt()
2459 occupant := e.contents[loc]
2460 if occupant.vid == vid {
2461
2462 e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
2463 if splice != nil {
2464 (*splice).Uses--
2465 *splice = occupant.c
2466 occupant.c.Uses++
2467 }
2468
2469
2470
2471 if _, ok := e.s.copies[occupant.c]; ok {
2472
2473 e.s.copies[occupant.c] = true
2474 }
2475 return true
2476 }
2477
2478
2479 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable {
2480
2481
2482 return false
2483 }
2484
2485
2486 v := e.s.orig[vid]
2487 var c *Value
2488 var src Location
2489 if e.s.f.pass.debug > regDebug {
2490 fmt.Printf("moving v%d to %s\n", vid, loc)
2491 fmt.Printf("sources of v%d:", vid)
2492 }
2493 if opcodeTable[v.Op].fixedReg {
2494 c = v
2495 src = e.s.f.getHome(v.ID)
2496 } else {
2497 for _, w := range e.cache[vid] {
2498 h := e.s.f.getHome(w.ID)
2499 if e.s.f.pass.debug > regDebug {
2500 fmt.Printf(" %s:%s", h, w)
2501 }
2502 _, isreg := h.(*Register)
2503 if src == nil || isreg {
2504 c = w
2505 src = h
2506 }
2507 }
2508 }
2509 if e.s.f.pass.debug > regDebug {
2510 if src != nil {
2511 fmt.Printf(" [use %s]\n", src)
2512 } else {
2513 fmt.Printf(" [no source]\n")
2514 }
2515 }
2516 _, dstReg := loc.(*Register)
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528 e.erase(loc)
2529 var x *Value
2530 if c == nil || e.s.values[vid].rematerializeable {
2531 if !e.s.values[vid].rematerializeable {
2532 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
2533 }
2534 if dstReg {
2535 x = v.copyInto(e.p)
2536 } else {
2537
2538
2539 r := e.findRegFor(v.Type)
2540 e.erase(r)
2541 x = v.copyIntoWithXPos(e.p, pos)
2542 e.set(r, vid, x, false, pos)
2543
2544
2545
2546 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
2547 }
2548 } else {
2549
2550 _, srcReg := src.(*Register)
2551 if srcReg {
2552 if dstReg {
2553 x = e.p.NewValue1(pos, OpCopy, c.Type, c)
2554 } else {
2555 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
2556 }
2557 } else {
2558 if dstReg {
2559 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2560 } else {
2561
2562 r := e.findRegFor(c.Type)
2563 e.erase(r)
2564 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2565 e.set(r, vid, t, false, pos)
2566 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
2567 }
2568 }
2569 }
2570 e.set(loc, vid, x, true, pos)
2571 if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
2572 e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
2573 }
2574 if splice != nil {
2575 (*splice).Uses--
2576 *splice = x
2577 x.Uses++
2578 }
2579 return true
2580 }
2581
2582
2583 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
2584 e.s.f.setHome(c, loc)
2585 e.contents[loc] = contentRecord{vid, c, final, pos}
2586 a := e.cache[vid]
2587 if len(a) == 0 {
2588 e.cachedVals = append(e.cachedVals, vid)
2589 }
2590 a = append(a, c)
2591 e.cache[vid] = a
2592 if r, ok := loc.(*Register); ok {
2593 if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 {
2594 e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
2595 }
2596 e.usedRegs |= regMask(1) << uint(r.num)
2597 if final {
2598 e.finalRegs |= regMask(1) << uint(r.num)
2599 }
2600 if len(a) == 1 {
2601 e.uniqueRegs |= regMask(1) << uint(r.num)
2602 }
2603 if len(a) == 2 {
2604 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2605 e.uniqueRegs &^= regMask(1) << uint(t.num)
2606 }
2607 }
2608 if e.s.values[vid].rematerializeable {
2609 e.rematerializeableRegs |= regMask(1) << uint(r.num)
2610 }
2611 }
2612 if e.s.f.pass.debug > regDebug {
2613 fmt.Printf("%s\n", c.LongString())
2614 fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
2615 }
2616 }
2617
2618
2619 func (e *edgeState) erase(loc Location) {
2620 cr := e.contents[loc]
2621 if cr.c == nil {
2622 return
2623 }
2624 vid := cr.vid
2625
2626 if cr.final {
2627
2628
2629
2630 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
2631 }
2632
2633
2634 a := e.cache[vid]
2635 for i, c := range a {
2636 if e.s.f.getHome(c.ID) == loc {
2637 if e.s.f.pass.debug > regDebug {
2638 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
2639 }
2640 a[i], a = a[len(a)-1], a[:len(a)-1]
2641 break
2642 }
2643 }
2644 e.cache[vid] = a
2645
2646
2647 if r, ok := loc.(*Register); ok {
2648 e.usedRegs &^= regMask(1) << uint(r.num)
2649 if cr.final {
2650 e.finalRegs &^= regMask(1) << uint(r.num)
2651 }
2652 e.rematerializeableRegs &^= regMask(1) << uint(r.num)
2653 }
2654 if len(a) == 1 {
2655 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2656 e.uniqueRegs |= regMask(1) << uint(r.num)
2657 }
2658 }
2659 }
2660
2661
2662 func (e *edgeState) findRegFor(typ *types.Type) Location {
2663
2664 types := &e.s.f.Config.Types
2665 m := e.s.compatRegs(typ)
2666
2667
2668
2669
2670
2671
2672 x := m &^ e.usedRegs
2673 if x != 0 {
2674 return &e.s.registers[pickReg(x)]
2675 }
2676 x = m &^ e.uniqueRegs &^ e.finalRegs
2677 if x != 0 {
2678 return &e.s.registers[pickReg(x)]
2679 }
2680 x = m &^ e.uniqueRegs
2681 if x != 0 {
2682 return &e.s.registers[pickReg(x)]
2683 }
2684 x = m & e.rematerializeableRegs
2685 if x != 0 {
2686 return &e.s.registers[pickReg(x)]
2687 }
2688
2689
2690
2691 for _, vid := range e.cachedVals {
2692 a := e.cache[vid]
2693 for _, c := range a {
2694 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
2695 if !c.rematerializeable() {
2696 x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
2697
2698
2699
2700 t := LocalSlot{N: e.s.f.NewLocal(c.Pos, types.Int64), Type: types.Int64}
2701
2702 e.set(t, vid, x, false, c.Pos)
2703 if e.s.f.pass.debug > regDebug {
2704 fmt.Printf(" SPILL %s->%s %s\n", r, t, x.LongString())
2705 }
2706 }
2707
2708
2709
2710 return r
2711 }
2712 }
2713 }
2714
2715 fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
2716 for _, vid := range e.cachedVals {
2717 a := e.cache[vid]
2718 for _, c := range a {
2719 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
2720 }
2721 }
2722 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
2723 return nil
2724 }
2725
2726
2727
2728 func (v *Value) rematerializeable() bool {
2729 if !opcodeTable[v.Op].rematerializeable {
2730 return false
2731 }
2732 for _, a := range v.Args {
2733
2734
2735 if !opcodeTable[a.Op].fixedReg {
2736 return false
2737 }
2738 }
2739 return true
2740 }
2741
2742 type liveInfo struct {
2743 ID ID
2744 dist int32
2745 pos src.XPos
2746 }
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756 func (s *regAllocState) computeLive() {
2757 f := s.f
2758 s.live = make([][]liveInfo, f.NumBlocks())
2759 s.desired = make([]desiredState, f.NumBlocks())
2760 var phis []*Value
2761
2762 live := f.newSparseMapPos(f.NumValues())
2763 defer f.retSparseMapPos(live)
2764 t := f.newSparseMapPos(f.NumValues())
2765 defer f.retSparseMapPos(t)
2766
2767
2768 var desired desiredState
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779 po := f.postorder()
2780 s.loopnest = f.loopnest()
2781 s.loopnest.computeUnavoidableCalls()
2782 for {
2783 changed := false
2784
2785 for _, b := range po {
2786
2787
2788
2789 live.clear()
2790 for _, e := range s.live[b.ID] {
2791 live.set(e.ID, e.dist+int32(len(b.Values)), e.pos)
2792 }
2793
2794
2795 for _, c := range b.ControlValues() {
2796 if s.values[c.ID].needReg {
2797 live.set(c.ID, int32(len(b.Values)), b.Pos)
2798 }
2799 }
2800
2801
2802
2803 phis = phis[:0]
2804 for i := len(b.Values) - 1; i >= 0; i-- {
2805 v := b.Values[i]
2806 live.remove(v.ID)
2807 if v.Op == OpPhi {
2808
2809 phis = append(phis, v)
2810 continue
2811 }
2812 if opcodeTable[v.Op].call {
2813 c := live.contents()
2814 for i := range c {
2815 c[i].val += unlikelyDistance
2816 }
2817 }
2818 for _, a := range v.Args {
2819 if s.values[a.ID].needReg {
2820 live.set(a.ID, int32(i), v.Pos)
2821 }
2822 }
2823 }
2824
2825 desired.copy(&s.desired[b.ID])
2826 for i := len(b.Values) - 1; i >= 0; i-- {
2827 v := b.Values[i]
2828 prefs := desired.remove(v.ID)
2829 if v.Op == OpPhi {
2830
2831
2832
2833 continue
2834 }
2835 regspec := s.regspec(v)
2836
2837 desired.clobber(regspec.clobbers)
2838
2839 for _, j := range regspec.inputs {
2840 if countRegs(j.regs) != 1 {
2841 continue
2842 }
2843 desired.clobber(j.regs)
2844 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
2845 }
2846
2847 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
2848
2849
2850
2851
2852
2853 if opcodeTable[v.Op].commutative {
2854 desired.addList(v.Args[1].ID, prefs)
2855 }
2856 desired.addList(v.Args[0].ID, prefs)
2857 }
2858 }
2859
2860
2861
2862 for i, e := range b.Preds {
2863 p := e.b
2864
2865
2866
2867 delta := int32(normalDistance)
2868 if len(p.Succs) == 2 {
2869 if p.Succs[0].b == b && p.Likely == BranchLikely ||
2870 p.Succs[1].b == b && p.Likely == BranchUnlikely {
2871 delta = likelyDistance
2872 }
2873 if p.Succs[0].b == b && p.Likely == BranchUnlikely ||
2874 p.Succs[1].b == b && p.Likely == BranchLikely {
2875 delta = unlikelyDistance
2876 }
2877 }
2878
2879
2880 s.desired[p.ID].merge(&desired)
2881
2882
2883 t.clear()
2884 for _, e := range s.live[p.ID] {
2885 t.set(e.ID, e.dist, e.pos)
2886 }
2887 update := false
2888
2889
2890 for _, e := range live.contents() {
2891 d := e.val + delta
2892 if !t.contains(e.key) || d < t.get(e.key) {
2893 update = true
2894 t.set(e.key, d, e.pos)
2895 }
2896 }
2897
2898
2899
2900 for _, v := range phis {
2901 id := v.Args[i].ID
2902 if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) {
2903 update = true
2904 t.set(id, delta, v.Pos)
2905 }
2906 }
2907
2908 if !update {
2909 continue
2910 }
2911
2912 l := s.live[p.ID][:0]
2913 if cap(l) < t.size() {
2914 l = make([]liveInfo, 0, t.size())
2915 }
2916 for _, e := range t.contents() {
2917 l = append(l, liveInfo{e.key, e.val, e.pos})
2918 }
2919 s.live[p.ID] = l
2920 changed = true
2921 }
2922 }
2923
2924 if !changed {
2925 break
2926 }
2927 }
2928 if f.pass.debug > regDebug {
2929 fmt.Println("live values at end of each block")
2930 for _, b := range f.Blocks {
2931 fmt.Printf(" %s:", b)
2932 for _, x := range s.live[b.ID] {
2933 fmt.Printf(" v%d(%d)", x.ID, x.dist)
2934 for _, e := range s.desired[b.ID].entries {
2935 if e.ID != x.ID {
2936 continue
2937 }
2938 fmt.Printf("[")
2939 first := true
2940 for _, r := range e.regs {
2941 if r == noRegister {
2942 continue
2943 }
2944 if !first {
2945 fmt.Printf(",")
2946 }
2947 fmt.Print(&s.registers[r])
2948 first = false
2949 }
2950 fmt.Printf("]")
2951 }
2952 }
2953 if avoid := s.desired[b.ID].avoid; avoid != 0 {
2954 fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
2955 }
2956 fmt.Println()
2957 }
2958 }
2959 }
2960
2961
2962 type desiredState struct {
2963
2964
2965 entries []desiredStateEntry
2966
2967
2968
2969
2970 avoid regMask
2971 }
2972 type desiredStateEntry struct {
2973
2974 ID ID
2975
2976
2977
2978
2979
2980 regs [4]register
2981 }
2982
2983
2984 func (d *desiredState) get(vid ID) [4]register {
2985 for _, e := range d.entries {
2986 if e.ID == vid {
2987 return e.regs
2988 }
2989 }
2990 return [4]register{noRegister, noRegister, noRegister, noRegister}
2991 }
2992
2993
2994 func (d *desiredState) add(vid ID, r register) {
2995 d.avoid |= regMask(1) << r
2996 for i := range d.entries {
2997 e := &d.entries[i]
2998 if e.ID != vid {
2999 continue
3000 }
3001 if e.regs[0] == r {
3002
3003 return
3004 }
3005 for j := 1; j < len(e.regs); j++ {
3006 if e.regs[j] == r {
3007
3008 copy(e.regs[1:], e.regs[:j])
3009 e.regs[0] = r
3010 return
3011 }
3012 }
3013 copy(e.regs[1:], e.regs[:])
3014 e.regs[0] = r
3015 return
3016 }
3017 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
3018 }
3019
3020 func (d *desiredState) addList(vid ID, regs [4]register) {
3021
3022 for i := len(regs) - 1; i >= 0; i-- {
3023 r := regs[i]
3024 if r != noRegister {
3025 d.add(vid, r)
3026 }
3027 }
3028 }
3029
3030
3031 func (d *desiredState) clobber(m regMask) {
3032 for i := 0; i < len(d.entries); {
3033 e := &d.entries[i]
3034 j := 0
3035 for _, r := range e.regs {
3036 if r != noRegister && m>>r&1 == 0 {
3037 e.regs[j] = r
3038 j++
3039 }
3040 }
3041 if j == 0 {
3042
3043 d.entries[i] = d.entries[len(d.entries)-1]
3044 d.entries = d.entries[:len(d.entries)-1]
3045 continue
3046 }
3047 for ; j < len(e.regs); j++ {
3048 e.regs[j] = noRegister
3049 }
3050 i++
3051 }
3052 d.avoid &^= m
3053 }
3054
3055
3056 func (d *desiredState) copy(x *desiredState) {
3057 d.entries = append(d.entries[:0], x.entries...)
3058 d.avoid = x.avoid
3059 }
3060
3061
3062 func (d *desiredState) remove(vid ID) [4]register {
3063 for i := range d.entries {
3064 if d.entries[i].ID == vid {
3065 regs := d.entries[i].regs
3066 d.entries[i] = d.entries[len(d.entries)-1]
3067 d.entries = d.entries[:len(d.entries)-1]
3068 return regs
3069 }
3070 }
3071 return [4]register{noRegister, noRegister, noRegister, noRegister}
3072 }
3073
3074
3075 func (d *desiredState) merge(x *desiredState) {
3076 d.avoid |= x.avoid
3077
3078
3079 for _, e := range x.entries {
3080 d.addList(e.ID, e.regs)
3081 }
3082 }
3083
3084
3085 func (loopnest *loopnest) computeUnavoidableCalls() {
3086 f := loopnest.f
3087
3088 hasCall := f.Cache.allocBoolSlice(f.NumBlocks())
3089 defer f.Cache.freeBoolSlice(hasCall)
3090 for _, b := range f.Blocks {
3091 if b.containsCall() {
3092 hasCall[b.ID] = true
3093 }
3094 }
3095 found := f.Cache.allocSparseSet(f.NumBlocks())
3096 defer f.Cache.freeSparseSet(found)
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106 loopLoop:
3107 for _, l := range loopnest.loops {
3108 found.clear()
3109 tovisit := make([]*Block, 0, 8)
3110 tovisit = append(tovisit, l.header)
3111 for len(tovisit) > 0 {
3112 cur := tovisit[len(tovisit)-1]
3113 tovisit = tovisit[:len(tovisit)-1]
3114 if hasCall[cur.ID] {
3115 continue
3116 }
3117 for _, s := range cur.Succs {
3118 nb := s.Block()
3119 if nb == l.header {
3120
3121 continue loopLoop
3122 }
3123 if found.contains(nb.ID) {
3124
3125 continue
3126 }
3127 nl := loopnest.b2l[nb.ID]
3128 if nl == nil || (nl.depth <= l.depth && nl != l) {
3129
3130 continue
3131 }
3132 tovisit = append(tovisit, nb)
3133 found.add(nb.ID)
3134 }
3135 }
3136
3137 l.containsUnavoidableCall = true
3138 }
3139 }
3140
3141 func (b *Block) containsCall() bool {
3142 if b.Kind == BlockDefer {
3143 return true
3144 }
3145 for _, v := range b.Values {
3146 if opcodeTable[v.Op].call {
3147 return true
3148 }
3149 }
3150 return false
3151 }
3152
View as plain text