Source file src/runtime/metrics_test.go

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime_test
     6  
     7  import (
     8  	"bytes"
     9  	"internal/abi"
    10  	"internal/goexperiment"
    11  	"internal/profile"
    12  	"internal/testenv"
    13  	"os"
    14  	"reflect"
    15  	"runtime"
    16  	"runtime/debug"
    17  	"runtime/metrics"
    18  	"runtime/pprof"
    19  	"runtime/trace"
    20  	"slices"
    21  	"sort"
    22  	"strings"
    23  	"sync"
    24  	"sync/atomic"
    25  	"testing"
    26  	"time"
    27  	"unsafe"
    28  )
    29  
    30  func prepareAllMetricsSamples() (map[string]metrics.Description, []metrics.Sample) {
    31  	all := metrics.All()
    32  	samples := make([]metrics.Sample, len(all))
    33  	descs := make(map[string]metrics.Description)
    34  	for i := range all {
    35  		samples[i].Name = all[i].Name
    36  		descs[all[i].Name] = all[i]
    37  	}
    38  	return descs, samples
    39  }
    40  
    41  func TestReadMetrics(t *testing.T) {
    42  	// Run a GC cycle to get some of the stats to be non-zero.
    43  	runtime.GC()
    44  
    45  	// Set an arbitrary memory limit to check the metric for it
    46  	limit := int64(512 * 1024 * 1024)
    47  	oldLimit := debug.SetMemoryLimit(limit)
    48  	defer debug.SetMemoryLimit(oldLimit)
    49  
    50  	// Set a GC percent to check the metric for it
    51  	gcPercent := 99
    52  	oldGCPercent := debug.SetGCPercent(gcPercent)
    53  	defer debug.SetGCPercent(oldGCPercent)
    54  
    55  	// Tests whether readMetrics produces values aligning
    56  	// with ReadMemStats while the world is stopped.
    57  	var mstats runtime.MemStats
    58  	_, samples := prepareAllMetricsSamples()
    59  	runtime.ReadMetricsSlow(&mstats, unsafe.Pointer(&samples[0]), len(samples), cap(samples))
    60  
    61  	checkUint64 := func(t *testing.T, m string, got, want uint64) {
    62  		t.Helper()
    63  		if got != want {
    64  			t.Errorf("metric %q: got %d, want %d", m, got, want)
    65  		}
    66  	}
    67  
    68  	// Check to make sure the values we read line up with other values we read.
    69  	var allocsBySize, gcPauses, schedPausesTotalGC *metrics.Float64Histogram
    70  	var tinyAllocs uint64
    71  	var mallocs, frees uint64
    72  	for i := range samples {
    73  		switch name := samples[i].Name; name {
    74  		case "/cgo/go-to-c-calls:calls":
    75  			checkUint64(t, name, samples[i].Value.Uint64(), uint64(runtime.NumCgoCall()))
    76  		case "/memory/classes/heap/free:bytes":
    77  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapIdle-mstats.HeapReleased)
    78  		case "/memory/classes/heap/released:bytes":
    79  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapReleased)
    80  		case "/memory/classes/heap/objects:bytes":
    81  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapAlloc)
    82  		case "/memory/classes/heap/unused:bytes":
    83  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapInuse-mstats.HeapAlloc)
    84  		case "/memory/classes/heap/stacks:bytes":
    85  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackInuse)
    86  		case "/memory/classes/metadata/mcache/free:bytes":
    87  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheSys-mstats.MCacheInuse)
    88  		case "/memory/classes/metadata/mcache/inuse:bytes":
    89  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheInuse)
    90  		case "/memory/classes/metadata/mspan/free:bytes":
    91  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanSys-mstats.MSpanInuse)
    92  		case "/memory/classes/metadata/mspan/inuse:bytes":
    93  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanInuse)
    94  		case "/memory/classes/metadata/other:bytes":
    95  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.GCSys)
    96  		case "/memory/classes/os-stacks:bytes":
    97  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackSys-mstats.StackInuse)
    98  		case "/memory/classes/other:bytes":
    99  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.OtherSys)
   100  		case "/memory/classes/profiling/buckets:bytes":
   101  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.BuckHashSys)
   102  		case "/memory/classes/total:bytes":
   103  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.Sys)
   104  		case "/gc/heap/allocs-by-size:bytes":
   105  			hist := samples[i].Value.Float64Histogram()
   106  			// Skip size class 0 in BySize, because it's always empty and not represented
   107  			// in the histogram.
   108  			for i, sc := range mstats.BySize[1:] {
   109  				if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s {
   110  					t.Errorf("bucket does not match size class: got %f, want %f", b, s)
   111  					// The rest of the checks aren't expected to work anyway.
   112  					continue
   113  				}
   114  				if c, m := hist.Counts[i], sc.Mallocs; c != m {
   115  					t.Errorf("histogram counts do not much BySize for class %d: got %d, want %d", i, c, m)
   116  				}
   117  			}
   118  			allocsBySize = hist
   119  		case "/gc/heap/allocs:bytes":
   120  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc)
   121  		case "/gc/heap/frees-by-size:bytes":
   122  			hist := samples[i].Value.Float64Histogram()
   123  			// Skip size class 0 in BySize, because it's always empty and not represented
   124  			// in the histogram.
   125  			for i, sc := range mstats.BySize[1:] {
   126  				if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s {
   127  					t.Errorf("bucket does not match size class: got %f, want %f", b, s)
   128  					// The rest of the checks aren't expected to work anyway.
   129  					continue
   130  				}
   131  				if c, f := hist.Counts[i], sc.Frees; c != f {
   132  					t.Errorf("histogram counts do not match BySize for class %d: got %d, want %d", i, c, f)
   133  				}
   134  			}
   135  		case "/gc/heap/frees:bytes":
   136  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc-mstats.HeapAlloc)
   137  		case "/gc/heap/tiny/allocs:objects":
   138  			// Currently, MemStats adds tiny alloc count to both Mallocs AND Frees.
   139  			// The reason for this is because MemStats couldn't be extended at the time
   140  			// but there was a desire to have Mallocs at least be a little more representative,
   141  			// while having Mallocs - Frees still represent a live object count.
   142  			// Unfortunately, MemStats doesn't actually export a large allocation count,
   143  			// so it's impossible to pull this number out directly.
   144  			//
   145  			// Check tiny allocation count outside of this loop, by using the allocs-by-size
   146  			// histogram in order to figure out how many large objects there are.
   147  			tinyAllocs = samples[i].Value.Uint64()
   148  			// Because the next two metrics tests are checking against Mallocs and Frees,
   149  			// we can't check them directly for the same reason: we need to account for tiny
   150  			// allocations included in Mallocs and Frees.
   151  		case "/gc/heap/allocs:objects":
   152  			mallocs = samples[i].Value.Uint64()
   153  		case "/gc/heap/frees:objects":
   154  			frees = samples[i].Value.Uint64()
   155  		case "/gc/heap/live:bytes":
   156  			// Check for "obviously wrong" values. We can't check a stronger invariant,
   157  			// such as live <= HeapAlloc, because live is not 100% accurate. It's computed
   158  			// under racy conditions, and some objects may be double-counted (this is
   159  			// intentional and necessary for GC performance).
   160  			//
   161  			// Instead, check against a much more reasonable upper-bound: the amount of
   162  			// mapped heap memory. We can't possibly overcount to the point of exceeding
   163  			// total mapped heap memory, except if there's an accounting bug.
   164  			if live := samples[i].Value.Uint64(); live > mstats.HeapSys {
   165  				t.Errorf("live bytes: %d > heap sys: %d", live, mstats.HeapSys)
   166  			} else if live == 0 {
   167  				// Might happen if we don't call runtime.GC() above.
   168  				t.Error("live bytes is 0")
   169  			}
   170  		case "/gc/gomemlimit:bytes":
   171  			checkUint64(t, name, samples[i].Value.Uint64(), uint64(limit))
   172  		case "/gc/heap/objects:objects":
   173  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapObjects)
   174  		case "/gc/heap/goal:bytes":
   175  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.NextGC)
   176  		case "/gc/gogc:percent":
   177  			checkUint64(t, name, samples[i].Value.Uint64(), uint64(gcPercent))
   178  		case "/gc/cycles/automatic:gc-cycles":
   179  			checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC-mstats.NumForcedGC))
   180  		case "/gc/cycles/forced:gc-cycles":
   181  			checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumForcedGC))
   182  		case "/gc/cycles/total:gc-cycles":
   183  			checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC))
   184  		case "/gc/pauses:seconds":
   185  			gcPauses = samples[i].Value.Float64Histogram()
   186  		case "/sched/pauses/total/gc:seconds":
   187  			schedPausesTotalGC = samples[i].Value.Float64Histogram()
   188  		}
   189  	}
   190  
   191  	// Check tinyAllocs.
   192  	nonTinyAllocs := uint64(0)
   193  	for _, c := range allocsBySize.Counts {
   194  		nonTinyAllocs += c
   195  	}
   196  	checkUint64(t, "/gc/heap/tiny/allocs:objects", tinyAllocs, mstats.Mallocs-nonTinyAllocs)
   197  
   198  	// Check allocation and free counts.
   199  	checkUint64(t, "/gc/heap/allocs:objects", mallocs, mstats.Mallocs-tinyAllocs)
   200  	checkUint64(t, "/gc/heap/frees:objects", frees, mstats.Frees-tinyAllocs)
   201  
   202  	// Verify that /gc/pauses:seconds is a copy of /sched/pauses/total/gc:seconds
   203  	if !slices.Equal(gcPauses.Buckets, schedPausesTotalGC.Buckets) {
   204  		t.Errorf("/gc/pauses:seconds buckets %v do not match /sched/pauses/total/gc:seconds buckets %v", gcPauses.Buckets, schedPausesTotalGC.Counts)
   205  	}
   206  	if !slices.Equal(gcPauses.Counts, schedPausesTotalGC.Counts) {
   207  		t.Errorf("/gc/pauses:seconds counts %v do not match /sched/pauses/total/gc:seconds counts %v", gcPauses.Counts, schedPausesTotalGC.Counts)
   208  	}
   209  }
   210  
   211  func TestReadMetricsConsistency(t *testing.T) {
   212  	// Tests whether readMetrics produces consistent, sensible values.
   213  	// The values are read concurrently with the runtime doing other
   214  	// things (e.g. allocating) so what we read can't reasonably compared
   215  	// to other runtime values (e.g. MemStats).
   216  
   217  	// Run a few GC cycles to get some of the stats to be non-zero.
   218  	runtime.GC()
   219  	runtime.GC()
   220  	runtime.GC()
   221  
   222  	// Set GOMAXPROCS high then sleep briefly to ensure we generate
   223  	// some idle time.
   224  	oldmaxprocs := runtime.GOMAXPROCS(10)
   225  	time.Sleep(time.Millisecond)
   226  	runtime.GOMAXPROCS(oldmaxprocs)
   227  
   228  	// Read all the supported metrics through the metrics package.
   229  	descs, samples := prepareAllMetricsSamples()
   230  	metrics.Read(samples)
   231  
   232  	// Check to make sure the values we read make sense.
   233  	var totalVirtual struct {
   234  		got, want uint64
   235  	}
   236  	var objects struct {
   237  		alloc, free             *metrics.Float64Histogram
   238  		allocs, frees           uint64
   239  		allocdBytes, freedBytes uint64
   240  		total, totalBytes       uint64
   241  	}
   242  	var gc struct {
   243  		numGC  uint64
   244  		pauses uint64
   245  	}
   246  	var totalScan struct {
   247  		got, want uint64
   248  	}
   249  	var cpu struct {
   250  		gcAssist    float64
   251  		gcDedicated float64
   252  		gcIdle      float64
   253  		gcPause     float64
   254  		gcTotal     float64
   255  
   256  		idle float64
   257  		user float64
   258  
   259  		scavengeAssist float64
   260  		scavengeBg     float64
   261  		scavengeTotal  float64
   262  
   263  		total float64
   264  	}
   265  	for i := range samples {
   266  		kind := samples[i].Value.Kind()
   267  		if want := descs[samples[i].Name].Kind; kind != want {
   268  			t.Errorf("supported metric %q has unexpected kind: got %d, want %d", samples[i].Name, kind, want)
   269  			continue
   270  		}
   271  		if samples[i].Name != "/memory/classes/total:bytes" && strings.HasPrefix(samples[i].Name, "/memory/classes") {
   272  			v := samples[i].Value.Uint64()
   273  			totalVirtual.want += v
   274  
   275  			// None of these stats should ever get this big.
   276  			// If they do, there's probably overflow involved,
   277  			// usually due to bad accounting.
   278  			if int64(v) < 0 {
   279  				t.Errorf("%q has high/negative value: %d", samples[i].Name, v)
   280  			}
   281  		}
   282  		switch samples[i].Name {
   283  		case "/cpu/classes/gc/mark/assist:cpu-seconds":
   284  			cpu.gcAssist = samples[i].Value.Float64()
   285  		case "/cpu/classes/gc/mark/dedicated:cpu-seconds":
   286  			cpu.gcDedicated = samples[i].Value.Float64()
   287  		case "/cpu/classes/gc/mark/idle:cpu-seconds":
   288  			cpu.gcIdle = samples[i].Value.Float64()
   289  		case "/cpu/classes/gc/pause:cpu-seconds":
   290  			cpu.gcPause = samples[i].Value.Float64()
   291  		case "/cpu/classes/gc/total:cpu-seconds":
   292  			cpu.gcTotal = samples[i].Value.Float64()
   293  		case "/cpu/classes/idle:cpu-seconds":
   294  			cpu.idle = samples[i].Value.Float64()
   295  		case "/cpu/classes/scavenge/assist:cpu-seconds":
   296  			cpu.scavengeAssist = samples[i].Value.Float64()
   297  		case "/cpu/classes/scavenge/background:cpu-seconds":
   298  			cpu.scavengeBg = samples[i].Value.Float64()
   299  		case "/cpu/classes/scavenge/total:cpu-seconds":
   300  			cpu.scavengeTotal = samples[i].Value.Float64()
   301  		case "/cpu/classes/total:cpu-seconds":
   302  			cpu.total = samples[i].Value.Float64()
   303  		case "/cpu/classes/user:cpu-seconds":
   304  			cpu.user = samples[i].Value.Float64()
   305  		case "/memory/classes/total:bytes":
   306  			totalVirtual.got = samples[i].Value.Uint64()
   307  		case "/memory/classes/heap/objects:bytes":
   308  			objects.totalBytes = samples[i].Value.Uint64()
   309  		case "/gc/heap/objects:objects":
   310  			objects.total = samples[i].Value.Uint64()
   311  		case "/gc/heap/allocs:bytes":
   312  			objects.allocdBytes = samples[i].Value.Uint64()
   313  		case "/gc/heap/allocs:objects":
   314  			objects.allocs = samples[i].Value.Uint64()
   315  		case "/gc/heap/allocs-by-size:bytes":
   316  			objects.alloc = samples[i].Value.Float64Histogram()
   317  		case "/gc/heap/frees:bytes":
   318  			objects.freedBytes = samples[i].Value.Uint64()
   319  		case "/gc/heap/frees:objects":
   320  			objects.frees = samples[i].Value.Uint64()
   321  		case "/gc/heap/frees-by-size:bytes":
   322  			objects.free = samples[i].Value.Float64Histogram()
   323  		case "/gc/cycles:gc-cycles":
   324  			gc.numGC = samples[i].Value.Uint64()
   325  		case "/gc/pauses:seconds":
   326  			h := samples[i].Value.Float64Histogram()
   327  			gc.pauses = 0
   328  			for i := range h.Counts {
   329  				gc.pauses += h.Counts[i]
   330  			}
   331  		case "/gc/scan/heap:bytes":
   332  			totalScan.want += samples[i].Value.Uint64()
   333  		case "/gc/scan/globals:bytes":
   334  			totalScan.want += samples[i].Value.Uint64()
   335  		case "/gc/scan/stack:bytes":
   336  			totalScan.want += samples[i].Value.Uint64()
   337  		case "/gc/scan/total:bytes":
   338  			totalScan.got = samples[i].Value.Uint64()
   339  		case "/sched/gomaxprocs:threads":
   340  			if got, want := samples[i].Value.Uint64(), uint64(runtime.GOMAXPROCS(-1)); got != want {
   341  				t.Errorf("gomaxprocs doesn't match runtime.GOMAXPROCS: got %d, want %d", got, want)
   342  			}
   343  		case "/sched/goroutines:goroutines":
   344  			if samples[i].Value.Uint64() < 1 {
   345  				t.Error("number of goroutines is less than one")
   346  			}
   347  		}
   348  	}
   349  	// Only check this on Linux where we can be reasonably sure we have a high-resolution timer.
   350  	if runtime.GOOS == "linux" {
   351  		if cpu.gcDedicated <= 0 && cpu.gcAssist <= 0 && cpu.gcIdle <= 0 {
   352  			t.Errorf("found no time spent on GC work: %#v", cpu)
   353  		}
   354  		if cpu.gcPause <= 0 {
   355  			t.Errorf("found no GC pauses: %f", cpu.gcPause)
   356  		}
   357  		if cpu.idle <= 0 {
   358  			t.Errorf("found no idle time: %f", cpu.idle)
   359  		}
   360  		if total := cpu.gcDedicated + cpu.gcAssist + cpu.gcIdle + cpu.gcPause; !withinEpsilon(cpu.gcTotal, total, 0.001) {
   361  			t.Errorf("calculated total GC CPU time not within %%0.1 of total: %f vs. %f", total, cpu.gcTotal)
   362  		}
   363  		if total := cpu.scavengeAssist + cpu.scavengeBg; !withinEpsilon(cpu.scavengeTotal, total, 0.001) {
   364  			t.Errorf("calculated total scavenge CPU not within %%0.1 of total: %f vs. %f", total, cpu.scavengeTotal)
   365  		}
   366  		if cpu.total <= 0 {
   367  			t.Errorf("found no total CPU time passed")
   368  		}
   369  		if cpu.user <= 0 {
   370  			t.Errorf("found no user time passed")
   371  		}
   372  		if total := cpu.gcTotal + cpu.scavengeTotal + cpu.user + cpu.idle; !withinEpsilon(cpu.total, total, 0.001) {
   373  			t.Errorf("calculated total CPU not within %%0.1 of total: %f vs. %f", total, cpu.total)
   374  		}
   375  	}
   376  	if totalVirtual.got != totalVirtual.want {
   377  		t.Errorf(`"/memory/classes/total:bytes" does not match sum of /memory/classes/**: got %d, want %d`, totalVirtual.got, totalVirtual.want)
   378  	}
   379  	if got, want := objects.allocs-objects.frees, objects.total; got != want {
   380  		t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want)
   381  	}
   382  	if got, want := objects.allocdBytes-objects.freedBytes, objects.totalBytes; got != want {
   383  		t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want)
   384  	}
   385  	if b, c := len(objects.alloc.Buckets), len(objects.alloc.Counts); b != c+1 {
   386  		t.Errorf("allocs-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c)
   387  	}
   388  	if b, c := len(objects.free.Buckets), len(objects.free.Counts); b != c+1 {
   389  		t.Errorf("frees-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c)
   390  	}
   391  	if len(objects.alloc.Buckets) != len(objects.free.Buckets) {
   392  		t.Error("allocs-by-size and frees-by-size buckets don't match in length")
   393  	} else if len(objects.alloc.Counts) != len(objects.free.Counts) {
   394  		t.Error("allocs-by-size and frees-by-size counts don't match in length")
   395  	} else {
   396  		for i := range objects.alloc.Buckets {
   397  			ba := objects.alloc.Buckets[i]
   398  			bf := objects.free.Buckets[i]
   399  			if ba != bf {
   400  				t.Errorf("bucket %d is different for alloc and free hists: %f != %f", i, ba, bf)
   401  			}
   402  		}
   403  		if !t.Failed() {
   404  			var gotAlloc, gotFree uint64
   405  			want := objects.total
   406  			for i := range objects.alloc.Counts {
   407  				if objects.alloc.Counts[i] < objects.free.Counts[i] {
   408  					t.Errorf("found more allocs than frees in object dist bucket %d", i)
   409  					continue
   410  				}
   411  				gotAlloc += objects.alloc.Counts[i]
   412  				gotFree += objects.free.Counts[i]
   413  			}
   414  			if got := gotAlloc - gotFree; got != want {
   415  				t.Errorf("object distribution counts don't match count of live objects: got %d, want %d", got, want)
   416  			}
   417  			if gotAlloc != objects.allocs {
   418  				t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotAlloc, objects.allocs)
   419  			}
   420  			if gotFree != objects.frees {
   421  				t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotFree, objects.frees)
   422  			}
   423  		}
   424  	}
   425  	// The current GC has at least 2 pauses per GC.
   426  	// Check to see if that value makes sense.
   427  	if gc.pauses < gc.numGC*2 {
   428  		t.Errorf("fewer pauses than expected: got %d, want at least %d", gc.pauses, gc.numGC*2)
   429  	}
   430  	if totalScan.got <= 0 {
   431  		t.Errorf("scannable GC space is empty: %d", totalScan.got)
   432  	}
   433  	if totalScan.got != totalScan.want {
   434  		t.Errorf("/gc/scan/total:bytes doesn't line up with sum of /gc/scan*: total %d vs. sum %d", totalScan.got, totalScan.want)
   435  	}
   436  }
   437  
   438  func BenchmarkReadMetricsLatency(b *testing.B) {
   439  	stop := applyGCLoad(b)
   440  
   441  	// Spend this much time measuring latencies.
   442  	latencies := make([]time.Duration, 0, 1024)
   443  	_, samples := prepareAllMetricsSamples()
   444  
   445  	// Hit metrics.Read continuously and measure.
   446  	b.ResetTimer()
   447  	for i := 0; i < b.N; i++ {
   448  		start := time.Now()
   449  		metrics.Read(samples)
   450  		latencies = append(latencies, time.Since(start))
   451  	}
   452  	// Make sure to stop the timer before we wait! The load created above
   453  	// is very heavy-weight and not easy to stop, so we could end up
   454  	// confusing the benchmarking framework for small b.N.
   455  	b.StopTimer()
   456  	stop()
   457  
   458  	// Disable the default */op metrics.
   459  	// ns/op doesn't mean anything because it's an average, but we
   460  	// have a sleep in our b.N loop above which skews this significantly.
   461  	b.ReportMetric(0, "ns/op")
   462  	b.ReportMetric(0, "B/op")
   463  	b.ReportMetric(0, "allocs/op")
   464  
   465  	// Sort latencies then report percentiles.
   466  	sort.Slice(latencies, func(i, j int) bool {
   467  		return latencies[i] < latencies[j]
   468  	})
   469  	b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns")
   470  	b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns")
   471  	b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns")
   472  }
   473  
   474  var readMetricsSink [1024]interface{}
   475  
   476  func TestReadMetricsCumulative(t *testing.T) {
   477  	// Set up the set of metrics marked cumulative.
   478  	descs := metrics.All()
   479  	var samples [2][]metrics.Sample
   480  	samples[0] = make([]metrics.Sample, len(descs))
   481  	samples[1] = make([]metrics.Sample, len(descs))
   482  	total := 0
   483  	for i := range samples[0] {
   484  		if !descs[i].Cumulative {
   485  			continue
   486  		}
   487  		samples[0][total].Name = descs[i].Name
   488  		total++
   489  	}
   490  	samples[0] = samples[0][:total]
   491  	samples[1] = samples[1][:total]
   492  	copy(samples[1], samples[0])
   493  
   494  	// Start some noise in the background.
   495  	var wg sync.WaitGroup
   496  	wg.Add(1)
   497  	done := make(chan struct{})
   498  	go func() {
   499  		defer wg.Done()
   500  		for {
   501  			// Add more things here that could influence metrics.
   502  			for i := 0; i < 10; i++ {
   503  				runtime.AddCleanup(new(*int), func(_ struct{}) {}, struct{}{})
   504  				runtime.SetFinalizer(new(*int), func(_ **int) {})
   505  			}
   506  			for i := 0; i < len(readMetricsSink); i++ {
   507  				readMetricsSink[i] = make([]byte, 1024)
   508  				select {
   509  				case <-done:
   510  					return
   511  				default:
   512  				}
   513  			}
   514  			runtime.GC()
   515  		}
   516  	}()
   517  
   518  	sum := func(us []uint64) uint64 {
   519  		total := uint64(0)
   520  		for _, u := range us {
   521  			total += u
   522  		}
   523  		return total
   524  	}
   525  
   526  	// Populate the first generation.
   527  	metrics.Read(samples[0])
   528  
   529  	// Check to make sure that these metrics only grow monotonically.
   530  	for gen := 1; gen < 10; gen++ {
   531  		metrics.Read(samples[gen%2])
   532  		for i := range samples[gen%2] {
   533  			name := samples[gen%2][i].Name
   534  			vNew, vOld := samples[gen%2][i].Value, samples[1-(gen%2)][i].Value
   535  
   536  			switch vNew.Kind() {
   537  			case metrics.KindUint64:
   538  				new := vNew.Uint64()
   539  				old := vOld.Uint64()
   540  				if new < old {
   541  					t.Errorf("%s decreased: %d < %d", name, new, old)
   542  				}
   543  			case metrics.KindFloat64:
   544  				new := vNew.Float64()
   545  				old := vOld.Float64()
   546  				if new < old {
   547  					t.Errorf("%s decreased: %f < %f", name, new, old)
   548  				}
   549  			case metrics.KindFloat64Histogram:
   550  				new := sum(vNew.Float64Histogram().Counts)
   551  				old := sum(vOld.Float64Histogram().Counts)
   552  				if new < old {
   553  					t.Errorf("%s counts decreased: %d < %d", name, new, old)
   554  				}
   555  			}
   556  		}
   557  	}
   558  	close(done)
   559  
   560  	wg.Wait()
   561  }
   562  
   563  func withinEpsilon(v1, v2, e float64) bool {
   564  	return v2-v2*e <= v1 && v1 <= v2+v2*e
   565  }
   566  
   567  func TestMutexWaitTimeMetric(t *testing.T) {
   568  	var sample [1]metrics.Sample
   569  	sample[0].Name = "/sync/mutex/wait/total:seconds"
   570  
   571  	locks := []locker2{
   572  		new(mutex),
   573  		new(rwmutexWrite),
   574  		new(rwmutexReadWrite),
   575  		new(rwmutexWriteRead),
   576  	}
   577  	for _, lock := range locks {
   578  		t.Run(reflect.TypeOf(lock).Elem().Name(), func(t *testing.T) {
   579  			metrics.Read(sample[:])
   580  			before := time.Duration(sample[0].Value.Float64() * 1e9)
   581  
   582  			minMutexWaitTime := generateMutexWaitTime(lock)
   583  
   584  			metrics.Read(sample[:])
   585  			after := time.Duration(sample[0].Value.Float64() * 1e9)
   586  
   587  			if wt := after - before; wt < minMutexWaitTime {
   588  				t.Errorf("too little mutex wait time: got %s, want %s", wt, minMutexWaitTime)
   589  			}
   590  		})
   591  	}
   592  }
   593  
   594  // locker2 represents an API surface of two concurrent goroutines
   595  // locking the same resource, but through different APIs. It's intended
   596  // to abstract over the relationship of two Lock calls or an RLock
   597  // and a Lock call.
   598  type locker2 interface {
   599  	Lock1()
   600  	Unlock1()
   601  	Lock2()
   602  	Unlock2()
   603  }
   604  
   605  type mutex struct {
   606  	mu sync.Mutex
   607  }
   608  
   609  func (m *mutex) Lock1()   { m.mu.Lock() }
   610  func (m *mutex) Unlock1() { m.mu.Unlock() }
   611  func (m *mutex) Lock2()   { m.mu.Lock() }
   612  func (m *mutex) Unlock2() { m.mu.Unlock() }
   613  
   614  type rwmutexWrite struct {
   615  	mu sync.RWMutex
   616  }
   617  
   618  func (m *rwmutexWrite) Lock1()   { m.mu.Lock() }
   619  func (m *rwmutexWrite) Unlock1() { m.mu.Unlock() }
   620  func (m *rwmutexWrite) Lock2()   { m.mu.Lock() }
   621  func (m *rwmutexWrite) Unlock2() { m.mu.Unlock() }
   622  
   623  type rwmutexReadWrite struct {
   624  	mu sync.RWMutex
   625  }
   626  
   627  func (m *rwmutexReadWrite) Lock1()   { m.mu.RLock() }
   628  func (m *rwmutexReadWrite) Unlock1() { m.mu.RUnlock() }
   629  func (m *rwmutexReadWrite) Lock2()   { m.mu.Lock() }
   630  func (m *rwmutexReadWrite) Unlock2() { m.mu.Unlock() }
   631  
   632  type rwmutexWriteRead struct {
   633  	mu sync.RWMutex
   634  }
   635  
   636  func (m *rwmutexWriteRead) Lock1()   { m.mu.Lock() }
   637  func (m *rwmutexWriteRead) Unlock1() { m.mu.Unlock() }
   638  func (m *rwmutexWriteRead) Lock2()   { m.mu.RLock() }
   639  func (m *rwmutexWriteRead) Unlock2() { m.mu.RUnlock() }
   640  
   641  // generateMutexWaitTime causes a couple of goroutines
   642  // to block a whole bunch of times on a sync.Mutex, returning
   643  // the minimum amount of time that should be visible in the
   644  // /sync/mutex-wait:seconds metric.
   645  func generateMutexWaitTime(mu locker2) time.Duration {
   646  	// Set up the runtime to always track casgstatus transitions for metrics.
   647  	*runtime.CasGStatusAlwaysTrack = true
   648  
   649  	mu.Lock1()
   650  
   651  	// Start up a goroutine to wait on the lock.
   652  	gc := make(chan *runtime.G)
   653  	done := make(chan bool)
   654  	go func() {
   655  		gc <- runtime.Getg()
   656  
   657  		for {
   658  			mu.Lock2()
   659  			mu.Unlock2()
   660  			if <-done {
   661  				return
   662  			}
   663  		}
   664  	}()
   665  	gp := <-gc
   666  
   667  	// Set the block time high enough so that it will always show up, even
   668  	// on systems with coarse timer granularity.
   669  	const blockTime = 100 * time.Millisecond
   670  
   671  	// Make sure the goroutine spawned above actually blocks on the lock.
   672  	for {
   673  		if runtime.GIsWaitingOnMutex(gp) {
   674  			break
   675  		}
   676  		runtime.Gosched()
   677  	}
   678  
   679  	// Let some amount of time pass.
   680  	time.Sleep(blockTime)
   681  
   682  	// Let the other goroutine acquire the lock.
   683  	mu.Unlock1()
   684  	done <- true
   685  
   686  	// Reset flag.
   687  	*runtime.CasGStatusAlwaysTrack = false
   688  	return blockTime
   689  }
   690  
   691  // See issue #60276.
   692  func TestCPUMetricsSleep(t *testing.T) {
   693  	if runtime.GOOS == "wasip1" {
   694  		// Since wasip1 busy-waits in the scheduler, there's no meaningful idle
   695  		// time. This is accurately reflected in the metrics, but it means this
   696  		// test is basically meaningless on this platform.
   697  		t.Skip("wasip1 currently busy-waits in idle time; test not applicable")
   698  	}
   699  
   700  	names := []string{
   701  		"/cpu/classes/idle:cpu-seconds",
   702  
   703  		"/cpu/classes/gc/mark/assist:cpu-seconds",
   704  		"/cpu/classes/gc/mark/dedicated:cpu-seconds",
   705  		"/cpu/classes/gc/mark/idle:cpu-seconds",
   706  		"/cpu/classes/gc/pause:cpu-seconds",
   707  		"/cpu/classes/gc/total:cpu-seconds",
   708  		"/cpu/classes/scavenge/assist:cpu-seconds",
   709  		"/cpu/classes/scavenge/background:cpu-seconds",
   710  		"/cpu/classes/scavenge/total:cpu-seconds",
   711  		"/cpu/classes/total:cpu-seconds",
   712  		"/cpu/classes/user:cpu-seconds",
   713  	}
   714  	prep := func() []metrics.Sample {
   715  		mm := make([]metrics.Sample, len(names))
   716  		for i := range names {
   717  			mm[i].Name = names[i]
   718  		}
   719  		return mm
   720  	}
   721  	m1, m2 := prep(), prep()
   722  
   723  	const (
   724  		// Expected time spent idle.
   725  		dur = 100 * time.Millisecond
   726  
   727  		// maxFailures is the number of consecutive failures requires to cause the test to fail.
   728  		maxFailures = 10
   729  	)
   730  
   731  	failureIdleTimes := make([]float64, 0, maxFailures)
   732  
   733  	// If the bug we expect is happening, then the Sleep CPU time will be accounted for
   734  	// as user time rather than idle time. In an ideal world we'd expect the whole application
   735  	// to go instantly idle the moment this goroutine goes to sleep, and stay asleep for that
   736  	// duration. However, the Go runtime can easily eat into idle time while this goroutine is
   737  	// blocked in a sleep. For example, slow platforms might spend more time expected in the
   738  	// scheduler. Another example is that a Go runtime background goroutine could run while
   739  	// everything else is idle. Lastly, if a running goroutine is descheduled by the OS, enough
   740  	// time may pass such that the goroutine is ready to wake, even though the runtime couldn't
   741  	// observe itself as idle with nanotime.
   742  	//
   743  	// To deal with all this, we give a half-proc's worth of leniency.
   744  	//
   745  	// We also retry multiple times to deal with the fact that the OS might deschedule us before
   746  	// we yield and go idle. That has a rare enough chance that retries should resolve it.
   747  	// If the issue we expect is happening, it should be persistent.
   748  	minIdleCPUSeconds := dur.Seconds() * (float64(runtime.GOMAXPROCS(-1)) - 0.5)
   749  
   750  	// Let's make sure there's no background scavenge work to do.
   751  	//
   752  	// The runtime.GC calls below ensure the background sweeper
   753  	// will not run during the idle period.
   754  	debug.FreeOSMemory()
   755  
   756  	for retries := 0; retries < maxFailures; retries++ {
   757  		// Read 1.
   758  		runtime.GC() // Update /cpu/classes metrics.
   759  		metrics.Read(m1)
   760  
   761  		// Sleep.
   762  		time.Sleep(dur)
   763  
   764  		// Read 2.
   765  		runtime.GC() // Update /cpu/classes metrics.
   766  		metrics.Read(m2)
   767  
   768  		dt := m2[0].Value.Float64() - m1[0].Value.Float64()
   769  		if dt >= minIdleCPUSeconds {
   770  			// All is well. Test passed.
   771  			return
   772  		}
   773  		failureIdleTimes = append(failureIdleTimes, dt)
   774  		// Try again.
   775  	}
   776  
   777  	// We couldn't observe the expected idle time even once.
   778  	for i, dt := range failureIdleTimes {
   779  		t.Logf("try %2d: idle time = %.5fs\n", i+1, dt)
   780  	}
   781  	t.Logf("try %d breakdown:\n", len(failureIdleTimes))
   782  	for i := range names {
   783  		if m1[i].Value.Kind() == metrics.KindBad {
   784  			continue
   785  		}
   786  		t.Logf("\t%s %0.3f\n", names[i], m2[i].Value.Float64()-m1[i].Value.Float64())
   787  	}
   788  	t.Errorf(`time.Sleep did not contribute enough to "idle" class: minimum idle time = %.5fs`, minIdleCPUSeconds)
   789  }
   790  
   791  // Call f() and verify that the correct STW metrics increment. If isGC is true,
   792  // fn triggers a GC STW. Otherwise, fn triggers an other STW.
   793  func testSchedPauseMetrics(t *testing.T, fn func(t *testing.T), isGC bool) {
   794  	m := []metrics.Sample{
   795  		{Name: "/sched/pauses/stopping/gc:seconds"},
   796  		{Name: "/sched/pauses/stopping/other:seconds"},
   797  		{Name: "/sched/pauses/total/gc:seconds"},
   798  		{Name: "/sched/pauses/total/other:seconds"},
   799  	}
   800  
   801  	stoppingGC := &m[0]
   802  	stoppingOther := &m[1]
   803  	totalGC := &m[2]
   804  	totalOther := &m[3]
   805  
   806  	sampleCount := func(s *metrics.Sample) uint64 {
   807  		h := s.Value.Float64Histogram()
   808  
   809  		var n uint64
   810  		for _, c := range h.Counts {
   811  			n += c
   812  		}
   813  		return n
   814  	}
   815  
   816  	// Read baseline.
   817  	metrics.Read(m)
   818  
   819  	baselineStartGC := sampleCount(stoppingGC)
   820  	baselineStartOther := sampleCount(stoppingOther)
   821  	baselineTotalGC := sampleCount(totalGC)
   822  	baselineTotalOther := sampleCount(totalOther)
   823  
   824  	fn(t)
   825  
   826  	metrics.Read(m)
   827  
   828  	if isGC {
   829  		if got := sampleCount(stoppingGC); got <= baselineStartGC {
   830  			t.Errorf("/sched/pauses/stopping/gc:seconds sample count %d did not increase from baseline of %d", got, baselineStartGC)
   831  		}
   832  		if got := sampleCount(totalGC); got <= baselineTotalGC {
   833  			t.Errorf("/sched/pauses/total/gc:seconds sample count %d did not increase from baseline of %d", got, baselineTotalGC)
   834  		}
   835  
   836  		if got := sampleCount(stoppingOther); got != baselineStartOther {
   837  			t.Errorf("/sched/pauses/stopping/other:seconds sample count %d changed from baseline of %d", got, baselineStartOther)
   838  		}
   839  		if got := sampleCount(totalOther); got != baselineTotalOther {
   840  			t.Errorf("/sched/pauses/stopping/other:seconds sample count %d changed from baseline of %d", got, baselineTotalOther)
   841  		}
   842  	} else {
   843  		if got := sampleCount(stoppingGC); got != baselineStartGC {
   844  			t.Errorf("/sched/pauses/stopping/gc:seconds sample count %d changed from baseline of %d", got, baselineStartGC)
   845  		}
   846  		if got := sampleCount(totalGC); got != baselineTotalGC {
   847  			t.Errorf("/sched/pauses/total/gc:seconds sample count %d changed from baseline of %d", got, baselineTotalGC)
   848  		}
   849  
   850  		if got := sampleCount(stoppingOther); got <= baselineStartOther {
   851  			t.Errorf("/sched/pauses/stopping/other:seconds sample count %d did not increase from baseline of %d", got, baselineStartOther)
   852  		}
   853  		if got := sampleCount(totalOther); got <= baselineTotalOther {
   854  			t.Errorf("/sched/pauses/stopping/other:seconds sample count %d did not increase from baseline of %d", got, baselineTotalOther)
   855  		}
   856  	}
   857  }
   858  
   859  func TestSchedPauseMetrics(t *testing.T) {
   860  	tests := []struct {
   861  		name string
   862  		isGC bool
   863  		fn   func(t *testing.T)
   864  	}{
   865  		{
   866  			name: "runtime.GC",
   867  			isGC: true,
   868  			fn: func(t *testing.T) {
   869  				runtime.GC()
   870  			},
   871  		},
   872  		{
   873  			name: "runtime.GOMAXPROCS",
   874  			fn: func(t *testing.T) {
   875  				if runtime.GOARCH == "wasm" {
   876  					t.Skip("GOMAXPROCS >1 not supported on wasm")
   877  				}
   878  
   879  				n := runtime.GOMAXPROCS(0)
   880  				defer runtime.GOMAXPROCS(n)
   881  
   882  				runtime.GOMAXPROCS(n + 1)
   883  			},
   884  		},
   885  		{
   886  			name: "runtime.GoroutineProfile",
   887  			fn: func(t *testing.T) {
   888  				var s [1]runtime.StackRecord
   889  				runtime.GoroutineProfile(s[:])
   890  			},
   891  		},
   892  		{
   893  			name: "runtime.ReadMemStats",
   894  			fn: func(t *testing.T) {
   895  				var mstats runtime.MemStats
   896  				runtime.ReadMemStats(&mstats)
   897  			},
   898  		},
   899  		{
   900  			name: "runtime.Stack",
   901  			fn: func(t *testing.T) {
   902  				var b [64]byte
   903  				runtime.Stack(b[:], true)
   904  			},
   905  		},
   906  		{
   907  			name: "runtime/debug.WriteHeapDump",
   908  			fn: func(t *testing.T) {
   909  				if runtime.GOOS == "js" {
   910  					t.Skip("WriteHeapDump not supported on js")
   911  				}
   912  
   913  				f, err := os.CreateTemp(t.TempDir(), "heapdumptest")
   914  				if err != nil {
   915  					t.Fatalf("os.CreateTemp failed: %v", err)
   916  				}
   917  				defer os.Remove(f.Name())
   918  				defer f.Close()
   919  				debug.WriteHeapDump(f.Fd())
   920  			},
   921  		},
   922  		{
   923  			name: "runtime/trace.Start",
   924  			fn: func(t *testing.T) {
   925  				if trace.IsEnabled() {
   926  					t.Skip("tracing already enabled")
   927  				}
   928  
   929  				var buf bytes.Buffer
   930  				if err := trace.Start(&buf); err != nil {
   931  					t.Errorf("trace.Start err got %v want nil", err)
   932  				}
   933  				trace.Stop()
   934  			},
   935  		},
   936  	}
   937  
   938  	// These tests count STW pauses, classified based on whether they're related
   939  	// to the GC or not. Disable automatic GC cycles during the test so we don't
   940  	// have an incidental GC pause when we're trying to observe only
   941  	// non-GC-related pauses. This is especially important for the
   942  	// runtime/trace.Start test, since (as of this writing) that will block
   943  	// until any active GC mark phase completes.
   944  	defer debug.SetGCPercent(debug.SetGCPercent(-1))
   945  	runtime.GC()
   946  
   947  	for _, tc := range tests {
   948  		t.Run(tc.name, func(t *testing.T) {
   949  			testSchedPauseMetrics(t, tc.fn, tc.isGC)
   950  		})
   951  	}
   952  }
   953  
   954  func TestRuntimeLockMetricsAndProfile(t *testing.T) {
   955  	old := runtime.SetMutexProfileFraction(0) // enabled during sub-tests
   956  	defer runtime.SetMutexProfileFraction(old)
   957  	if old != 0 {
   958  		t.Fatalf("need MutexProfileRate 0, got %d", old)
   959  	}
   960  
   961  	t.Logf("NumCPU %d", runtime.NumCPU())
   962  	t.Logf("GOMAXPROCS %d", runtime.GOMAXPROCS(0))
   963  	if minCPU := 2; runtime.NumCPU() < minCPU {
   964  		t.Skipf("creating and observing contention on runtime-internal locks requires NumCPU >= %d", minCPU)
   965  	}
   966  
   967  	loadProfile := func(t *testing.T) *profile.Profile {
   968  		var w bytes.Buffer
   969  		pprof.Lookup("mutex").WriteTo(&w, 0)
   970  		p, err := profile.Parse(&w)
   971  		if err != nil {
   972  			t.Fatalf("failed to parse profile: %v", err)
   973  		}
   974  		if err := p.CheckValid(); err != nil {
   975  			t.Fatalf("invalid profile: %v", err)
   976  		}
   977  		return p
   978  	}
   979  
   980  	measureDelta := func(t *testing.T, fn func()) (metricGrowth, profileGrowth float64, p *profile.Profile) {
   981  		beforeProfile := loadProfile(t)
   982  		beforeMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}}
   983  		metrics.Read(beforeMetrics)
   984  
   985  		fn()
   986  
   987  		afterProfile := loadProfile(t)
   988  		afterMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}}
   989  		metrics.Read(afterMetrics)
   990  
   991  		sumSamples := func(p *profile.Profile, i int) int64 {
   992  			var sum int64
   993  			for _, s := range p.Sample {
   994  				sum += s.Value[i]
   995  			}
   996  			return sum
   997  		}
   998  
   999  		metricGrowth = afterMetrics[0].Value.Float64() - beforeMetrics[0].Value.Float64()
  1000  		profileGrowth = float64(sumSamples(afterProfile, 1)-sumSamples(beforeProfile, 1)) * time.Nanosecond.Seconds()
  1001  
  1002  		// The internal/profile package does not support compaction; this delta
  1003  		// profile will include separate positive and negative entries.
  1004  		p = afterProfile.Copy()
  1005  		if len(beforeProfile.Sample) > 0 {
  1006  			err := p.Merge(beforeProfile, -1)
  1007  			if err != nil {
  1008  				t.Fatalf("Merge profiles: %v", err)
  1009  			}
  1010  		}
  1011  
  1012  		return metricGrowth, profileGrowth, p
  1013  	}
  1014  
  1015  	testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth float64, profileGrowth []int64, n, value int64, explain func()) {
  1016  		return func(t *testing.T) (metricGrowth float64, profileGrowth []int64, n, value int64, explain func()) {
  1017  			metricGrowth, totalProfileGrowth, p := measureDelta(t, func() {
  1018  				var started, stopped sync.WaitGroup
  1019  				started.Add(workers)
  1020  				stopped.Add(workers)
  1021  				for i := 0; i < workers; i++ {
  1022  					w := &contentionWorker{
  1023  						before: func() {
  1024  							started.Done()
  1025  							started.Wait()
  1026  						},
  1027  						after: func() {
  1028  							stopped.Done()
  1029  						},
  1030  						fn: fn,
  1031  					}
  1032  					go w.run()
  1033  				}
  1034  				stopped.Wait()
  1035  			})
  1036  
  1037  			if totalProfileGrowth == 0 {
  1038  				t.Errorf("no increase in mutex profile")
  1039  			}
  1040  			if metricGrowth == 0 && strictTiming {
  1041  				// If the critical section is very short, systems with low timer
  1042  				// resolution may be unable to measure it via nanotime.
  1043  				//
  1044  				// This is sampled at 1 per gTrackingPeriod, but the explicit
  1045  				// runtime.mutex tests create 200 contention events. Observing
  1046  				// zero of those has a probability of (7/8)^200 = 2.5e-12 which
  1047  				// is acceptably low (though the calculation has a tenuous
  1048  				// dependency on cheaprandn being a good-enough source of
  1049  				// entropy).
  1050  				t.Errorf("no increase in /sync/mutex/wait/total:seconds metric")
  1051  			}
  1052  			// This comparison is possible because the time measurements in support of
  1053  			// runtime/pprof and runtime/metrics for runtime-internal locks are so close
  1054  			// together. It doesn't work as well for user-space contention, where the
  1055  			// involved goroutines are not _Grunnable the whole time and so need to pass
  1056  			// through the scheduler.
  1057  			t.Logf("lock contention growth in runtime/pprof's view  (%fs)", totalProfileGrowth)
  1058  			t.Logf("lock contention growth in runtime/metrics' view (%fs)", metricGrowth)
  1059  
  1060  			acceptStacks = append([][]string(nil), acceptStacks...)
  1061  			for i, stk := range acceptStacks {
  1062  				if goexperiment.StaticLockRanking {
  1063  					if !slices.ContainsFunc(stk, func(s string) bool {
  1064  						return s == "runtime.systemstack" || s == "runtime.mcall" || s == "runtime.mstart"
  1065  					}) {
  1066  						// stk is a call stack that is still on the user stack when
  1067  						// it calls runtime.unlock. Add the extra function that
  1068  						// we'll see, when the static lock ranking implementation of
  1069  						// runtime.unlockWithRank switches to the system stack.
  1070  						stk = append([]string{"runtime.unlockWithRank"}, stk...)
  1071  					}
  1072  				}
  1073  				acceptStacks[i] = stk
  1074  			}
  1075  
  1076  			var stks [][]string
  1077  			values := make([][2]int64, len(acceptStacks)+1)
  1078  			for _, s := range p.Sample {
  1079  				var have []string
  1080  				for _, loc := range s.Location {
  1081  					for _, line := range loc.Line {
  1082  						have = append(have, line.Function.Name)
  1083  					}
  1084  				}
  1085  				stks = append(stks, have)
  1086  				found := false
  1087  				for i, stk := range acceptStacks {
  1088  					if slices.Equal(have, stk) {
  1089  						values[i][0] += s.Value[0]
  1090  						values[i][1] += s.Value[1]
  1091  						found = true
  1092  						break
  1093  					}
  1094  				}
  1095  				if !found {
  1096  					values[len(values)-1][0] += s.Value[0]
  1097  					values[len(values)-1][1] += s.Value[1]
  1098  				}
  1099  			}
  1100  			profileGrowth = make([]int64, len(acceptStacks)+1)
  1101  			profileGrowth[len(profileGrowth)-1] = values[len(values)-1][1]
  1102  			for i, stk := range acceptStacks {
  1103  				n += values[i][0]
  1104  				value += values[i][1]
  1105  				profileGrowth[i] = values[i][1]
  1106  				t.Logf("stack %v has samples totaling n=%d value=%d", stk, values[i][0], values[i][1])
  1107  			}
  1108  			if n == 0 && value == 0 {
  1109  				t.Logf("profile:\n%s", p)
  1110  				for _, have := range stks {
  1111  					t.Logf("have stack %v", have)
  1112  				}
  1113  				for _, stk := range acceptStacks {
  1114  					t.Errorf("want stack %v", stk)
  1115  				}
  1116  			}
  1117  
  1118  			return metricGrowth, profileGrowth, n, value, func() {
  1119  				t.Logf("profile:\n%s", p)
  1120  			}
  1121  		}
  1122  	}
  1123  
  1124  	name := t.Name()
  1125  
  1126  	t.Run("runtime.lock", func(t *testing.T) {
  1127  		// The goroutine that acquires the lock will only proceed when it
  1128  		// detects that its partner is contended for the lock. That will lead to
  1129  		// live-lock if anything (such as a STW) prevents the partner goroutine
  1130  		// from running. Allowing the contention workers to pause and restart
  1131  		// (to allow a STW to proceed) makes it harder to confirm that we're
  1132  		// counting the correct number of contention events, since some locks
  1133  		// will end up contended twice. Instead, disable the GC.
  1134  		defer debug.SetGCPercent(debug.SetGCPercent(-1))
  1135  
  1136  		mus := make([]runtime.Mutex, 200)
  1137  		var needContention atomic.Int64
  1138  
  1139  		baseDelay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks
  1140  		fastDelayMicros := baseDelay.Microseconds()
  1141  		slowDelayMicros := baseDelay.Microseconds() * 4
  1142  
  1143  		const (
  1144  			fastRole = iota
  1145  			slowRole
  1146  			workerCount
  1147  		)
  1148  		if runtime.GOMAXPROCS(0) < workerCount {
  1149  			t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workerCount)
  1150  		}
  1151  
  1152  		minTicks := make([][]int64, workerCount) // lower bound, known-contended time, measured by cputicks
  1153  		maxTicks := make([][]int64, workerCount) // upper bound, total lock() duration, measured by cputicks
  1154  		for i := range minTicks {
  1155  			minTicks[i] = make([]int64, len(mus))
  1156  			maxTicks[i] = make([]int64, len(mus))
  1157  		}
  1158  		var id atomic.Int32
  1159  		fn := func() bool {
  1160  			n := int(needContention.Load())
  1161  			if n < 0 {
  1162  				return false
  1163  			}
  1164  			mu := &mus[n]
  1165  
  1166  			// Each worker has a role: to have a fast or slow critical section.
  1167  			// Rotate the role assignments as we step through the mutex slice so
  1168  			// we don't end up with one M always claiming the same kind of work.
  1169  			id := int(id.Add(1))
  1170  			role := (id + n) % workerCount
  1171  
  1172  			marker, delayMicros := fastMarkerFrame, fastDelayMicros
  1173  			if role == slowRole {
  1174  				marker, delayMicros = slowMarkerFrame, slowDelayMicros
  1175  			}
  1176  
  1177  			// Each lock is used by two different critical sections, one fast
  1178  			// and one slow, identified in profiles by their different "marker"
  1179  			// functions. We expect the profile to blame each for the amount of
  1180  			// delay it inflicts on other users of the lock. We run one worker
  1181  			// of each kind, so any contention in one would be due to the other.
  1182  			//
  1183  			// We measure how long our runtime.lock call takes, which sets an
  1184  			// upper bound on how much blame to expect for the other worker type
  1185  			// in the profile. And if we acquire the lock first, we wait for the
  1186  			// other worker to announce its contention. We measure the
  1187  			// known-contended time, to use as a lower bound on how much blame
  1188  			// we expect of ourselves in the profile. Then we stall for a little
  1189  			// while (different amounts for "fast" versus "slow") before
  1190  			// unlocking the mutex.
  1191  
  1192  			marker(func() {
  1193  				t0 := runtime.Cputicks()
  1194  				runtime.Lock(mu)
  1195  				maxTicks[role][n] = runtime.Cputicks() - t0
  1196  				minTicks[role][n] = 0
  1197  				for int(needContention.Load()) == n {
  1198  					if runtime.MutexContended(mu) {
  1199  						t1 := runtime.Cputicks()
  1200  						// make them wait a little while
  1201  						for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; {
  1202  							runtime.Usleep(uint32(1 + delayMicros/8))
  1203  						}
  1204  						minTicks[role][n] = runtime.Cputicks() - t1
  1205  						break
  1206  					}
  1207  				}
  1208  				runtime.Unlock(mu)
  1209  				needContention.Store(int64(n - 1))
  1210  			})
  1211  
  1212  			return true
  1213  		}
  1214  
  1215  		stks := make([][]string, 2)
  1216  		for i := range stks {
  1217  			marker := "runtime_test.fastMarkerFrame"
  1218  			if i == slowRole {
  1219  				marker = "runtime_test.slowMarkerFrame"
  1220  			}
  1221  
  1222  			stks[i] = []string{
  1223  				"runtime.unlock",
  1224  				"runtime_test." + name + ".func4.1.1",
  1225  				marker,
  1226  				"runtime_test." + name + ".func4.1",
  1227  				"runtime_test.(*contentionWorker).run",
  1228  			}
  1229  		}
  1230  
  1231  		t.Run("sample-1", func(t *testing.T) {
  1232  			old := runtime.SetMutexProfileFraction(1)
  1233  			defer runtime.SetMutexProfileFraction(old)
  1234  
  1235  			needContention.Store(int64(len(mus) - 1))
  1236  			metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workerCount, fn)(t)
  1237  			defer func() {
  1238  				if t.Failed() {
  1239  					explain()
  1240  				}
  1241  			}()
  1242  
  1243  			t.Run("metric", func(t *testing.T) {
  1244  				// The runtime/metrics view may be sampled at 1 per
  1245  				// gTrackingPeriod, so we don't have a hard lower bound here.
  1246  				testenv.SkipFlaky(t, 64253)
  1247  
  1248  				if have, want := metricGrowth, baseDelay.Seconds()*float64(len(mus)); have < want {
  1249  					// The test imposes a delay with usleep, verified with calls to
  1250  					// nanotime. Compare against the runtime/metrics package's view
  1251  					// (based on nanotime) rather than runtime/pprof's view (based
  1252  					// on cputicks).
  1253  					t.Errorf("runtime/metrics reported less than the known minimum contention duration (%fs < %fs)", have, want)
  1254  				}
  1255  			})
  1256  			if have, want := n, int64(len(mus)); have != want {
  1257  				t.Errorf("mutex profile reported contention count different from the known true count (%d != %d)", have, want)
  1258  			}
  1259  
  1260  			var slowMinTicks, fastMinTicks int64
  1261  			for role, ticks := range minTicks {
  1262  				for _, delta := range ticks {
  1263  					if role == slowRole {
  1264  						slowMinTicks += delta
  1265  					} else {
  1266  						fastMinTicks += delta
  1267  					}
  1268  				}
  1269  			}
  1270  			var slowMaxTicks, fastMaxTicks int64
  1271  			for role, ticks := range maxTicks {
  1272  				for _, delta := range ticks {
  1273  					if role == slowRole {
  1274  						slowMaxTicks += delta
  1275  					} else {
  1276  						fastMaxTicks += delta
  1277  					}
  1278  				}
  1279  			}
  1280  
  1281  			cpuGHz := float64(runtime.CyclesPerSecond()) / 1e9
  1282  			for _, set := range []struct {
  1283  				name     string
  1284  				profTime int64
  1285  				minTime  int64
  1286  				maxTime  int64
  1287  			}{
  1288  				{
  1289  					name:     "slow",
  1290  					profTime: profileGrowth[slowRole],
  1291  					minTime:  int64(float64(slowMinTicks) / cpuGHz),
  1292  					maxTime:  int64(float64(fastMaxTicks) / cpuGHz),
  1293  				},
  1294  				{
  1295  					name:     "fast",
  1296  					profTime: profileGrowth[fastRole],
  1297  					minTime:  int64(float64(fastMinTicks) / cpuGHz),
  1298  					maxTime:  int64(float64(slowMaxTicks) / cpuGHz),
  1299  				},
  1300  			} {
  1301  				t.Logf("profile's view of delays due to %q critical section:                 %dns", set.name, set.profTime)
  1302  				t.Logf("test's view of known-contended time within %q critical section:      %dns", set.name, set.minTime)
  1303  				t.Logf("test's view of lock duration before critical sections other than %q: %dns", set.name, set.maxTime)
  1304  
  1305  				if set.profTime < set.minTime {
  1306  					t.Errorf("profile undercounted %q critical section", set.name)
  1307  				}
  1308  				if set.profTime > set.maxTime {
  1309  					t.Errorf("profile overcounted %q critical section", set.name)
  1310  				}
  1311  			}
  1312  
  1313  			var totalProfileGrowth float64
  1314  			for _, growth := range profileGrowth {
  1315  				totalProfileGrowth += float64(growth) * time.Nanosecond.Seconds()
  1316  			}
  1317  
  1318  			const slop = 1.5 // account for nanotime vs cputicks
  1319  			t.Run("compare timers", func(t *testing.T) {
  1320  				testenv.SkipFlaky(t, 64253)
  1321  				if totalProfileGrowth > slop*metricGrowth || metricGrowth > slop*totalProfileGrowth {
  1322  					t.Errorf("views differ by more than %fx", slop)
  1323  				}
  1324  			})
  1325  		})
  1326  
  1327  		t.Run("sample-2", func(t *testing.T) {
  1328  			testenv.SkipFlaky(t, 64253)
  1329  
  1330  			old := runtime.SetMutexProfileFraction(2)
  1331  			defer runtime.SetMutexProfileFraction(old)
  1332  
  1333  			needContention.Store(int64(len(mus) - 1))
  1334  			metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workerCount, fn)(t)
  1335  			defer func() {
  1336  				if t.Failed() {
  1337  					explain()
  1338  				}
  1339  			}()
  1340  
  1341  			// With 100 trials and profile fraction of 2, we expect to capture
  1342  			// 50 samples. Allow the test to pass if we get at least 20 samples;
  1343  			// the CDF of the binomial distribution says there's less than a
  1344  			// 1e-9 chance of that, which is an acceptably low flakiness rate.
  1345  			const samplingSlop = 2.5
  1346  
  1347  			if have, want := metricGrowth, baseDelay.Seconds()*float64(len(mus)); samplingSlop*have < want {
  1348  				// The test imposes a delay with usleep, verified with calls to
  1349  				// nanotime. Compare against the runtime/metrics package's view
  1350  				// (based on nanotime) rather than runtime/pprof's view (based
  1351  				// on cputicks).
  1352  				t.Errorf("runtime/metrics reported less than the known minimum contention duration (%f * %fs < %fs)", samplingSlop, have, want)
  1353  			}
  1354  			if have, want := n, int64(len(mus)); float64(have) > float64(want)*samplingSlop || float64(want) > float64(have)*samplingSlop {
  1355  				t.Errorf("mutex profile reported contention count too different from the expected count (%d far from %d)", have, want)
  1356  			}
  1357  
  1358  			var totalProfileGrowth float64
  1359  			for _, growth := range profileGrowth {
  1360  				totalProfileGrowth += float64(growth) * time.Nanosecond.Seconds()
  1361  			}
  1362  
  1363  			const timerSlop = 1.5 * samplingSlop // account for nanotime vs cputicks, plus the two views' independent sampling
  1364  			if totalProfileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*totalProfileGrowth {
  1365  				t.Errorf("views differ by more than %fx", timerSlop)
  1366  			}
  1367  		})
  1368  	})
  1369  
  1370  	t.Run("runtime.semrelease", func(t *testing.T) {
  1371  		testenv.SkipFlaky(t, 64253)
  1372  
  1373  		old := runtime.SetMutexProfileFraction(1)
  1374  		defer runtime.SetMutexProfileFraction(old)
  1375  
  1376  		const workers = 3
  1377  		if runtime.GOMAXPROCS(0) < workers {
  1378  			t.Skipf("creating and observing contention on runtime-internal semaphores requires GOMAXPROCS >= %d", workers)
  1379  		}
  1380  
  1381  		var sem uint32 = 1
  1382  		var tries atomic.Int32
  1383  		tries.Store(10_000_000) // prefer controlled failure to timeout
  1384  		var sawContention atomic.Int32
  1385  		var need int32 = 1
  1386  		fn := func() bool {
  1387  			if sawContention.Load() >= need {
  1388  				return false
  1389  			}
  1390  			if tries.Add(-1) < 0 {
  1391  				return false
  1392  			}
  1393  
  1394  			runtime.Semacquire(&sem)
  1395  			runtime.Semrelease1(&sem, false, 0)
  1396  			if runtime.MutexContended(runtime.SemRootLock(&sem)) {
  1397  				sawContention.Add(1)
  1398  			}
  1399  			return true
  1400  		}
  1401  
  1402  		stks := [][]string{
  1403  			{
  1404  				"runtime.unlock",
  1405  				"runtime.semrelease1",
  1406  				"runtime_test.TestRuntimeLockMetricsAndProfile.func5.1",
  1407  				"runtime_test.(*contentionWorker).run",
  1408  			},
  1409  			{
  1410  				"runtime.unlock",
  1411  				"runtime.semacquire1",
  1412  				"runtime.semacquire",
  1413  				"runtime_test.TestRuntimeLockMetricsAndProfile.func5.1",
  1414  				"runtime_test.(*contentionWorker).run",
  1415  			},
  1416  		}
  1417  
  1418  		// Verify that we get call stack we expect, with anything more than zero
  1419  		// cycles / zero samples. The duration of each contention event is too
  1420  		// small relative to the expected overhead for us to verify its value
  1421  		// more directly. Leave that to the explicit lock/unlock test.
  1422  
  1423  		testcase(false, stks, workers, fn)(t)
  1424  
  1425  		if remaining := tries.Load(); remaining >= 0 {
  1426  			t.Logf("finished test early (%d tries remaining)", remaining)
  1427  		}
  1428  	})
  1429  }
  1430  
  1431  func slowMarkerFrame(fn func()) { fn() }
  1432  func fastMarkerFrame(fn func()) { fn() }
  1433  
  1434  // contentionWorker provides cleaner call stacks for lock contention profile tests
  1435  type contentionWorker struct {
  1436  	before func()
  1437  	fn     func() bool
  1438  	after  func()
  1439  }
  1440  
  1441  func (w *contentionWorker) run() {
  1442  	defer w.after()
  1443  	w.before()
  1444  
  1445  	for w.fn() {
  1446  	}
  1447  }
  1448  
  1449  func TestCPUStats(t *testing.T) {
  1450  	// Run a few GC cycles to get some of the stats to be non-zero.
  1451  	runtime.GC()
  1452  	runtime.GC()
  1453  	runtime.GC()
  1454  
  1455  	// Set GOMAXPROCS high then sleep briefly to ensure we generate
  1456  	// some idle time.
  1457  	oldmaxprocs := runtime.GOMAXPROCS(10)
  1458  	time.Sleep(time.Millisecond)
  1459  	runtime.GOMAXPROCS(oldmaxprocs)
  1460  
  1461  	stats := runtime.ReadCPUStats()
  1462  	gcTotal := stats.GCAssistTime + stats.GCDedicatedTime + stats.GCIdleTime + stats.GCPauseTime
  1463  	if gcTotal != stats.GCTotalTime {
  1464  		t.Errorf("manually computed total does not match GCTotalTime: %d cpu-ns vs. %d cpu-ns", gcTotal, stats.GCTotalTime)
  1465  	}
  1466  	scavTotal := stats.ScavengeAssistTime + stats.ScavengeBgTime
  1467  	if scavTotal != stats.ScavengeTotalTime {
  1468  		t.Errorf("manually computed total does not match ScavengeTotalTime: %d cpu-ns vs. %d cpu-ns", scavTotal, stats.ScavengeTotalTime)
  1469  	}
  1470  	total := gcTotal + scavTotal + stats.IdleTime + stats.UserTime
  1471  	if total != stats.TotalTime {
  1472  		t.Errorf("manually computed overall total does not match TotalTime: %d cpu-ns vs. %d cpu-ns", total, stats.TotalTime)
  1473  	}
  1474  	if total == 0 {
  1475  		t.Error("total time is zero")
  1476  	}
  1477  	if gcTotal == 0 {
  1478  		t.Error("GC total time is zero")
  1479  	}
  1480  	if stats.IdleTime == 0 {
  1481  		t.Error("idle time is zero")
  1482  	}
  1483  }
  1484  
  1485  func TestMetricHeapUnusedLargeObjectOverflow(t *testing.T) {
  1486  	// This test makes sure /memory/classes/heap/unused:bytes
  1487  	// doesn't overflow when allocating and deallocating large
  1488  	// objects. It is a regression test for #67019.
  1489  	done := make(chan struct{})
  1490  	var wg sync.WaitGroup
  1491  	wg.Add(1)
  1492  	go func() {
  1493  		defer wg.Done()
  1494  		for {
  1495  			for range 10 {
  1496  				abi.Escape(make([]byte, 1<<20))
  1497  			}
  1498  			runtime.GC()
  1499  			select {
  1500  			case <-done:
  1501  				return
  1502  			default:
  1503  			}
  1504  		}
  1505  	}()
  1506  	s := []metrics.Sample{
  1507  		{Name: "/memory/classes/heap/unused:bytes"},
  1508  	}
  1509  	for range 1000 {
  1510  		metrics.Read(s)
  1511  		if s[0].Value.Uint64() > 1<<40 {
  1512  			t.Errorf("overflow")
  1513  			break
  1514  		}
  1515  	}
  1516  	done <- struct{}{}
  1517  	wg.Wait()
  1518  }
  1519  
  1520  func TestReadMetricsCleanups(t *testing.T) {
  1521  	runtime.GC()                                                // End any in-progress GC.
  1522  	runtime.BlockUntilEmptyCleanupQueue(int64(1 * time.Second)) // Flush any queued cleanups.
  1523  
  1524  	var before [2]metrics.Sample
  1525  	before[0].Name = "/gc/cleanups/queued:cleanups"
  1526  	before[1].Name = "/gc/cleanups/executed:cleanups"
  1527  	after := before
  1528  
  1529  	metrics.Read(before[:])
  1530  
  1531  	const N = 10
  1532  	for i := 0; i < N; i++ {
  1533  		runtime.AddCleanup(new(*int), func(_ struct{}) {}, struct{}{})
  1534  	}
  1535  
  1536  	runtime.GC()
  1537  	runtime.BlockUntilEmptyCleanupQueue(int64(1 * time.Second))
  1538  
  1539  	metrics.Read(after[:])
  1540  
  1541  	if v0, v1 := before[0].Value.Uint64(), after[0].Value.Uint64(); v0+N != v1 {
  1542  		t.Errorf("expected %s difference to be exactly %d, got %d -> %d", before[0].Name, N, v0, v1)
  1543  	}
  1544  	if v0, v1 := before[1].Value.Uint64(), after[1].Value.Uint64(); v0+N != v1 {
  1545  		t.Errorf("expected %s difference to be exactly %d, got %d -> %d", before[1].Name, N, v0, v1)
  1546  	}
  1547  }
  1548  
  1549  func TestReadMetricsFinalizers(t *testing.T) {
  1550  	runtime.GC()                                                  // End any in-progress GC.
  1551  	runtime.BlockUntilEmptyFinalizerQueue(int64(1 * time.Second)) // Flush any queued finalizers.
  1552  
  1553  	var before [2]metrics.Sample
  1554  	before[0].Name = "/gc/finalizers/queued:finalizers"
  1555  	before[1].Name = "/gc/finalizers/executed:finalizers"
  1556  	after := before
  1557  
  1558  	metrics.Read(before[:])
  1559  
  1560  	const N = 10
  1561  	for i := 0; i < N; i++ {
  1562  		runtime.SetFinalizer(new(*int), func(_ **int) {})
  1563  	}
  1564  
  1565  	runtime.GC()
  1566  	runtime.GC()
  1567  	runtime.BlockUntilEmptyFinalizerQueue(int64(1 * time.Second))
  1568  
  1569  	metrics.Read(after[:])
  1570  
  1571  	if v0, v1 := before[0].Value.Uint64(), after[0].Value.Uint64(); v0+N != v1 {
  1572  		t.Errorf("expected %s difference to be exactly %d, got %d -> %d", before[0].Name, N, v0, v1)
  1573  	}
  1574  	if v0, v1 := before[1].Value.Uint64(), after[1].Value.Uint64(); v0+N != v1 {
  1575  		t.Errorf("expected %s difference to be exactly %d, got %d -> %d", before[1].Name, N, v0, v1)
  1576  	}
  1577  }
  1578  

View as plain text