// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // CPU profiling. // Based on algorithms and data structures used in // https://github.com/google/pprof. // // The main difference between this code and the google-perftools // code is that this code is written to allow copying the profile data // to an arbitrary io.Writer, while the google-perftools code always // writes to an operating system file. // // The signal handler for the profiling clock tick adds a new stack trace // to a hash table tracking counts for recent traces. Most clock ticks // hit in the cache. In the event of a cache miss, an entry must be // evicted from the hash table, copied to a log that will eventually be // written as profile data. The google-perftools code flushed the // log itself during the signal handler. This code cannot do that, because // the io.Writer might block or need system calls or locks that are not // safe to use from within the signal handler. Instead, we split the log // into two halves and let the signal handler fill one half while a goroutine // is writing out the other half. When the signal handler fills its half, it // offers to swap with the goroutine. If the writer is not done with its half, // we lose the stack trace for this clock tick (and record that loss). // The goroutine interacts with the signal handler by calling getprofile() to // get the next log piece to write, implicitly handing back the last log // piece it obtained. // // The state of this dance between the signal handler and the goroutine // is encoded in the Profile.handoff field. If handoff == 0, then the goroutine // is not using either log half and is waiting (or will soon be waiting) for // a new piece by calling notesleep(&p.wait). If the signal handler // changes handoff from 0 to non-zero, it must call notewakeup(&p.wait) // to wake the goroutine. The value indicates the number of entries in the // log half being handed off. The goroutine leaves the non-zero value in // place until it has finished processing the log half and then flips the number // back to zero. Setting the high bit in handoff means that the profiling is over, // and the goroutine is now in charge of flushing the data left in the hash table // to the log and returning that data. // // The handoff field is manipulated using atomic operations. // For the most part, the manipulation of handoff is orderly: if handoff == 0 // then the signal handler owns it and can change it to non-zero. // If handoff != 0 then the goroutine owns it and can change it to zero. // If that were the end of the story then we would not need to manipulate // handoff using atomic operations. The operations are needed, however, // in order to let the log closer set the high bit to indicate "EOF" safely // in the situation when normally the goroutine "owns" handoff. package runtime import ( "runtime/internal/atomic" "unsafe" ) const ( numBuckets = 1 << 10 logSize = 1 << 17 assoc = 4 maxCPUProfStack = 64 ) type cpuprofEntry struct { count uintptr depth int stack [maxCPUProfStack]uintptr } //go:notinheap type cpuProfile struct { on bool // profiling is on wait note // goroutine waits here count uintptr // tick count evicts uintptr // eviction count lost uintptr // lost ticks that need to be logged // Active recent stack traces. hash [numBuckets]struct { entry [assoc]cpuprofEntry } // Log of traces evicted from hash. // Signal handler has filled log[toggle][:nlog]. // Goroutine is writing log[1-toggle][:handoff]. log [2][logSize / 2]uintptr nlog int toggle int32 handoff uint32 // Writer state. // Writer maintains its own toggle to avoid races // looking at signal handler's toggle. wtoggle uint32 wholding bool // holding & need to release a log half flushing bool // flushing hash table - profile is over eodSent bool // special end-of-data record sent; => flushing } var ( cpuprofLock mutex cpuprof *cpuProfile eod = [3]uintptr{0, 1, 0} ) func setcpuprofilerate(hz int32) { systemstack(func() { setcpuprofilerate_m(hz) }) } // lostProfileData is a no-op function used in profiles // to mark the number of profiling stack traces that were // discarded due to slow data writers. func lostProfileData() {} // SetCPUProfileRate sets the CPU profiling rate to hz samples per second. // If hz <= 0, SetCPUProfileRate turns off profiling. // If the profiler is on, the rate cannot be changed without first turning it off. // // Most clients should use the runtime/pprof package or // the testing package's -test.cpuprofile flag instead of calling // SetCPUProfileRate directly. func SetCPUProfileRate(hz int) { // Clamp hz to something reasonable. if hz < 0 { hz = 0 } if hz > 1000000 { hz = 1000000 } lock(&cpuprofLock) if hz > 0 { if cpuprof == nil { cpuprof = (*cpuProfile)(sysAlloc(unsafe.Sizeof(cpuProfile{}), &memstats.other_sys)) if cpuprof == nil { print("runtime: cpu profiling cannot allocate memory\n") unlock(&cpuprofLock) return } } if cpuprof.on || cpuprof.handoff != 0 { print("runtime: cannot set cpu profile rate until previous profile has finished.\n") unlock(&cpuprofLock) return } cpuprof.on = true // pprof binary header format. // https://github.com/gperftools/gperftools/blob/master/src/profiledata.cc#L119 p := &cpuprof.log[0] p[0] = 0 // count for header p[1] = 3 // depth for header p[2] = 0 // version number p[3] = uintptr(1e6 / hz) // period (microseconds) p[4] = 0 cpuprof.nlog = 5 cpuprof.toggle = 0 cpuprof.wholding = false cpuprof.wtoggle = 0 cpuprof.flushing = false cpuprof.eodSent = false noteclear(&cpuprof.wait) setcpuprofilerate(int32(hz)) } else if cpuprof != nil && cpuprof.on { setcpuprofilerate(0) cpuprof.on = false // Now add is not running anymore, and getprofile owns the entire log. // Set the high bit in cpuprof.handoff to tell getprofile. for { n := cpuprof.handoff if n&0x80000000 != 0 { print("runtime: setcpuprofile(off) twice\n") } if atomic.Cas(&cpuprof.handoff, n, n|0x80000000) { if n == 0 { // we did the transition from 0 -> nonzero so we wake getprofile notewakeup(&cpuprof.wait) } break } } } unlock(&cpuprofLock) } // add adds the stack trace to the profile. // It is called from signal handlers and other limited environments // and cannot allocate memory or acquire locks that might be // held at the time of the signal, nor can it use substantial amounts // of stack. It is allowed to call evict. //go:nowritebarrierrec func (p *cpuProfile) add(pc []uintptr) { p.addWithFlushlog(pc, p.flushlog) } // addWithFlushlog implements add and addNonGo. // It is called from signal handlers and other limited environments // and cannot allocate memory or acquire locks that might be // held at the time of the signal, nor can it use substantial amounts // of stack. It may be called by a signal handler with no g or m. // It is allowed to call evict, passing the flushlog parameter. //go:nosplit //go:nowritebarrierrec func (p *cpuProfile) addWithFlushlog(pc []uintptr, flushlog func() bool) { if len(pc) > maxCPUProfStack { pc = pc[:maxCPUProfStack] } // Compute hash. h := uintptr(0) for _, x := range pc { h = h<<8 | (h >> (8 * (unsafe.Sizeof(h) - 1))) h += x * 41 } p.count++ // Add to entry count if already present in table. b := &p.hash[h%numBuckets] Assoc: for i := range b.entry { e := &b.entry[i] if e.depth != len(pc) { continue } for j := range pc { if e.stack[j] != pc[j] { continue Assoc } } e.count++ return } // Evict entry with smallest count. var e *cpuprofEntry for i := range b.entry { if e == nil || b.entry[i].count < e.count { e = &b.entry[i] } } if e.count > 0 { if !p.evict(e, flushlog) { // Could not evict entry. Record lost stack. p.lost++ return } p.evicts++ } // Reuse the newly evicted entry. e.depth = len(pc) e.count = 1 copy(e.stack[:], pc) } // evict copies the given entry's data into the log, so that // the entry can be reused. evict is called from add, which // is called from the profiling signal handler, so it must not // allocate memory or block, and it may be called with no g or m. // It is safe to call flushlog. evict returns true if the entry was // copied to the log, false if there was no room available. //go:nosplit //go:nowritebarrierrec func (p *cpuProfile) evict(e *cpuprofEntry, flushlog func() bool) bool { d := e.depth nslot := d + 2 log := &p.log[p.toggle] if p.nlog+nslot > len(log) { if !flushlog() { return false } log = &p.log[p.toggle] } q := p.nlog log[q] = e.count q++ log[q] = uintptr(d) q++ copy(log[q:], e.stack[:d]) q += d p.nlog = q e.count = 0 return true } // flushlog tries to flush the current log and switch to the other one. // flushlog is called from evict, called from add, called from the signal handler, // so it cannot allocate memory or block. It can try to swap logs with // the writing goroutine, as explained in the comment at the top of this file. //go:nowritebarrierrec func (p *cpuProfile) flushlog() bool { if !atomic.Cas(&p.handoff, 0, uint32(p.nlog)) { return false } notewakeup(&p.wait) p.toggle = 1 - p.toggle log := &p.log[p.toggle] q := 0 if p.lost > 0 { lostPC := funcPC(lostProfileData) log[0] = p.lost log[1] = 1 log[2] = lostPC q = 3 p.lost = 0 } p.nlog = q return true } // addNonGo is like add, but runs on a non-Go thread. // It can't do anything that might need a g or an m. // With this entry point, we don't try to flush the log when evicting an // old entry. Instead, we just drop the stack trace if we're out of space. //go:nosplit //go:nowritebarrierrec func (p *cpuProfile) addNonGo(pc []uintptr) { p.addWithFlushlog(pc, func() bool { return false }) } // getprofile blocks until the next block of profiling data is available // and returns it as a []byte. It is called from the writing goroutine. func (p *cpuProfile) getprofile() []byte { if p == nil { return nil } if p.wholding { // Release previous log to signal handling side. // Loop because we are racing against SetCPUProfileRate(0). for { n := p.handoff if n == 0 { print("runtime: phase error during cpu profile handoff\n") return nil } if n&0x80000000 != 0 { p.wtoggle = 1 - p.wtoggle p.wholding = false p.flushing = true goto Flush } if atomic.Cas(&p.handoff, n, 0) { break } } p.wtoggle = 1 - p.wtoggle p.wholding = false } if p.flushing { goto Flush } if !p.on && p.handoff == 0 { return nil } // Wait for new log. notetsleepg(&p.wait, -1) noteclear(&p.wait) switch n := p.handoff; { case n == 0: print("runtime: phase error during cpu profile wait\n") return nil case n == 0x80000000: p.flushing = true goto Flush default: n &^= 0x80000000 // Return new log to caller. p.wholding = true return uintptrBytes(p.log[p.wtoggle][:n]) } // In flush mode. // Add is no longer being called. We own the log. // Also, p.handoff is non-zero, so flushlog will return false. // Evict the hash table into the log and return it. Flush: for i := range p.hash { b := &p.hash[i] for j := range b.entry { e := &b.entry[j] if e.count > 0 && !p.evict(e, p.flushlog) { // Filled the log. Stop the loop and return what we've got. break Flush } } } // Return pending log data. if p.nlog > 0 { // Note that we're using toggle now, not wtoggle, // because we're working on the log directly. n := p.nlog p.nlog = 0 return uintptrBytes(p.log[p.toggle][:n]) } // Made it through the table without finding anything to log. if !p.eodSent { // We may not have space to append this to the partial log buf, // so we always return a new slice for the end-of-data marker. p.eodSent = true return uintptrBytes(eod[:]) } // Finally done. Clean up and return nil. p.flushing = false if !atomic.Cas(&p.handoff, p.handoff, 0) { print("runtime: profile flush racing with something\n") } return nil } func uintptrBytes(p []uintptr) (ret []byte) { pp := (*slice)(unsafe.Pointer(&p)) rp := (*slice)(unsafe.Pointer(&ret)) rp.array = pp.array rp.len = pp.len * int(unsafe.Sizeof(p[0])) rp.cap = rp.len return } // CPUProfile returns the next chunk of binary CPU profiling stack trace data, // blocking until data is available. If profiling is turned off and all the profile // data accumulated while it was on has been returned, CPUProfile returns nil. // The caller must save the returned data before calling CPUProfile again. // // Most clients should use the runtime/pprof package or // the testing package's -test.cpuprofile flag instead of calling // CPUProfile directly. func CPUProfile() []byte { return cpuprof.getprofile() } //go:linkname runtime_pprof_runtime_cyclesPerSecond runtime_pprof.runtime_cyclesPerSecond func runtime_pprof_runtime_cyclesPerSecond() int64 { return tickspersecond() }