...

Source file src/github.com/cilium/ebpf/perf/ring.go

Documentation: github.com/cilium/ebpf/perf

     1  package perf
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io"
     7  	"math"
     8  	"os"
     9  	"runtime"
    10  	"sync/atomic"
    11  	"unsafe"
    12  
    13  	"github.com/cilium/ebpf/internal/unix"
    14  )
    15  
    16  // perfEventRing is a page of metadata followed by
    17  // a variable number of pages which form a ring buffer.
    18  type perfEventRing struct {
    19  	fd   int
    20  	cpu  int
    21  	mmap []byte
    22  	*ringReader
    23  }
    24  
    25  func newPerfEventRing(cpu, perCPUBuffer, watermark int) (*perfEventRing, error) {
    26  	if watermark >= perCPUBuffer {
    27  		return nil, errors.New("watermark must be smaller than perCPUBuffer")
    28  	}
    29  
    30  	fd, err := createPerfEvent(cpu, watermark)
    31  	if err != nil {
    32  		return nil, err
    33  	}
    34  
    35  	if err := unix.SetNonblock(fd, true); err != nil {
    36  		unix.Close(fd)
    37  		return nil, err
    38  	}
    39  
    40  	mmap, err := unix.Mmap(fd, 0, perfBufferSize(perCPUBuffer), unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED)
    41  	if err != nil {
    42  		unix.Close(fd)
    43  		return nil, fmt.Errorf("can't mmap: %v", err)
    44  	}
    45  
    46  	// This relies on the fact that we allocate an extra metadata page,
    47  	// and that the struct is smaller than an OS page.
    48  	// This use of unsafe.Pointer isn't explicitly sanctioned by the
    49  	// documentation, since a byte is smaller than sampledPerfEvent.
    50  	meta := (*unix.PerfEventMmapPage)(unsafe.Pointer(&mmap[0]))
    51  
    52  	ring := &perfEventRing{
    53  		fd:         fd,
    54  		cpu:        cpu,
    55  		mmap:       mmap,
    56  		ringReader: newRingReader(meta, mmap[meta.Data_offset:meta.Data_offset+meta.Data_size]),
    57  	}
    58  	runtime.SetFinalizer(ring, (*perfEventRing).Close)
    59  
    60  	return ring, nil
    61  }
    62  
    63  // mmapBufferSize returns a valid mmap buffer size for use with perf_event_open (1+2^n pages)
    64  func perfBufferSize(perCPUBuffer int) int {
    65  	pageSize := os.Getpagesize()
    66  
    67  	// Smallest whole number of pages
    68  	nPages := (perCPUBuffer + pageSize - 1) / pageSize
    69  
    70  	// Round up to nearest power of two number of pages
    71  	nPages = int(math.Pow(2, math.Ceil(math.Log2(float64(nPages)))))
    72  
    73  	// Add one for metadata
    74  	nPages += 1
    75  
    76  	return nPages * pageSize
    77  }
    78  
    79  func (ring *perfEventRing) Close() {
    80  	runtime.SetFinalizer(ring, nil)
    81  
    82  	_ = unix.Close(ring.fd)
    83  	_ = unix.Munmap(ring.mmap)
    84  
    85  	ring.fd = -1
    86  	ring.mmap = nil
    87  }
    88  
    89  func createPerfEvent(cpu, watermark int) (int, error) {
    90  	if watermark == 0 {
    91  		watermark = 1
    92  	}
    93  
    94  	attr := unix.PerfEventAttr{
    95  		Type:        unix.PERF_TYPE_SOFTWARE,
    96  		Config:      unix.PERF_COUNT_SW_BPF_OUTPUT,
    97  		Bits:        unix.PerfBitWatermark,
    98  		Sample_type: unix.PERF_SAMPLE_RAW,
    99  		Wakeup:      uint32(watermark),
   100  	}
   101  
   102  	attr.Size = uint32(unsafe.Sizeof(attr))
   103  	fd, err := unix.PerfEventOpen(&attr, -1, cpu, -1, unix.PERF_FLAG_FD_CLOEXEC)
   104  	if err != nil {
   105  		return -1, fmt.Errorf("can't create perf event: %w", err)
   106  	}
   107  	return fd, nil
   108  }
   109  
   110  type ringReader struct {
   111  	meta       *unix.PerfEventMmapPage
   112  	head, tail uint64
   113  	mask       uint64
   114  	ring       []byte
   115  }
   116  
   117  func newRingReader(meta *unix.PerfEventMmapPage, ring []byte) *ringReader {
   118  	return &ringReader{
   119  		meta: meta,
   120  		head: atomic.LoadUint64(&meta.Data_head),
   121  		tail: atomic.LoadUint64(&meta.Data_tail),
   122  		// cap is always a power of two
   123  		mask: uint64(cap(ring) - 1),
   124  		ring: ring,
   125  	}
   126  }
   127  
   128  func (rr *ringReader) loadHead() {
   129  	rr.head = atomic.LoadUint64(&rr.meta.Data_head)
   130  }
   131  
   132  func (rr *ringReader) writeTail() {
   133  	// Commit the new tail. This lets the kernel know that
   134  	// the ring buffer has been consumed.
   135  	atomic.StoreUint64(&rr.meta.Data_tail, rr.tail)
   136  }
   137  
   138  func (rr *ringReader) Read(p []byte) (int, error) {
   139  	start := int(rr.tail & rr.mask)
   140  
   141  	n := len(p)
   142  	// Truncate if the read wraps in the ring buffer
   143  	if remainder := cap(rr.ring) - start; n > remainder {
   144  		n = remainder
   145  	}
   146  
   147  	// Truncate if there isn't enough data
   148  	if remainder := int(rr.head - rr.tail); n > remainder {
   149  		n = remainder
   150  	}
   151  
   152  	copy(p, rr.ring[start:start+n])
   153  	rr.tail += uint64(n)
   154  
   155  	if rr.tail == rr.head {
   156  		return n, io.EOF
   157  	}
   158  
   159  	return n, nil
   160  }
   161  

View as plain text