...

Source file src/github.com/prometheus/procfs/blockdevice/stats.go

Documentation: github.com/prometheus/procfs/blockdevice

     1  // Copyright 2018 The Prometheus Authors
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package blockdevice
    15  
    16  import (
    17  	"bufio"
    18  	"errors"
    19  	"fmt"
    20  	"io"
    21  	"os"
    22  	"strings"
    23  
    24  	"github.com/prometheus/procfs/internal/fs"
    25  	"github.com/prometheus/procfs/internal/util"
    26  )
    27  
    28  // Info contains identifying information for a block device such as a disk drive.
    29  type Info struct {
    30  	MajorNumber uint32
    31  	MinorNumber uint32
    32  	DeviceName  string
    33  }
    34  
    35  // IOStats models the iostats data described in the kernel documentation.
    36  // - https://www.kernel.org/doc/Documentation/iostats.txt,
    37  // - https://www.kernel.org/doc/Documentation/block/stat.txt
    38  // - https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats
    39  type IOStats struct {
    40  	// ReadIOs is the number of reads completed successfully.
    41  	ReadIOs uint64
    42  	// ReadMerges is the number of reads merged.  Reads and writes
    43  	// which are adjacent to each other may be merged for efficiency.
    44  	ReadMerges uint64
    45  	// ReadSectors is the total number of sectors read successfully.
    46  	ReadSectors uint64
    47  	// ReadTicks is the total number of milliseconds spent by all reads.
    48  	ReadTicks uint64
    49  	// WriteIOs is the total number of writes completed successfully.
    50  	WriteIOs uint64
    51  	// WriteMerges is the number of reads merged.
    52  	WriteMerges uint64
    53  	// WriteSectors is the total number of sectors written successfully.
    54  	WriteSectors uint64
    55  	// WriteTicks is the total number of milliseconds spent by all writes.
    56  	WriteTicks uint64
    57  	// IOsInProgress is number of I/Os currently in progress.
    58  	IOsInProgress uint64
    59  	// IOsTotalTicks is the number of milliseconds spent doing I/Os.
    60  	// This field increases so long as IosInProgress is nonzero.
    61  	IOsTotalTicks uint64
    62  	// WeightedIOTicks is the weighted number of milliseconds spent doing I/Os.
    63  	// This can also be used to estimate average queue wait time for requests.
    64  	WeightedIOTicks uint64
    65  	// DiscardIOs is the total number of discards completed successfully.
    66  	DiscardIOs uint64
    67  	// DiscardMerges is the number of discards merged.
    68  	DiscardMerges uint64
    69  	// DiscardSectors is the total number of sectors discarded successfully.
    70  	DiscardSectors uint64
    71  	// DiscardTicks is the total number of milliseconds spent by all discards.
    72  	DiscardTicks uint64
    73  	// FlushRequestsCompleted is the total number of flush request completed successfully.
    74  	FlushRequestsCompleted uint64
    75  	// TimeSpentFlushing is the total number of milliseconds spent flushing.
    76  	TimeSpentFlushing uint64
    77  }
    78  
    79  // Diskstats combines the device Info and IOStats.
    80  type Diskstats struct {
    81  	Info
    82  	IOStats
    83  	// IoStatsCount contains the number of io stats read. For kernel versions 5.5+,
    84  	// there should be 20 fields read. For kernel versions 4.18+,
    85  	// there should be 18 fields read. For earlier kernel versions this
    86  	// will be 14 because the discard values are not available.
    87  	IoStatsCount int
    88  }
    89  
    90  // BlockQueueStats models the queue files that are located in the sysfs tree for each block device
    91  // and described in the kernel documentation:
    92  // https://www.kernel.org/doc/Documentation/block/queue-sysfs.txt
    93  // https://www.kernel.org/doc/html/latest/block/queue-sysfs.html
    94  type BlockQueueStats struct {
    95  	// AddRandom is the status of a disk entropy (1 is on, 0 is off).
    96  	AddRandom uint64
    97  	// Dax indicates whether the device supports Direct Access (DAX) (1 is on, 0 is off).
    98  	DAX uint64
    99  	// DiscardGranularity is the size of internal allocation of the device in bytes, 0 means device
   100  	// does not support the discard functionality.
   101  	DiscardGranularity uint64
   102  	// DiscardMaxHWBytes is the hardware maximum number of bytes that can be discarded in a single operation,
   103  	// 0 means device does not support the discard functionality.
   104  	DiscardMaxHWBytes uint64
   105  	// DiscardMaxBytes is the software maximum number of bytes that can be discarded in a single operation.
   106  	DiscardMaxBytes uint64
   107  	// HWSectorSize is the sector size of the device, in bytes.
   108  	HWSectorSize uint64
   109  	// IOPoll indicates if polling is enabled (1 is on, 0 is off).
   110  	IOPoll uint64
   111  	// IOPollDelay indicates how polling will be performed, -1 for classic polling, 0 for hybrid polling,
   112  	// with greater than 0 the kernel will put process issuing IO to sleep for this amount of time in
   113  	// microseconds before entering classic polling.
   114  	IOPollDelay int64
   115  	// IOTimeout is the request timeout in milliseconds.
   116  	IOTimeout uint64
   117  	// IOStats indicates if iostats accounting is used for the disk (1 is on, 0 is off).
   118  	IOStats uint64
   119  	// LogicalBlockSize is the logical block size of the device, in bytes.
   120  	LogicalBlockSize uint64
   121  	// MaxHWSectorsKB is the maximum number of kilobytes supported in a single data transfer.
   122  	MaxHWSectorsKB uint64
   123  	// MaxIntegritySegments is the max limit of integrity segments as set by block layer which a hardware controller
   124  	// can handle.
   125  	MaxIntegritySegments uint64
   126  	// MaxSectorsKB is the maximum number of kilobytes that the block layer will allow for a filesystem request.
   127  	MaxSectorsKB uint64
   128  	// MaxSegments is the number of segments on the device.
   129  	MaxSegments uint64
   130  	// MaxSegmentsSize is the maximum segment size of the device.
   131  	MaxSegmentSize uint64
   132  	// MinimumIOSize is the smallest preferred IO size reported by the device.
   133  	MinimumIOSize uint64
   134  	// NoMerges shows the lookup logic involved with IO merging requests in the block layer. 0 all merges are
   135  	// enabled, 1 only simple one hit merges are tried, 2 no merge algorithms will be tried.
   136  	NoMerges uint64
   137  	// NRRequests is the number of how many requests may be allocated in the block layer for read or write requests.
   138  	NRRequests uint64
   139  	// OptimalIOSize is the optimal IO size reported by the device.
   140  	OptimalIOSize uint64
   141  	// PhysicalBlockSize is the physical block size of device, in bytes.
   142  	PhysicalBlockSize uint64
   143  	// ReadAHeadKB is the maximum number of kilobytes to read-ahead for filesystems on this block device.
   144  	ReadAHeadKB uint64
   145  	// Rotational indicates if the device is of rotational type or non-rotational type.
   146  	Rotational uint64
   147  	// RQAffinity indicates affinity policy of device, if 1 the block layer will migrate request completions to the
   148  	// cpu “group” that originally submitted the request, if 2 forces the completion to run on the requesting cpu.
   149  	RQAffinity uint64
   150  	// SchedulerList contains list of available schedulers for this block device.
   151  	SchedulerList []string
   152  	// SchedulerCurrent is the current scheduler for this block device.
   153  	SchedulerCurrent string
   154  	// WriteCache shows the type of cache for block device, "write back" or "write through".
   155  	WriteCache string
   156  	// WriteSameMaxBytes is the number of bytes the device can write in a single write-same command.
   157  	// A value of ‘0’ means write-same is not supported by this device.
   158  	WriteSameMaxBytes uint64
   159  	// WBTLatUSec is the target minimum read latency, 0 means feature is disables.
   160  	WBTLatUSec int64
   161  	// ThrottleSampleTime is the time window that blk-throttle samples data, in millisecond. Optional
   162  	// exists only if CONFIG_BLK_DEV_THROTTLING_LOW is enabled.
   163  	ThrottleSampleTime *uint64
   164  	// Zoned indicates if the device is a zoned block device and the zone model of the device if it is indeed zoned.
   165  	// Possible values are: none, host-aware, host-managed for zoned block devices.
   166  	Zoned string
   167  	// NRZones indicates the total number of zones of the device, always zero for regular block devices.
   168  	NRZones uint64
   169  	// ChunksSectors for RAID is the size in 512B sectors of the RAID volume stripe segment,
   170  	// for zoned host device is the size in 512B sectors.
   171  	ChunkSectors uint64
   172  	// FUA indicates whether the device supports Force Unit Access for write requests.
   173  	FUA uint64
   174  	// MaxDiscardSegments is the maximum number of DMA entries in a discard request.
   175  	MaxDiscardSegments uint64
   176  	// WriteZeroesMaxBytes the maximum number of bytes that can be zeroed at once.
   177  	// The value 0 means that REQ_OP_WRITE_ZEROES is not supported.
   178  	WriteZeroesMaxBytes uint64
   179  }
   180  
   181  // DeviceMapperInfo models the devicemapper files that are located in the sysfs tree for each block device
   182  // and described in the kernel documentation:
   183  // https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-block-dm
   184  type DeviceMapperInfo struct {
   185  	// Name is the string containing mapped device name.
   186  	Name string
   187  	// RqBasedSeqIOMergeDeadline determines how long (in microseconds) a request that is a reasonable merge
   188  	// candidate can be queued on the request queue.
   189  	RqBasedSeqIOMergeDeadline uint64
   190  	// Suspended indicates if the device is suspended (1 is on, 0 is off).
   191  	Suspended uint64
   192  	// UseBlkMQ indicates if the device is using the request-based blk-mq I/O path mode (1 is on, 0 is off).
   193  	UseBlkMQ uint64
   194  	// UUID is the DM-UUID string or empty string if DM-UUID is not set.
   195  	UUID string
   196  }
   197  
   198  // UnderlyingDevices models the list of devices that this device is built from.
   199  type UnderlyingDeviceInfo struct {
   200  	// DeviceNames is the list of devices names
   201  	DeviceNames []string
   202  }
   203  
   204  const (
   205  	procDiskstatsPath   = "diskstats"
   206  	procDiskstatsFormat = "%d %d %s %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d"
   207  	sysBlockPath        = "block"
   208  	sysBlockStatFormat  = "%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d"
   209  	sysBlockQueue       = "queue"
   210  	sysBlockDM          = "dm"
   211  	sysUnderlyingDev    = "slaves"
   212  )
   213  
   214  // FS represents the pseudo-filesystems proc and sys, which provides an
   215  // interface to kernel data structures.
   216  type FS struct {
   217  	proc *fs.FS
   218  	sys  *fs.FS
   219  }
   220  
   221  // NewDefaultFS returns a new blockdevice fs using the default mountPoints for proc and sys.
   222  // It will error if either of these mount points can't be read.
   223  func NewDefaultFS() (FS, error) {
   224  	return NewFS(fs.DefaultProcMountPoint, fs.DefaultSysMountPoint)
   225  }
   226  
   227  // NewFS returns a new blockdevice fs using the given mountPoints for proc and sys.
   228  // It will error if either of these mount points can't be read.
   229  func NewFS(procMountPoint string, sysMountPoint string) (FS, error) {
   230  	if strings.TrimSpace(procMountPoint) == "" {
   231  		procMountPoint = fs.DefaultProcMountPoint
   232  	}
   233  	procfs, err := fs.NewFS(procMountPoint)
   234  	if err != nil {
   235  		return FS{}, err
   236  	}
   237  	if strings.TrimSpace(sysMountPoint) == "" {
   238  		sysMountPoint = fs.DefaultSysMountPoint
   239  	}
   240  	sysfs, err := fs.NewFS(sysMountPoint)
   241  	if err != nil {
   242  		return FS{}, err
   243  	}
   244  	return FS{&procfs, &sysfs}, nil
   245  }
   246  
   247  // ProcDiskstats reads the diskstats file and returns
   248  // an array of Diskstats (one per line/device).
   249  func (fs FS) ProcDiskstats() ([]Diskstats, error) {
   250  	file, err := os.Open(fs.proc.Path(procDiskstatsPath))
   251  	if err != nil {
   252  		return nil, err
   253  	}
   254  	defer file.Close()
   255  	return parseProcDiskstats(file)
   256  }
   257  
   258  func parseProcDiskstats(r io.Reader) ([]Diskstats, error) {
   259  	var (
   260  		diskstats []Diskstats
   261  		scanner   = bufio.NewScanner(r)
   262  		err       error
   263  	)
   264  	for scanner.Scan() {
   265  		d := &Diskstats{}
   266  		d.IoStatsCount, err = fmt.Sscanf(scanner.Text(), procDiskstatsFormat,
   267  			&d.MajorNumber,
   268  			&d.MinorNumber,
   269  			&d.DeviceName,
   270  			&d.ReadIOs,
   271  			&d.ReadMerges,
   272  			&d.ReadSectors,
   273  			&d.ReadTicks,
   274  			&d.WriteIOs,
   275  			&d.WriteMerges,
   276  			&d.WriteSectors,
   277  			&d.WriteTicks,
   278  			&d.IOsInProgress,
   279  			&d.IOsTotalTicks,
   280  			&d.WeightedIOTicks,
   281  			&d.DiscardIOs,
   282  			&d.DiscardMerges,
   283  			&d.DiscardSectors,
   284  			&d.DiscardTicks,
   285  			&d.FlushRequestsCompleted,
   286  			&d.TimeSpentFlushing,
   287  		)
   288  		// The io.EOF error can be safely ignored because it just means we read fewer than
   289  		// the full 20 fields.
   290  		if err != nil && !errors.Is(err, io.EOF) {
   291  			return diskstats, err
   292  		}
   293  		if d.IoStatsCount >= 14 {
   294  			diskstats = append(diskstats, *d)
   295  		}
   296  	}
   297  	return diskstats, scanner.Err()
   298  }
   299  
   300  // SysBlockDevices lists the device names from /sys/block/<dev>.
   301  func (fs FS) SysBlockDevices() ([]string, error) {
   302  	deviceDirs, err := os.ReadDir(fs.sys.Path(sysBlockPath))
   303  	if err != nil {
   304  		return nil, err
   305  	}
   306  	devices := []string{}
   307  	for _, deviceDir := range deviceDirs {
   308  		devices = append(devices, deviceDir.Name())
   309  	}
   310  	return devices, nil
   311  }
   312  
   313  // SysBlockDeviceStat returns stats for the block device read from /sys/block/<device>/stat.
   314  // The number of stats read will be 15 if the discard stats are available (kernel 4.18+)
   315  // and 11 if they are not available.
   316  func (fs FS) SysBlockDeviceStat(device string) (IOStats, int, error) {
   317  	bytes, err := os.ReadFile(fs.sys.Path(sysBlockPath, device, "stat"))
   318  	if err != nil {
   319  		return IOStats{}, 0, err
   320  	}
   321  	return parseSysBlockDeviceStat(bytes)
   322  }
   323  
   324  func parseSysBlockDeviceStat(data []byte) (IOStats, int, error) {
   325  	stat := IOStats{}
   326  	count, err := fmt.Sscanf(strings.TrimSpace(string(data)), sysBlockStatFormat,
   327  		&stat.ReadIOs,
   328  		&stat.ReadMerges,
   329  		&stat.ReadSectors,
   330  		&stat.ReadTicks,
   331  		&stat.WriteIOs,
   332  		&stat.WriteMerges,
   333  		&stat.WriteSectors,
   334  		&stat.WriteTicks,
   335  		&stat.IOsInProgress,
   336  		&stat.IOsTotalTicks,
   337  		&stat.WeightedIOTicks,
   338  		&stat.DiscardIOs,
   339  		&stat.DiscardMerges,
   340  		&stat.DiscardSectors,
   341  		&stat.DiscardTicks,
   342  		&stat.FlushRequestsCompleted,
   343  		&stat.TimeSpentFlushing,
   344  	)
   345  	// An io.EOF error is ignored because it just means we read fewer than the full 15 fields.
   346  	if errors.Is(err, io.EOF) {
   347  		return stat, count, nil
   348  	}
   349  	return stat, count, err
   350  }
   351  
   352  // SysBlockDeviceQueueStats returns stats for /sys/block/xxx/queue where xxx is a device name.
   353  func (fs FS) SysBlockDeviceQueueStats(device string) (BlockQueueStats, error) {
   354  	stat := BlockQueueStats{}
   355  	// Files with uint64 fields
   356  	for file, p := range map[string]*uint64{
   357  		"add_random":             &stat.AddRandom,
   358  		"dax":                    &stat.DAX,
   359  		"discard_granularity":    &stat.DiscardGranularity,
   360  		"discard_max_hw_bytes":   &stat.DiscardMaxHWBytes,
   361  		"discard_max_bytes":      &stat.DiscardMaxBytes,
   362  		"hw_sector_size":         &stat.HWSectorSize,
   363  		"io_poll":                &stat.IOPoll,
   364  		"io_timeout":             &stat.IOTimeout,
   365  		"iostats":                &stat.IOStats,
   366  		"logical_block_size":     &stat.LogicalBlockSize,
   367  		"max_hw_sectors_kb":      &stat.MaxHWSectorsKB,
   368  		"max_integrity_segments": &stat.MaxIntegritySegments,
   369  		"max_sectors_kb":         &stat.MaxSectorsKB,
   370  		"max_segments":           &stat.MaxSegments,
   371  		"max_segment_size":       &stat.MaxSegmentSize,
   372  		"minimum_io_size":        &stat.MinimumIOSize,
   373  		"nomerges":               &stat.NoMerges,
   374  		"nr_requests":            &stat.NRRequests,
   375  		"optimal_io_size":        &stat.OptimalIOSize,
   376  		"physical_block_size":    &stat.PhysicalBlockSize,
   377  		"read_ahead_kb":          &stat.ReadAHeadKB,
   378  		"rotational":             &stat.Rotational,
   379  		"rq_affinity":            &stat.RQAffinity,
   380  		"write_same_max_bytes":   &stat.WriteSameMaxBytes,
   381  		"nr_zones":               &stat.NRZones,
   382  		"chunk_sectors":          &stat.ChunkSectors,
   383  		"fua":                    &stat.FUA,
   384  		"max_discard_segments":   &stat.MaxDiscardSegments,
   385  		"write_zeroes_max_bytes": &stat.WriteZeroesMaxBytes,
   386  	} {
   387  		val, err := util.ReadUintFromFile(fs.sys.Path(sysBlockPath, device, sysBlockQueue, file))
   388  		if err != nil {
   389  			return BlockQueueStats{}, err
   390  		}
   391  		*p = val
   392  	}
   393  	// Files with int64 fields
   394  	for file, p := range map[string]*int64{
   395  		"io_poll_delay": &stat.IOPollDelay,
   396  		"wbt_lat_usec":  &stat.WBTLatUSec,
   397  	} {
   398  		val, err := util.ReadIntFromFile(fs.sys.Path(sysBlockPath, device, sysBlockQueue, file))
   399  		if err != nil {
   400  			return BlockQueueStats{}, err
   401  		}
   402  		*p = val
   403  	}
   404  	// Files with string fields
   405  	for file, p := range map[string]*string{
   406  		"write_cache": &stat.WriteCache,
   407  		"zoned":       &stat.Zoned,
   408  	} {
   409  		val, err := util.SysReadFile(fs.sys.Path(sysBlockPath, device, sysBlockQueue, file))
   410  		if err != nil {
   411  			return BlockQueueStats{}, err
   412  		}
   413  		*p = val
   414  	}
   415  	scheduler, err := util.SysReadFile(fs.sys.Path(sysBlockPath, device, sysBlockQueue, "scheduler"))
   416  	if err != nil {
   417  		return BlockQueueStats{}, err
   418  	}
   419  	var schedulers []string
   420  	xs := strings.Split(scheduler, " ")
   421  	for _, s := range xs {
   422  		if strings.HasPrefix(s, "[") && strings.HasSuffix(s, "]") {
   423  			s = s[1 : len(s)-1]
   424  			stat.SchedulerCurrent = s
   425  		}
   426  		schedulers = append(schedulers, s)
   427  	}
   428  	stat.SchedulerList = schedulers
   429  	// optional
   430  	throttleSampleTime, err := util.ReadUintFromFile(fs.sys.Path(sysBlockPath, device, sysBlockQueue, "throttle_sample_time"))
   431  	if err == nil {
   432  		stat.ThrottleSampleTime = &throttleSampleTime
   433  	}
   434  	return stat, nil
   435  }
   436  
   437  func (fs FS) SysBlockDeviceMapperInfo(device string) (DeviceMapperInfo, error) {
   438  	info := DeviceMapperInfo{}
   439  	// Files with uint64 fields
   440  	for file, p := range map[string]*uint64{
   441  		"rq_based_seq_io_merge_deadline": &info.RqBasedSeqIOMergeDeadline,
   442  		"suspended":                      &info.Suspended,
   443  		"use_blk_mq":                     &info.UseBlkMQ,
   444  	} {
   445  		val, err := util.ReadUintFromFile(fs.sys.Path(sysBlockPath, device, sysBlockDM, file))
   446  		if err != nil {
   447  			return DeviceMapperInfo{}, err
   448  		}
   449  		*p = val
   450  	}
   451  	// Files with string fields
   452  	for file, p := range map[string]*string{
   453  		"name": &info.Name,
   454  		"uuid": &info.UUID,
   455  	} {
   456  		val, err := util.SysReadFile(fs.sys.Path(sysBlockPath, device, sysBlockDM, file))
   457  		if err != nil {
   458  			return DeviceMapperInfo{}, err
   459  		}
   460  		*p = val
   461  	}
   462  	return info, nil
   463  }
   464  
   465  func (fs FS) SysBlockDeviceUnderlyingDevices(device string) (UnderlyingDeviceInfo, error) {
   466  	underlyingDir, err := os.Open(fs.sys.Path(sysBlockPath, device, sysUnderlyingDev))
   467  	if err != nil {
   468  		return UnderlyingDeviceInfo{}, err
   469  	}
   470  	underlying, err := underlyingDir.Readdirnames(0)
   471  	if err != nil {
   472  		return UnderlyingDeviceInfo{}, err
   473  	}
   474  	return UnderlyingDeviceInfo{DeviceNames: underlying}, nil
   475  
   476  }
   477  

View as plain text