...

Source file src/github.com/prometheus/procfs/sysfs/class_infiniband.go

Documentation: github.com/prometheus/procfs/sysfs

     1  // Copyright 2019 The Prometheus Authors
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  //go:build linux
    15  // +build linux
    16  
    17  package sysfs
    18  
    19  import (
    20  	"errors"
    21  	"fmt"
    22  	"os"
    23  	"path/filepath"
    24  	"strconv"
    25  	"strings"
    26  
    27  	"github.com/prometheus/procfs/internal/util"
    28  )
    29  
    30  const infinibandClassPath = "class/infiniband"
    31  
    32  // InfiniBandCounters contains counter values from files in
    33  // /sys/class/infiniband/<Name>/ports/<Port>/counters or
    34  // /sys/class/infiniband/<Name>/ports/<Port>/counters_ext
    35  // for a single port of one InfiniBand device.
    36  type InfiniBandCounters struct {
    37  	LegacyPortMulticastRcvPackets  *uint64 // counters_ext/port_multicast_rcv_packets
    38  	LegacyPortMulticastXmitPackets *uint64 // counters_ext/port_multicast_xmit_packets
    39  	LegacyPortRcvData64            *uint64 // counters_ext/port_rcv_data_64
    40  	LegacyPortRcvPackets64         *uint64 // counters_ext/port_rcv_packets_64
    41  	LegacyPortUnicastRcvPackets    *uint64 // counters_ext/port_unicast_rcv_packets
    42  	LegacyPortUnicastXmitPackets   *uint64 // counters_ext/port_unicast_xmit_packets
    43  	LegacyPortXmitData64           *uint64 // counters_ext/port_xmit_data_64
    44  	LegacyPortXmitPackets64        *uint64 // counters_ext/port_xmit_packets_64
    45  
    46  	ExcessiveBufferOverrunErrors *uint64 // counters/excessive_buffer_overrun_errors
    47  	LinkDowned                   *uint64 // counters/link_downed
    48  	LinkErrorRecovery            *uint64 // counters/link_error_recovery
    49  	LocalLinkIntegrityErrors     *uint64 // counters/local_link_integrity_errors
    50  	MulticastRcvPackets          *uint64 // counters/multicast_rcv_packets
    51  	MulticastXmitPackets         *uint64 // counters/multicast_xmit_packets
    52  	PortRcvConstraintErrors      *uint64 // counters/port_rcv_constraint_errors
    53  	PortRcvData                  *uint64 // counters/port_rcv_data
    54  	PortRcvDiscards              *uint64 // counters/port_rcv_discards
    55  	PortRcvErrors                *uint64 // counters/port_rcv_errors
    56  	PortRcvPackets               *uint64 // counters/port_rcv_packets
    57  	PortRcvRemotePhysicalErrors  *uint64 // counters/port_rcv_remote_physical_errors
    58  	PortRcvSwitchRelayErrors     *uint64 // counters/port_rcv_switch_relay_errors
    59  	PortXmitConstraintErrors     *uint64 // counters/port_xmit_constraint_errors
    60  	PortXmitData                 *uint64 // counters/port_xmit_data
    61  	PortXmitDiscards             *uint64 // counters/port_xmit_discards
    62  	PortXmitPackets              *uint64 // counters/port_xmit_packets
    63  	PortXmitWait                 *uint64 // counters/port_xmit_wait
    64  	SymbolError                  *uint64 // counters/symbol_error
    65  	UnicastRcvPackets            *uint64 // counters/unicast_rcv_packets
    66  	UnicastXmitPackets           *uint64 // counters/unicast_xmit_packets
    67  	VL15Dropped                  *uint64 // counters/VL15_dropped
    68  }
    69  
    70  // InfiniBandHwCounters contains counter value from files in
    71  // /sys/class/infiniband/<Name>/ports/<Port>/hw_counters
    72  // for a single port of one InfiniBand device.
    73  type InfiniBandHwCounters struct {
    74  	DuplicateRequest        *uint64 // hw_counters/duplicate_request
    75  	ImpliedNakSeqErr        *uint64 // hw_counters/implied_nak_seq_err
    76  	Lifespan                *uint64 // hw_counters/lifespan
    77  	LocalAckTimeoutErr      *uint64 // hw_counters/local_ack_timeout_err
    78  	NpCnpSent               *uint64 // hw_counters/np_cnp_sent
    79  	NpEcnMarkedRocePackets  *uint64 // hw_counters/np_ecn_marked_roce_packets
    80  	OutOfBuffer             *uint64 // hw_counters/out_of_buffer
    81  	OutOfSequence           *uint64 // hw_counters/out_of_sequence
    82  	PacketSeqErr            *uint64 // hw_counters/packet_seq_err
    83  	ReqCqeError             *uint64 // hw_counters/req_cqe_error
    84  	ReqCqeFlushError        *uint64 // hw_counters/req_cqe_flush_error
    85  	ReqRemoteAccessErrors   *uint64 // hw_counters/req_remote_access_errors
    86  	ReqRemoteInvalidRequest *uint64 // hw_counters/req_remote_invalid_request
    87  	RespCqeError            *uint64 // hw_counters/resp_cqe_error
    88  	RespCqeFlushError       *uint64 // hw_counters/resp_cqe_flush_error
    89  	RespLocalLengthError    *uint64 // hw_counters/resp_local_length_error
    90  	RespRemoteAccessErrors  *uint64 // hw_counters/resp_remote_access_errors
    91  	RnrNakRetryErr          *uint64 // hw_counters/rnr_nak_retry_err
    92  	RoceAdpRetrans          *uint64 // hw_counters/roce_adp_retrans
    93  	RoceAdpRetransTo        *uint64 // hw_counters/roce_adp_retrans_to
    94  	RoceSlowRestart         *uint64 // hw_counters/roce_slow_restart
    95  	RoceSlowRestartCnps     *uint64 // hw_counters/roce_slow_restart_cnps
    96  	RoceSlowRestartTrans    *uint64 // hw_counters/roce_slow_restart_trans
    97  	RpCnpHandled            *uint64 // hw_counters/rp_cnp_handled
    98  	RpCnpIgnored            *uint64 // hw_counters/rp_cnp_ignored
    99  	RxAtomicRequests        *uint64 // hw_counters/rx_atomic_requests
   100  	RxDctConnect            *uint64 // hw_counters/rx_dct_connect
   101  	RxIcrcEncapsulated      *uint64 // hw_counters/rx_icrc_encapsulated
   102  	RxReadRequests          *uint64 // hw_counters/rx_read_requests
   103  	RxWriteRequests         *uint64 // hw_counters/rx_write_requests
   104  }
   105  
   106  // InfiniBandPort contains info from files in
   107  // /sys/class/infiniband/<Name>/ports/<Port>
   108  // for a single port of one InfiniBand device.
   109  type InfiniBandPort struct {
   110  	Name        string
   111  	Port        uint
   112  	State       string // String representation from /sys/class/infiniband/<Name>/ports/<Port>/state
   113  	StateID     uint   // ID from /sys/class/infiniband/<Name>/ports/<Port>/state
   114  	PhysState   string // String representation from /sys/class/infiniband/<Name>/ports/<Port>/phys_state
   115  	PhysStateID uint   // String representation from /sys/class/infiniband/<Name>/ports/<Port>/phys_state
   116  	Rate        uint64 // in bytes/second from /sys/class/infiniband/<Name>/ports/<Port>/rate
   117  	Counters    InfiniBandCounters
   118  	HwCounters  InfiniBandHwCounters
   119  }
   120  
   121  // InfiniBandDevice contains info from files in /sys/class/infiniband for a
   122  // single InfiniBand device.
   123  type InfiniBandDevice struct {
   124  	Name            string
   125  	BoardID         string // /sys/class/infiniband/<Name>/board_id
   126  	FirmwareVersion string // /sys/class/infiniband/<Name>/fw_ver
   127  	HCAType         string // /sys/class/infiniband/<Name>/hca_type
   128  	Ports           map[uint]InfiniBandPort
   129  }
   130  
   131  // InfiniBandClass is a collection of every InfiniBand device in
   132  // /sys/class/infiniband.
   133  //
   134  // The map keys are the names of the InfiniBand devices.
   135  type InfiniBandClass map[string]InfiniBandDevice
   136  
   137  // InfiniBandClass returns info for all InfiniBand devices read from
   138  // /sys/class/infiniband.
   139  func (fs FS) InfiniBandClass() (InfiniBandClass, error) {
   140  	path := fs.sys.Path(infinibandClassPath)
   141  
   142  	dirs, err := os.ReadDir(path)
   143  	if err != nil {
   144  		return nil, err
   145  	}
   146  
   147  	ibc := make(InfiniBandClass, len(dirs))
   148  	for _, d := range dirs {
   149  		device, err := fs.parseInfiniBandDevice(d.Name())
   150  		if err != nil {
   151  			return nil, err
   152  		}
   153  
   154  		ibc[device.Name] = *device
   155  	}
   156  
   157  	return ibc, nil
   158  }
   159  
   160  // Parse one InfiniBand device.
   161  // Refer to https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-class-infiniband
   162  func (fs FS) parseInfiniBandDevice(name string) (*InfiniBandDevice, error) {
   163  	path := fs.sys.Path(infinibandClassPath, name)
   164  	device := InfiniBandDevice{Name: name}
   165  
   166  	// fw_ver is exposed by all InfiniBand drivers since kernel version 4.10.
   167  	value, err := util.SysReadFile(filepath.Join(path, "fw_ver"))
   168  	if err != nil {
   169  		return nil, fmt.Errorf("failed to read HCA firmware version: %w", err)
   170  	}
   171  	device.FirmwareVersion = value
   172  
   173  	// Not all InfiniBand drivers expose all of these.
   174  	for _, f := range [...]string{"board_id", "hca_type"} {
   175  		name := filepath.Join(path, f)
   176  		value, err := util.SysReadFile(name)
   177  		if err != nil {
   178  			if os.IsNotExist(err) {
   179  				continue
   180  			}
   181  			return nil, fmt.Errorf("failed to read file %q: %w", name, err)
   182  		}
   183  
   184  		switch f {
   185  		case "board_id":
   186  			device.BoardID = value
   187  		case "hca_type":
   188  			device.HCAType = value
   189  		}
   190  	}
   191  
   192  	portsPath := filepath.Join(path, "ports")
   193  	ports, err := os.ReadDir(portsPath)
   194  	if err != nil {
   195  		return nil, fmt.Errorf("failed to list InfiniBand ports at %q: %w", portsPath, err)
   196  	}
   197  
   198  	device.Ports = make(map[uint]InfiniBandPort, len(ports))
   199  	for _, d := range ports {
   200  		port, err := fs.parseInfiniBandPort(name, d.Name())
   201  		if err != nil {
   202  			return nil, err
   203  		}
   204  
   205  		device.Ports[port.Port] = *port
   206  	}
   207  
   208  	return &device, nil
   209  }
   210  
   211  // Parse InfiniBand state. Expected format: "<id>: <string-representation>".
   212  func parseState(s string) (uint, string, error) {
   213  	parts := strings.Split(s, ":")
   214  	if len(parts) != 2 {
   215  		return 0, "", fmt.Errorf("failed to split %s into 'ID: NAME'", s)
   216  	}
   217  	name := strings.TrimSpace(parts[1])
   218  	value, err := strconv.ParseUint(strings.TrimSpace(parts[0]), 10, 32)
   219  	if err != nil {
   220  		return 0, name, fmt.Errorf("failed to convert %s into uint", strings.TrimSpace(parts[0]))
   221  	}
   222  	id := uint(value)
   223  	return id, name, nil
   224  }
   225  
   226  // Parse rate (example: "100 Gb/sec (4X EDR)") and return it as bytes/second.
   227  func parseRate(s string) (uint64, error) {
   228  	parts := strings.SplitAfterN(s, " ", 2)
   229  	if len(parts) != 2 {
   230  		return 0, fmt.Errorf("failed to split %q", s)
   231  	}
   232  	value, err := strconv.ParseFloat(strings.TrimSpace(parts[0]), 32)
   233  	if err != nil {
   234  		return 0, fmt.Errorf("failed to convert %s into uint", strings.TrimSpace(parts[0]))
   235  	}
   236  	// Convert Gb/s into bytes/s
   237  	rate := uint64(value * 125000000)
   238  	return rate, nil
   239  }
   240  
   241  // parseInfiniBandPort scans predefined files in /sys/class/infiniband/<device>/ports/<port>
   242  // directory and gets their contents.
   243  func (fs FS) parseInfiniBandPort(name string, port string) (*InfiniBandPort, error) {
   244  	portNumber, err := strconv.ParseUint(port, 10, 32)
   245  	if err != nil {
   246  		return nil, fmt.Errorf("failed to convert %s into uint", port)
   247  	}
   248  	ibp := InfiniBandPort{Name: name, Port: uint(portNumber)}
   249  
   250  	portPath := fs.sys.Path(infinibandClassPath, name, "ports", port)
   251  	content, err := os.ReadFile(filepath.Join(portPath, "state"))
   252  	if err != nil {
   253  		return nil, err
   254  	}
   255  	id, name, err := parseState(string(content))
   256  	if err != nil {
   257  		return nil, fmt.Errorf("could not parse state file in %q: %w", portPath, err)
   258  	}
   259  	ibp.State = name
   260  	ibp.StateID = id
   261  
   262  	content, err = os.ReadFile(filepath.Join(portPath, "phys_state"))
   263  	if err != nil {
   264  		return nil, err
   265  	}
   266  	id, name, err = parseState(string(content))
   267  	if err != nil {
   268  		return nil, fmt.Errorf("could not parse phys_state file in %q: %w", portPath, err)
   269  	}
   270  	ibp.PhysState = name
   271  	ibp.PhysStateID = id
   272  
   273  	content, err = os.ReadFile(filepath.Join(portPath, "rate"))
   274  	if err != nil {
   275  		return nil, err
   276  	}
   277  	ibp.Rate, err = parseRate(string(content))
   278  	if err != nil {
   279  		return nil, fmt.Errorf("could not parse rate file in %q: %w", portPath, err)
   280  	}
   281  
   282  	// Intel irdma module does not expose /sys/class/infiniband/<device>/ports/<port-num>/counters
   283  	if !strings.HasPrefix(ibp.Name, "irdma") {
   284  		counters, err := parseInfiniBandCounters(portPath)
   285  		if err != nil {
   286  			return nil, err
   287  		}
   288  		ibp.Counters = *counters
   289  	}
   290  
   291  	if strings.HasPrefix(ibp.Name, "irdma") || strings.HasPrefix(ibp.Name, "mlx5_") {
   292  		hwCounters, err := parseInfiniBandHwCounters(portPath)
   293  		if err != nil {
   294  			return nil, err
   295  		}
   296  		ibp.HwCounters = *hwCounters
   297  	}
   298  
   299  	return &ibp, nil
   300  }
   301  
   302  // parseInfiniBandCounters parses the counters exposed under
   303  // /sys/class/infiniband/<device>/ports/<port-num>/counters, which first appeared in kernel v2.6.12.
   304  // Prior to kernel v4.5, 64-bit counters were exposed separately under the "counters_ext" directory.
   305  func parseInfiniBandCounters(portPath string) (*InfiniBandCounters, error) {
   306  	var counters InfiniBandCounters
   307  
   308  	path := filepath.Join(portPath, "counters")
   309  	files, err := os.ReadDir(path)
   310  	if err != nil {
   311  		return nil, err
   312  	}
   313  
   314  	for _, f := range files {
   315  		if !f.Type().IsRegular() {
   316  			continue
   317  		}
   318  
   319  		name := filepath.Join(path, f.Name())
   320  		value, err := util.SysReadFile(name)
   321  		if err != nil {
   322  			if os.IsNotExist(err) || os.IsPermission(err) || err.Error() == "operation not supported" || errors.Is(err, os.ErrInvalid) {
   323  				continue
   324  			}
   325  			return nil, fmt.Errorf("failed to read file %q: %w", name, err)
   326  		}
   327  
   328  		// According to Mellanox, the metrics port_rcv_data, port_xmit_data,
   329  		// port_rcv_data_64, and port_xmit_data_64 "are divided by 4 unconditionally"
   330  		// as they represent the amount of data being transmitted and received per lane.
   331  		// Mellanox cards have 4 lanes per port, so all values must be multiplied by 4
   332  		// to get the expected value.
   333  
   334  		vp := util.NewValueParser(value)
   335  
   336  		switch f.Name() {
   337  		case "excessive_buffer_overrun_errors":
   338  			counters.ExcessiveBufferOverrunErrors = vp.PUInt64()
   339  		case "link_downed":
   340  			counters.LinkDowned = vp.PUInt64()
   341  		case "link_error_recovery":
   342  			counters.LinkErrorRecovery = vp.PUInt64()
   343  		case "local_link_integrity_errors":
   344  			counters.LocalLinkIntegrityErrors = vp.PUInt64()
   345  		case "multicast_rcv_packets":
   346  			counters.MulticastRcvPackets = vp.PUInt64()
   347  		case "multicast_xmit_packets":
   348  			counters.MulticastXmitPackets = vp.PUInt64()
   349  		case "port_rcv_constraint_errors":
   350  			counters.PortRcvConstraintErrors = vp.PUInt64()
   351  		case "port_rcv_data":
   352  			counters.PortRcvData = vp.PUInt64()
   353  			if counters.PortRcvData != nil {
   354  				*counters.PortRcvData *= 4
   355  			}
   356  		case "port_rcv_discards":
   357  			counters.PortRcvDiscards = vp.PUInt64()
   358  		case "port_rcv_errors":
   359  			counters.PortRcvErrors = vp.PUInt64()
   360  		case "port_rcv_packets":
   361  			counters.PortRcvPackets = vp.PUInt64()
   362  		case "port_rcv_remote_physical_errors":
   363  			counters.PortRcvRemotePhysicalErrors = vp.PUInt64()
   364  		case "port_rcv_switch_relay_errors":
   365  			counters.PortRcvSwitchRelayErrors = vp.PUInt64()
   366  		case "port_xmit_constraint_errors":
   367  			counters.PortXmitConstraintErrors = vp.PUInt64()
   368  		case "port_xmit_data":
   369  			counters.PortXmitData = vp.PUInt64()
   370  			if counters.PortXmitData != nil {
   371  				*counters.PortXmitData *= 4
   372  			}
   373  		case "port_xmit_discards":
   374  			counters.PortXmitDiscards = vp.PUInt64()
   375  		case "port_xmit_packets":
   376  			counters.PortXmitPackets = vp.PUInt64()
   377  		case "port_xmit_wait":
   378  			counters.PortXmitWait = vp.PUInt64()
   379  		case "symbol_error":
   380  			counters.SymbolError = vp.PUInt64()
   381  		case "unicast_rcv_packets":
   382  			counters.UnicastRcvPackets = vp.PUInt64()
   383  		case "unicast_xmit_packets":
   384  			counters.UnicastXmitPackets = vp.PUInt64()
   385  		case "VL15_dropped":
   386  			counters.VL15Dropped = vp.PUInt64()
   387  		}
   388  
   389  		if err := vp.Err(); err != nil {
   390  			// Ugly workaround for handling https://github.com/prometheus/node_exporter/issues/966
   391  			// when counters are `N/A (not available)`.
   392  			// This was already patched and submitted, see
   393  			// https://www.spinics.net/lists/linux-rdma/msg68596.html
   394  			// Remove this as soon as the fix lands in the enterprise distros.
   395  			if strings.Contains(value, "N/A (no PMA)") {
   396  				continue
   397  			}
   398  			return nil, err
   399  		}
   400  	}
   401  
   402  	// Parse pre-kernel-v4.5 64-bit counters.
   403  	path = filepath.Join(portPath, "counters_ext")
   404  	files, err = os.ReadDir(path)
   405  	if err != nil && !os.IsNotExist(err) {
   406  		return nil, err
   407  	}
   408  
   409  	for _, f := range files {
   410  		if !f.Type().IsRegular() {
   411  			continue
   412  		}
   413  
   414  		name := filepath.Join(path, f.Name())
   415  		value, err := util.SysReadFile(name)
   416  		if err != nil {
   417  			if os.IsNotExist(err) || os.IsPermission(err) || err.Error() == "operation not supported" || errors.Is(err, os.ErrInvalid) {
   418  				continue
   419  			}
   420  			return nil, fmt.Errorf("failed to read file %q: %w", name, err)
   421  		}
   422  
   423  		vp := util.NewValueParser(value)
   424  
   425  		switch f.Name() {
   426  		case "port_multicast_rcv_packets":
   427  			counters.LegacyPortMulticastRcvPackets = vp.PUInt64()
   428  		case "port_multicast_xmit_packets":
   429  			counters.LegacyPortMulticastXmitPackets = vp.PUInt64()
   430  		case "port_rcv_data_64":
   431  			counters.LegacyPortRcvData64 = vp.PUInt64()
   432  			if counters.LegacyPortRcvData64 != nil {
   433  				*counters.LegacyPortRcvData64 *= 4
   434  			}
   435  		case "port_rcv_packets_64":
   436  			counters.LegacyPortRcvPackets64 = vp.PUInt64()
   437  		case "port_unicast_rcv_packets":
   438  			counters.LegacyPortUnicastRcvPackets = vp.PUInt64()
   439  		case "port_unicast_xmit_packets":
   440  			counters.LegacyPortUnicastXmitPackets = vp.PUInt64()
   441  		case "port_xmit_data_64":
   442  			counters.LegacyPortXmitData64 = vp.PUInt64()
   443  			if counters.LegacyPortXmitData64 != nil {
   444  				*counters.LegacyPortXmitData64 *= 4
   445  			}
   446  		case "port_xmit_packets_64":
   447  			counters.LegacyPortXmitPackets64 = vp.PUInt64()
   448  		}
   449  
   450  		if err := vp.Err(); err != nil {
   451  			// Ugly workaround for handling https://github.com/prometheus/node_exporter/issues/966
   452  			// when counters are `N/A (not available)`.
   453  			// This was already patched and submitted, see
   454  			// https://www.spinics.net/lists/linux-rdma/msg68596.html
   455  			// Remove this as soon as the fix lands in the enterprise distros.
   456  			if strings.Contains(value, "N/A (no PMA)") {
   457  				continue
   458  			}
   459  			return nil, err
   460  		}
   461  	}
   462  
   463  	return &counters, nil
   464  }
   465  
   466  // parseInfiniBandHwCounters parses the optional counters exposed under
   467  // /sys/class/infiniband/<device>/ports/<port-num>/hw_counters, which first appeared in kernel v4.6.
   468  func parseInfiniBandHwCounters(portPath string) (*InfiniBandHwCounters, error) {
   469  	var hwCounters InfiniBandHwCounters
   470  
   471  	path := filepath.Join(portPath, "hw_counters")
   472  	files, err := os.ReadDir(path)
   473  	if err != nil {
   474  		return nil, err
   475  	}
   476  
   477  	for _, f := range files {
   478  		if !f.Type().IsRegular() {
   479  			continue
   480  		}
   481  
   482  		name := filepath.Join(path, f.Name())
   483  		value, err := util.SysReadFile(name)
   484  		if err != nil {
   485  			if os.IsNotExist(err) || os.IsPermission(err) || err.Error() == "operation not supported" || errors.Is(err, os.ErrInvalid) {
   486  				continue
   487  			}
   488  			return nil, fmt.Errorf("failed to read file %q: %w", name, err)
   489  		}
   490  
   491  		vp := util.NewValueParser(value)
   492  
   493  		switch f.Name() {
   494  		case "duplicate_request":
   495  			hwCounters.DuplicateRequest = vp.PUInt64()
   496  		case "implied_nak_seq_err":
   497  			hwCounters.ImpliedNakSeqErr = vp.PUInt64()
   498  		case "lifespan":
   499  			hwCounters.Lifespan = vp.PUInt64()
   500  		case "local_ack_timeout_err":
   501  			hwCounters.LocalAckTimeoutErr = vp.PUInt64()
   502  		case "np_cnp_sent":
   503  			hwCounters.NpCnpSent = vp.PUInt64()
   504  		case "np_ecn_marked_roce_packets":
   505  			hwCounters.NpEcnMarkedRocePackets = vp.PUInt64()
   506  		case "out_of_buffer":
   507  			hwCounters.OutOfBuffer = vp.PUInt64()
   508  		case "out_of_sequence":
   509  			hwCounters.OutOfSequence = vp.PUInt64()
   510  		case "packet_seq_err":
   511  			hwCounters.PacketSeqErr = vp.PUInt64()
   512  		case "req_cqe_error":
   513  			hwCounters.ReqCqeError = vp.PUInt64()
   514  		case "req_cqe_flush_error":
   515  			hwCounters.ReqCqeFlushError = vp.PUInt64()
   516  		case "req_remote_access_errors":
   517  			hwCounters.ReqRemoteAccessErrors = vp.PUInt64()
   518  		case "req_remote_invalid_request":
   519  			hwCounters.ReqRemoteInvalidRequest = vp.PUInt64()
   520  		case "resp_cqe_error":
   521  			hwCounters.RespCqeError = vp.PUInt64()
   522  		case "resp_cqe_flush_error":
   523  			hwCounters.RespCqeFlushError = vp.PUInt64()
   524  		case "resp_local_length_error":
   525  			hwCounters.RespLocalLengthError = vp.PUInt64()
   526  		case "resp_remote_access_errors":
   527  			hwCounters.RespRemoteAccessErrors = vp.PUInt64()
   528  		case "rnr_nak_retry_err":
   529  			hwCounters.RnrNakRetryErr = vp.PUInt64()
   530  		case "roce_adp_retrans":
   531  			hwCounters.RoceAdpRetrans = vp.PUInt64()
   532  		case "roce_adp_retrans_to":
   533  			hwCounters.RoceAdpRetransTo = vp.PUInt64()
   534  		case "roce_slow_restart":
   535  			hwCounters.RoceSlowRestart = vp.PUInt64()
   536  		case "roce_slow_restart_cnps":
   537  			hwCounters.RoceSlowRestartCnps = vp.PUInt64()
   538  		case "roce_slow_restart_trans":
   539  			hwCounters.RoceSlowRestartTrans = vp.PUInt64()
   540  		case "rp_cnp_handled":
   541  			hwCounters.RpCnpHandled = vp.PUInt64()
   542  		case "rp_cnp_ignored":
   543  			hwCounters.RpCnpIgnored = vp.PUInt64()
   544  		case "rx_atomic_requests":
   545  			hwCounters.RxAtomicRequests = vp.PUInt64()
   546  		case "rx_dct_connect":
   547  			hwCounters.RxDctConnect = vp.PUInt64()
   548  		case "rx_icrc_encapsulated":
   549  			hwCounters.RxIcrcEncapsulated = vp.PUInt64()
   550  		case "rx_read_requests":
   551  			hwCounters.RxReadRequests = vp.PUInt64()
   552  		case "rx_write_requests":
   553  			hwCounters.RxWriteRequests = vp.PUInt64()
   554  		}
   555  
   556  		if err := vp.Err(); err != nil {
   557  			// Ugly workaround for handling https://github.com/prometheus/node_exporter/issues/966
   558  			// when counters are `N/A (not available)`.
   559  			// This was already patched and submitted, see
   560  			// https://www.spinics.net/lists/linux-rdma/msg68596.html
   561  			// Remove this as soon as the fix lands in the enterprise distros.
   562  			if strings.Contains(value, "N/A (no PMA)") {
   563  				continue
   564  			}
   565  			return nil, err
   566  		}
   567  	}
   568  	return &hwCounters, nil
   569  }
   570  

View as plain text