cidr_set.go

Documentation: k8s.io/kubernetes/pkg/controller/nodeipam/ipam/cidrset

     1  /*
     2  Copyright 2016 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package cidrset
    18  
    19  import (
    20  	"encoding/binary"
    21  	"errors"
    22  	"fmt"
    23  	"math/big"
    24  	"math/bits"
    25  	"net"
    26  	"sync"
    27  )
    28  
    29  // CidrSet manages a set of CIDR ranges from which blocks of IPs can
    30  // be allocated from.
    31  type CidrSet struct {
    32  	sync.Mutex
    33  	// clusterCIDR is the CIDR assigned to the cluster
    34  	clusterCIDR *net.IPNet
    35  	// clusterMaskSize is the mask size, in bits, assigned to the cluster
    36  	// caches the mask size to avoid the penalty of calling clusterCIDR.Mask.Size()
    37  	clusterMaskSize int
    38  	// nodeMask is the network mask assigned to the nodes
    39  	nodeMask net.IPMask
    40  	// nodeMaskSize is the mask size, in bits,assigned to the nodes
    41  	// caches the mask size to avoid the penalty of calling nodeMask.Size()
    42  	nodeMaskSize int
    43  	// maxCIDRs is the maximum number of CIDRs that can be allocated
    44  	maxCIDRs int
    45  	// allocatedCIDRs counts the number of CIDRs allocated
    46  	allocatedCIDRs int
    47  	// nextCandidate points to the next CIDR that should be free
    48  	nextCandidate int
    49  	// used is a bitmap used to track the CIDRs allocated
    50  	used big.Int
    51  	// label is used to identify the metrics
    52  	label string
    53  }
    54  
    55  const (
    56  	// The subnet mask size cannot be greater than 16 more than the cluster mask size
    57  	// TODO: https://github.com/kubernetes/kubernetes/issues/44918
    58  	// clusterSubnetMaxDiff limited to 16 due to the uncompressed bitmap
    59  	// Due to this limitation the subnet mask for IPv6 cluster cidr needs to be >= 48
    60  	// as default mask size for IPv6 is 64.
    61  	clusterSubnetMaxDiff = 16
    62  	// halfIPv6Len is the half of the IPv6 length
    63  	halfIPv6Len = net.IPv6len / 2
    64  )
    65  
    66  var (
    67  	// ErrCIDRRangeNoCIDRsRemaining occurs when there is no more space
    68  	// to allocate CIDR ranges.
    69  	ErrCIDRRangeNoCIDRsRemaining = errors.New(
    70  		"CIDR allocation failed; there are no remaining CIDRs left to allocate in the accepted range")
    71  	// ErrCIDRSetSubNetTooBig occurs when the subnet mask size is too
    72  	// big compared to the CIDR mask size.
    73  	ErrCIDRSetSubNetTooBig = errors.New(
    74  		"New CIDR set failed; the node CIDR size is too big")
    75  )
    76  
    77  // NewCIDRSet creates a new CidrSet.
    78  func NewCIDRSet(clusterCIDR *net.IPNet, subNetMaskSize int) (*CidrSet, error) {
    79  	clusterMask := clusterCIDR.Mask
    80  	clusterMaskSize, bits := clusterMask.Size()
    81  
    82  	if (clusterCIDR.IP.To4() == nil) && (subNetMaskSize-clusterMaskSize > clusterSubnetMaxDiff) {
    83  		return nil, ErrCIDRSetSubNetTooBig
    84  	}
    85  
    86  	// register CidrSet metrics
    87  	registerCidrsetMetrics()
    88  
    89  	maxCIDRs := getMaxCIDRs(subNetMaskSize, clusterMaskSize)
    90  	cidrSet := &CidrSet{
    91  		clusterCIDR:     clusterCIDR,
    92  		nodeMask:        net.CIDRMask(subNetMaskSize, bits),
    93  		clusterMaskSize: clusterMaskSize,
    94  		maxCIDRs:        maxCIDRs,
    95  		nodeMaskSize:    subNetMaskSize,
    96  		label:           clusterCIDR.String(),
    97  	}
    98  	cidrSetMaxCidrs.WithLabelValues(cidrSet.label).Set(float64(maxCIDRs))
    99  
   100  	return cidrSet, nil
   101  }
   102  
   103  func (s *CidrSet) indexToCIDRBlock(index int) *net.IPNet {
   104  	var ip []byte
   105  	switch /*v4 or v6*/ {
   106  	case s.clusterCIDR.IP.To4() != nil:
   107  		{
   108  			j := uint32(index) << uint32(32-s.nodeMaskSize)
   109  			ipInt := (binary.BigEndian.Uint32(s.clusterCIDR.IP)) | j
   110  			ip = make([]byte, net.IPv4len)
   111  			binary.BigEndian.PutUint32(ip, ipInt)
   112  		}
   113  	case s.clusterCIDR.IP.To16() != nil:
   114  		{
   115  			// leftClusterIP      |     rightClusterIP
   116  			// 2001:0DB8:1234:0000:0000:0000:0000:0000
   117  			const v6NBits = 128
   118  			const halfV6NBits = v6NBits / 2
   119  			leftClusterIP := binary.BigEndian.Uint64(s.clusterCIDR.IP[:halfIPv6Len])
   120  			rightClusterIP := binary.BigEndian.Uint64(s.clusterCIDR.IP[halfIPv6Len:])
   121  
   122  			ip = make([]byte, net.IPv6len)
   123  
   124  			if s.nodeMaskSize <= halfV6NBits {
   125  				// We only care about left side IP
   126  				leftClusterIP |= uint64(index) << uint(halfV6NBits-s.nodeMaskSize)
   127  			} else {
   128  				if s.clusterMaskSize < halfV6NBits {
   129  					// see how many bits are needed to reach the left side
   130  					btl := uint(s.nodeMaskSize - halfV6NBits)
   131  					indexMaxBit := uint(64 - bits.LeadingZeros64(uint64(index)))
   132  					if indexMaxBit > btl {
   133  						leftClusterIP |= uint64(index) >> btl
   134  					}
   135  				}
   136  				// the right side will be calculated the same way either the
   137  				// subNetMaskSize affects both left and right sides
   138  				rightClusterIP |= uint64(index) << uint(v6NBits-s.nodeMaskSize)
   139  			}
   140  			binary.BigEndian.PutUint64(ip[:halfIPv6Len], leftClusterIP)
   141  			binary.BigEndian.PutUint64(ip[halfIPv6Len:], rightClusterIP)
   142  		}
   143  	}
   144  	return &net.IPNet{
   145  		IP:   ip,
   146  		Mask: s.nodeMask,
   147  	}
   148  }
   149  
   150  // AllocateNext allocates the next free CIDR range. This will set the range
   151  // as occupied and return the allocated range.
   152  func (s *CidrSet) AllocateNext() (*net.IPNet, error) {
   153  	s.Lock()
   154  	defer s.Unlock()
   155  
   156  	if s.allocatedCIDRs == s.maxCIDRs {
   157  		return nil, ErrCIDRRangeNoCIDRsRemaining
   158  	}
   159  	candidate := s.nextCandidate
   160  	var i int
   161  	for i = 0; i < s.maxCIDRs; i++ {
   162  		if s.used.Bit(candidate) == 0 {
   163  			break
   164  		}
   165  		candidate = (candidate + 1) % s.maxCIDRs
   166  	}
   167  
   168  	s.nextCandidate = (candidate + 1) % s.maxCIDRs
   169  	s.used.SetBit(&s.used, candidate, 1)
   170  	s.allocatedCIDRs++
   171  	// Update metrics
   172  	cidrSetAllocations.WithLabelValues(s.label).Inc()
   173  	cidrSetAllocationTriesPerRequest.WithLabelValues(s.label).Observe(float64(i))
   174  	cidrSetUsage.WithLabelValues(s.label).Set(float64(s.allocatedCIDRs) / float64(s.maxCIDRs))
   175  
   176  	return s.indexToCIDRBlock(candidate), nil
   177  }
   178  
   179  func (s *CidrSet) getBeginningAndEndIndices(cidr *net.IPNet) (begin, end int, err error) {
   180  	if cidr == nil {
   181  		return -1, -1, fmt.Errorf("error getting indices for cluster cidr %v, cidr is nil", s.clusterCIDR)
   182  	}
   183  	begin, end = 0, s.maxCIDRs-1
   184  	cidrMask := cidr.Mask
   185  	maskSize, _ := cidrMask.Size()
   186  	var ipSize int
   187  
   188  	if !s.clusterCIDR.Contains(cidr.IP.Mask(s.clusterCIDR.Mask)) && !cidr.Contains(s.clusterCIDR.IP.Mask(cidr.Mask)) {
   189  		return -1, -1, fmt.Errorf("cidr %v is out the range of cluster cidr %v", cidr, s.clusterCIDR)
   190  	}
   191  
   192  	if s.clusterMaskSize < maskSize {
   193  
   194  		ipSize = net.IPv4len
   195  		if cidr.IP.To4() == nil {
   196  			ipSize = net.IPv6len
   197  		}
   198  		begin, err = s.getIndexForIP(cidr.IP.Mask(s.nodeMask))
   199  		if err != nil {
   200  			return -1, -1, err
   201  		}
   202  		ip := make([]byte, ipSize)
   203  		if cidr.IP.To4() != nil {
   204  			ipInt := binary.BigEndian.Uint32(cidr.IP) | (^binary.BigEndian.Uint32(cidr.Mask))
   205  			binary.BigEndian.PutUint32(ip, ipInt)
   206  		} else {
   207  			// ipIntLeft          |         ipIntRight
   208  			// 2001:0DB8:1234:0000:0000:0000:0000:0000
   209  			ipIntLeft := binary.BigEndian.Uint64(cidr.IP[:net.IPv6len/2]) | (^binary.BigEndian.Uint64(cidr.Mask[:net.IPv6len/2]))
   210  			ipIntRight := binary.BigEndian.Uint64(cidr.IP[net.IPv6len/2:]) | (^binary.BigEndian.Uint64(cidr.Mask[net.IPv6len/2:]))
   211  			binary.BigEndian.PutUint64(ip[:net.IPv6len/2], ipIntLeft)
   212  			binary.BigEndian.PutUint64(ip[net.IPv6len/2:], ipIntRight)
   213  		}
   214  		end, err = s.getIndexForIP(net.IP(ip).Mask(s.nodeMask))
   215  		if err != nil {
   216  			return -1, -1, err
   217  		}
   218  	}
   219  	return begin, end, nil
   220  }
   221  
   222  // Release releases the given CIDR range.
   223  func (s *CidrSet) Release(cidr *net.IPNet) error {
   224  	begin, end, err := s.getBeginningAndEndIndices(cidr)
   225  	if err != nil {
   226  		return err
   227  	}
   228  	s.Lock()
   229  	defer s.Unlock()
   230  	for i := begin; i <= end; i++ {
   231  		// Only change the counters if we change the bit to prevent
   232  		// double counting.
   233  		if s.used.Bit(i) != 0 {
   234  			s.used.SetBit(&s.used, i, 0)
   235  			s.allocatedCIDRs--
   236  			cidrSetReleases.WithLabelValues(s.label).Inc()
   237  		}
   238  	}
   239  
   240  	cidrSetUsage.WithLabelValues(s.label).Set(float64(s.allocatedCIDRs) / float64(s.maxCIDRs))
   241  	return nil
   242  }
   243  
   244  // Occupy marks the given CIDR range as used. Occupy succeeds even if the CIDR
   245  // range was previously used.
   246  func (s *CidrSet) Occupy(cidr *net.IPNet) (err error) {
   247  	begin, end, err := s.getBeginningAndEndIndices(cidr)
   248  	if err != nil {
   249  		return err
   250  	}
   251  	s.Lock()
   252  	defer s.Unlock()
   253  	for i := begin; i <= end; i++ {
   254  		// Only change the counters if we change the bit to prevent
   255  		// double counting.
   256  		if s.used.Bit(i) == 0 {
   257  			s.used.SetBit(&s.used, i, 1)
   258  			s.allocatedCIDRs++
   259  			cidrSetAllocations.WithLabelValues(s.label).Inc()
   260  		}
   261  	}
   262  
   263  	cidrSetUsage.WithLabelValues(s.label).Set(float64(s.allocatedCIDRs) / float64(s.maxCIDRs))
   264  	return nil
   265  }
   266  
   267  func (s *CidrSet) getIndexForIP(ip net.IP) (int, error) {
   268  	if ip.To4() != nil {
   269  		cidrIndex := (binary.BigEndian.Uint32(s.clusterCIDR.IP) ^ binary.BigEndian.Uint32(ip.To4())) >> uint32(32-s.nodeMaskSize)
   270  		if cidrIndex >= uint32(s.maxCIDRs) {
   271  			return 0, fmt.Errorf("CIDR: %v/%v is out of the range of CIDR allocator", ip, s.nodeMaskSize)
   272  		}
   273  		return int(cidrIndex), nil
   274  	}
   275  	if ip.To16() != nil {
   276  		bigIP := big.NewInt(0).SetBytes(s.clusterCIDR.IP)
   277  		bigIP = bigIP.Xor(bigIP, big.NewInt(0).SetBytes(ip))
   278  		cidrIndexBig := bigIP.Rsh(bigIP, uint(net.IPv6len*8-s.nodeMaskSize))
   279  		cidrIndex := cidrIndexBig.Uint64()
   280  		if cidrIndex >= uint64(s.maxCIDRs) {
   281  			return 0, fmt.Errorf("CIDR: %v/%v is out of the range of CIDR allocator", ip, s.nodeMaskSize)
   282  		}
   283  		return int(cidrIndex), nil
   284  	}
   285  
   286  	return 0, fmt.Errorf("invalid IP: %v", ip)
   287  }
   288  
   289  // getMaxCIDRs returns the max number of CIDRs that can be obtained by subdividing a mask of size `clusterMaskSize`
   290  // into subnets with mask of size `subNetMaskSize`.
   291  func getMaxCIDRs(subNetMaskSize, clusterMaskSize int) int {
   292  	return 1 << uint32(subNetMaskSize-clusterMaskSize)
   293  }
   294
View as plain text