1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package config 18 19 import ( 20 "fmt" 21 "sort" 22 "strings" 23 24 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 25 componentbaseconfig "k8s.io/component-base/config" 26 logsapi "k8s.io/component-base/logs/api/v1" 27 ) 28 29 // KubeProxyIPTablesConfiguration contains iptables-related configuration 30 // details for the Kubernetes proxy server. 31 type KubeProxyIPTablesConfiguration struct { 32 // masqueradeBit is the bit of the iptables fwmark space to use for SNAT if using 33 // the iptables or ipvs proxy mode. Values must be within the range [0, 31]. 34 MasqueradeBit *int32 35 // masqueradeAll tells kube-proxy to SNAT all traffic sent to Service cluster IPs, 36 // when using the iptables or ipvs proxy mode. This may be required with some CNI 37 // plugins. 38 MasqueradeAll bool 39 // localhostNodePorts, if false, tells kube-proxy to disable the legacy behavior 40 // of allowing NodePort services to be accessed via localhost. (Applies only to 41 // iptables mode and IPv4; localhost NodePorts are never allowed with other proxy 42 // modes or with IPv6.) 43 LocalhostNodePorts *bool 44 // syncPeriod is an interval (e.g. '5s', '1m', '2h22m') indicating how frequently 45 // various re-synchronizing and cleanup operations are performed. Must be greater 46 // than 0. 47 SyncPeriod metav1.Duration 48 // minSyncPeriod is the minimum period between iptables rule resyncs (e.g. '5s', 49 // '1m', '2h22m'). A value of 0 means every Service or EndpointSlice change will 50 // result in an immediate iptables resync. 51 MinSyncPeriod metav1.Duration 52 } 53 54 // KubeProxyIPVSConfiguration contains ipvs-related configuration 55 // details for the Kubernetes proxy server. 56 type KubeProxyIPVSConfiguration struct { 57 // syncPeriod is an interval (e.g. '5s', '1m', '2h22m') indicating how frequently 58 // various re-synchronizing and cleanup operations are performed. Must be greater 59 // than 0. 60 SyncPeriod metav1.Duration 61 // minSyncPeriod is the minimum period between IPVS rule resyncs (e.g. '5s', '1m', 62 // '2h22m'). A value of 0 means every Service or EndpointSlice change will result 63 // in an immediate IPVS resync. 64 MinSyncPeriod metav1.Duration 65 // scheduler is the IPVS scheduler to use 66 Scheduler string 67 // excludeCIDRs is a list of CIDRs which the ipvs proxier should not touch 68 // when cleaning up ipvs services. 69 ExcludeCIDRs []string 70 // strictARP configures arp_ignore and arp_announce to avoid answering ARP queries 71 // from kube-ipvs0 interface 72 StrictARP bool 73 // tcpTimeout is the timeout value used for idle IPVS TCP sessions. 74 // The default value is 0, which preserves the current timeout value on the system. 75 TCPTimeout metav1.Duration 76 // tcpFinTimeout is the timeout value used for IPVS TCP sessions after receiving a FIN. 77 // The default value is 0, which preserves the current timeout value on the system. 78 TCPFinTimeout metav1.Duration 79 // udpTimeout is the timeout value used for IPVS UDP packets. 80 // The default value is 0, which preserves the current timeout value on the system. 81 UDPTimeout metav1.Duration 82 } 83 84 // KubeProxyNFTablesConfiguration contains nftables-related configuration 85 // details for the Kubernetes proxy server. 86 type KubeProxyNFTablesConfiguration struct { 87 // masqueradeBit is the bit of the iptables fwmark space to use for SNAT if using 88 // the nftables proxy mode. Values must be within the range [0, 31]. 89 MasqueradeBit *int32 90 // masqueradeAll tells kube-proxy to SNAT all traffic sent to Service cluster IPs, 91 // when using the nftables mode. This may be required with some CNI plugins. 92 MasqueradeAll bool 93 // syncPeriod is an interval (e.g. '5s', '1m', '2h22m') indicating how frequently 94 // various re-synchronizing and cleanup operations are performed. Must be greater 95 // than 0. 96 SyncPeriod metav1.Duration 97 // minSyncPeriod is the minimum period between iptables rule resyncs (e.g. '5s', 98 // '1m', '2h22m'). A value of 0 means every Service or EndpointSlice change will 99 // result in an immediate iptables resync. 100 MinSyncPeriod metav1.Duration 101 } 102 103 // KubeProxyConntrackConfiguration contains conntrack settings for 104 // the Kubernetes proxy server. 105 type KubeProxyConntrackConfiguration struct { 106 // maxPerCore is the maximum number of NAT connections to track 107 // per CPU core (0 to leave the limit as-is and ignore min). 108 MaxPerCore *int32 109 // min is the minimum value of connect-tracking records to allocate, 110 // regardless of maxPerCore (set maxPerCore=0 to leave the limit as-is). 111 Min *int32 112 // tcpEstablishedTimeout is how long an idle TCP connection will be kept open 113 // (e.g. '2s'). Must be greater than 0 to set. 114 TCPEstablishedTimeout *metav1.Duration 115 // tcpCloseWaitTimeout is how long an idle conntrack entry 116 // in CLOSE_WAIT state will remain in the conntrack 117 // table. (e.g. '60s'). Must be greater than 0 to set. 118 TCPCloseWaitTimeout *metav1.Duration 119 // tcpBeLiberal, if true, kube-proxy will configure conntrack 120 // to run in liberal mode for TCP connections and packets with 121 // out-of-window sequence numbers won't be marked INVALID. 122 TCPBeLiberal bool 123 // udpTimeout is how long an idle UDP conntrack entry in 124 // UNREPLIED state will remain in the conntrack table 125 // (e.g. '30s'). Must be greater than 0 to set. 126 UDPTimeout metav1.Duration 127 // udpStreamTimeout is how long an idle UDP conntrack entry in 128 // ASSURED state will remain in the conntrack table 129 // (e.g. '300s'). Must be greater than 0 to set. 130 UDPStreamTimeout metav1.Duration 131 } 132 133 // KubeProxyWinkernelConfiguration contains Windows/HNS settings for 134 // the Kubernetes proxy server. 135 type KubeProxyWinkernelConfiguration struct { 136 // networkName is the name of the network kube-proxy will use 137 // to create endpoints and policies 138 NetworkName string 139 // sourceVip is the IP address of the source VIP endpoint used for 140 // NAT when loadbalancing 141 SourceVip string 142 // enableDSR tells kube-proxy whether HNS policies should be created 143 // with DSR 144 EnableDSR bool 145 // rootHnsEndpointName is the name of hnsendpoint that is attached to 146 // l2bridge for root network namespace 147 RootHnsEndpointName string 148 // forwardHealthCheckVip forwards service VIP for health check port on 149 // Windows 150 ForwardHealthCheckVip bool 151 } 152 153 // DetectLocalConfiguration contains optional settings related to DetectLocalMode option 154 type DetectLocalConfiguration struct { 155 // bridgeInterface is a bridge interface name. When DetectLocalMode is set to 156 // LocalModeBridgeInterface, kube-proxy will consider traffic to be local if 157 // it originates from this bridge. 158 BridgeInterface string 159 // interfaceNamePrefix is an interface name prefix. When DetectLocalMode is set to 160 // LocalModeInterfaceNamePrefix, kube-proxy will consider traffic to be local if 161 // it originates from any interface whose name begins with this prefix. 162 InterfaceNamePrefix string 163 } 164 165 // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object 166 167 // KubeProxyConfiguration contains everything necessary to configure the 168 // Kubernetes proxy server. 169 type KubeProxyConfiguration struct { 170 metav1.TypeMeta 171 172 // featureGates is a map of feature names to bools that enable or disable alpha/experimental features. 173 FeatureGates map[string]bool 174 175 // clientConnection specifies the kubeconfig file and client connection settings for the proxy 176 // server to use when communicating with the apiserver. 177 ClientConnection componentbaseconfig.ClientConnectionConfiguration 178 // logging specifies the options of logging. 179 // Refer to [Logs Options](https://github.com/kubernetes/component-base/blob/master/logs/options.go) 180 // for more information. 181 Logging logsapi.LoggingConfiguration 182 183 // hostnameOverride, if non-empty, will be used as the name of the Node that 184 // kube-proxy is running on. If unset, the node name is assumed to be the same as 185 // the node's hostname. 186 HostnameOverride string 187 // bindAddress can be used to override kube-proxy's idea of what its node's 188 // primary IP is. Note that the name is a historical artifact, and kube-proxy does 189 // not actually bind any sockets to this IP. 190 BindAddress string 191 // healthzBindAddress is the IP address and port for the health check server to 192 // serve on, defaulting to "0.0.0.0:10256" (if bindAddress is unset or IPv4), or 193 // "[::]:10256" (if bindAddress is IPv6). 194 HealthzBindAddress string 195 // metricsBindAddress is the IP address and port for the metrics server to serve 196 // on, defaulting to "127.0.0.1:10249" (if bindAddress is unset or IPv4), or 197 // "[::1]:10249" (if bindAddress is IPv6). (Set to "0.0.0.0:10249" / "[::]:10249" 198 // to bind on all interfaces.) 199 MetricsBindAddress string 200 // bindAddressHardFail, if true, tells kube-proxy to treat failure to bind to a 201 // port as fatal and exit 202 BindAddressHardFail bool 203 // enableProfiling enables profiling via web interface on /debug/pprof handler. 204 // Profiling handlers will be handled by metrics server. 205 EnableProfiling bool 206 // showHiddenMetricsForVersion is the version for which you want to show hidden metrics. 207 ShowHiddenMetricsForVersion string 208 209 // mode specifies which proxy mode to use. 210 Mode ProxyMode 211 // iptables contains iptables-related configuration options. 212 IPTables KubeProxyIPTablesConfiguration 213 // ipvs contains ipvs-related configuration options. 214 IPVS KubeProxyIPVSConfiguration 215 // winkernel contains winkernel-related configuration options. 216 Winkernel KubeProxyWinkernelConfiguration 217 // nftables contains nftables-related configuration options. 218 NFTables KubeProxyNFTablesConfiguration 219 220 // detectLocalMode determines mode to use for detecting local traffic, defaults to LocalModeClusterCIDR 221 DetectLocalMode LocalMode 222 // detectLocal contains optional configuration settings related to DetectLocalMode. 223 DetectLocal DetectLocalConfiguration 224 // clusterCIDR is the CIDR range of the pods in the cluster. (For dual-stack 225 // clusters, this can be a comma-separated dual-stack pair of CIDR ranges.). When 226 // DetectLocalMode is set to LocalModeClusterCIDR, kube-proxy will consider 227 // traffic to be local if its source IP is in this range. (Otherwise it is not 228 // used.) 229 ClusterCIDR string 230 231 // nodePortAddresses is a list of CIDR ranges that contain valid node IPs. If set, 232 // connections to NodePort services will only be accepted on node IPs in one of 233 // the indicated ranges. If unset, NodePort connections will be accepted on all 234 // local IPs. 235 NodePortAddresses []string 236 237 // oomScoreAdj is the oom-score-adj value for kube-proxy process. Values must be within 238 // the range [-1000, 1000] 239 OOMScoreAdj *int32 240 // conntrack contains conntrack-related configuration options. 241 Conntrack KubeProxyConntrackConfiguration 242 // configSyncPeriod is how often configuration from the apiserver is refreshed. Must be greater 243 // than 0. 244 ConfigSyncPeriod metav1.Duration 245 246 // portRange was previously used to configure the userspace proxy, but is now unused. 247 PortRange string 248 } 249 250 // ProxyMode represents modes used by the Kubernetes proxy server. 251 // 252 // Currently, three modes of proxy are available on Linux platforms: 'iptables', 'ipvs', 253 // and 'nftables'. One mode of proxy is available on Windows platforms: 'kernelspace'. 254 // 255 // If the proxy mode is unspecified, the best-available proxy mode will be used (currently this 256 // is `iptables` on Linux and `kernelspace` on Windows). If the selected proxy mode cannot be 257 // used (due to lack of kernel support, missing userspace components, etc) then kube-proxy 258 // will exit with an error. 259 type ProxyMode string 260 261 const ( 262 ProxyModeIPTables ProxyMode = "iptables" 263 ProxyModeIPVS ProxyMode = "ipvs" 264 ProxyModeNFTables ProxyMode = "nftables" 265 ProxyModeKernelspace ProxyMode = "kernelspace" 266 ) 267 268 // LocalMode represents modes to detect local traffic from the node 269 type LocalMode string 270 271 // Currently supported modes for LocalMode 272 const ( 273 LocalModeClusterCIDR LocalMode = "ClusterCIDR" 274 LocalModeNodeCIDR LocalMode = "NodeCIDR" 275 LocalModeBridgeInterface LocalMode = "BridgeInterface" 276 LocalModeInterfaceNamePrefix LocalMode = "InterfaceNamePrefix" 277 ) 278 279 func (m *ProxyMode) Set(s string) error { 280 *m = ProxyMode(s) 281 return nil 282 } 283 284 func (m *ProxyMode) String() string { 285 if m != nil { 286 return string(*m) 287 } 288 return "" 289 } 290 291 func (m *ProxyMode) Type() string { 292 return "ProxyMode" 293 } 294 295 func (m *LocalMode) Set(s string) error { 296 *m = LocalMode(s) 297 return nil 298 } 299 300 func (m *LocalMode) String() string { 301 if m != nil { 302 return string(*m) 303 } 304 return "" 305 } 306 307 func (m *LocalMode) Type() string { 308 return "LocalMode" 309 } 310 311 type ConfigurationMap map[string]string 312 313 func (m *ConfigurationMap) String() string { 314 pairs := []string{} 315 for k, v := range *m { 316 pairs = append(pairs, fmt.Sprintf("%s=%s", k, v)) 317 } 318 sort.Strings(pairs) 319 return strings.Join(pairs, ",") 320 } 321 322 func (m *ConfigurationMap) Set(value string) error { 323 for _, s := range strings.Split(value, ",") { 324 if len(s) == 0 { 325 continue 326 } 327 arr := strings.SplitN(s, "=", 2) 328 if len(arr) == 2 { 329 (*m)[strings.TrimSpace(arr[0])] = strings.TrimSpace(arr[1]) 330 } else { 331 (*m)[strings.TrimSpace(arr[0])] = "" 332 } 333 } 334 return nil 335 } 336 337 func (*ConfigurationMap) Type() string { 338 return "mapStringString" 339 } 340