...

Source file src/go.etcd.io/etcd/server/v3/config/config.go

Documentation: go.etcd.io/etcd/server/v3/config

     1  // Copyright 2015 The etcd Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package config
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"path/filepath"
    21  	"sort"
    22  	"strings"
    23  	"time"
    24  
    25  	"go.etcd.io/etcd/client/pkg/v3/transport"
    26  	"go.etcd.io/etcd/client/pkg/v3/types"
    27  	"go.etcd.io/etcd/pkg/v3/netutil"
    28  	"go.etcd.io/etcd/server/v3/datadir"
    29  	"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
    30  
    31  	bolt "go.etcd.io/bbolt"
    32  	"go.uber.org/zap"
    33  )
    34  
    35  // ServerConfig holds the configuration of etcd as taken from the command line or discovery.
    36  type ServerConfig struct {
    37  	Name           string
    38  	DiscoveryURL   string
    39  	DiscoveryProxy string
    40  	ClientURLs     types.URLs
    41  	PeerURLs       types.URLs
    42  	DataDir        string
    43  	// DedicatedWALDir config will make the etcd to write the WAL to the WALDir
    44  	// rather than the dataDir/member/wal.
    45  	DedicatedWALDir string
    46  
    47  	SnapshotCount uint64
    48  
    49  	// SnapshotCatchUpEntries is the number of entries for a slow follower
    50  	// to catch-up after compacting the raft storage entries.
    51  	// We expect the follower has a millisecond level latency with the leader.
    52  	// The max throughput is around 10K. Keep a 5K entries is enough for helping
    53  	// follower to catch up.
    54  	// WARNING: only change this for tests. Always use "DefaultSnapshotCatchUpEntries"
    55  	SnapshotCatchUpEntries uint64
    56  
    57  	MaxSnapFiles uint
    58  	MaxWALFiles  uint
    59  
    60  	// BackendBatchInterval is the maximum time before commit the backend transaction.
    61  	BackendBatchInterval time.Duration
    62  	// BackendBatchLimit is the maximum operations before commit the backend transaction.
    63  	BackendBatchLimit int
    64  
    65  	// BackendFreelistType is the type of the backend boltdb freelist.
    66  	BackendFreelistType bolt.FreelistType
    67  
    68  	InitialPeerURLsMap  types.URLsMap
    69  	InitialClusterToken string
    70  	NewCluster          bool
    71  	PeerTLSInfo         transport.TLSInfo
    72  
    73  	CORS map[string]struct{}
    74  
    75  	// HostWhitelist lists acceptable hostnames from client requests.
    76  	// If server is insecure (no TLS), server only accepts requests
    77  	// whose Host header value exists in this white list.
    78  	HostWhitelist map[string]struct{}
    79  
    80  	TickMs        uint
    81  	ElectionTicks int
    82  
    83  	// InitialElectionTickAdvance is true, then local member fast-forwards
    84  	// election ticks to speed up "initial" leader election trigger. This
    85  	// benefits the case of larger election ticks. For instance, cross
    86  	// datacenter deployment may require longer election timeout of 10-second.
    87  	// If true, local node does not need wait up to 10-second. Instead,
    88  	// forwards its election ticks to 8-second, and have only 2-second left
    89  	// before leader election.
    90  	//
    91  	// Major assumptions are that:
    92  	//  - cluster has no active leader thus advancing ticks enables faster
    93  	//    leader election, or
    94  	//  - cluster already has an established leader, and rejoining follower
    95  	//    is likely to receive heartbeats from the leader after tick advance
    96  	//    and before election timeout.
    97  	//
    98  	// However, when network from leader to rejoining follower is congested,
    99  	// and the follower does not receive leader heartbeat within left election
   100  	// ticks, disruptive election has to happen thus affecting cluster
   101  	// availabilities.
   102  	//
   103  	// Disabling this would slow down initial bootstrap process for cross
   104  	// datacenter deployments. Make your own tradeoffs by configuring
   105  	// --initial-election-tick-advance at the cost of slow initial bootstrap.
   106  	//
   107  	// If single-node, it advances ticks regardless.
   108  	//
   109  	// See https://github.com/etcd-io/etcd/issues/9333 for more detail.
   110  	InitialElectionTickAdvance bool
   111  
   112  	BootstrapTimeout time.Duration
   113  
   114  	AutoCompactionRetention time.Duration
   115  	AutoCompactionMode      string
   116  	CompactionBatchLimit    int
   117  	QuotaBackendBytes       int64
   118  	MaxTxnOps               uint
   119  
   120  	// MaxRequestBytes is the maximum request size to send over raft.
   121  	MaxRequestBytes uint
   122  
   123  	// MaxConcurrentStreams specifies the maximum number of concurrent
   124  	// streams that each client can open at a time.
   125  	MaxConcurrentStreams uint32
   126  
   127  	WarningApplyDuration time.Duration
   128  
   129  	StrictReconfigCheck bool
   130  
   131  	// ClientCertAuthEnabled is true when cert has been signed by the client CA.
   132  	ClientCertAuthEnabled bool
   133  
   134  	AuthToken  string
   135  	BcryptCost uint
   136  	TokenTTL   uint
   137  
   138  	// InitialCorruptCheck is true to check data corruption on boot
   139  	// before serving any peer/client traffic.
   140  	InitialCorruptCheck     bool
   141  	CorruptCheckTime        time.Duration
   142  	CompactHashCheckEnabled bool
   143  	CompactHashCheckTime    time.Duration
   144  
   145  	// PreVote is true to enable Raft Pre-Vote.
   146  	PreVote bool
   147  
   148  	// SocketOpts are socket options passed to listener config.
   149  	SocketOpts transport.SocketOpts
   150  
   151  	// Logger logs server-side operations.
   152  	Logger *zap.Logger
   153  
   154  	ForceNewCluster bool
   155  
   156  	// EnableLeaseCheckpoint enables leader to send regular checkpoints to other members to prevent reset of remaining TTL on leader change.
   157  	EnableLeaseCheckpoint bool
   158  	// LeaseCheckpointInterval time.Duration is the wait duration between lease checkpoints.
   159  	LeaseCheckpointInterval time.Duration
   160  	// LeaseCheckpointPersist enables persisting remainingTTL to prevent indefinite auto-renewal of long lived leases. Always enabled in v3.6. Should be used to ensure smooth upgrade from v3.5 clusters with this feature enabled.
   161  	LeaseCheckpointPersist bool
   162  
   163  	EnableGRPCGateway bool
   164  
   165  	// ExperimentalEnableDistributedTracing enables distributed tracing using OpenTelemetry protocol.
   166  	ExperimentalEnableDistributedTracing bool
   167  	// ExperimentalTracerOptions are options for OpenTelemetry gRPC interceptor.
   168  	ExperimentalTracerOptions []otelgrpc.Option
   169  
   170  	WatchProgressNotifyInterval time.Duration
   171  
   172  	// UnsafeNoFsync disables all uses of fsync.
   173  	// Setting this is unsafe and will cause data loss.
   174  	UnsafeNoFsync bool `json:"unsafe-no-fsync"`
   175  
   176  	DowngradeCheckTime time.Duration
   177  
   178  	// ExperimentalMemoryMlock enables mlocking of etcd owned memory pages.
   179  	// The setting improves etcd tail latency in environments were:
   180  	//   - memory pressure might lead to swapping pages to disk
   181  	//   - disk latency might be unstable
   182  	// Currently all etcd memory gets mlocked, but in future the flag can
   183  	// be refined to mlock in-use area of bbolt only.
   184  	ExperimentalMemoryMlock bool `json:"experimental-memory-mlock"`
   185  
   186  	// ExperimentalTxnModeWriteWithSharedBuffer enable write transaction to use
   187  	// a shared buffer in its readonly check operations.
   188  	ExperimentalTxnModeWriteWithSharedBuffer bool `json:"experimental-txn-mode-write-with-shared-buffer"`
   189  
   190  	// ExperimentalBootstrapDefragThresholdMegabytes is the minimum number of megabytes needed to be freed for etcd server to
   191  	// consider running defrag during bootstrap. Needs to be set to non-zero value to take effect.
   192  	ExperimentalBootstrapDefragThresholdMegabytes uint `json:"experimental-bootstrap-defrag-threshold-megabytes"`
   193  
   194  	// V2Deprecation defines a phase of v2store deprecation process.
   195  	V2Deprecation V2DeprecationEnum `json:"v2-deprecation"`
   196  }
   197  
   198  // VerifyBootstrap sanity-checks the initial config for bootstrap case
   199  // and returns an error for things that should never happen.
   200  func (c *ServerConfig) VerifyBootstrap() error {
   201  	if err := c.hasLocalMember(); err != nil {
   202  		return err
   203  	}
   204  	if err := c.advertiseMatchesCluster(); err != nil {
   205  		return err
   206  	}
   207  	if CheckDuplicateURL(c.InitialPeerURLsMap) {
   208  		return fmt.Errorf("initial cluster %s has duplicate url", c.InitialPeerURLsMap)
   209  	}
   210  	if c.InitialPeerURLsMap.String() == "" && c.DiscoveryURL == "" {
   211  		return fmt.Errorf("initial cluster unset and no discovery URL found")
   212  	}
   213  	return nil
   214  }
   215  
   216  // VerifyJoinExisting sanity-checks the initial config for join existing cluster
   217  // case and returns an error for things that should never happen.
   218  func (c *ServerConfig) VerifyJoinExisting() error {
   219  	// The member has announced its peer urls to the cluster before starting; no need to
   220  	// set the configuration again.
   221  	if err := c.hasLocalMember(); err != nil {
   222  		return err
   223  	}
   224  	if CheckDuplicateURL(c.InitialPeerURLsMap) {
   225  		return fmt.Errorf("initial cluster %s has duplicate url", c.InitialPeerURLsMap)
   226  	}
   227  	if c.DiscoveryURL != "" {
   228  		return fmt.Errorf("discovery URL should not be set when joining existing initial cluster")
   229  	}
   230  	return nil
   231  }
   232  
   233  // hasLocalMember checks that the cluster at least contains the local server.
   234  func (c *ServerConfig) hasLocalMember() error {
   235  	if urls := c.InitialPeerURLsMap[c.Name]; urls == nil {
   236  		return fmt.Errorf("couldn't find local name %q in the initial cluster configuration", c.Name)
   237  	}
   238  	return nil
   239  }
   240  
   241  // advertiseMatchesCluster confirms peer URLs match those in the cluster peer list.
   242  func (c *ServerConfig) advertiseMatchesCluster() error {
   243  	urls, apurls := c.InitialPeerURLsMap[c.Name], c.PeerURLs.StringSlice()
   244  	urls.Sort()
   245  	sort.Strings(apurls)
   246  	ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
   247  	defer cancel()
   248  	ok, err := netutil.URLStringsEqual(ctx, c.Logger, apurls, urls.StringSlice())
   249  	if ok {
   250  		return nil
   251  	}
   252  
   253  	initMap, apMap := make(map[string]struct{}), make(map[string]struct{})
   254  	for _, url := range c.PeerURLs {
   255  		apMap[url.String()] = struct{}{}
   256  	}
   257  	for _, url := range c.InitialPeerURLsMap[c.Name] {
   258  		initMap[url.String()] = struct{}{}
   259  	}
   260  
   261  	missing := []string{}
   262  	for url := range initMap {
   263  		if _, ok := apMap[url]; !ok {
   264  			missing = append(missing, url)
   265  		}
   266  	}
   267  	if len(missing) > 0 {
   268  		for i := range missing {
   269  			missing[i] = c.Name + "=" + missing[i]
   270  		}
   271  		mstr := strings.Join(missing, ",")
   272  		apStr := strings.Join(apurls, ",")
   273  		return fmt.Errorf("--initial-cluster has %s but missing from --initial-advertise-peer-urls=%s (%v)", mstr, apStr, err)
   274  	}
   275  
   276  	for url := range apMap {
   277  		if _, ok := initMap[url]; !ok {
   278  			missing = append(missing, url)
   279  		}
   280  	}
   281  	if len(missing) > 0 {
   282  		mstr := strings.Join(missing, ",")
   283  		umap := types.URLsMap(map[string]types.URLs{c.Name: c.PeerURLs})
   284  		return fmt.Errorf("--initial-advertise-peer-urls has %s but missing from --initial-cluster=%s", mstr, umap.String())
   285  	}
   286  
   287  	// resolved URLs from "--initial-advertise-peer-urls" and "--initial-cluster" did not match or failed
   288  	apStr := strings.Join(apurls, ",")
   289  	umap := types.URLsMap(map[string]types.URLs{c.Name: c.PeerURLs})
   290  	return fmt.Errorf("failed to resolve %s to match --initial-cluster=%s (%v)", apStr, umap.String(), err)
   291  }
   292  
   293  func (c *ServerConfig) MemberDir() string { return datadir.ToMemberDir(c.DataDir) }
   294  
   295  func (c *ServerConfig) WALDir() string {
   296  	if c.DedicatedWALDir != "" {
   297  		return c.DedicatedWALDir
   298  	}
   299  	return datadir.ToWalDir(c.DataDir)
   300  }
   301  
   302  func (c *ServerConfig) SnapDir() string { return filepath.Join(c.MemberDir(), "snap") }
   303  
   304  func (c *ServerConfig) ShouldDiscover() bool { return c.DiscoveryURL != "" }
   305  
   306  // ReqTimeout returns timeout for request to finish.
   307  func (c *ServerConfig) ReqTimeout() time.Duration {
   308  	// 5s for queue waiting, computation and disk IO delay
   309  	// + 2 * election timeout for possible leader election
   310  	return 5*time.Second + 2*time.Duration(c.ElectionTicks*int(c.TickMs))*time.Millisecond
   311  }
   312  
   313  func (c *ServerConfig) ElectionTimeout() time.Duration {
   314  	return time.Duration(c.ElectionTicks*int(c.TickMs)) * time.Millisecond
   315  }
   316  
   317  func (c *ServerConfig) PeerDialTimeout() time.Duration {
   318  	// 1s for queue wait and election timeout
   319  	return time.Second + time.Duration(c.ElectionTicks*int(c.TickMs))*time.Millisecond
   320  }
   321  
   322  func CheckDuplicateURL(urlsmap types.URLsMap) bool {
   323  	um := make(map[string]bool)
   324  	for _, urls := range urlsmap {
   325  		for _, url := range urls {
   326  			u := url.String()
   327  			if um[u] {
   328  				return true
   329  			}
   330  			um[u] = true
   331  		}
   332  	}
   333  	return false
   334  }
   335  
   336  func (c *ServerConfig) BootstrapTimeoutEffective() time.Duration {
   337  	if c.BootstrapTimeout != 0 {
   338  		return c.BootstrapTimeout
   339  	}
   340  	return time.Second
   341  }
   342  
   343  func (c *ServerConfig) BackendPath() string { return datadir.ToBackendFileName(c.DataDir) }
   344  

View as plain text