...
1 package install
2
3 import (
4 "context"
5 "errors"
6 "fmt"
7 "time"
8
9 "go.etcd.io/etcd/api/v3/etcdserverpb"
10 "go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
11
12 "edge-infra.dev/pkg/lib/fog"
13 "edge-infra.dev/pkg/sds/lib/etcd/client"
14 )
15
16
17 func (r *Reconciler) addMemberAsLearner(ctx context.Context, handlers *Handlers) (uint64, error) {
18 log := fog.FromContext(ctx)
19 failedAttempts := 0
20 for {
21 resp, err := r.EtcdRetryClient.SafeMemberAddAsLearner(ctx, []string{handlers.member.PeerURL()})
22
23
24
25
26 if errors.Is(err, rpctypes.ErrTooManyLearners) {
27 failedAttempts++
28
29 if failedAttempts%12 == 0 {
30 log.V(0).Info("too many learners in the cluster. Waiting for the current learner to be promoted or removed...")
31 }
32 time.Sleep(10 * time.Second)
33 continue
34 }
35
36 if errors.Is(err, rpctypes.ErrPeerURLExist) {
37 return r.memberByPeerURL(ctx, handlers)
38 }
39 if err != nil {
40 return 0, fmt.Errorf("failed to add member as learner: %w", err)
41 }
42
43 return resp.Member.ID, nil
44 }
45 }
46
47
48
49 func (r *Reconciler) memberByPeerURL(ctx context.Context, handlers *Handlers) (uint64, error) {
50 resp, err := r.EtcdRetryClient.SafeMemberList(ctx)
51 if err != nil {
52 return 0, fmt.Errorf("failed to retrieve etcd members: %w", err)
53 }
54
55 for _, member := range resp.Members {
56 if member.PeerURLs[0] == handlers.member.PeerURL() {
57 return r.deleteIfNameMismatch(ctx, member, handlers)
58 }
59 }
60 return 0, fmt.Errorf("failed to find member by peerURL (%s)", handlers.member.PeerURL())
61 }
62
63
64
65 func (r *Reconciler) deleteIfNameMismatch(ctx context.Context, member *etcdserverpb.Member, handlers *Handlers) (uint64, error) {
66
67 if member.Name != handlers.member.Name {
68 if _, err := r.EtcdRetryClient.SafeMemberRemove(ctx, member.ID); err != nil {
69 return 0, fmt.Errorf("failed to remove mismatched member: %w", err)
70 }
71 return 0, fmt.Errorf("member name mismatch: expected %s, got %s", handlers.member.Name, member.Name)
72 }
73 return member.ID, nil
74 }
75
76
77 func (r *Reconciler) promoteLearner(ctx context.Context, memberID uint64) error {
78 failedAttempts := 0
79 for {
80 _, err := r.EtcdRetryClient.SafeMemberPromote(ctx, memberID)
81
82
83
84 if errors.Is(err, rpctypes.ErrMemberLearnerNotReady) {
85 failedAttempts++
86
87
88 if failedAttempts > 12 {
89 return fmt.Errorf("failed to promote member after 2 minutes")
90 }
91 time.Sleep(10 * time.Second)
92 continue
93 }
94
95 if client.IgnoreMemberNotLearner(err) != nil {
96 return fmt.Errorf("failed to promote learner: %w", err)
97 }
98 return nil
99 }
100 }
101
View as plain text