1 package healthcheck
2
3 import (
4 "bufio"
5 "context"
6 "crypto/x509"
7 "errors"
8 "fmt"
9 "io"
10 "net"
11 "os"
12 "sort"
13 "strconv"
14 "strings"
15 "time"
16
17 controllerK8s "github.com/linkerd/linkerd2/controller/k8s"
18 l5dcharts "github.com/linkerd/linkerd2/pkg/charts/linkerd2"
19 "github.com/linkerd/linkerd2/pkg/config"
20 "github.com/linkerd/linkerd2/pkg/identity"
21 "github.com/linkerd/linkerd2/pkg/issuercerts"
22 "github.com/linkerd/linkerd2/pkg/k8s"
23 "github.com/linkerd/linkerd2/pkg/tls"
24 "github.com/linkerd/linkerd2/pkg/util"
25 "github.com/linkerd/linkerd2/pkg/version"
26 log "github.com/sirupsen/logrus"
27 admissionRegistration "k8s.io/api/admissionregistration/v1"
28 appsv1 "k8s.io/api/apps/v1"
29 corev1 "k8s.io/api/core/v1"
30 apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
31 kerrors "k8s.io/apimachinery/pkg/api/errors"
32 "k8s.io/apimachinery/pkg/api/meta"
33 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
34 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
35 "k8s.io/apimachinery/pkg/labels"
36 "k8s.io/apimachinery/pkg/runtime"
37 "k8s.io/apimachinery/pkg/runtime/schema"
38 yamlDecoder "k8s.io/apimachinery/pkg/util/yaml"
39 k8sVersion "k8s.io/apimachinery/pkg/version"
40 "k8s.io/client-go/kubernetes"
41 apiregistrationv1client "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/typed/apiregistration/v1"
42 "sigs.k8s.io/yaml"
43 )
44
45
46 type CategoryID string
47
48 const (
49
50
51 KubernetesAPIChecks CategoryID = "kubernetes-api"
52
53
54
55 KubernetesVersionChecks CategoryID = "kubernetes-version"
56
57
58
59
60
61
62
63
64
65
66
67 LinkerdPreInstallChecks CategoryID = "pre-kubernetes-setup"
68
69
70
71
72 LinkerdCRDChecks CategoryID = "linkerd-crd"
73
74
75
76
77
78
79
80
81
82
83 LinkerdConfigChecks CategoryID = "linkerd-config"
84
85
86
87 LinkerdIdentity CategoryID = "linkerd-identity"
88
89
90
91 LinkerdWebhooksAndAPISvcTLS CategoryID = "linkerd-webhooks-and-apisvc-tls"
92
93
94
95
96
97 LinkerdIdentityDataPlane CategoryID = "linkerd-identity-data-plane"
98
99
100
101
102
103 LinkerdControlPlaneExistenceChecks CategoryID = "linkerd-existence"
104
105
106
107 LinkerdVersionChecks CategoryID = "linkerd-version"
108
109
110
111
112
113
114 LinkerdControlPlaneVersionChecks CategoryID = "control-plane-version"
115
116
117
118
119
120
121
122 LinkerdDataPlaneChecks CategoryID = "linkerd-data-plane"
123
124
125
126 LinkerdControlPlaneProxyChecks CategoryID = "linkerd-control-plane-proxy"
127
128
129
130 LinkerdHAChecks CategoryID = "linkerd-ha-checks"
131
132
133
134 LinkerdCNIPluginChecks CategoryID = "linkerd-cni-plugin"
135
136
137
138
139 LinkerdOpaquePortsDefinitionChecks CategoryID = "linkerd-opaque-ports-definition"
140
141
142
143 LinkerdExtensionChecks CategoryID = "linkerd-extension-checks"
144
145
146
147
148
149 LinkerdCNIResourceLabel = "linkerd.io/cni-resource"
150
151 linkerdCNIDisabledSkipReason = "skipping check because CNI is not enabled"
152 linkerdCNIResourceName = "linkerd-cni"
153 linkerdCNIConfigMapName = "linkerd-cni-config"
154
155 podCIDRUnavailableSkipReason = "skipping check because the nodes aren't exposing podCIDR"
156 configMapDoesNotExistSkipReason = "skipping check because ConigMap does not exist"
157
158 proxyInjectorOldTLSSecretName = "linkerd-proxy-injector-tls"
159 proxyInjectorTLSSecretName = "linkerd-proxy-injector-k8s-tls"
160
161 spValidatorOldTLSSecretName = "linkerd-sp-validator-tls"
162 spValidatorTLSSecretName = "linkerd-sp-validator-k8s-tls"
163
164 policyValidatorTLSSecretName = "linkerd-policy-validator-k8s-tls"
165 certOldKeyName = "crt.pem"
166 certKeyName = "tls.crt"
167 keyOldKeyName = "key.pem"
168 keyKeyName = "tls.key"
169 )
170
171
172
173
174
175
176
177 const AllowedClockSkew = 5*time.Minute + tls.DefaultClockSkewAllowance
178
179 var linkerdHAControlPlaneComponents = []string{
180 "linkerd-destination",
181 "linkerd-identity",
182 "linkerd-proxy-injector",
183 }
184
185
186
187
188 var ExpectedServiceAccountNames = []string{
189 "linkerd-destination",
190 "linkerd-identity",
191 "linkerd-proxy-injector",
192 }
193
194 var (
195 retryWindow = 5 * time.Second
196
197 RequestTimeout = 30 * time.Second
198 )
199
200
201
202
203 type Resource struct {
204 groupVersionKind schema.GroupVersionKind
205 name string
206 }
207
208
209
210 func (r *Resource) String() string {
211 return fmt.Sprintf("%s/%s", strings.ToLower(r.groupVersionKind.GroupKind().String()), r.name)
212 }
213
214
215
216
217 type ResourceError struct {
218 resourceName string
219 Resources []Resource
220 }
221
222
223
224 func (e ResourceError) Error() string {
225 names := []string{}
226 for _, res := range e.Resources {
227 names = append(names, res.name)
228 }
229 return fmt.Sprintf("%s found but should not exist: %s", e.resourceName, strings.Join(names, " "))
230 }
231
232
233
234 type CategoryError struct {
235 Category CategoryID
236 Err error
237 }
238
239
240 func (e CategoryError) Error() string {
241 return e.Err.Error()
242 }
243
244
245 func IsCategoryError(err error, categoryID CategoryID) bool {
246 var ce CategoryError
247 if errors.As(err, &ce) {
248 return ce.Category == categoryID
249 }
250 return false
251 }
252
253
254 type SkipError struct {
255 Reason string
256 }
257
258
259 func (e SkipError) Error() string {
260 return e.Reason
261 }
262
263
264
265 type VerboseSuccess struct {
266 Message string
267 }
268
269
270
271 func (e VerboseSuccess) Error() string {
272 return ""
273 }
274
275
276 type Checker struct {
277
278
279 description string
280
281
282
283 hintAnchor string
284
285
286
287
288 fatal bool
289
290
291
292 warning bool
293
294
295
296 retryDeadline time.Time
297
298
299
300
301 surfaceErrorOnRetry bool
302
303
304
305 check func(context.Context) error
306 }
307
308
309 func NewChecker(description string) *Checker {
310 return &Checker{
311 description: description,
312 retryDeadline: time.Time{},
313 }
314 }
315
316
317 func (c *Checker) WithHintAnchor(hint string) *Checker {
318 c.hintAnchor = hint
319 return c
320 }
321
322
323 func (c *Checker) Fatal() *Checker {
324 c.fatal = true
325 return c
326 }
327
328
329 func (c *Checker) Warning() *Checker {
330 c.warning = true
331 return c
332 }
333
334
335 func (c *Checker) WithRetryDeadline(retryDeadLine time.Time) *Checker {
336 c.retryDeadline = retryDeadLine
337 return c
338 }
339
340
341 func (c *Checker) SurfaceErrorOnRetry() *Checker {
342 c.surfaceErrorOnRetry = true
343 return c
344 }
345
346
347 func (c *Checker) WithCheck(check func(context.Context) error) *Checker {
348 c.check = check
349 return c
350 }
351
352
353
354
355 type CheckResult struct {
356 Category CategoryID
357 Description string
358 HintURL string
359 Retry bool
360 Warning bool
361 Err error
362 }
363
364
365 type CheckObserver func(*CheckResult)
366
367
368 type Category struct {
369 ID CategoryID
370 checkers []Checker
371 enabled bool
372
373
374 hintBaseURL string
375 }
376
377
378 func NewCategory(id CategoryID, checkers []Checker, enabled bool) *Category {
379 return &Category{
380 ID: id,
381 checkers: checkers,
382 enabled: enabled,
383 hintBaseURL: HintBaseURL(version.Version),
384 }
385 }
386
387
388 func (c *Category) WithHintBaseURL(hintBaseURL string) *Category {
389 c.hintBaseURL = hintBaseURL
390 return c
391 }
392
393
394 type Options struct {
395 IsMainCheckCommand bool
396 ControlPlaneNamespace string
397 CNINamespace string
398 DataPlaneNamespace string
399 KubeConfig string
400 KubeContext string
401 Impersonate string
402 ImpersonateGroup []string
403 APIAddr string
404 VersionOverride string
405 RetryDeadline time.Time
406 CNIEnabled bool
407 InstallManifest string
408 CRDManifest string
409 ChartValues *l5dcharts.Values
410 }
411
412
413
414 type HealthChecker struct {
415 categories []*Category
416 *Options
417
418
419 kubeAPI *k8s.KubernetesAPI
420 kubeVersion *k8sVersion.Info
421 controlPlanePods []corev1.Pod
422 LatestVersions version.Channels
423 serverVersion string
424 linkerdConfig *l5dcharts.Values
425 uuid string
426 issuerCert *tls.Cred
427 trustAnchors []*x509.Certificate
428 cniDaemonSet *appsv1.DaemonSet
429 }
430
431
432 type Runner interface {
433 RunChecks(observer CheckObserver) (bool, bool)
434 }
435
436
437 func NewHealthChecker(categoryIDs []CategoryID, options *Options) *HealthChecker {
438 hc := &HealthChecker{
439 Options: options,
440 }
441
442 hc.categories = hc.allCategories()
443
444 checkMap := map[CategoryID]struct{}{}
445 for _, category := range categoryIDs {
446 checkMap[category] = struct{}{}
447 }
448 for i := range hc.categories {
449 if _, ok := checkMap[hc.categories[i].ID]; ok {
450 hc.categories[i].enabled = true
451 }
452 }
453
454 return hc
455 }
456
457 func NewWithCoreChecks(options *Options) *HealthChecker {
458 checks := []CategoryID{KubernetesAPIChecks, LinkerdControlPlaneExistenceChecks}
459 return NewHealthChecker(checks, options)
460 }
461
462
463
464
465 func (hc *HealthChecker) InitializeKubeAPIClient() error {
466 k8sAPI, err := k8s.NewAPI(hc.KubeConfig, hc.KubeContext, hc.Impersonate, hc.ImpersonateGroup, RequestTimeout)
467 if err != nil {
468 return err
469 }
470 hc.kubeAPI = k8sAPI
471
472 return nil
473 }
474
475
476
477
478 func (hc *HealthChecker) InitializeLinkerdGlobalConfig(ctx context.Context) error {
479 uuid, l5dConfig, err := hc.checkLinkerdConfigConfigMap(ctx)
480 if err != nil {
481 return err
482 }
483
484 if l5dConfig != nil {
485 hc.CNIEnabled = l5dConfig.CNIEnabled
486 }
487 hc.uuid = uuid
488 hc.linkerdConfig = l5dConfig
489
490 return nil
491 }
492
493
494 func (hc *HealthChecker) AppendCategories(categories ...*Category) *HealthChecker {
495 hc.categories = append(hc.categories, categories...)
496 return hc
497 }
498
499
500 func (hc *HealthChecker) GetCategories() []*Category {
501 return hc.categories
502 }
503
504
505
506
507
508
509
510
511
512
513
514
515 func (hc *HealthChecker) allCategories() []*Category {
516 return []*Category{
517 NewCategory(
518 KubernetesAPIChecks,
519 []Checker{
520 {
521 description: "can initialize the client",
522 hintAnchor: "k8s-api",
523 fatal: true,
524 check: func(context.Context) (err error) {
525 err = hc.InitializeKubeAPIClient()
526 return
527 },
528 },
529 {
530 description: "can query the Kubernetes API",
531 hintAnchor: "k8s-api",
532 fatal: true,
533 check: func(ctx context.Context) (err error) {
534 hc.kubeVersion, err = hc.kubeAPI.GetVersionInfo()
535 return
536 },
537 },
538 },
539 false,
540 ),
541 NewCategory(
542 KubernetesVersionChecks,
543 []Checker{
544 {
545 description: "is running the minimum Kubernetes API version",
546 hintAnchor: "k8s-version",
547 check: func(context.Context) error {
548 return hc.kubeAPI.CheckVersion(hc.kubeVersion)
549 },
550 },
551 },
552 false,
553 ),
554 NewCategory(
555 LinkerdPreInstallChecks,
556 []Checker{
557 {
558 description: "control plane namespace does not already exist",
559 hintAnchor: "pre-ns",
560 check: func(ctx context.Context) error {
561 return hc.CheckNamespace(ctx, hc.ControlPlaneNamespace, false)
562 },
563 },
564 {
565 description: "can create non-namespaced resources",
566 hintAnchor: "pre-k8s-cluster-k8s",
567 check: func(ctx context.Context) error {
568 return hc.checkCanCreateNonNamespacedResources(ctx)
569 },
570 },
571 {
572 description: "can create ServiceAccounts",
573 hintAnchor: "pre-k8s",
574 check: func(ctx context.Context) error {
575 return hc.checkCanCreate(ctx, hc.ControlPlaneNamespace, "", "v1", "serviceaccounts")
576 },
577 },
578 {
579 description: "can create Services",
580 hintAnchor: "pre-k8s",
581 check: func(ctx context.Context) error {
582 return hc.checkCanCreate(ctx, hc.ControlPlaneNamespace, "", "v1", "services")
583 },
584 },
585 {
586 description: "can create Deployments",
587 hintAnchor: "pre-k8s",
588 check: func(ctx context.Context) error {
589 return hc.checkCanCreate(ctx, hc.ControlPlaneNamespace, "apps", "v1", "deployments")
590 },
591 },
592 {
593 description: "can create CronJobs",
594 hintAnchor: "pre-k8s",
595 check: func(ctx context.Context) error {
596 return hc.checkCanCreate(ctx, hc.ControlPlaneNamespace, "batch", "v1beta1", "cronjobs")
597 },
598 },
599 {
600 description: "can create ConfigMaps",
601 hintAnchor: "pre-k8s",
602 check: func(ctx context.Context) error {
603 return hc.checkCanCreate(ctx, hc.ControlPlaneNamespace, "", "v1", "configmaps")
604 },
605 },
606 {
607 description: "can create Secrets",
608 hintAnchor: "pre-k8s",
609 check: func(ctx context.Context) error {
610 return hc.checkCanCreate(ctx, hc.ControlPlaneNamespace, "", "v1", "secrets")
611 },
612 },
613 {
614 description: "can read Secrets",
615 hintAnchor: "pre-k8s",
616 check: func(ctx context.Context) error {
617 return hc.checkCanGet(ctx, hc.ControlPlaneNamespace, "", "v1", "secrets")
618 },
619 },
620 {
621 description: "can read extension-apiserver-authentication configmap",
622 hintAnchor: "pre-k8s",
623 check: func(ctx context.Context) error {
624 return hc.checkExtensionAPIServerAuthentication(ctx)
625 },
626 },
627 {
628 description: "no clock skew detected",
629 hintAnchor: "pre-k8s-clock-skew",
630 warning: true,
631 check: func(ctx context.Context) error {
632 return hc.checkClockSkew(ctx)
633 },
634 },
635 },
636 false,
637 ),
638 NewCategory(
639 LinkerdCRDChecks,
640 []Checker{
641 {
642 description: "control plane CustomResourceDefinitions exist",
643 hintAnchor: "l5d-existence-crd",
644 fatal: true,
645 retryDeadline: hc.RetryDeadline,
646 check: func(ctx context.Context) error {
647 return CheckCustomResourceDefinitions(ctx, hc.kubeAPI, hc.CRDManifest)
648 },
649 },
650 },
651 false,
652 ),
653 NewCategory(
654 LinkerdControlPlaneExistenceChecks,
655 []Checker{
656 {
657 description: "'linkerd-config' config map exists",
658 hintAnchor: "l5d-existence-linkerd-config",
659 fatal: true,
660 check: func(ctx context.Context) (err error) {
661 err = hc.InitializeLinkerdGlobalConfig(ctx)
662 return
663 },
664 },
665 {
666 description: "heartbeat ServiceAccount exist",
667 hintAnchor: "l5d-existence-sa",
668 fatal: true,
669 check: func(ctx context.Context) error {
670 if hc.isHeartbeatDisabled() {
671 return nil
672 }
673 return hc.checkServiceAccounts(ctx, []string{"linkerd-heartbeat"}, hc.ControlPlaneNamespace, controlPlaneComponentsSelector())
674 },
675 },
676 {
677 description: "control plane replica sets are ready",
678 hintAnchor: "l5d-existence-replicasets",
679 retryDeadline: hc.RetryDeadline,
680 fatal: true,
681 check: func(ctx context.Context) error {
682 controlPlaneReplicaSet, err := hc.kubeAPI.GetReplicaSets(ctx, hc.ControlPlaneNamespace)
683 if err != nil {
684 return err
685 }
686 return checkControlPlaneReplicaSets(controlPlaneReplicaSet)
687 },
688 },
689 {
690 description: "no unschedulable pods",
691 hintAnchor: "l5d-existence-unschedulable-pods",
692 retryDeadline: hc.RetryDeadline,
693 surfaceErrorOnRetry: true,
694 warning: true,
695 check: func(ctx context.Context) error {
696
697
698 controlPlanePods, err := hc.kubeAPI.GetPodsByNamespace(ctx, hc.ControlPlaneNamespace)
699 if err != nil {
700 return err
701 }
702 return checkUnschedulablePods(controlPlanePods)
703 },
704 },
705 {
706 description: "control plane pods are ready",
707 hintAnchor: "l5d-api-control-ready",
708 retryDeadline: hc.RetryDeadline,
709 surfaceErrorOnRetry: true,
710 fatal: true,
711 check: func(ctx context.Context) error {
712 var err error
713 podList, err := hc.kubeAPI.CoreV1().Pods(hc.ControlPlaneNamespace).List(ctx, metav1.ListOptions{
714 LabelSelector: k8s.ControllerComponentLabel,
715 })
716 if err != nil {
717 return err
718 }
719 hc.controlPlanePods = podList.Items
720 return validateControlPlanePods(hc.controlPlanePods)
721 },
722 },
723 {
724 description: "cluster networks contains all node podCIDRs",
725 hintAnchor: "l5d-cluster-networks-cidr",
726 check: func(ctx context.Context) error {
727
728
729 err := hc.InitializeLinkerdGlobalConfig(ctx)
730 if err != nil {
731 return err
732 }
733 return hc.checkClusterNetworks(ctx)
734 },
735 },
736 {
737 description: "cluster networks contains all pods",
738 hintAnchor: "l5d-cluster-networks-pods",
739 check: func(ctx context.Context) error {
740 return hc.checkClusterNetworksContainAllPods(ctx)
741 },
742 },
743 {
744 description: "cluster networks contains all services",
745 hintAnchor: "l5d-cluster-networks-pods",
746 check: func(ctx context.Context) error {
747 return hc.checkClusterNetworksContainAllServices(ctx)
748 },
749 },
750 },
751 false,
752 ),
753 NewCategory(
754 LinkerdConfigChecks,
755 []Checker{
756 {
757 description: "control plane Namespace exists",
758 hintAnchor: "l5d-existence-ns",
759 fatal: true,
760 check: func(ctx context.Context) error {
761 return hc.CheckNamespace(ctx, hc.ControlPlaneNamespace, true)
762 },
763 },
764 {
765 description: "control plane ClusterRoles exist",
766 hintAnchor: "l5d-existence-cr",
767 fatal: true,
768 check: func(ctx context.Context) error {
769 return hc.checkClusterRoles(ctx, true, hc.expectedRBACNames(), controlPlaneComponentsSelector())
770 },
771 },
772 {
773 description: "control plane ClusterRoleBindings exist",
774 hintAnchor: "l5d-existence-crb",
775 fatal: true,
776 check: func(ctx context.Context) error {
777 return hc.checkClusterRoleBindings(ctx, true, hc.expectedRBACNames(), controlPlaneComponentsSelector())
778 },
779 },
780 {
781 description: "control plane ServiceAccounts exist",
782 hintAnchor: "l5d-existence-sa",
783 fatal: true,
784 check: func(ctx context.Context) error {
785 return hc.checkServiceAccounts(ctx, ExpectedServiceAccountNames, hc.ControlPlaneNamespace, controlPlaneComponentsSelector())
786 },
787 },
788 {
789 description: "control plane CustomResourceDefinitions exist",
790 hintAnchor: "l5d-existence-crd",
791 fatal: true,
792 check: func(ctx context.Context) error {
793 return CheckCustomResourceDefinitions(ctx, hc.kubeAPI, hc.CRDManifest)
794 },
795 },
796 {
797 description: "control plane MutatingWebhookConfigurations exist",
798 hintAnchor: "l5d-existence-mwc",
799 fatal: true,
800 check: func(ctx context.Context) error {
801 return hc.checkMutatingWebhookConfigurations(ctx, true)
802 },
803 },
804 {
805 description: "control plane ValidatingWebhookConfigurations exist",
806 hintAnchor: "l5d-existence-vwc",
807 fatal: true,
808 check: func(ctx context.Context) error {
809 return hc.checkValidatingWebhookConfigurations(ctx, true)
810 },
811 },
812 {
813 description: "proxy-init container runs as root user if docker container runtime is used",
814 hintAnchor: "l5d-proxy-init-run-as-root",
815 fatal: false,
816 check: func(ctx context.Context) error {
817
818
819 err := hc.InitializeLinkerdGlobalConfig(ctx)
820 if err != nil {
821 if kerrors.IsNotFound(err) {
822 return SkipError{Reason: configMapDoesNotExistSkipReason}
823 }
824 return err
825 }
826 config := hc.LinkerdConfig()
827 runAsRoot := config != nil && config.ProxyInit != nil && config.ProxyInit.RunAsRoot
828 if !runAsRoot {
829 return CheckNodesHaveNonDockerRuntime(ctx, hc.KubeAPIClient())
830 }
831 return nil
832 },
833 },
834 },
835 false,
836 ),
837 NewCategory(
838 LinkerdCNIPluginChecks,
839 []Checker{
840 {
841 description: "cni plugin ConfigMap exists",
842 hintAnchor: "cni-plugin-cm-exists",
843 fatal: true,
844 check: func(ctx context.Context) error {
845 if !hc.CNIEnabled {
846 return SkipError{Reason: linkerdCNIDisabledSkipReason}
847 }
848 _, err := hc.kubeAPI.CoreV1().ConfigMaps(hc.CNINamespace).Get(ctx, linkerdCNIConfigMapName, metav1.GetOptions{})
849 return err
850 },
851 },
852 {
853 description: "cni plugin ClusterRole exists",
854 hintAnchor: "cni-plugin-cr-exists",
855 fatal: true,
856 check: func(ctx context.Context) error {
857 if !hc.CNIEnabled {
858 return SkipError{Reason: linkerdCNIDisabledSkipReason}
859 }
860 _, err := hc.kubeAPI.RbacV1().ClusterRoles().Get(ctx, linkerdCNIResourceName, metav1.GetOptions{})
861 if kerrors.IsNotFound(err) {
862 return fmt.Errorf("missing ClusterRole: %s", linkerdCNIResourceName)
863 }
864 return err
865 },
866 },
867 {
868 description: "cni plugin ClusterRoleBinding exists",
869 hintAnchor: "cni-plugin-crb-exists",
870 fatal: true,
871 check: func(ctx context.Context) error {
872 if !hc.CNIEnabled {
873 return SkipError{Reason: linkerdCNIDisabledSkipReason}
874 }
875 _, err := hc.kubeAPI.RbacV1().ClusterRoleBindings().Get(ctx, linkerdCNIResourceName, metav1.GetOptions{})
876 if kerrors.IsNotFound(err) {
877 return fmt.Errorf("missing ClusterRoleBinding: %s", linkerdCNIResourceName)
878 }
879 return err
880 },
881 },
882 {
883 description: "cni plugin ServiceAccount exists",
884 hintAnchor: "cni-plugin-sa-exists",
885 fatal: true,
886 check: func(ctx context.Context) error {
887 if !hc.CNIEnabled {
888 return SkipError{Reason: linkerdCNIDisabledSkipReason}
889 }
890 _, err := hc.kubeAPI.CoreV1().ServiceAccounts(hc.CNINamespace).Get(ctx, linkerdCNIResourceName, metav1.GetOptions{})
891 if kerrors.IsNotFound(err) {
892 return fmt.Errorf("missing ServiceAccount: %s", linkerdCNIResourceName)
893 }
894 return err
895 },
896 },
897 {
898 description: "cni plugin DaemonSet exists",
899 hintAnchor: "cni-plugin-ds-exists",
900 fatal: true,
901 check: func(ctx context.Context) (err error) {
902 if !hc.CNIEnabled {
903 return SkipError{Reason: linkerdCNIDisabledSkipReason}
904 }
905 hc.cniDaemonSet, err = hc.kubeAPI.Interface.AppsV1().DaemonSets(hc.CNINamespace).Get(ctx, linkerdCNIResourceName, metav1.GetOptions{})
906 if kerrors.IsNotFound(err) {
907 return fmt.Errorf("missing DaemonSet: %s", linkerdCNIResourceName)
908 }
909 return err
910 },
911 },
912 {
913 description: "cni plugin pod is running on all nodes",
914 hintAnchor: "cni-plugin-ready",
915 retryDeadline: hc.RetryDeadline,
916 surfaceErrorOnRetry: true,
917 fatal: true,
918 check: func(ctx context.Context) (err error) {
919 if !hc.CNIEnabled {
920 return SkipError{Reason: linkerdCNIDisabledSkipReason}
921 }
922 hc.cniDaemonSet, err = hc.kubeAPI.Interface.AppsV1().DaemonSets(hc.CNINamespace).Get(ctx, linkerdCNIResourceName, metav1.GetOptions{})
923 if kerrors.IsNotFound(err) {
924 return fmt.Errorf("missing DaemonSet: %s", linkerdCNIResourceName)
925 }
926 scheduled := hc.cniDaemonSet.Status.DesiredNumberScheduled
927 ready := hc.cniDaemonSet.Status.NumberReady
928 if scheduled != ready {
929 return fmt.Errorf("number ready: %d, number scheduled: %d", ready, scheduled)
930 }
931 return nil
932 },
933 },
934 },
935 false,
936 ),
937 NewCategory(
938 LinkerdIdentity,
939 []Checker{
940 {
941 description: "certificate config is valid",
942 hintAnchor: "l5d-identity-cert-config-valid",
943 fatal: true,
944 check: func(ctx context.Context) (err error) {
945 hc.issuerCert, hc.trustAnchors, err = hc.checkCertificatesConfig(ctx)
946 return
947 },
948 },
949 {
950 description: "trust anchors are using supported crypto algorithm",
951 hintAnchor: "l5d-identity-trustAnchors-use-supported-crypto",
952 fatal: true,
953 check: func(context.Context) error {
954 var invalidAnchors []string
955 for _, anchor := range hc.trustAnchors {
956 if err := issuercerts.CheckTrustAnchorAlgoRequirements(anchor); err != nil {
957 invalidAnchors = append(invalidAnchors, fmt.Sprintf("* %v %s %s", anchor.SerialNumber, anchor.Subject.CommonName, err))
958 }
959 }
960 if len(invalidAnchors) > 0 {
961 return fmt.Errorf("Invalid trustAnchors:\n\t%s", strings.Join(invalidAnchors, "\n\t"))
962 }
963 return nil
964 },
965 },
966 {
967 description: "trust anchors are within their validity period",
968 hintAnchor: "l5d-identity-trustAnchors-are-time-valid",
969 fatal: true,
970 check: func(ctx context.Context) error {
971 var expiredAnchors []string
972 for _, anchor := range hc.trustAnchors {
973 if err := issuercerts.CheckCertValidityPeriod(anchor); err != nil {
974 expiredAnchors = append(expiredAnchors, fmt.Sprintf("* %v %s %s", anchor.SerialNumber, anchor.Subject.CommonName, err))
975 }
976 }
977 if len(expiredAnchors) > 0 {
978 return fmt.Errorf("Invalid anchors:\n\t%s", strings.Join(expiredAnchors, "\n\t"))
979 }
980
981 return nil
982 },
983 },
984 {
985 description: "trust anchors are valid for at least 60 days",
986 hintAnchor: "l5d-identity-trustAnchors-not-expiring-soon",
987 warning: true,
988 check: func(ctx context.Context) error {
989 var expiringAnchors []string
990 for _, anchor := range hc.trustAnchors {
991 if err := issuercerts.CheckExpiringSoon(anchor); err != nil {
992 expiringAnchors = append(expiringAnchors, fmt.Sprintf("* %v %s %s", anchor.SerialNumber, anchor.Subject.CommonName, err))
993 }
994 }
995 if len(expiringAnchors) > 0 {
996 return fmt.Errorf("Anchors expiring soon:\n\t%s", strings.Join(expiringAnchors, "\n\t"))
997 }
998 return nil
999 },
1000 },
1001 {
1002 description: "issuer cert is using supported crypto algorithm",
1003 hintAnchor: "l5d-identity-issuer-cert-uses-supported-crypto",
1004 fatal: true,
1005 check: func(context.Context) error {
1006 if err := issuercerts.CheckIssuerCertAlgoRequirements(hc.issuerCert.Certificate); err != nil {
1007 return fmt.Errorf("issuer certificate %w", err)
1008 }
1009 return nil
1010 },
1011 },
1012 {
1013 description: "issuer cert is within its validity period",
1014 hintAnchor: "l5d-identity-issuer-cert-is-time-valid",
1015 fatal: true,
1016 check: func(ctx context.Context) error {
1017 if err := issuercerts.CheckCertValidityPeriod(hc.issuerCert.Certificate); err != nil {
1018 return fmt.Errorf("issuer certificate is %w", err)
1019 }
1020 return nil
1021 },
1022 },
1023 {
1024 description: "issuer cert is valid for at least 60 days",
1025 warning: true,
1026 hintAnchor: "l5d-identity-issuer-cert-not-expiring-soon",
1027 check: func(context.Context) error {
1028 if err := issuercerts.CheckExpiringSoon(hc.issuerCert.Certificate); err != nil {
1029 return fmt.Errorf("issuer certificate %w", err)
1030 }
1031 return nil
1032 },
1033 },
1034 {
1035 description: "issuer cert is issued by the trust anchor",
1036 hintAnchor: "l5d-identity-issuer-cert-issued-by-trust-anchor",
1037 check: func(ctx context.Context) error {
1038 return hc.issuerCert.Verify(tls.CertificatesToPool(hc.trustAnchors), "", time.Time{})
1039 },
1040 },
1041 },
1042 false,
1043 ),
1044 NewCategory(
1045 LinkerdWebhooksAndAPISvcTLS,
1046 []Checker{
1047 {
1048 description: "proxy-injector webhook has valid cert",
1049 hintAnchor: "l5d-proxy-injector-webhook-cert-valid",
1050 fatal: true,
1051 check: func(ctx context.Context) (err error) {
1052 anchors, err := hc.fetchProxyInjectorCaBundle(ctx)
1053 if err != nil {
1054 return err
1055 }
1056 cert, err := hc.FetchCredsFromSecret(ctx, hc.ControlPlaneNamespace, proxyInjectorTLSSecretName)
1057 if kerrors.IsNotFound(err) {
1058 cert, err = hc.FetchCredsFromOldSecret(ctx, hc.ControlPlaneNamespace, proxyInjectorOldTLSSecretName)
1059 }
1060 if err != nil {
1061 return err
1062 }
1063
1064 identityName := fmt.Sprintf("linkerd-proxy-injector.%s.svc", hc.ControlPlaneNamespace)
1065 return hc.CheckCertAndAnchors(cert, anchors, identityName)
1066 },
1067 },
1068 {
1069 description: "proxy-injector cert is valid for at least 60 days",
1070 warning: true,
1071 hintAnchor: "l5d-proxy-injector-webhook-cert-not-expiring-soon",
1072 check: func(ctx context.Context) error {
1073 cert, err := hc.FetchCredsFromSecret(ctx, hc.ControlPlaneNamespace, proxyInjectorTLSSecretName)
1074 if kerrors.IsNotFound(err) {
1075 cert, err = hc.FetchCredsFromOldSecret(ctx, hc.ControlPlaneNamespace, proxyInjectorOldTLSSecretName)
1076 }
1077 if err != nil {
1078 return err
1079 }
1080 return hc.CheckCertAndAnchorsExpiringSoon(cert)
1081
1082 },
1083 },
1084 {
1085 description: "sp-validator webhook has valid cert",
1086 hintAnchor: "l5d-sp-validator-webhook-cert-valid",
1087 fatal: true,
1088 check: func(ctx context.Context) (err error) {
1089 anchors, err := hc.fetchWebhookCaBundle(ctx, k8s.SPValidatorWebhookConfigName)
1090 if err != nil {
1091 return err
1092 }
1093 cert, err := hc.FetchCredsFromSecret(ctx, hc.ControlPlaneNamespace, spValidatorTLSSecretName)
1094 if kerrors.IsNotFound(err) {
1095 cert, err = hc.FetchCredsFromOldSecret(ctx, hc.ControlPlaneNamespace, spValidatorOldTLSSecretName)
1096 }
1097 if err != nil {
1098 return err
1099 }
1100 identityName := fmt.Sprintf("linkerd-sp-validator.%s.svc", hc.ControlPlaneNamespace)
1101 return hc.CheckCertAndAnchors(cert, anchors, identityName)
1102 },
1103 },
1104 {
1105 description: "sp-validator cert is valid for at least 60 days",
1106 warning: true,
1107 hintAnchor: "l5d-sp-validator-webhook-cert-not-expiring-soon",
1108 check: func(ctx context.Context) error {
1109 cert, err := hc.FetchCredsFromSecret(ctx, hc.ControlPlaneNamespace, spValidatorTLSSecretName)
1110 if kerrors.IsNotFound(err) {
1111 cert, err = hc.FetchCredsFromOldSecret(ctx, hc.ControlPlaneNamespace, spValidatorOldTLSSecretName)
1112 }
1113 if err != nil {
1114 return err
1115 }
1116 return hc.CheckCertAndAnchorsExpiringSoon(cert)
1117
1118 },
1119 },
1120 {
1121 description: "policy-validator webhook has valid cert",
1122 hintAnchor: "l5d-policy-validator-webhook-cert-valid",
1123 fatal: true,
1124 check: func(ctx context.Context) (err error) {
1125 anchors, err := hc.fetchWebhookCaBundle(ctx, k8s.PolicyValidatorWebhookConfigName)
1126 if kerrors.IsNotFound(err) {
1127 return SkipError{Reason: "policy-validator not installed"}
1128 }
1129 if err != nil {
1130 return err
1131 }
1132 cert, err := hc.FetchCredsFromSecret(ctx, hc.ControlPlaneNamespace, policyValidatorTLSSecretName)
1133 if kerrors.IsNotFound(err) {
1134 return SkipError{Reason: "policy-validator not installed"}
1135 }
1136 if err != nil {
1137 return err
1138 }
1139 identityName := fmt.Sprintf("linkerd-policy-validator.%s.svc", hc.ControlPlaneNamespace)
1140 return hc.CheckCertAndAnchors(cert, anchors, identityName)
1141 },
1142 },
1143 {
1144 description: "policy-validator cert is valid for at least 60 days",
1145 warning: true,
1146 hintAnchor: "l5d-policy-validator-webhook-cert-not-expiring-soon",
1147 check: func(ctx context.Context) error {
1148 cert, err := hc.FetchCredsFromSecret(ctx, hc.ControlPlaneNamespace, policyValidatorTLSSecretName)
1149 if kerrors.IsNotFound(err) {
1150 return SkipError{Reason: "policy-validator not installed"}
1151 }
1152 if err != nil {
1153 return err
1154 }
1155 return hc.CheckCertAndAnchorsExpiringSoon(cert)
1156
1157 },
1158 },
1159 },
1160 false,
1161 ),
1162 NewCategory(
1163 LinkerdIdentityDataPlane,
1164 []Checker{
1165 {
1166 description: "data plane proxies certificate match CA",
1167 hintAnchor: "l5d-identity-data-plane-proxies-certs-match-ca",
1168 warning: true,
1169 check: func(ctx context.Context) error {
1170 return hc.checkDataPlaneProxiesCertificate(ctx)
1171 },
1172 },
1173 },
1174 false,
1175 ),
1176 NewCategory(
1177 LinkerdVersionChecks,
1178 []Checker{
1179 {
1180 description: "can determine the latest version",
1181 hintAnchor: "l5d-version-latest",
1182 warning: true,
1183 check: func(ctx context.Context) (err error) {
1184 if hc.VersionOverride != "" {
1185 hc.LatestVersions, err = version.NewChannels(hc.VersionOverride)
1186 } else {
1187 uuid := "unknown"
1188 if hc.uuid != "" {
1189 uuid = hc.uuid
1190 }
1191 hc.LatestVersions, err = version.GetLatestVersions(ctx, uuid, "cli")
1192 }
1193 return
1194 },
1195 },
1196 {
1197 description: "cli is up-to-date",
1198 hintAnchor: "l5d-version-cli",
1199 warning: true,
1200 check: func(context.Context) error {
1201 return hc.LatestVersions.Match(version.Version)
1202 },
1203 },
1204 },
1205 false,
1206 ),
1207 NewCategory(
1208 LinkerdControlPlaneVersionChecks,
1209 []Checker{
1210 {
1211 description: "can retrieve the control plane version",
1212 hintAnchor: "l5d-version-control",
1213 retryDeadline: hc.RetryDeadline,
1214 fatal: true,
1215 check: func(ctx context.Context) (err error) {
1216 hc.serverVersion, err = GetServerVersion(ctx, hc.ControlPlaneNamespace, hc.kubeAPI)
1217 return
1218 },
1219 },
1220 {
1221 description: "control plane is up-to-date",
1222 hintAnchor: "l5d-version-control",
1223 warning: true,
1224 check: func(context.Context) error {
1225 return hc.LatestVersions.Match(hc.serverVersion)
1226 },
1227 },
1228 {
1229 description: "control plane and cli versions match",
1230 hintAnchor: "l5d-version-control",
1231 warning: true,
1232 check: func(context.Context) error {
1233 if hc.serverVersion != version.Version {
1234 return fmt.Errorf("control plane running %s but cli running %s", hc.serverVersion, version.Version)
1235 }
1236 return nil
1237 },
1238 },
1239 },
1240 false,
1241 ),
1242 NewCategory(
1243 LinkerdControlPlaneProxyChecks,
1244 []Checker{
1245 {
1246 description: "control plane proxies are healthy",
1247 hintAnchor: "l5d-cp-proxy-healthy",
1248 retryDeadline: hc.RetryDeadline,
1249 surfaceErrorOnRetry: true,
1250 fatal: true,
1251 check: func(ctx context.Context) error {
1252 return hc.CheckProxyHealth(ctx, hc.ControlPlaneNamespace, hc.ControlPlaneNamespace)
1253 },
1254 },
1255 {
1256 description: "control plane proxies are up-to-date",
1257 hintAnchor: "l5d-cp-proxy-version",
1258 warning: true,
1259 check: func(ctx context.Context) error {
1260 podList, err := hc.kubeAPI.CoreV1().Pods(hc.ControlPlaneNamespace).List(ctx, metav1.ListOptions{LabelSelector: k8s.ControllerNSLabel})
1261 if err != nil {
1262 return err
1263 }
1264
1265 return hc.CheckProxyVersionsUpToDate(podList.Items)
1266 },
1267 },
1268 {
1269 description: "control plane proxies and cli versions match",
1270 hintAnchor: "l5d-cp-proxy-cli-version",
1271 warning: true,
1272 check: func(ctx context.Context) error {
1273 podList, err := hc.kubeAPI.CoreV1().Pods(hc.ControlPlaneNamespace).List(ctx, metav1.ListOptions{LabelSelector: k8s.ControllerNSLabel})
1274 if err != nil {
1275 return err
1276 }
1277
1278 return CheckIfProxyVersionsMatchWithCLI(podList.Items)
1279 },
1280 },
1281 },
1282 false,
1283 ),
1284 NewCategory(
1285 LinkerdDataPlaneChecks,
1286 []Checker{
1287 {
1288 description: "data plane namespace exists",
1289 hintAnchor: "l5d-data-plane-exists",
1290 fatal: true,
1291 check: func(ctx context.Context) error {
1292 if hc.DataPlaneNamespace == "" {
1293
1294 return nil
1295 }
1296 return hc.CheckNamespace(ctx, hc.DataPlaneNamespace, true)
1297 },
1298 },
1299 {
1300 description: "data plane proxies are ready",
1301 hintAnchor: "l5d-data-plane-ready",
1302 retryDeadline: hc.RetryDeadline,
1303 fatal: true,
1304 check: func(ctx context.Context) error {
1305 pods, err := hc.GetDataPlanePods(ctx)
1306 if err != nil {
1307 return err
1308 }
1309 return CheckPodsRunning(pods, hc.DataPlaneNamespace)
1310 },
1311 },
1312 {
1313 description: "data plane is up-to-date",
1314 hintAnchor: "l5d-data-plane-version",
1315 warning: true,
1316 check: func(ctx context.Context) error {
1317 pods, err := hc.GetDataPlanePods(ctx)
1318 if err != nil {
1319 return err
1320 }
1321
1322 return hc.CheckProxyVersionsUpToDate(pods)
1323 },
1324 },
1325 {
1326 description: "data plane and cli versions match",
1327 hintAnchor: "l5d-data-plane-cli-version",
1328 warning: true,
1329 check: func(ctx context.Context) error {
1330 pods, err := hc.GetDataPlanePods(ctx)
1331 if err != nil {
1332 return err
1333 }
1334
1335 return CheckIfProxyVersionsMatchWithCLI(pods)
1336 },
1337 },
1338 {
1339 description: "data plane pod labels are configured correctly",
1340 hintAnchor: "l5d-data-plane-pod-labels",
1341 warning: true,
1342 check: func(ctx context.Context) error {
1343 pods, err := hc.GetDataPlanePods(ctx)
1344 if err != nil {
1345 return err
1346 }
1347
1348 return checkMisconfiguredPodsLabels(pods)
1349 },
1350 },
1351 {
1352 description: "data plane service labels are configured correctly",
1353 hintAnchor: "l5d-data-plane-services-labels",
1354 warning: true,
1355 check: func(ctx context.Context) error {
1356 services, err := hc.GetServices(ctx)
1357 if err != nil {
1358 return err
1359 }
1360
1361 return checkMisconfiguredServiceLabels(services)
1362 },
1363 },
1364 {
1365 description: "data plane service annotations are configured correctly",
1366 hintAnchor: "l5d-data-plane-services-annotations",
1367 warning: true,
1368 check: func(ctx context.Context) error {
1369 services, err := hc.GetServices(ctx)
1370 if err != nil {
1371 return err
1372 }
1373
1374 return checkMisconfiguredServiceAnnotations(services)
1375 },
1376 },
1377 {
1378 description: "opaque ports are properly annotated",
1379 hintAnchor: "linkerd-opaque-ports-definition",
1380 warning: true,
1381 check: func(ctx context.Context) error {
1382 return hc.checkMisconfiguredOpaquePortAnnotations(ctx)
1383 },
1384 },
1385 },
1386 false,
1387 ),
1388 NewCategory(
1389 LinkerdHAChecks,
1390 []Checker{
1391 {
1392 description: "multiple replicas of control plane pods",
1393 hintAnchor: "l5d-control-plane-replicas",
1394 retryDeadline: hc.RetryDeadline,
1395 warning: true,
1396 check: func(ctx context.Context) error {
1397 if hc.isHA() {
1398 return hc.checkMinReplicasAvailable(ctx)
1399 }
1400 return SkipError{Reason: "not run for non HA installs"}
1401 },
1402 },
1403 },
1404 false,
1405 ),
1406 NewCategory(
1407 LinkerdExtensionChecks,
1408 []Checker{
1409 {
1410 description: "namespace configuration for extensions",
1411 warning: true,
1412 hintAnchor: "l5d-extension-namespaces",
1413 check: func(ctx context.Context) error {
1414 return hc.checkExtensionNsLabels(ctx)
1415 },
1416 },
1417 },
1418 false,
1419 ),
1420 }
1421 }
1422
1423
1424
1425 func (hc *HealthChecker) CheckProxyVersionsUpToDate(pods []corev1.Pod) error {
1426 return CheckProxyVersionsUpToDate(pods, hc.LatestVersions)
1427 }
1428
1429
1430
1431 func CheckProxyVersionsUpToDate(pods []corev1.Pod, versions version.Channels) error {
1432 outdatedPods := []string{}
1433 for _, pod := range pods {
1434 status := k8s.GetPodStatus(pod)
1435 if status == string(corev1.PodRunning) {
1436 proxyVersion := k8s.GetProxyVersion(pod)
1437 if proxyVersion == "" {
1438 continue
1439 }
1440 if err := versions.Match(proxyVersion); err != nil {
1441 outdatedPods = append(outdatedPods, fmt.Sprintf("\t* %s (%s)", pod.Name, proxyVersion))
1442 }
1443 }
1444 }
1445 if versions.Empty() {
1446 return errors.New("unable to determine version channel")
1447 }
1448 if len(outdatedPods) > 0 {
1449 podList := strings.Join(outdatedPods, "\n")
1450 return fmt.Errorf("some proxies are not running the current version:\n%s", podList)
1451 }
1452 return nil
1453 }
1454
1455
1456
1457 func CheckIfProxyVersionsMatchWithCLI(pods []corev1.Pod) error {
1458 for _, pod := range pods {
1459 status := k8s.GetPodStatus(pod)
1460 proxyVersion := k8s.GetProxyVersion(pod)
1461 if status == string(corev1.PodRunning) && proxyVersion != "" && proxyVersion != version.Version {
1462 return fmt.Errorf("%s running %s but cli running %s", pod.Name, proxyVersion, version.Version)
1463 }
1464 }
1465 return nil
1466 }
1467
1468
1469 func (hc *HealthChecker) CheckCertAndAnchors(cert *tls.Cred, trustAnchors []*x509.Certificate, identityName string) error {
1470
1471
1472 var expiredAnchors []string
1473 for _, anchor := range trustAnchors {
1474 if err := issuercerts.CheckCertValidityPeriod(anchor); err != nil {
1475 expiredAnchors = append(expiredAnchors, fmt.Sprintf("* %v %s %s", anchor.SerialNumber, anchor.Subject.CommonName, err))
1476 }
1477 }
1478 if len(expiredAnchors) > 0 {
1479 return fmt.Errorf("anchors not within their validity period:\n\t%s", strings.Join(expiredAnchors, "\n\t"))
1480 }
1481
1482
1483 if err := issuercerts.CheckCertValidityPeriod(cert.Certificate); err != nil {
1484 return fmt.Errorf("certificate is %w", err)
1485 }
1486
1487 if err := cert.Verify(tls.CertificatesToPool(trustAnchors), identityName, time.Time{}); err != nil {
1488 return fmt.Errorf("cert is not issued by the trust anchor: %w", err)
1489 }
1490
1491 return nil
1492 }
1493
1494
1495
1496 func (hc *HealthChecker) CheckProxyHealth(ctx context.Context, controlPlaneNamespace, namespace string) error {
1497 podList, err := hc.kubeAPI.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{LabelSelector: k8s.ControllerNSLabel})
1498 if err != nil {
1499 return err
1500 }
1501
1502
1503 err = CheckPodsRunning(podList.Items, controlPlaneNamespace)
1504 if err != nil {
1505 return err
1506 }
1507
1508
1509 return checkPodsProxiesCertificate(ctx, *hc.kubeAPI, namespace, controlPlaneNamespace)
1510 }
1511
1512
1513
1514 func (hc *HealthChecker) CheckCertAndAnchorsExpiringSoon(cert *tls.Cred) error {
1515
1516 var expiringAnchors []string
1517 for _, anchor := range cert.TrustChain {
1518 anchor := anchor
1519 if err := issuercerts.CheckExpiringSoon(anchor); err != nil {
1520 expiringAnchors = append(expiringAnchors, fmt.Sprintf("* %v %s %s", anchor.SerialNumber, anchor.Subject.CommonName, err))
1521 }
1522 }
1523 if len(expiringAnchors) > 0 {
1524 return fmt.Errorf("Anchors expiring soon:\n\t%s", strings.Join(expiringAnchors, "\n\t"))
1525 }
1526
1527
1528 if err := issuercerts.CheckExpiringSoon(cert.Certificate); err != nil {
1529 return fmt.Errorf("certificate %w", err)
1530 }
1531 return nil
1532 }
1533
1534
1535 func (hc *HealthChecker) CheckAPIService(ctx context.Context, serviceName string) error {
1536 apiServiceClient, err := apiregistrationv1client.NewForConfig(hc.kubeAPI.Config)
1537 if err != nil {
1538 return err
1539 }
1540
1541 apiStatus, err := apiServiceClient.APIServices().Get(ctx, serviceName, metav1.GetOptions{})
1542 if err != nil {
1543 return err
1544 }
1545
1546 for _, condition := range apiStatus.Status.Conditions {
1547 if condition.Type == "Available" {
1548 if condition.Status == "True" {
1549 return nil
1550 }
1551 return fmt.Errorf("%s: %s", condition.Reason, condition.Message)
1552 }
1553 }
1554
1555 return fmt.Errorf("%s service not available", apiStatus.Name)
1556 }
1557
1558 func (hc *HealthChecker) checkMinReplicasAvailable(ctx context.Context) error {
1559 faulty := []string{}
1560
1561 for _, component := range linkerdHAControlPlaneComponents {
1562 conf, err := hc.kubeAPI.AppsV1().Deployments(hc.ControlPlaneNamespace).Get(ctx, component, metav1.GetOptions{})
1563 if err != nil {
1564 return err
1565 }
1566
1567 if conf.Status.AvailableReplicas <= 1 {
1568 faulty = append(faulty, component)
1569 }
1570 }
1571
1572 if len(faulty) > 0 {
1573 return fmt.Errorf("not enough replicas available for %v", faulty)
1574 }
1575 return nil
1576 }
1577
1578
1579
1580
1581
1582
1583 func (hc *HealthChecker) RunChecks(observer CheckObserver) (bool, bool) {
1584 success := true
1585 warning := false
1586 for _, c := range hc.categories {
1587 if c.enabled {
1588 for _, checker := range c.checkers {
1589 checker := checker
1590 if checker.check != nil {
1591 if !hc.runCheck(c, &checker, observer) {
1592 if !checker.warning {
1593 success = false
1594 } else {
1595 warning = true
1596 }
1597 if checker.fatal {
1598 return success, warning
1599 }
1600 }
1601 }
1602 }
1603 }
1604 }
1605
1606 return success, warning
1607 }
1608
1609 func (hc *HealthChecker) RunWithExitOnError() (bool, bool) {
1610 return hc.RunChecks(func(result *CheckResult) {
1611 if result.Retry {
1612 fmt.Fprintln(os.Stderr, "Waiting for control plane to become available")
1613 return
1614 }
1615
1616 if result.Err != nil && !result.Warning {
1617 var msg string
1618 switch result.Category {
1619 case KubernetesAPIChecks:
1620 msg = "Cannot connect to Kubernetes"
1621 case LinkerdControlPlaneExistenceChecks:
1622 msg = "Cannot find Linkerd"
1623 }
1624 fmt.Fprintf(os.Stderr, "%s: %s\nValidate the install with: 'linkerd check'\n",
1625 msg, result.Err)
1626 os.Exit(1)
1627 }
1628 })
1629 }
1630
1631
1632 func (hc *HealthChecker) LinkerdConfig() *l5dcharts.Values {
1633 return hc.linkerdConfig
1634 }
1635
1636 func (hc *HealthChecker) runCheck(category *Category, c *Checker, observer CheckObserver) bool {
1637 for {
1638 ctx, cancel := context.WithTimeout(context.Background(), RequestTimeout)
1639 err := c.check(ctx)
1640 cancel()
1641 var se SkipError
1642 if errors.As(err, &se) {
1643 log.Debugf("Skipping check: %s. Reason: %s", c.description, se.Reason)
1644 return true
1645 }
1646
1647 checkResult := &CheckResult{
1648 Category: category.ID,
1649 Description: c.description,
1650 Warning: c.warning,
1651 HintURL: fmt.Sprintf("%s%s", category.hintBaseURL, c.hintAnchor),
1652 }
1653 var vs VerboseSuccess
1654 if errors.As(err, &vs) {
1655 checkResult.Description = fmt.Sprintf("%s\n%s", checkResult.Description, vs.Message)
1656 } else if err != nil {
1657 checkResult.Err = CategoryError{category.ID, err}
1658 }
1659
1660 if checkResult.Err != nil && time.Now().Before(c.retryDeadline) {
1661 checkResult.Retry = true
1662 if !c.surfaceErrorOnRetry {
1663 checkResult.Err = errors.New("waiting for check to complete")
1664 }
1665 log.Debugf("Retrying on error: %s", err)
1666
1667 observer(checkResult)
1668 time.Sleep(retryWindow)
1669 continue
1670 }
1671
1672 observer(checkResult)
1673 return checkResult.Err == nil
1674 }
1675 }
1676
1677 func controlPlaneComponentsSelector() string {
1678 return fmt.Sprintf("%s,!%s", k8s.ControllerNSLabel, LinkerdCNIResourceLabel)
1679 }
1680
1681
1682
1683 func (hc *HealthChecker) KubeAPIClient() *k8s.KubernetesAPI {
1684 return hc.kubeAPI
1685 }
1686
1687
1688 func (hc *HealthChecker) UUID() string {
1689 return hc.uuid
1690 }
1691
1692 func (hc *HealthChecker) checkLinkerdConfigConfigMap(ctx context.Context) (string, *l5dcharts.Values, error) {
1693 configMap, values, err := FetchCurrentConfiguration(ctx, hc.kubeAPI, hc.ControlPlaneNamespace)
1694 if err != nil {
1695 return "", nil, err
1696 }
1697
1698 return string(configMap.GetUID()), values, nil
1699 }
1700
1701
1702
1703
1704
1705
1706 func (hc *HealthChecker) checkCertificatesConfig(ctx context.Context) (*tls.Cred, []*x509.Certificate, error) {
1707 _, values, err := FetchCurrentConfiguration(ctx, hc.kubeAPI, hc.ControlPlaneNamespace)
1708 if err != nil {
1709 return nil, nil, err
1710 }
1711
1712 var data *issuercerts.IssuerCertData
1713
1714 if values.Identity.Issuer.Scheme == "" || values.Identity.Issuer.Scheme == k8s.IdentityIssuerSchemeLinkerd {
1715 data, err = issuercerts.FetchIssuerData(ctx, hc.kubeAPI, values.IdentityTrustAnchorsPEM, hc.ControlPlaneNamespace)
1716 } else {
1717 data, err = issuercerts.FetchExternalIssuerData(ctx, hc.kubeAPI, hc.ControlPlaneNamespace)
1718 }
1719
1720 if err != nil {
1721 return nil, nil, err
1722 }
1723
1724 issuerCreds, err := tls.ValidateAndCreateCreds(data.IssuerCrt, data.IssuerKey)
1725 if err != nil {
1726 return nil, nil, err
1727 }
1728
1729 anchors, err := tls.DecodePEMCertificates(data.TrustAnchors)
1730 if err != nil {
1731 return nil, nil, err
1732 }
1733
1734 return issuerCreds, anchors, nil
1735 }
1736
1737
1738 func FetchCurrentConfiguration(ctx context.Context, k kubernetes.Interface, controlPlaneNamespace string) (*corev1.ConfigMap, *l5dcharts.Values, error) {
1739
1740 configMap, err := config.FetchLinkerdConfigMap(ctx, k, controlPlaneNamespace)
1741 if err != nil {
1742 return nil, nil, err
1743 }
1744
1745 rawValues := configMap.Data["values"]
1746 if rawValues == "" {
1747 return configMap, nil, nil
1748 }
1749
1750
1751 rawValuesBytes, err := config.RemoveGlobalFieldIfPresent([]byte(rawValues))
1752 if err != nil {
1753 return nil, nil, err
1754 }
1755 rawValues = string(rawValuesBytes)
1756 var fullValues l5dcharts.Values
1757
1758 err = yaml.Unmarshal([]byte(rawValues), &fullValues)
1759 if err != nil {
1760 return nil, nil, err
1761 }
1762 return configMap, &fullValues, nil
1763 }
1764
1765 func (hc *HealthChecker) fetchProxyInjectorCaBundle(ctx context.Context) ([]*x509.Certificate, error) {
1766 mwh, err := hc.getProxyInjectorMutatingWebhook(ctx)
1767 if err != nil {
1768 return nil, err
1769 }
1770
1771 caBundle, err := tls.DecodePEMCertificates(string(mwh.ClientConfig.CABundle))
1772 if err != nil {
1773 return nil, err
1774 }
1775 return caBundle, nil
1776 }
1777
1778 func (hc *HealthChecker) fetchWebhookCaBundle(ctx context.Context, webhook string) ([]*x509.Certificate, error) {
1779 vwc, err := hc.kubeAPI.AdmissionregistrationV1().ValidatingWebhookConfigurations().Get(ctx, webhook, metav1.GetOptions{})
1780 if err != nil {
1781 return nil, err
1782 }
1783
1784 if len(vwc.Webhooks) != 1 {
1785 return nil, fmt.Errorf("expected 1 webhooks, found %d", len(vwc.Webhooks))
1786 }
1787
1788 caBundle, err := tls.DecodePEMCertificates(string(vwc.Webhooks[0].ClientConfig.CABundle))
1789 if err != nil {
1790 return nil, err
1791 }
1792 return caBundle, nil
1793 }
1794
1795
1796 func FetchTrustBundle(ctx context.Context, kubeAPI k8s.KubernetesAPI, controlPlaneNamespace string) (string, error) {
1797 configMap, err := kubeAPI.CoreV1().ConfigMaps(controlPlaneNamespace).Get(ctx, "linkerd-identity-trust-roots", metav1.GetOptions{})
1798
1799 return configMap.Data["ca-bundle.crt"], err
1800 }
1801
1802
1803 func (hc *HealthChecker) FetchCredsFromSecret(ctx context.Context, namespace string, secretName string) (*tls.Cred, error) {
1804 secret, err := hc.kubeAPI.CoreV1().Secrets(namespace).Get(ctx, secretName, metav1.GetOptions{})
1805 if err != nil {
1806 return nil, err
1807 }
1808
1809 crt, ok := secret.Data[certKeyName]
1810 if !ok {
1811 return nil, fmt.Errorf("key %s needs to exist in secret %s", certKeyName, secretName)
1812 }
1813
1814 key, ok := secret.Data[keyKeyName]
1815 if !ok {
1816 return nil, fmt.Errorf("key %s needs to exist in secret %s", keyKeyName, secretName)
1817 }
1818
1819 cred, err := tls.ValidateAndCreateCreds(string(crt), string(key))
1820 if err != nil {
1821 return nil, err
1822 }
1823
1824 return cred, nil
1825 }
1826
1827
1828
1829
1830 func (hc *HealthChecker) FetchCredsFromOldSecret(ctx context.Context, namespace string, secretName string) (*tls.Cred, error) {
1831 secret, err := hc.kubeAPI.CoreV1().Secrets(namespace).Get(ctx, secretName, metav1.GetOptions{})
1832 if err != nil {
1833 return nil, err
1834 }
1835
1836 crt, ok := secret.Data[certOldKeyName]
1837 if !ok {
1838 return nil, fmt.Errorf("key %s needs to exist in secret %s", certOldKeyName, secretName)
1839 }
1840
1841 key, ok := secret.Data[keyOldKeyName]
1842 if !ok {
1843 return nil, fmt.Errorf("key %s needs to exist in secret %s", keyOldKeyName, secretName)
1844 }
1845
1846 cred, err := tls.ValidateAndCreateCreds(string(crt), string(key))
1847 if err != nil {
1848 return nil, err
1849 }
1850
1851 return cred, nil
1852 }
1853
1854
1855
1856 func (hc *HealthChecker) CheckNamespace(ctx context.Context, namespace string, shouldExist bool) error {
1857 exists, err := hc.kubeAPI.NamespaceExists(ctx, namespace)
1858 if err != nil {
1859 return err
1860 }
1861 if shouldExist && !exists {
1862 return fmt.Errorf("The \"%s\" namespace does not exist", namespace)
1863 }
1864 if !shouldExist && exists {
1865 return fmt.Errorf("The \"%s\" namespace already exists", namespace)
1866 }
1867 return nil
1868 }
1869
1870 func (hc *HealthChecker) checkClusterNetworks(ctx context.Context) error {
1871 nodes, err := hc.kubeAPI.GetNodes(ctx)
1872 if err != nil {
1873 return err
1874 }
1875 clusterNetworks := strings.Split(hc.linkerdConfig.ClusterNetworks, ",")
1876 clusterIPNets := make([]*net.IPNet, len(clusterNetworks))
1877 for i, clusterNetwork := range clusterNetworks {
1878 _, clusterIPNets[i], err = net.ParseCIDR(clusterNetwork)
1879 if err != nil {
1880 return err
1881 }
1882 }
1883 var badPodCIDRS []string
1884 var podCIDRExists bool
1885 for _, node := range nodes {
1886 podCIDR := node.Spec.PodCIDR
1887 if podCIDR == "" {
1888 continue
1889 }
1890 podCIDRExists = true
1891 podIP, podIPNet, err := net.ParseCIDR(podCIDR)
1892 if err != nil {
1893 return err
1894 }
1895 exists := cluterNetworksContainCIDR(clusterIPNets, podIPNet, podIP)
1896 if !exists {
1897 badPodCIDRS = append(badPodCIDRS, podCIDR)
1898 }
1899 }
1900
1901 if !podCIDRExists {
1902
1903 return SkipError{Reason: podCIDRUnavailableSkipReason}
1904 }
1905 if len(badPodCIDRS) > 0 {
1906 sort.Strings(badPodCIDRS)
1907 return fmt.Errorf("node has podCIDR(s) %v which are not contained in the Linkerd clusterNetworks.\n\tTry installing linkerd via --set clusterNetworks=\"%s\"",
1908 badPodCIDRS, strings.Join(badPodCIDRS, "\\,"))
1909 }
1910 return nil
1911 }
1912
1913 func cluterNetworksContainCIDR(clusterIPNets []*net.IPNet, podIPNet *net.IPNet, podIP net.IP) bool {
1914 for _, clusterIPNet := range clusterIPNets {
1915 clusterIPMaskOnes, _ := clusterIPNet.Mask.Size()
1916 podCIDRMaskOnes, _ := podIPNet.Mask.Size()
1917 if clusterIPNet.Contains(podIP) && podCIDRMaskOnes >= clusterIPMaskOnes {
1918 return true
1919 }
1920 }
1921 return false
1922 }
1923
1924 func clusterNetworksContainIP(clusterIPNets []*net.IPNet, ip string) bool {
1925 for _, clusterIPNet := range clusterIPNets {
1926 if clusterIPNet.Contains(net.ParseIP(ip)) {
1927 return true
1928 }
1929 }
1930 return false
1931 }
1932
1933 func (hc *HealthChecker) checkClusterNetworksContainAllPods(ctx context.Context) error {
1934 clusterNetworks := strings.Split(hc.linkerdConfig.ClusterNetworks, ",")
1935 clusterIPNets := make([]*net.IPNet, len(clusterNetworks))
1936 var err error
1937 for i, clusterNetwork := range clusterNetworks {
1938 _, clusterIPNets[i], err = net.ParseCIDR(clusterNetwork)
1939 if err != nil {
1940 return err
1941 }
1942 }
1943 pods, err := hc.kubeAPI.CoreV1().Pods(corev1.NamespaceAll).List(ctx, metav1.ListOptions{})
1944 if err != nil {
1945 return err
1946 }
1947 for _, pod := range pods.Items {
1948 if pod.Spec.HostNetwork {
1949 continue
1950 }
1951 if len(pod.Status.PodIP) == 0 {
1952 continue
1953 }
1954 if !clusterNetworksContainIP(clusterIPNets, pod.Status.PodIP) {
1955 return fmt.Errorf("the Linkerd clusterNetworks [%q] do not include pod %s/%s (%s)", hc.linkerdConfig.ClusterNetworks, pod.Namespace, pod.Name, pod.Status.PodIP)
1956 }
1957 }
1958 return nil
1959 }
1960
1961 func (hc *HealthChecker) checkClusterNetworksContainAllServices(ctx context.Context) error {
1962 clusterNetworks := strings.Split(hc.linkerdConfig.ClusterNetworks, ",")
1963 clusterIPNets := make([]*net.IPNet, len(clusterNetworks))
1964 var err error
1965 for i, clusterNetwork := range clusterNetworks {
1966 _, clusterIPNets[i], err = net.ParseCIDR(clusterNetwork)
1967 if err != nil {
1968 return err
1969 }
1970 }
1971 svcs, err := hc.kubeAPI.CoreV1().Services(corev1.NamespaceAll).List(ctx, metav1.ListOptions{})
1972 if err != nil {
1973 return err
1974 }
1975 for _, svc := range svcs.Items {
1976 clusterIP := svc.Spec.ClusterIP
1977 if clusterIP != "" && clusterIP != "None" && !clusterNetworksContainIP(clusterIPNets, svc.Spec.ClusterIP) {
1978 return fmt.Errorf("the Linkerd clusterNetworks [%q] do not include svc %s/%s (%s)", hc.linkerdConfig.ClusterNetworks, svc.Namespace, svc.Name, svc.Spec.ClusterIP)
1979 }
1980 }
1981 return nil
1982 }
1983
1984 func (hc *HealthChecker) expectedRBACNames() []string {
1985 return []string{
1986 fmt.Sprintf("linkerd-%s-identity", hc.ControlPlaneNamespace),
1987 fmt.Sprintf("linkerd-%s-proxy-injector", hc.ControlPlaneNamespace),
1988 }
1989 }
1990
1991 func (hc *HealthChecker) checkClusterRoles(ctx context.Context, shouldExist bool, expectedNames []string, labelSelector string) error {
1992 return CheckClusterRoles(ctx, hc.kubeAPI, shouldExist, expectedNames, labelSelector)
1993 }
1994
1995
1996 func CheckClusterRoles(ctx context.Context, kubeAPI *k8s.KubernetesAPI, shouldExist bool, expectedNames []string, labelSelector string) error {
1997 options := metav1.ListOptions{
1998 LabelSelector: labelSelector,
1999 }
2000 crList, err := kubeAPI.RbacV1().ClusterRoles().List(ctx, options)
2001 if err != nil {
2002 return err
2003 }
2004
2005 objects := []runtime.Object{}
2006
2007 for _, item := range crList.Items {
2008 item := item
2009 objects = append(objects, &item)
2010 }
2011
2012 return checkResources("ClusterRoles", objects, expectedNames, shouldExist)
2013 }
2014
2015 func (hc *HealthChecker) checkClusterRoleBindings(ctx context.Context, shouldExist bool, expectedNames []string, labelSelector string) error {
2016 return CheckClusterRoleBindings(ctx, hc.kubeAPI, shouldExist, expectedNames, labelSelector)
2017 }
2018
2019
2020 func CheckClusterRoleBindings(ctx context.Context, kubeAPI *k8s.KubernetesAPI, shouldExist bool, expectedNames []string, labelSelector string) error {
2021 options := metav1.ListOptions{
2022 LabelSelector: labelSelector,
2023 }
2024 crbList, err := kubeAPI.RbacV1().ClusterRoleBindings().List(ctx, options)
2025 if err != nil {
2026 return err
2027 }
2028
2029 objects := []runtime.Object{}
2030
2031 for _, item := range crbList.Items {
2032 item := item
2033 objects = append(objects, &item)
2034 }
2035
2036 return checkResources("ClusterRoleBindings", objects, expectedNames, shouldExist)
2037 }
2038
2039
2040 func CheckConfigMaps(ctx context.Context, kubeAPI *k8s.KubernetesAPI, namespace string, shouldExist bool, expectedNames []string, labelSelector string) error {
2041 options := metav1.ListOptions{
2042 LabelSelector: labelSelector,
2043 }
2044 crbList, err := kubeAPI.CoreV1().ConfigMaps(namespace).List(ctx, options)
2045 if err != nil {
2046 return err
2047 }
2048
2049 objects := []runtime.Object{}
2050
2051 for _, item := range crbList.Items {
2052 item := item
2053 objects = append(objects, &item)
2054 }
2055
2056 return checkResources("ConfigMaps", objects, expectedNames, shouldExist)
2057 }
2058
2059 func (hc *HealthChecker) isHA() bool {
2060 return hc.linkerdConfig.HighAvailability
2061 }
2062
2063 func (hc *HealthChecker) isHeartbeatDisabled() bool {
2064 return hc.linkerdConfig.DisableHeartBeat
2065 }
2066
2067 func (hc *HealthChecker) checkServiceAccounts(ctx context.Context, saNames []string, ns, labelSelector string) error {
2068 return CheckServiceAccounts(ctx, hc.kubeAPI, saNames, ns, labelSelector)
2069 }
2070
2071
2072 func CheckServiceAccounts(ctx context.Context, api *k8s.KubernetesAPI, saNames []string, ns, labelSelector string) error {
2073 options := metav1.ListOptions{
2074 LabelSelector: labelSelector,
2075 }
2076 saList, err := api.CoreV1().ServiceAccounts(ns).List(ctx, options)
2077 if err != nil {
2078 return err
2079 }
2080
2081 objects := []runtime.Object{}
2082
2083 for _, item := range saList.Items {
2084 item := item
2085 objects = append(objects, &item)
2086 }
2087
2088 return checkResources("ServiceAccounts", objects, saNames, true)
2089 }
2090
2091
2092 func CheckIfLinkerdExists(ctx context.Context, kubeAPI *k8s.KubernetesAPI, controlPlaneNamespace string) (bool, error) {
2093 _, err := kubeAPI.CoreV1().Namespaces().Get(ctx, controlPlaneNamespace, metav1.GetOptions{})
2094 if err != nil {
2095 if kerrors.IsNotFound(err) {
2096 return false, nil
2097 }
2098 return false, err
2099 }
2100
2101 _, _, err = FetchCurrentConfiguration(ctx, kubeAPI, controlPlaneNamespace)
2102 if err != nil {
2103 if kerrors.IsNotFound(err) {
2104 return false, nil
2105 }
2106 return false, err
2107 }
2108
2109 return true, nil
2110 }
2111
2112 func (hc *HealthChecker) getProxyInjectorMutatingWebhook(ctx context.Context) (*admissionRegistration.MutatingWebhook, error) {
2113 mwc, err := hc.kubeAPI.AdmissionregistrationV1().MutatingWebhookConfigurations().Get(ctx, k8s.ProxyInjectorWebhookConfigName, metav1.GetOptions{})
2114 if err != nil {
2115 return nil, err
2116 }
2117 if len(mwc.Webhooks) != 1 {
2118 return nil, fmt.Errorf("expected 1 webhooks, found %d", len(mwc.Webhooks))
2119 }
2120 return &mwc.Webhooks[0], nil
2121 }
2122
2123 func (hc *HealthChecker) checkMutatingWebhookConfigurations(ctx context.Context, shouldExist bool) error {
2124 options := metav1.ListOptions{
2125 LabelSelector: controlPlaneComponentsSelector(),
2126 }
2127 mwc, err := hc.kubeAPI.AdmissionregistrationV1().MutatingWebhookConfigurations().List(ctx, options)
2128 if err != nil {
2129 return err
2130 }
2131
2132 objects := []runtime.Object{}
2133 for _, item := range mwc.Items {
2134 item := item
2135 objects = append(objects, &item)
2136 }
2137
2138 return checkResources("MutatingWebhookConfigurations", objects, []string{k8s.ProxyInjectorWebhookConfigName}, shouldExist)
2139 }
2140
2141 func (hc *HealthChecker) checkValidatingWebhookConfigurations(ctx context.Context, shouldExist bool) error {
2142 options := metav1.ListOptions{
2143 LabelSelector: controlPlaneComponentsSelector(),
2144 }
2145 vwc, err := hc.kubeAPI.AdmissionregistrationV1().ValidatingWebhookConfigurations().List(ctx, options)
2146 if err != nil {
2147 return err
2148 }
2149
2150 objects := []runtime.Object{}
2151 for _, item := range vwc.Items {
2152 item := item
2153 objects = append(objects, &item)
2154 }
2155
2156 return checkResources("ValidatingWebhookConfigurations", objects, []string{k8s.SPValidatorWebhookConfigName}, shouldExist)
2157 }
2158
2159
2160
2161 func CheckCustomResourceDefinitions(ctx context.Context, k8sAPI *k8s.KubernetesAPI, expectedCRDManifests string) error {
2162
2163 crdYamls := strings.Split(expectedCRDManifests, "\n---\n")
2164 crdVersions := []struct{ name, version string }{}
2165 for _, crdYaml := range crdYamls {
2166 var crd apiextv1.CustomResourceDefinition
2167 err := yaml.Unmarshal([]byte(crdYaml), &crd)
2168 if err != nil {
2169 return err
2170 }
2171 if len(crd.Spec.Versions) == 0 {
2172 continue
2173 }
2174 versionIndex := len(crd.Spec.Versions) - 1
2175 crdVersions = append(crdVersions, struct{ name, version string }{
2176 name: crd.Name,
2177 version: crd.Spec.Versions[versionIndex].Name,
2178 })
2179 }
2180
2181 errMsgs := []string{}
2182
2183 for _, crdVersion := range crdVersions {
2184 name := crdVersion.name
2185 version := crdVersion.version
2186
2187 crd, err := k8sAPI.Apiextensions.ApiextensionsV1().CustomResourceDefinitions().Get(ctx, name, metav1.GetOptions{})
2188 if err != nil && kerrors.IsNotFound(err) {
2189 errMsgs = append(errMsgs, fmt.Sprintf("missing %s", name))
2190 continue
2191 } else if err != nil {
2192 return err
2193 }
2194 if !crdHasVersion(crd, version) {
2195 errMsgs = append(errMsgs, fmt.Sprintf("CRD %s is missing version %s", name, version))
2196 }
2197 }
2198 if len(errMsgs) > 0 {
2199 return errors.New(strings.Join(errMsgs, ", "))
2200 }
2201 return nil
2202 }
2203
2204 func crdHasVersion(crd *apiextv1.CustomResourceDefinition, version string) bool {
2205 for _, crdVersion := range crd.Spec.Versions {
2206 if crdVersion.Name == version {
2207 return true
2208 }
2209 }
2210 return false
2211 }
2212
2213
2214
2215
2216 func CheckNodesHaveNonDockerRuntime(ctx context.Context, k8sAPI *k8s.KubernetesAPI) error {
2217 hasDockerNodes := false
2218 continueToken := ""
2219 for {
2220 nodes, err := k8sAPI.CoreV1().Nodes().List(ctx, metav1.ListOptions{Continue: continueToken})
2221 if err != nil {
2222 return err
2223 }
2224 continueToken = nodes.Continue
2225 for _, node := range nodes.Items {
2226 crv := node.Status.NodeInfo.ContainerRuntimeVersion
2227 if strings.HasPrefix(crv, "docker:") {
2228 hasDockerNodes = true
2229 break
2230 }
2231 }
2232 if continueToken == "" {
2233 break
2234 }
2235 }
2236 if hasDockerNodes {
2237 return fmt.Errorf("there are nodes using the docker container runtime and proxy-init container must run as root user.\ntry installing linkerd via --set proxyInit.runAsRoot=true")
2238 }
2239 return nil
2240 }
2241
2242
2243 type MeshedPodIdentityData struct {
2244 Name string
2245 Namespace string
2246 Anchors string
2247 }
2248
2249
2250 func GetMeshedPodsIdentityData(ctx context.Context, api kubernetes.Interface, dataPlaneNamespace string) ([]MeshedPodIdentityData, error) {
2251 podList, err := api.CoreV1().Pods(dataPlaneNamespace).List(ctx, metav1.ListOptions{LabelSelector: k8s.ControllerNSLabel})
2252 if err != nil {
2253 return nil, err
2254 }
2255 if len(podList.Items) == 0 {
2256 return nil, nil
2257 }
2258 pods := []MeshedPodIdentityData{}
2259 for _, pod := range podList.Items {
2260 containers := append(pod.Spec.InitContainers, pod.Spec.Containers...)
2261 for _, containerSpec := range containers {
2262 if containerSpec.Name != k8s.ProxyContainerName {
2263 continue
2264 }
2265 for _, envVar := range containerSpec.Env {
2266 if envVar.Name != identity.EnvTrustAnchors {
2267 continue
2268 }
2269 pods = append(pods, MeshedPodIdentityData{
2270 pod.Name, pod.Namespace, strings.TrimSpace(envVar.Value),
2271 })
2272 }
2273 }
2274 }
2275 return pods, nil
2276 }
2277
2278 func (hc *HealthChecker) checkDataPlaneProxiesCertificate(ctx context.Context) error {
2279 return checkPodsProxiesCertificate(ctx, *hc.kubeAPI, hc.DataPlaneNamespace, hc.ControlPlaneNamespace)
2280 }
2281
2282 func checkPodsProxiesCertificate(ctx context.Context, kubeAPI k8s.KubernetesAPI, targetNamespace, controlPlaneNamespace string) error {
2283 meshedPods, err := GetMeshedPodsIdentityData(ctx, kubeAPI, targetNamespace)
2284 if err != nil {
2285 return err
2286 }
2287
2288 trustAnchorsPem, err := FetchTrustBundle(ctx, kubeAPI, controlPlaneNamespace)
2289 if err != nil {
2290 return err
2291 }
2292
2293 offendingPods := []string{}
2294 for _, pod := range meshedPods {
2295
2296 if pod.Namespace == controlPlaneNamespace {
2297 continue
2298 }
2299 if strings.TrimSpace(pod.Anchors) != strings.TrimSpace(trustAnchorsPem) {
2300 if targetNamespace == "" {
2301 offendingPods = append(offendingPods, fmt.Sprintf("* %s/%s", pod.Namespace, pod.Name))
2302 } else {
2303 offendingPods = append(offendingPods, fmt.Sprintf("* %s", pod.Name))
2304 }
2305 }
2306 }
2307 if len(offendingPods) == 0 {
2308 return nil
2309 }
2310 return fmt.Errorf("Some pods do not have the current trust bundle and must be restarted:\n\t%s", strings.Join(offendingPods, "\n\t"))
2311 }
2312
2313 func checkResources(resourceName string, objects []runtime.Object, expectedNames []string, shouldExist bool) error {
2314 if !shouldExist {
2315 if len(objects) > 0 {
2316 resources := []Resource{}
2317 for _, obj := range objects {
2318 m, err := meta.Accessor(obj)
2319 if err != nil {
2320 return err
2321 }
2322
2323 res := Resource{name: m.GetName()}
2324 gvks, _, err := k8s.ObjectKinds(obj)
2325 if err == nil && len(gvks) > 0 {
2326 res.groupVersionKind = gvks[0]
2327 }
2328 resources = append(resources, res)
2329 }
2330 return ResourceError{resourceName, resources}
2331 }
2332 return nil
2333 }
2334
2335 expected := map[string]bool{}
2336 for _, name := range expectedNames {
2337 expected[name] = false
2338 }
2339
2340 for _, obj := range objects {
2341 metaObj, err := meta.Accessor(obj)
2342 if err != nil {
2343 return err
2344 }
2345
2346 if _, ok := expected[metaObj.GetName()]; ok {
2347 expected[metaObj.GetName()] = true
2348 }
2349 }
2350
2351 missing := []string{}
2352 for name, found := range expected {
2353 if !found {
2354 missing = append(missing, name)
2355 }
2356 }
2357 if len(missing) > 0 {
2358 sort.Strings(missing)
2359 return fmt.Errorf("missing %s: %s", resourceName, strings.Join(missing, ", "))
2360 }
2361
2362 return nil
2363 }
2364
2365
2366
2367 func (hc *HealthChecker) checkMisconfiguredOpaquePortAnnotations(ctx context.Context) error {
2368
2369
2370
2371
2372 kubeAPI := controllerK8s.NewClusterScopedAPI(hc.kubeAPI, nil, nil, "local", controllerK8s.Endpoint, controllerK8s.Pod, controllerK8s.Svc)
2373 kubeAPI.Sync(ctx.Done())
2374
2375 services, err := kubeAPI.Svc().Lister().Services(hc.DataPlaneNamespace).List(labels.Everything())
2376 if err != nil {
2377 return err
2378 }
2379
2380 var errStrings []string
2381 for _, service := range services {
2382 if service.Spec.ClusterIP == "None" {
2383
2384 continue
2385 }
2386
2387 endpoints, err := kubeAPI.Endpoint().Lister().Endpoints(service.Namespace).Get(service.Name)
2388 if err != nil {
2389 return err
2390 }
2391
2392 pods, err := getEndpointsPods(endpoints, kubeAPI, service.Namespace)
2393 if err != nil {
2394 return err
2395 }
2396
2397 for pod := range pods {
2398 err := misconfiguredOpaqueAnnotation(service, pod)
2399 if err != nil {
2400 errStrings = append(errStrings, fmt.Sprintf("\t* %s", err.Error()))
2401 }
2402 }
2403 }
2404
2405 if len(errStrings) >= 1 {
2406 return fmt.Errorf(strings.Join(errStrings, "\n "))
2407 }
2408
2409 return nil
2410 }
2411
2412
2413
2414 func getEndpointsPods(endpoints *corev1.Endpoints, kubeAPI *controllerK8s.API, namespace string) (map[*corev1.Pod]struct{}, error) {
2415 pods := make(map[*corev1.Pod]struct{})
2416 for _, subset := range endpoints.Subsets {
2417 for _, addr := range subset.Addresses {
2418 if addr.TargetRef != nil && addr.TargetRef.Kind == "Pod" {
2419 pod, err := kubeAPI.Pod().Lister().Pods(namespace).Get(addr.TargetRef.Name)
2420 if err != nil {
2421 return nil, err
2422 }
2423 if _, ok := pods[pod]; !ok {
2424 pods[pod] = struct{}{}
2425 }
2426 }
2427 }
2428 }
2429 return pods, nil
2430 }
2431
2432 func misconfiguredOpaqueAnnotation(service *corev1.Service, pod *corev1.Pod) error {
2433 var svcPorts, podPorts []string
2434 if v, ok := service.Annotations[k8s.ProxyOpaquePortsAnnotation]; ok {
2435 svcPorts = strings.Split(v, ",")
2436 }
2437 if v, ok := pod.Annotations[k8s.ProxyOpaquePortsAnnotation]; ok {
2438 podPorts = strings.Split(v, ",")
2439 }
2440
2441
2442
2443
2444 for _, p := range svcPorts {
2445 port, err := strconv.Atoi(p)
2446 if err != nil {
2447 return fmt.Errorf("failed to convert %s to port number for pod %s", p, pod.Name)
2448 }
2449 err = checkPodPorts(service, pod, podPorts, port)
2450 if err != nil {
2451 return err
2452 }
2453 }
2454
2455
2456
2457
2458 for _, p := range podPorts {
2459 if util.ContainsString(p, svcPorts) {
2460
2461 continue
2462 }
2463 port, err := strconv.Atoi(p)
2464 if err != nil {
2465 return fmt.Errorf("failed to convert %s to port number for pod %s", p, pod.Name)
2466 }
2467
2468
2469
2470
2471 ok, err := checkServiceIntPorts(service, svcPorts, port)
2472 if err != nil {
2473 return err
2474 }
2475 if ok {
2476
2477
2478 continue
2479 }
2480
2481
2482
2483
2484 err = checkServiceNamePorts(service, pod, port, svcPorts)
2485 if err != nil {
2486 return err
2487 }
2488 }
2489 return nil
2490 }
2491
2492 func checkPodPorts(service *corev1.Service, pod *corev1.Pod, podPorts []string, port int) error {
2493 for _, sp := range service.Spec.Ports {
2494 if int(sp.Port) == port {
2495 for _, c := range pod.Spec.Containers {
2496 for _, cp := range c.Ports {
2497 if cp.ContainerPort == sp.TargetPort.IntVal || cp.Name == sp.TargetPort.StrVal {
2498
2499
2500 var strPort string
2501 if sp.TargetPort.Type == 0 {
2502 strPort = strconv.Itoa(int(sp.TargetPort.IntVal))
2503 } else {
2504 strPort = strconv.Itoa(int(cp.ContainerPort))
2505 }
2506 if util.ContainsString(strPort, podPorts) {
2507 return nil
2508 }
2509 return fmt.Errorf("service %s expects target port %s to be opaque; add it to pod %s %s annotation", service.Name, strPort, pod.Name, k8s.ProxyOpaquePortsAnnotation)
2510 }
2511 }
2512 }
2513 }
2514 }
2515 return nil
2516 }
2517
2518 func checkServiceIntPorts(service *corev1.Service, svcPorts []string, port int) (bool, error) {
2519 for _, p := range service.Spec.Ports {
2520 if p.TargetPort.Type == 0 && p.TargetPort.IntVal == 0 {
2521 if int(p.Port) == port {
2522
2523
2524 return false, fmt.Errorf("service %s targets the opaque port %d; add it to its %s annotation", service.Name, port, k8s.ProxyOpaquePortsAnnotation)
2525 }
2526 }
2527 if int(p.TargetPort.IntVal) == port {
2528 svcPort := strconv.Itoa(int(p.Port))
2529 if util.ContainsString(svcPort, svcPorts) {
2530
2531
2532 return true, nil
2533 }
2534 return false, fmt.Errorf("service %s targets the opaque port %d through %d; add %d to its %s annotation", service.Name, port, p.Port, p.Port, k8s.ProxyOpaquePortsAnnotation)
2535 }
2536 }
2537 return false, nil
2538 }
2539
2540 func checkServiceNamePorts(service *corev1.Service, pod *corev1.Pod, port int, svcPorts []string) error {
2541 for _, p := range service.Spec.Ports {
2542 if p.TargetPort.StrVal == "" {
2543
2544
2545 continue
2546 }
2547 for _, c := range pod.Spec.Containers {
2548 for _, cp := range c.Ports {
2549 if int(cp.ContainerPort) == port {
2550
2551
2552 if cp.Name == p.TargetPort.StrVal {
2553 svcPort := strconv.Itoa(int(p.Port))
2554 if util.ContainsString(svcPort, svcPorts) {
2555
2556
2557 return nil
2558 }
2559 return fmt.Errorf("service %s targets the opaque port %s through %d; add %d to its %s annotation", service.Name, cp.Name, p.Port, p.Port, k8s.ProxyOpaquePortsAnnotation)
2560 }
2561 }
2562 }
2563 }
2564 }
2565 return nil
2566 }
2567
2568
2569 func (hc *HealthChecker) GetDataPlanePods(ctx context.Context) ([]corev1.Pod, error) {
2570 selector := fmt.Sprintf("%s=%s", k8s.ControllerNSLabel, hc.ControlPlaneNamespace)
2571 podList, err := hc.kubeAPI.CoreV1().Pods(hc.DataPlaneNamespace).List(ctx, metav1.ListOptions{LabelSelector: selector})
2572 if err != nil {
2573 return nil, err
2574 }
2575 return podList.Items, nil
2576 }
2577
2578
2579 func (hc *HealthChecker) GetServices(ctx context.Context) ([]corev1.Service, error) {
2580 svcList, err := hc.kubeAPI.CoreV1().Services(hc.DataPlaneNamespace).List(ctx, metav1.ListOptions{})
2581 if err != nil {
2582 return nil, err
2583 }
2584 return svcList.Items, nil
2585 }
2586
2587 func (hc *HealthChecker) checkCanCreate(ctx context.Context, namespace, group, version, resource string) error {
2588 return CheckCanPerformAction(ctx, hc.kubeAPI, "create", namespace, group, version, resource)
2589 }
2590
2591 func (hc *HealthChecker) checkCanCreateNonNamespacedResources(ctx context.Context) error {
2592 var errs []string
2593 dryRun := metav1.CreateOptions{DryRun: []string{metav1.DryRunAll}}
2594
2595
2596 installManifestReader := strings.NewReader(hc.Options.InstallManifest)
2597 yamlReader := yamlDecoder.NewYAMLReader(bufio.NewReader(installManifestReader))
2598 for {
2599
2600 objYAML, err := yamlReader.Read()
2601 if err != nil {
2602 if errors.Is(err, io.EOF) {
2603 break
2604 }
2605 return fmt.Errorf("error reading install manifest: %w", err)
2606 }
2607
2608
2609 objMap := map[string]interface{}{}
2610 err = yaml.Unmarshal(objYAML, &objMap)
2611 if err != nil {
2612 return fmt.Errorf("error unmarshaling yaml object %s: %w", objYAML, err)
2613 }
2614 if len(objMap) == 0 {
2615
2616 continue
2617 }
2618 obj := &unstructured.Unstructured{Object: objMap}
2619
2620
2621 if obj.GetNamespace() != "" {
2622 continue
2623 }
2624
2625 resource, _ := meta.UnsafeGuessKindToResource(obj.GroupVersionKind())
2626 _, err = hc.kubeAPI.DynamicClient.Resource(resource).Create(ctx, obj, dryRun)
2627 if err != nil {
2628 errs = append(errs, fmt.Sprintf("cannot create %s/%s: %v", obj.GetKind(), obj.GetName(), err))
2629 }
2630 }
2631
2632 if len(errs) > 0 {
2633 return errors.New(strings.Join(errs, "\n "))
2634 }
2635 return nil
2636 }
2637
2638 func (hc *HealthChecker) checkCanGet(ctx context.Context, namespace, group, version, resource string) error {
2639 return CheckCanPerformAction(ctx, hc.kubeAPI, "get", namespace, group, version, resource)
2640 }
2641
2642 func (hc *HealthChecker) checkExtensionAPIServerAuthentication(ctx context.Context) error {
2643 if hc.kubeAPI == nil {
2644 return fmt.Errorf("unexpected error: Kubernetes ClientSet not initialized")
2645 }
2646 m, err := hc.kubeAPI.CoreV1().ConfigMaps(metav1.NamespaceSystem).Get(ctx, k8s.ExtensionAPIServerAuthenticationConfigMapName, metav1.GetOptions{})
2647 if err != nil {
2648 return err
2649 }
2650 if v, exists := m.Data[k8s.ExtensionAPIServerAuthenticationRequestHeaderClientCAFileKey]; !exists || v == "" {
2651 return fmt.Errorf("--%s is not configured", k8s.ExtensionAPIServerAuthenticationRequestHeaderClientCAFileKey)
2652 }
2653 return nil
2654 }
2655 func (hc *HealthChecker) checkClockSkew(ctx context.Context) error {
2656 if hc.kubeAPI == nil {
2657
2658 return errors.New("unexpected error: Kubernetes ClientSet not initialized")
2659 }
2660
2661 var clockSkewNodes []string
2662
2663 nodeList, err := hc.kubeAPI.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
2664 if err != nil {
2665 return err
2666 }
2667
2668 for _, node := range nodeList.Items {
2669 for _, condition := range node.Status.Conditions {
2670
2671 if condition.Type == corev1.NodeReady && condition.Status == corev1.ConditionTrue {
2672 since := time.Since(condition.LastHeartbeatTime.Time)
2673 if (since > AllowedClockSkew) || (since < -AllowedClockSkew) {
2674 clockSkewNodes = append(clockSkewNodes, node.Name)
2675 }
2676 }
2677 }
2678 }
2679
2680 if len(clockSkewNodes) > 0 {
2681 return fmt.Errorf("clock skew detected for node(s): %s", strings.Join(clockSkewNodes, ", "))
2682 }
2683
2684 return nil
2685 }
2686
2687 func (hc *HealthChecker) checkExtensionNsLabels(ctx context.Context) error {
2688 if hc.kubeAPI == nil {
2689
2690 return errors.New("unexpected error: Kubernetes ClientSet not initialized")
2691 }
2692
2693 namespaces, err := hc.kubeAPI.GetAllNamespacesWithExtensionLabel(ctx)
2694 if err != nil {
2695 return fmt.Errorf("unexpected error when retrieving namespaces: %w", err)
2696 }
2697
2698 freq := make(map[string][]string)
2699 for _, ns := range namespaces {
2700
2701
2702 ext := ns.Labels[k8s.LinkerdExtensionLabel]
2703
2704
2705 freq[ext] = append(freq[ext], fmt.Sprintf("\t\t* %s", ns.Name))
2706 }
2707
2708 errs := []string{}
2709 for ext, namespaces := range freq {
2710 if len(namespaces) == 1 {
2711 continue
2712 }
2713 errs = append(errs, fmt.Sprintf("\t* label \"%s=%s\" is present on more than one namespace:\n%s", k8s.LinkerdExtensionLabel, ext, strings.Join(namespaces, "\n")))
2714 }
2715
2716 if len(errs) > 0 {
2717 return errors.New(strings.Join(
2718 append([]string{"some extensions have invalid configuration"}, errs...), "\n"))
2719 }
2720
2721 return nil
2722 }
2723
2724
2725 func CheckRoles(ctx context.Context, kubeAPI *k8s.KubernetesAPI, shouldExist bool, namespace string, expectedNames []string, labelSelector string) error {
2726 options := metav1.ListOptions{
2727 LabelSelector: labelSelector,
2728 }
2729 crList, err := kubeAPI.RbacV1().Roles(namespace).List(ctx, options)
2730 if err != nil {
2731 return err
2732 }
2733
2734 objects := []runtime.Object{}
2735
2736 for _, item := range crList.Items {
2737 item := item
2738 objects = append(objects, &item)
2739 }
2740
2741 return checkResources("Roles", objects, expectedNames, shouldExist)
2742 }
2743
2744
2745 func CheckRoleBindings(ctx context.Context, kubeAPI *k8s.KubernetesAPI, shouldExist bool, namespace string, expectedNames []string, labelSelector string) error {
2746 options := metav1.ListOptions{
2747 LabelSelector: labelSelector,
2748 }
2749 crbList, err := kubeAPI.RbacV1().RoleBindings(namespace).List(ctx, options)
2750 if err != nil {
2751 return err
2752 }
2753
2754 objects := []runtime.Object{}
2755
2756 for _, item := range crbList.Items {
2757 item := item
2758 objects = append(objects, &item)
2759 }
2760
2761 return checkResources("RoleBindings", objects, expectedNames, shouldExist)
2762 }
2763
2764
2765 func CheckCanPerformAction(ctx context.Context, api *k8s.KubernetesAPI, verb, namespace, group, version, resource string) error {
2766 if api == nil {
2767
2768 return fmt.Errorf("unexpected error: Kubernetes ClientSet not initialized")
2769 }
2770
2771 return k8s.ResourceAuthz(
2772 ctx,
2773 api,
2774 namespace,
2775 verb,
2776 group,
2777 version,
2778 resource,
2779 "",
2780 )
2781 }
2782
2783
2784
2785
2786
2787
2788 func getPodStatuses(pods []corev1.Pod) map[string]map[string][]corev1.ContainerStatus {
2789 statuses := make(map[string]map[string][]corev1.ContainerStatus)
2790
2791 for _, pod := range pods {
2792 if pod.Status.Phase == corev1.PodRunning && strings.HasPrefix(pod.Name, "linkerd-") {
2793 parts := strings.Split(pod.Name, "-")
2794
2795
2796 if len(parts) >= 4 {
2797 name := strings.Join(parts[1:len(parts)-2], "-")
2798 if _, found := statuses[name]; !found {
2799 statuses[name] = make(map[string][]corev1.ContainerStatus)
2800 }
2801 statuses[name][pod.Name] = pod.Status.ContainerStatuses
2802 }
2803 }
2804 }
2805
2806 return statuses
2807 }
2808
2809 func validateControlPlanePods(pods []corev1.Pod) error {
2810 statuses := getPodStatuses(pods)
2811
2812 names := []string{"destination", "identity", "proxy-injector"}
2813
2814 for _, name := range names {
2815 pods, found := statuses[name]
2816 if !found {
2817 return fmt.Errorf("No running pods for \"linkerd-%s\"", name)
2818 }
2819 var err error
2820 var ready bool
2821 for pod, containers := range pods {
2822 containersReady := true
2823 for _, container := range containers {
2824 if !container.Ready {
2825
2826
2827
2828
2829
2830 err = fmt.Errorf("pod/%s container %s is not ready", pod, container.Name)
2831 containersReady = false
2832 }
2833 }
2834 if containersReady {
2835
2836 ready = true
2837 break
2838 }
2839 }
2840 if !ready {
2841 return err
2842 }
2843 }
2844
2845 return nil
2846 }
2847
2848 func checkUnschedulablePods(pods []corev1.Pod) error {
2849 for _, pod := range pods {
2850 for _, condition := range pod.Status.Conditions {
2851 if condition.Reason == corev1.PodReasonUnschedulable {
2852 return fmt.Errorf("%s: %s", pod.Name, condition.Message)
2853 }
2854 }
2855 }
2856
2857 return nil
2858 }
2859
2860 func checkControlPlaneReplicaSets(rst []appsv1.ReplicaSet) error {
2861 var errors []string
2862 for _, rs := range rst {
2863 for _, r := range rs.Status.Conditions {
2864 if r.Type == appsv1.ReplicaSetReplicaFailure && r.Status == corev1.ConditionTrue {
2865 errors = append(errors, fmt.Sprintf("%s: %s", r.Reason, r.Message))
2866 }
2867 }
2868 }
2869
2870 if len(errors) > 0 {
2871 return fmt.Errorf("%s", strings.Join(errors, "\n "))
2872 }
2873
2874 return nil
2875 }
2876
2877
2878 func CheckForPods(pods []corev1.Pod, deployNames []string) error {
2879 exists := make(map[string]bool)
2880
2881 for _, pod := range pods {
2882 for label, value := range pod.Labels {
2883
2884
2885
2886 if label == k8s.ControllerComponentLabel || label == "component" {
2887 exists[value] = true
2888 }
2889 }
2890 }
2891
2892 for _, expected := range deployNames {
2893 if !exists[expected] {
2894 return fmt.Errorf("Could not find pods for deployment %s", expected)
2895 }
2896 }
2897
2898 return nil
2899 }
2900
2901
2902
2903 func CheckPodsRunning(pods []corev1.Pod, namespace string) error {
2904 if len(pods) == 0 {
2905 msg := fmt.Sprintf("no \"%s\" containers found", k8s.ProxyContainerName)
2906 if namespace != "" {
2907 msg += fmt.Sprintf(" in the \"%s\" namespace", namespace)
2908 }
2909 return fmt.Errorf(msg)
2910 }
2911 for _, pod := range pods {
2912 status := k8s.GetPodStatus(pod)
2913
2914
2915
2916 switch status {
2917 case "Completed", "NodeShutdown", "Shutdown", "Terminated":
2918 continue
2919 }
2920 if status != string(corev1.PodRunning) && status != "Evicted" {
2921 return fmt.Errorf("pod \"%s\" status is %s", pod.Name, pod.Status.Phase)
2922 }
2923 if !k8s.GetProxyReady(pod) {
2924 return fmt.Errorf("container \"%s\" in pod \"%s\" is not ready", k8s.ProxyContainerName, pod.Name)
2925 }
2926 }
2927 return nil
2928 }
2929
2930
2931 func CheckIfDataPlanePodsExist(pods []corev1.Pod) error {
2932 for _, pod := range pods {
2933 if !containsProxy(pod) {
2934 return fmt.Errorf("could not find proxy container for %s pod", pod.Name)
2935 }
2936 }
2937 return nil
2938 }
2939
2940 func containsProxy(pod corev1.Pod) bool {
2941 containers := append(pod.Spec.InitContainers, pod.Spec.Containers...)
2942 for _, containerSpec := range containers {
2943 if containerSpec.Name == k8s.ProxyContainerName {
2944 return true
2945 }
2946 }
2947 return false
2948 }
2949
View as plain text