/*
Copyright 2015 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package scheduler

// This file tests the scheduler.

import (
	"context"
	"fmt"
	"net/http"
	"strings"
	"sync/atomic"
	"testing"
	"time"

	"github.com/google/go-cmp/cmp"
	v1 "k8s.io/api/core/v1"
	resourcev1alpha2 "k8s.io/api/resource/v1alpha2"
	apierrors "k8s.io/apimachinery/pkg/api/errors"
	"k8s.io/apimachinery/pkg/api/resource"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/runtime"
	"k8s.io/apimachinery/pkg/util/wait"
	"k8s.io/apimachinery/pkg/watch"
	utilfeature "k8s.io/apiserver/pkg/util/feature"
	clientset "k8s.io/client-go/kubernetes"
	corelisters "k8s.io/client-go/listers/core/v1"
	"k8s.io/client-go/tools/cache"
	featuregatetesting "k8s.io/component-base/featuregate/testing"
	configv1 "k8s.io/kube-scheduler/config/v1"
	"k8s.io/kubernetes/pkg/features"
	"k8s.io/kubernetes/pkg/scheduler"
	configtesting "k8s.io/kubernetes/pkg/scheduler/apis/config/testing"
	st "k8s.io/kubernetes/pkg/scheduler/testing"
	testutils "k8s.io/kubernetes/test/integration/util"
	"k8s.io/kubernetes/test/utils/format"
	"k8s.io/utils/pointer"
)

type nodeMutationFunc func(t *testing.T, n *v1.Node, nodeLister corelisters.NodeLister, c clientset.Interface)

type nodeStateManager struct {
	makeSchedulable   nodeMutationFunc
	makeUnSchedulable nodeMutationFunc
}

func TestUnschedulableNodes(t *testing.T) {
	testCtx := testutils.InitTestSchedulerWithNS(t, "unschedulable-nodes")

	nodeLister := testCtx.InformerFactory.Core().V1().Nodes().Lister()
	// NOTE: This test cannot run in parallel, because it is creating and deleting
	// non-namespaced objects (Nodes).
	defer testCtx.ClientSet.CoreV1().Nodes().DeleteCollection(context.TODO(), metav1.DeleteOptions{}, metav1.ListOptions{})

	goodCondition := v1.NodeCondition{
		Type:              v1.NodeReady,
		Status:            v1.ConditionTrue,
		Reason:            fmt.Sprintf("schedulable condition"),
		LastHeartbeatTime: metav1.Time{Time: time.Now()},
	}
	// Create a new schedulable node, since we're first going to apply
	// the unschedulable condition and verify that pods aren't scheduled.
	node := &v1.Node{
		ObjectMeta: metav1.ObjectMeta{Name: "node-scheduling-test-node"},
		Spec:       v1.NodeSpec{Unschedulable: false},
		Status: v1.NodeStatus{
			Capacity: v1.ResourceList{
				v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
			},
			Conditions: []v1.NodeCondition{goodCondition},
		},
	}
	nodeKey, err := cache.MetaNamespaceKeyFunc(node)
	if err != nil {
		t.Fatalf("Couldn't retrieve key for node %v", node.Name)
	}

	// The test does the following for each nodeStateManager in this list:
	//	1. Create a new node
	//	2. Apply the makeUnSchedulable function
	//	3. Create a new pod
	//  4. Check that the pod doesn't get assigned to the node
	//  5. Apply the schedulable function
	//  6. Check that the pod *does* get assigned to the node
	//  7. Delete the pod and node.

	nodeModifications := []nodeStateManager{
		// Test node.Spec.Unschedulable=true/false
		{
			makeUnSchedulable: func(t *testing.T, n *v1.Node, nodeLister corelisters.NodeLister, c clientset.Interface) {
				n.Spec.Unschedulable = true
				if _, err := c.CoreV1().Nodes().Update(context.TODO(), n, metav1.UpdateOptions{}); err != nil {
					t.Fatalf("Failed to update node with unschedulable=true: %v", err)
				}
				err = testutils.WaitForReflection(t, nodeLister, nodeKey, func(node interface{}) bool {
					// An unschedulable node should still be present in the store
					// Nodes that are unschedulable or that are not ready or
					// have their disk full (Node.Spec.Conditions) are excluded
					// based on NodeConditionPredicate, a separate check
					return node != nil && node.(*v1.Node).Spec.Unschedulable
				})
				if err != nil {
					t.Fatalf("Failed to observe reflected update for setting unschedulable=true: %v", err)
				}
			},
			makeSchedulable: func(t *testing.T, n *v1.Node, nodeLister corelisters.NodeLister, c clientset.Interface) {
				n.Spec.Unschedulable = false
				if _, err := c.CoreV1().Nodes().Update(context.TODO(), n, metav1.UpdateOptions{}); err != nil {
					t.Fatalf("Failed to update node with unschedulable=false: %v", err)
				}
				err = testutils.WaitForReflection(t, nodeLister, nodeKey, func(node interface{}) bool {
					return node != nil && node.(*v1.Node).Spec.Unschedulable == false
				})
				if err != nil {
					t.Fatalf("Failed to observe reflected update for setting unschedulable=false: %v", err)
				}
			},
		},
	}

	for i, mod := range nodeModifications {
		unSchedNode, err := testutils.CreateNode(testCtx.ClientSet, node)
		if err != nil {
			t.Fatalf("Failed to create node: %v", err)
		}

		// Apply the unschedulable modification to the node, and wait for the reflection
		mod.makeUnSchedulable(t, unSchedNode, nodeLister, testCtx.ClientSet)

		// Create the new pod, note that this needs to happen post unschedulable
		// modification or we have a race in the test.
		myPod, err := testutils.CreatePausePodWithResource(testCtx.ClientSet, "node-scheduling-test-pod", testCtx.NS.Name, nil)
		if err != nil {
			t.Fatalf("Failed to create pod: %v", err)
		}

		// There are no schedulable nodes - the pod shouldn't be scheduled.
		err = testutils.WaitForPodToScheduleWithTimeout(testCtx.ClientSet, myPod, 2*time.Second)
		if err == nil {
			t.Errorf("Test %d: Pod scheduled successfully on unschedulable nodes", i)
		}
		if !wait.Interrupted(err) {
			t.Errorf("Test %d: failed while trying to confirm the pod does not get scheduled on the node: %v", i, err)
		} else {
			t.Logf("Test %d: Pod did not get scheduled on an unschedulable node", i)
		}

		// Apply the schedulable modification to the node, and wait for the reflection
		schedNode, err := testCtx.ClientSet.CoreV1().Nodes().Get(context.TODO(), unSchedNode.Name, metav1.GetOptions{})
		if err != nil {
			t.Fatalf("Failed to get node: %v", err)
		}
		mod.makeSchedulable(t, schedNode, nodeLister, testCtx.ClientSet)

		// Wait until the pod is scheduled.
		if err := testutils.WaitForPodToSchedule(testCtx.ClientSet, myPod); err != nil {
			t.Errorf("Test %d: failed to schedule a pod: %v", i, err)
		} else {
			t.Logf("Test %d: Pod got scheduled on a schedulable node", i)
		}
		// Clean up.
		if err := testutils.DeletePod(testCtx.ClientSet, myPod.Name, myPod.Namespace); err != nil {
			t.Errorf("Failed to delete pod: %v", err)
		}
		err = testCtx.ClientSet.CoreV1().Nodes().Delete(context.TODO(), schedNode.Name, metav1.DeleteOptions{})
		if err != nil {
			t.Errorf("Failed to delete node: %v", err)
		}
	}
}

func TestMultipleSchedulers(t *testing.T) {
	// This integration tests the multi-scheduler feature in the following way:
	// 1. create a default scheduler
	// 2. create a node
	// 3. create 3 pods: testPodNoAnnotation, testPodWithAnnotationFitsDefault and testPodWithAnnotationFitsFoo
	//	  - note: the first two should be picked and scheduled by default scheduler while the last one should be
	//	          picked by scheduler of name "foo-scheduler" which does not exist yet.
	// 4. **check point-1**:
	//	   - testPodNoAnnotation, testPodWithAnnotationFitsDefault should be scheduled
	//	   - testPodWithAnnotationFitsFoo should NOT be scheduled
	// 5. create a scheduler with name "foo-scheduler"
	// 6. **check point-2**:
	//     - testPodWithAnnotationFitsFoo should be scheduled

	// 1. create and start default-scheduler
	testCtx := testutils.InitTestSchedulerWithNS(t, "multi-scheduler")

	// 2. create a node
	node := &v1.Node{
		ObjectMeta: metav1.ObjectMeta{Name: "node-multi-scheduler-test-node"},
		Spec:       v1.NodeSpec{Unschedulable: false},
		Status: v1.NodeStatus{
			Capacity: v1.ResourceList{
				v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
			},
		},
	}
	testutils.CreateNode(testCtx.ClientSet, node)

	// 3. create 3 pods for testing
	t.Logf("create 3 pods for testing")
	testPod, err := testutils.CreatePausePodWithResource(testCtx.ClientSet, "pod-without-scheduler-name", testCtx.NS.Name, nil)
	if err != nil {
		t.Fatalf("Failed to create pod: %v", err)
	}

	defaultScheduler := "default-scheduler"
	testPodFitsDefault, err := testutils.CreatePausePod(testCtx.ClientSet, testutils.InitPausePod(&testutils.PausePodConfig{Name: "pod-fits-default", Namespace: testCtx.NS.Name, SchedulerName: defaultScheduler}))
	if err != nil {
		t.Fatalf("Failed to create pod: %v", err)
	}

	fooScheduler := "foo-scheduler"
	testPodFitsFoo, err := testutils.CreatePausePod(testCtx.ClientSet, testutils.InitPausePod(&testutils.PausePodConfig{Name: "pod-fits-foo", Namespace: testCtx.NS.Name, SchedulerName: fooScheduler}))
	if err != nil {
		t.Fatalf("Failed to create pod: %v", err)
	}

	// 4. **check point-1**:
	//		- testPod, testPodFitsDefault should be scheduled
	//		- testPodFitsFoo should NOT be scheduled
	t.Logf("wait for pods scheduled")
	if err := testutils.WaitForPodToSchedule(testCtx.ClientSet, testPod); err != nil {
		t.Errorf("Test MultiScheduler: %s Pod not scheduled: %v", testPod.Name, err)
	} else {
		t.Logf("Test MultiScheduler: %s Pod scheduled", testPod.Name)
	}

	if err := testutils.WaitForPodToSchedule(testCtx.ClientSet, testPodFitsDefault); err != nil {
		t.Errorf("Test MultiScheduler: %s Pod not scheduled: %v", testPodFitsDefault.Name, err)
	} else {
		t.Logf("Test MultiScheduler: %s Pod scheduled", testPodFitsDefault.Name)
	}

	if err := testutils.WaitForPodToScheduleWithTimeout(testCtx.ClientSet, testPodFitsFoo, time.Second*5); err == nil {
		t.Errorf("Test MultiScheduler: %s Pod got scheduled, %v", testPodFitsFoo.Name, err)
	} else {
		t.Logf("Test MultiScheduler: %s Pod not scheduled", testPodFitsFoo.Name)
	}

	// 5. create and start a scheduler with name "foo-scheduler"
	cfg := configtesting.V1ToInternalWithDefaults(t, configv1.KubeSchedulerConfiguration{
		Profiles: []configv1.KubeSchedulerProfile{{
			SchedulerName: pointer.String(fooScheduler),
			PluginConfig: []configv1.PluginConfig{
				{
					Name: "VolumeBinding",
					Args: runtime.RawExtension{
						Object: &configv1.VolumeBindingArgs{
							BindTimeoutSeconds: pointer.Int64(30),
						},
					},
				},
			}},
		},
	})
	testCtx = testutils.InitTestSchedulerWithOptions(t, testCtx, 0, scheduler.WithProfiles(cfg.Profiles...))
	testutils.SyncSchedulerInformerFactory(testCtx)
	go testCtx.Scheduler.Run(testCtx.Ctx)

	//	6. **check point-2**:
	//		- testPodWithAnnotationFitsFoo should be scheduled
	err = testutils.WaitForPodToSchedule(testCtx.ClientSet, testPodFitsFoo)
	if err != nil {
		t.Errorf("Test MultiScheduler: %s Pod not scheduled, %v", testPodFitsFoo.Name, err)
	} else {
		t.Logf("Test MultiScheduler: %s Pod scheduled", testPodFitsFoo.Name)
	}
}

func TestMultipleSchedulingProfiles(t *testing.T) {
	cfg := configtesting.V1ToInternalWithDefaults(t, configv1.KubeSchedulerConfiguration{
		Profiles: []configv1.KubeSchedulerProfile{
			{SchedulerName: pointer.String("default-scheduler")},
			{SchedulerName: pointer.String("custom-scheduler")},
		},
	})

	testCtx := testutils.InitTestSchedulerWithNS(t, "multi-scheduler", scheduler.WithProfiles(cfg.Profiles...))

	node := &v1.Node{
		ObjectMeta: metav1.ObjectMeta{Name: "node-multi-scheduler-test-node"},
		Spec:       v1.NodeSpec{Unschedulable: false},
		Status: v1.NodeStatus{
			Capacity: v1.ResourceList{
				v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
			},
		},
	}
	if _, err := testutils.CreateNode(testCtx.ClientSet, node); err != nil {
		t.Fatal(err)
	}

	evs, err := testCtx.ClientSet.CoreV1().Events(testCtx.NS.Name).Watch(testCtx.Ctx, metav1.ListOptions{})
	if err != nil {
		t.Fatal(err)
	}
	defer evs.Stop()

	for _, pc := range []*testutils.PausePodConfig{
		{Name: "foo", Namespace: testCtx.NS.Name},
		{Name: "bar", Namespace: testCtx.NS.Name, SchedulerName: "unknown-scheduler"},
		{Name: "baz", Namespace: testCtx.NS.Name, SchedulerName: "default-scheduler"},
		{Name: "zet", Namespace: testCtx.NS.Name, SchedulerName: "custom-scheduler"},
	} {
		if _, err := testutils.CreatePausePod(testCtx.ClientSet, testutils.InitPausePod(pc)); err != nil {
			t.Fatal(err)
		}
	}

	wantProfiles := map[string]string{
		"foo": "default-scheduler",
		"baz": "default-scheduler",
		"zet": "custom-scheduler",
	}

	gotProfiles := make(map[string]string)
	if err := wait.PollUntilContextTimeout(testCtx.Ctx, 100*time.Millisecond, 30*time.Second, false, func(ctx context.Context) (bool, error) {
		var ev watch.Event
		select {
		case ev = <-evs.ResultChan():
		case <-time.After(30 * time.Second):
			return false, nil
		}
		e, ok := ev.Object.(*v1.Event)
		if !ok || e.Reason != "Scheduled" {
			return false, nil
		}
		gotProfiles[e.InvolvedObject.Name] = e.ReportingController
		return len(gotProfiles) >= len(wantProfiles), nil
	}); err != nil {
		t.Errorf("waiting for scheduling events: %v", err)
	}

	if diff := cmp.Diff(wantProfiles, gotProfiles); diff != "" {
		t.Errorf("pods scheduled by the wrong profile (-want, +got):\n%s", diff)
	}
}

// This test will verify scheduler can work well regardless of whether kubelet is allocatable aware or not.
func TestAllocatable(t *testing.T) {
	testCtx := testutils.InitTestSchedulerWithNS(t, "allocatable")

	// 2. create a node without allocatable awareness
	nodeRes := map[v1.ResourceName]string{
		v1.ResourcePods:   "32",
		v1.ResourceCPU:    "30m",
		v1.ResourceMemory: "30",
	}
	allocNode, err := testutils.CreateNode(testCtx.ClientSet, st.MakeNode().Name("node-allocatable-scheduler-test-node").Capacity(nodeRes).Obj())
	if err != nil {
		t.Fatalf("Failed to create node: %v", err)
	}

	// 3. create resource pod which requires less than Capacity
	podName := "pod-test-allocatable"
	podRes := &v1.ResourceList{
		v1.ResourceCPU:    *resource.NewMilliQuantity(20, resource.DecimalSI),
		v1.ResourceMemory: *resource.NewQuantity(20, resource.BinarySI),
	}
	testAllocPod, err := testutils.CreatePausePodWithResource(testCtx.ClientSet, podName, testCtx.NS.Name, podRes)
	if err != nil {
		t.Fatalf("Test allocatable unawareness failed to create pod: %v", err)
	}

	// 4. Test: this test pod should be scheduled since api-server will use Capacity as Allocatable
	err = testutils.WaitForPodToScheduleWithTimeout(testCtx.ClientSet, testAllocPod, time.Second*5)
	if err != nil {
		t.Errorf("Test allocatable unawareness: %s Pod not scheduled: %v", testAllocPod.Name, err)
	} else {
		t.Logf("Test allocatable unawareness: %s Pod scheduled", testAllocPod.Name)
	}

	// 5. Change the node status to allocatable aware, note that Allocatable is less than Pod's requirement
	allocNode.Status = v1.NodeStatus{
		Capacity: v1.ResourceList{
			v1.ResourcePods:   *resource.NewQuantity(32, resource.DecimalSI),
			v1.ResourceCPU:    *resource.NewMilliQuantity(30, resource.DecimalSI),
			v1.ResourceMemory: *resource.NewQuantity(30, resource.BinarySI),
		},
		Allocatable: v1.ResourceList{
			v1.ResourcePods:   *resource.NewQuantity(32, resource.DecimalSI),
			v1.ResourceCPU:    *resource.NewMilliQuantity(10, resource.DecimalSI),
			v1.ResourceMemory: *resource.NewQuantity(10, resource.BinarySI),
		},
	}

	if _, err := testCtx.ClientSet.CoreV1().Nodes().UpdateStatus(context.TODO(), allocNode, metav1.UpdateOptions{}); err != nil {
		t.Fatalf("Failed to update node with Status.Allocatable: %v", err)
	}

	if err := testutils.DeletePod(testCtx.ClientSet, testAllocPod.Name, testCtx.NS.Name); err != nil {
		t.Fatalf("Failed to remove the first pod: %v", err)
	}

	// 6. Make another pod with different name, same resource request
	podName2 := "pod-test-allocatable2"
	testAllocPod2, err := testutils.CreatePausePodWithResource(testCtx.ClientSet, podName2, testCtx.NS.Name, podRes)
	if err != nil {
		t.Fatalf("Test allocatable awareness failed to create pod: %v", err)
	}

	// 7. Test: this test pod should not be scheduled since it request more than Allocatable
	if err := testutils.WaitForPodToScheduleWithTimeout(testCtx.ClientSet, testAllocPod2, time.Second*5); err == nil {
		t.Errorf("Test allocatable awareness: %s Pod got scheduled unexpectedly, %v", testAllocPod2.Name, err)
	} else {
		t.Logf("Test allocatable awareness: %s Pod not scheduled as expected", testAllocPod2.Name)
	}
}

// TestSchedulerInformers tests that scheduler receives informer events and updates its cache when
// pods are scheduled by other schedulers.
func TestSchedulerInformers(t *testing.T) {
	// Initialize scheduler.
	testCtx := testutils.InitTestSchedulerWithNS(t, "scheduler-informer")
	cs := testCtx.ClientSet

	defaultPodRes := &v1.ResourceRequirements{Requests: v1.ResourceList{
		v1.ResourceCPU:    *resource.NewMilliQuantity(200, resource.DecimalSI),
		v1.ResourceMemory: *resource.NewQuantity(200, resource.BinarySI)},
	}
	defaultNodeRes := map[v1.ResourceName]string{
		v1.ResourcePods:   "32",
		v1.ResourceCPU:    "500m",
		v1.ResourceMemory: "500",
	}

	type nodeConfig struct {
		name string
		res  map[v1.ResourceName]string
	}

	tests := []struct {
		name                string
		nodes               []*nodeConfig
		existingPods        []*v1.Pod
		pod                 *v1.Pod
		preemptedPodIndexes map[int]struct{}
	}{
		{
			name:  "Pod cannot be scheduled when node is occupied by pods scheduled by other schedulers",
			nodes: []*nodeConfig{{name: "node-1", res: defaultNodeRes}},
			existingPods: []*v1.Pod{
				testutils.InitPausePod(&testutils.PausePodConfig{
					Name:          "pod1",
					Namespace:     testCtx.NS.Name,
					Resources:     defaultPodRes,
					Labels:        map[string]string{"foo": "bar"},
					NodeName:      "node-1",
					SchedulerName: "foo-scheduler",
				}),
				testutils.InitPausePod(&testutils.PausePodConfig{
					Name:          "pod2",
					Namespace:     testCtx.NS.Name,
					Resources:     defaultPodRes,
					Labels:        map[string]string{"foo": "bar"},
					NodeName:      "node-1",
					SchedulerName: "bar-scheduler",
				}),
			},
			pod: testutils.InitPausePod(&testutils.PausePodConfig{
				Name:      "unschedulable-pod",
				Namespace: testCtx.NS.Name,
				Resources: defaultPodRes,
			}),
			preemptedPodIndexes: map[int]struct{}{2: {}},
		},
	}

	for _, test := range tests {
		t.Run(test.name, func(t *testing.T) {
			for _, nodeConf := range test.nodes {
				_, err := testutils.CreateNode(cs, st.MakeNode().Name(nodeConf.name).Capacity(nodeConf.res).Obj())
				if err != nil {
					t.Fatalf("Error creating node %v: %v", nodeConf.name, err)
				}
			}
			// Ensure nodes are present in scheduler cache.
			if err := testutils.WaitForNodesInCache(testCtx.Scheduler, len(test.nodes)); err != nil {
				t.Fatal(err)
			}

			pods := make([]*v1.Pod, len(test.existingPods))
			var err error
			// Create and run existingPods.
			for i, p := range test.existingPods {
				if pods[i], err = testutils.RunPausePod(cs, p); err != nil {
					t.Fatalf("Error running pause pod: %v", err)
				}
			}
			// Create the new "pod".
			unschedulable, err := testutils.CreatePausePod(cs, test.pod)
			if err != nil {
				t.Errorf("Error while creating new pod: %v", err)
			}
			if err := testutils.WaitForPodUnschedulable(cs, unschedulable); err != nil {
				t.Errorf("Pod %v got scheduled: %v", unschedulable.Name, err)
			}

			// Cleanup
			pods = append(pods, unschedulable)
			testutils.CleanupPods(testCtx.Ctx, cs, t, pods)
			if err := cs.PolicyV1().PodDisruptionBudgets(testCtx.NS.Name).DeleteCollection(testCtx.Ctx, metav1.DeleteOptions{}, metav1.ListOptions{}); err != nil {
				t.Errorf("error whiling deleting PDBs, error: %v", err)
			}
			if err := cs.CoreV1().Nodes().DeleteCollection(testCtx.Ctx, metav1.DeleteOptions{}, metav1.ListOptions{}); err != nil {
				t.Errorf("error whiling deleting nodes, error: %v", err)
			}
		})
	}
}

func TestNodeEvents(t *testing.T) {
	// The test verifies that unschedulable pods are re-queued
	// on node update events. The scenario we are testing is the following:
	// 1. Create pod1 and node1 that is small enough to only fit pod1; pod1 schedules on node1
	// 2. Create pod2, it should be unschedulable due to insufficient cpu
	// 3. Create node2 with a taint, pod2 should still not schedule
	// 4. Remove the taint from node2; pod2 should now schedule on node2

	testCtx := testutils.InitTestSchedulerWithNS(t, "node-events")
	defer testCtx.ClientSet.CoreV1().Nodes().DeleteCollection(context.TODO(), metav1.DeleteOptions{}, metav1.ListOptions{})

	// 1.1 create pod1
	pod1, err := testutils.CreatePausePodWithResource(testCtx.ClientSet, "pod1", testCtx.NS.Name, &v1.ResourceList{
		v1.ResourceCPU: *resource.NewMilliQuantity(80, resource.DecimalSI),
	})
	if err != nil {
		t.Fatalf("Failed to create pod: %v", err)
	}

	// 1.2 Create node1
	node1, err := testutils.CreateNode(testCtx.ClientSet, st.MakeNode().
		Name("node-events-test-node1").
		Capacity(map[v1.ResourceName]string{
			v1.ResourcePods:   "32",
			v1.ResourceCPU:    "100m",
			v1.ResourceMemory: "30",
		}).Obj())
	if err != nil {
		t.Fatalf("Failed to create %s: %v", node1.Name, err)
	}

	// 1.3 verify pod1 is scheduled
	err = testutils.WaitForPodToScheduleWithTimeout(testCtx.ClientSet, pod1, time.Second*5)
	if err != nil {
		t.Errorf("Pod %s didn't schedule: %v", pod1.Name, err)
	}

	// 2. create pod2
	pod2, err := testutils.CreatePausePodWithResource(testCtx.ClientSet, "pod2", testCtx.NS.Name, &v1.ResourceList{
		v1.ResourceCPU: *resource.NewMilliQuantity(40, resource.DecimalSI),
	})
	if err != nil {
		t.Fatalf("Failed to create pod %v: %v", pod2.Name, err)
	}

	if err := testutils.WaitForPodUnschedulable(testCtx.ClientSet, pod2); err != nil {
		t.Errorf("Pod %v got scheduled: %v", pod2.Name, err)
	}

	// 3.1 Create node2 with a taint
	node2 := st.MakeNode().
		Name("node-events-test-node2").
		Capacity(map[v1.ResourceName]string{
			v1.ResourcePods:   "32",
			v1.ResourceCPU:    "100m",
			v1.ResourceMemory: "30",
		}).
		Label("affinity-key", "affinity-value").
		Taints([]v1.Taint{{Key: "taint-key", Effect: v1.TaintEffectNoSchedule}}).Obj()
	node2, err = testutils.CreateNode(testCtx.ClientSet, node2)
	if err != nil {
		t.Fatalf("Failed to create %s: %v", node2.Name, err)
	}
	// make sure the scheduler received the node add event by creating a pod that only fits node2
	plugPod := st.MakePod().Name("plug-pod").Namespace(testCtx.NS.Name).Container("pause").
		Req(map[v1.ResourceName]string{v1.ResourceCPU: "40m"}).
		NodeAffinityIn("affinity-key", []string{"affinity-value"}).
		Toleration("taint-key").Obj()
	plugPod, err = testCtx.ClientSet.CoreV1().Pods(plugPod.Namespace).Create(context.TODO(), plugPod, metav1.CreateOptions{})
	if err != nil {
		t.Fatalf("Failed to create pod %v: %v", plugPod.Name, err)
	}
	err = testutils.WaitForPodToScheduleWithTimeout(testCtx.ClientSet, plugPod, time.Second*5)
	if err != nil {
		t.Errorf("Pod %s didn't schedule: %v", plugPod.Name, err)
	}

	// 3.2 pod2 still unschedulable
	if err := testutils.WaitForPodUnschedulable(testCtx.ClientSet, pod2); err != nil {
		t.Errorf("Pod %v got scheduled: %v", pod2.Name, err)
	}

	// 4. Remove node taint, pod2 should schedule
	node2.Spec.Taints = nil
	node2, err = testutils.UpdateNode(testCtx.ClientSet, node2)
	if err != nil {
		t.Fatalf("Failed to update %s: %v", node2.Name, err)
	}

	err = testutils.WaitForPodToScheduleWithTimeout(testCtx.ClientSet, pod2, time.Second*5)
	if err != nil {
		t.Errorf("Pod %s didn't schedule: %v", pod2.Name, err)
	}

}

// TestPodSchedulingContextSSA checks that the dynamicresources plugin falls
// back to SSA successfully when the normal Update call encountered
// a conflict.
//
// This is an integration test because:
//   - Unit testing does not cover RBAC rules.
//   - Triggering this particular race is harder in E2E testing
//     and harder to verify (needs apiserver metrics and there's
//     no standard API for those).
func TestPodSchedulingContextSSA(t *testing.T) {
	defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DynamicResourceAllocation, true)()

	testCtx := testutils.InitTestAPIServer(t, "podschedulingcontext-ssa", nil)
	testCtx.DisableEventSink = true
	testCtx = testutils.InitTestSchedulerWithOptions(t, testCtx, 0)
	testutils.SyncSchedulerInformerFactory(testCtx)
	go testCtx.Scheduler.Run(testCtx.SchedulerCtx)

	// Set up enough objects that the scheduler will start trying to
	// schedule the pod and create the PodSchedulingContext.
	nodeRes := map[v1.ResourceName]string{
		v1.ResourcePods:   "32",
		v1.ResourceCPU:    "30m",
		v1.ResourceMemory: "30",
	}
	for _, name := range []string{"node-a", "node-b"} {
		if _, err := testutils.CreateNode(testCtx.ClientSet, st.MakeNode().Name(name).Capacity(nodeRes).Obj()); err != nil {
			t.Fatalf("Failed to create node: %v", err)
		}
	}

	defer func() {
		if err := testCtx.ClientSet.ResourceV1alpha2().ResourceClasses().DeleteCollection(testCtx.Ctx, metav1.DeleteOptions{}, metav1.ListOptions{}); err != nil {
			t.Errorf("Unexpected error deleting ResourceClasses: %v", err)
		}
	}()
	class := &resourcev1alpha2.ResourceClass{
		ObjectMeta: metav1.ObjectMeta{
			Name: "my-class",
		},
		DriverName: "does-not-matter",
	}
	if _, err := testCtx.ClientSet.ResourceV1alpha2().ResourceClasses().Create(testCtx.Ctx, class, metav1.CreateOptions{}); err != nil {
		t.Fatalf("Failed to create class: %v", err)
	}

	claim := &resourcev1alpha2.ResourceClaim{
		ObjectMeta: metav1.ObjectMeta{
			Name:      "my-claim",
			Namespace: testCtx.NS.Name,
		},
		Spec: resourcev1alpha2.ResourceClaimSpec{
			ResourceClassName: class.Name,
		},
	}
	if _, err := testCtx.ClientSet.ResourceV1alpha2().ResourceClaims(claim.Namespace).Create(testCtx.Ctx, claim, metav1.CreateOptions{}); err != nil {
		t.Fatalf("Failed to create claim: %v", err)
	}

	podConf := testutils.PausePodConfig{
		Name:      "testpod",
		Namespace: testCtx.NS.Name,
	}
	pod := testutils.InitPausePod(&podConf)
	podClaimName := "myclaim"
	pod.Spec.Containers[0].Resources.Claims = []v1.ResourceClaim{{Name: podClaimName}}
	pod.Spec.ResourceClaims = []v1.PodResourceClaim{{Name: podClaimName, Source: v1.ClaimSource{ResourceClaimName: &claim.Name}}}
	if _, err := testCtx.ClientSet.CoreV1().Pods(pod.Namespace).Create(testCtx.Ctx, pod, metav1.CreateOptions{}); err != nil {
		t.Fatalf("Failed to create pod: %v", err)
	}

	// Check that the PodSchedulingContext exists and has a selected node.
	var schedulingCtx *resourcev1alpha2.PodSchedulingContext
	if err := wait.PollUntilContextTimeout(testCtx.Ctx, 10*time.Microsecond, 30*time.Second, true,
		func(context.Context) (bool, error) {
			var err error
			schedulingCtx, err = testCtx.ClientSet.ResourceV1alpha2().PodSchedulingContexts(pod.Namespace).Get(testCtx.Ctx, pod.Name, metav1.GetOptions{})
			if apierrors.IsNotFound(err) {
				return false, nil
			}
			if err == nil && schedulingCtx.Spec.SelectedNode != "" {
				return true, nil
			}
			return false, err
		}); err != nil {
		t.Fatalf("Failed while waiting for PodSchedulingContext with selected node: %v\nLast PodSchedulingContext:\n%s", err, format.Object(schedulingCtx, 1))
	}

	// Force the plugin to use SSA.
	var podSchedulingContextPatchCounter atomic.Int64
	roundTrip := testutils.RoundTripWrapper(func(transport http.RoundTripper, req *http.Request) (*http.Response, error) {
		if strings.HasPrefix(req.URL.Path, "/apis/resource.k8s.io/") &&
			strings.HasSuffix(req.URL.Path, "/podschedulingcontexts/"+pod.Name) {
			switch req.Method {
			case http.MethodPut, http.MethodPost:
				return &http.Response{
					Status:     fmt.Sprintf("%d %s", http.StatusConflict, metav1.StatusReasonConflict),
					StatusCode: http.StatusConflict,
				}, nil
			case http.MethodPatch:
				podSchedulingContextPatchCounter.Add(1)
			}
		}
		return transport.RoundTrip(req)
	})
	testCtx.RoundTrip.Store(&roundTrip)

	// Now force the scheduler to update the PodSchedulingContext by setting UnsuitableNodes so that
	// the selected node is not suitable.
	schedulingCtx.Status.ResourceClaims = []resourcev1alpha2.ResourceClaimSchedulingStatus{{
		Name:            podClaimName,
		UnsuitableNodes: []string{schedulingCtx.Spec.SelectedNode},
	}}

	if _, err := testCtx.ClientSet.ResourceV1alpha2().PodSchedulingContexts(pod.Namespace).UpdateStatus(testCtx.Ctx, schedulingCtx, metav1.UpdateOptions{}); err != nil {
		t.Fatalf("Unexpected PodSchedulingContext status update error: %v", err)
	}

	// We know that the scheduler has to use SSA because above we inject a conflict
	// error whenever it tries to use a plain update. We just need to wait for it...
	if err := wait.PollUntilContextTimeout(testCtx.Ctx, 10*time.Microsecond, time.Minute, true,
		func(context.Context) (bool, error) {
			return podSchedulingContextPatchCounter.Load() > 0, nil
		}); err != nil {
		t.Fatalf("Failed while waiting for PodSchedulingContext Patch: %v", err)
	}
}