Skip to content

Commit 3ebe892

Browse files
ffromanihaircommander
authored andcommitted
UPSTREAM: <carry>: e2e_node: add a test to verify the kubelet starts
with systemd cgroup driver and cpumanager none policy. This was originally planned to be a correctness check for https://issues.k8s.io/125923, but it was difficult to reproduce the bug, so it's now a regression test against it. Signed-off-by: Francesco Romani <fromani@redhat.com> Signed-off-by: Peter Hunt <pehunt@redhat.com>
1 parent b0a8ba6 commit 3ebe892

File tree

1 file changed

+75
-0
lines changed

1 file changed

+75
-0
lines changed

test/e2e_node/node_container_manager_test.go

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,81 @@ var _ = SIGDescribe("Node Container Manager [Serial]", func() {
7171
framework.ExpectNoError(runTest(ctx, f))
7272
})
7373
})
74+
ginkgo.Context("Validate CGroup management", func() {
75+
// Regression test for https://issues.k8s.io/125923
76+
// In this issue there's a race involved with systemd which seems to manifest most likely, or perhaps only
77+
// (data gathered so far seems inconclusive) on the very first boot of the machine, so restarting the kubelet
78+
// seems not sufficient. OTOH, the exact reproducer seems to require a dedicate lane with only this test, or
79+
// to reboot the machine before to run this test. Both are practically unrealistic in CI.
80+
// The closest approximation is this test in this current form, using a kubelet restart. This at least
81+
// acts as non regression testing, so it still brings value.
82+
ginkgo.It("should correctly start with cpumanager none policy in use with systemd", func(ctx context.Context) {
83+
if !IsCgroup2UnifiedMode() {
84+
ginkgo.Skip("this test requires cgroups v2")
85+
}
86+
87+
var err error
88+
var oldCfg *kubeletconfig.KubeletConfiguration
89+
// Get current kubelet configuration
90+
oldCfg, err = getCurrentKubeletConfig(ctx)
91+
framework.ExpectNoError(err)
92+
93+
ginkgo.DeferCleanup(func(ctx context.Context) {
94+
if oldCfg != nil {
95+
// Update the Kubelet configuration.
96+
ginkgo.By("Stopping the kubelet")
97+
startKubelet := stopKubelet()
98+
99+
// wait until the kubelet health check will fail
100+
gomega.Eventually(ctx, func() bool {
101+
return kubeletHealthCheck(kubeletHealthCheckURL)
102+
}).WithTimeout(time.Minute).WithPolling(time.Second).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
103+
ginkgo.By("Stopped the kubelet")
104+
105+
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(oldCfg))
106+
107+
ginkgo.By("Starting the kubelet")
108+
startKubelet()
109+
110+
// wait until the kubelet health check will succeed
111+
gomega.Eventually(ctx, func(ctx context.Context) bool {
112+
return kubeletHealthCheck(kubeletHealthCheckURL)
113+
}).WithTimeout(2 * time.Minute).WithPolling(5 * time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
114+
ginkgo.By("Started the kubelet")
115+
}
116+
})
117+
118+
newCfg := oldCfg.DeepCopy()
119+
// Change existing kubelet configuration
120+
newCfg.CPUManagerPolicy = "none"
121+
newCfg.CgroupDriver = "systemd"
122+
123+
// Update the Kubelet configuration.
124+
ginkgo.By("Stopping the kubelet")
125+
startKubelet := stopKubelet()
126+
127+
// wait until the kubelet health check will fail
128+
gomega.Eventually(ctx, func() bool {
129+
return kubeletHealthCheck(kubeletHealthCheckURL)
130+
}).WithTimeout(time.Minute).WithPolling(time.Second).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
131+
ginkgo.By("Stopped the kubelet")
132+
133+
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(newCfg))
134+
135+
ginkgo.By("Starting the kubelet")
136+
startKubelet()
137+
138+
// wait until the kubelet health check will succeed
139+
gomega.Eventually(ctx, func() bool {
140+
return getNodeReadyStatus(ctx, f) && kubeletHealthCheck(kubeletHealthCheckURL)
141+
}).WithTimeout(2 * time.Minute).WithPolling(5 * time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
142+
ginkgo.By("Started the kubelet")
143+
144+
gomega.Consistently(ctx, func(ctx context.Context) bool {
145+
return getNodeReadyStatus(ctx, f) && kubeletHealthCheck(kubeletHealthCheckURL)
146+
}).WithTimeout(2 * time.Minute).WithPolling(2 * time.Second).Should(gomega.BeTrueBecause("node keeps reporting ready status"))
147+
})
148+
})
74149
})
75150

76151
func expectFileValToEqual(filePath string, expectedValue, delta int64) error {

0 commit comments

Comments
 (0)