Skip to content

Commit 2712f53

Browse files
committed
Revert "Update Node Monitor Grace Period default duration to 50s"
This reverts commit f8bf6b9.
1 parent 38c60a5 commit 2712f53

File tree

7 files changed

+7
-19
lines changed

7 files changed

+7
-19
lines changed

cmd/kube-controller-manager/app/options/nodelifecyclecontroller.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,7 @@ func (o *NodeLifecycleControllerOptions) AddFlags(fs *pflag.FlagSet) {
4141
fs.DurationVar(&o.NodeMonitorGracePeriod.Duration, "node-monitor-grace-period", o.NodeMonitorGracePeriod.Duration,
4242
"Amount of time which we allow running Node to be unresponsive before marking it unhealthy. "+
4343
"Must be N times more than kubelet's nodeStatusUpdateFrequency, "+
44-
"where N means number of retries allowed for kubelet to post node status. "+
45-
"This value should also be greater than the sum of HTTP2_PING_TIMEOUT_SECONDS and HTTP2_READ_IDLE_TIMEOUT_SECONDS")
44+
"where N means number of retries allowed for kubelet to post node status.")
4645
fs.Float32Var(&o.NodeEvictionRate, "node-eviction-rate", 0.1, "Number of nodes per second on which pods are deleted in case of node failure when a zone is healthy (see --unhealthy-zone-threshold for definition of healthy/unhealthy). Zone refers to entire cluster in non-multizone clusters.")
4746
fs.Float32Var(&o.SecondaryNodeEvictionRate, "secondary-node-eviction-rate", 0.01, "Number of nodes per second on which pods are deleted in case of node failure when a zone is unhealthy (see --unhealthy-zone-threshold for definition of healthy/unhealthy). Zone refers to entire cluster in non-multizone clusters. This value is implicitly overridden to 0 if the cluster size is smaller than --large-cluster-size-threshold.")
4847
fs.Int32Var(&o.LargeClusterSizeThreshold, "large-cluster-size-threshold", 50, fmt.Sprintf("Number of nodes from which %s treats the cluster as large for the eviction logic purposes. --secondary-node-eviction-rate is implicitly overridden to 0 for clusters this size or smaller. Notice: If nodes reside in multiple zones, this threshold will be considered as zone node size threshold for each zone to determine node eviction rate independently.", names.NodeLifecycleController))

pkg/controller/nodelifecycle/config/types.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ type NodeLifecycleControllerConfiguration struct {
3232
// NodeMonitorGracePeriod is the amount of time which we allow a running node to be
3333
// unresponsive before marking it unhealthy. Must be N times more than kubelet's
3434
// nodeStatusUpdateFrequency, where N means number of retries allowed for kubelet
35-
// to post node status. This value should also be greater than the sum of
36-
// HTTP2_PING_TIMEOUT_SECONDS and HTTP2_READ_IDLE_TIMEOUT_SECONDS.
35+
// to post node status.
3736
NodeMonitorGracePeriod metav1.Duration
3837
// secondaryNodeEvictionRate is implicitly overridden to 0 for clusters smaller than or equal to largeClusterSizeThreshold
3938
LargeClusterSizeThreshold int32

pkg/controller/nodelifecycle/config/v1alpha1/defaults.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,8 @@ func RecommendedDefaultNodeLifecycleControllerConfiguration(obj *kubectrlmgrconf
3737
if obj.PodEvictionTimeout == zero {
3838
obj.PodEvictionTimeout = metav1.Duration{Duration: 5 * time.Minute}
3939
}
40-
// NodeMonitorGracePeriod is set to a default value of 50 seconds.
41-
// This value should be greater than the sum of HTTP2_PING_TIMEOUT_SECONDS (30s)
42-
// and HTTP2_READ_IDLE_TIMEOUT_SECONDS (15s) from the http2 health check
43-
// to ensure that the server has adequate time to handle slow or idle connections
44-
// properly before marking a node as unhealthy.
4540
if obj.NodeMonitorGracePeriod == zero {
46-
obj.NodeMonitorGracePeriod = metav1.Duration{Duration: 50 * time.Second}
41+
obj.NodeMonitorGracePeriod = metav1.Duration{Duration: 40 * time.Second}
4742
}
4843
if obj.NodeStartupGracePeriod == zero {
4944
obj.NodeStartupGracePeriod = metav1.Duration{Duration: 60 * time.Second}

pkg/controller/nodelifecycle/node_lifecycle_controller.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -282,11 +282,7 @@ type Controller struct {
282282
// be less than the node health signal update frequency, since there will
283283
// only be fresh values from Kubelet at an interval of node health signal
284284
// update frequency.
285-
// 2. nodeMonitorGracePeriod should be greater than the sum of HTTP2_PING_TIMEOUT_SECONDS (30s)
286-
// and HTTP2_READ_IDLE_TIMEOUT_SECONDS (15s) from the http2 health check
287-
// to ensure that the server has adequate time to handle slow or idle connections
288-
// properly before marking a node as unhealthy.
289-
// 3. nodeMonitorGracePeriod can't be too large for user experience - larger
285+
// 2. nodeMonitorGracePeriod can't be too large for user experience - larger
290286
// value takes longer for user to see up-to-date node health.
291287
nodeMonitorGracePeriod time.Duration
292288

pkg/controller/nodelifecycle/node_lifecycle_controller_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ import (
5252
)
5353

5454
const (
55-
testNodeMonitorGracePeriod = 50 * time.Second
55+
testNodeMonitorGracePeriod = 40 * time.Second
5656
testNodeStartupGracePeriod = 60 * time.Second
5757
testNodeMonitorPeriod = 5 * time.Second
5858
testRateLimiterQPS = float32(100000)

pkg/generated/openapi/zz_generated.openapi.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

staging/src/k8s.io/kube-controller-manager/config/v1alpha1/types.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -403,8 +403,7 @@ type NodeLifecycleControllerConfiguration struct {
403403
// nodeMontiorGracePeriod is the amount of time which we allow a running node to be
404404
// unresponsive before marking it unhealthy. Must be N times more than kubelet's
405405
// nodeStatusUpdateFrequency, where N means number of retries allowed for kubelet
406-
// to post node status. This value should also be greater than the sum of
407-
// HTTP2_PING_TIMEOUT_SECONDS and HTTP2_READ_IDLE_TIMEOUT_SECONDS.
406+
// to post node status.
408407
NodeMonitorGracePeriod metav1.Duration
409408
// podEvictionTimeout is the grace period for deleting pods on failed nodes.
410409
PodEvictionTimeout metav1.Duration

0 commit comments

Comments
 (0)