From e68e966468621ad516a2debaca5193bca22c2f1e Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Thu, 23 Nov 2017 13:55:42 +0100 Subject: [PATCH 1/2] UPSTREAM: 53135: Fixed counting of unbound PVCs towards limit of attached volumes --- .../pkg/scheduler/algorithm/predicates/BUILD | 1 + .../algorithm/predicates/predicates.go | 58 +++---- .../algorithm/predicates/predicates_test.go | 151 ++++++++++++++++++ 3 files changed, 182 insertions(+), 28 deletions(-) diff --git a/vendor/k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/BUILD b/vendor/k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/BUILD index ff893c3c4165..c940d2f538fb 100644 --- a/vendor/k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/BUILD +++ b/vendor/k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/BUILD @@ -29,6 +29,7 @@ go_library( "//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/labels:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/util/rand:go_default_library", "//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library", "//vendor/k8s.io/client-go/listers/core/v1:go_default_library", "//vendor/k8s.io/client-go/util/workqueue:go_default_library", diff --git a/vendor/k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/predicates.go b/vendor/k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/predicates.go index d047c0ffcdfe..efe1fac490f0 100644 --- a/vendor/k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/predicates.go +++ b/vendor/k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/predicates.go @@ -18,15 +18,13 @@ package predicates import ( "fmt" - "math/rand" - "strconv" "sync" - "time" "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/rand" utilfeature "k8s.io/apiserver/pkg/util/feature" corelisters "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/util/workqueue" @@ -206,7 +204,7 @@ func NewMaxPDVolumeCountPredicate(filter VolumeFilter, maxVolumes int, pvInfo Pe return c.predicate } -func (c *MaxPDVolumeCountChecker) filterVolumes(volumes []v1.Volume, namespace string, filteredVolumes map[string]bool) error { +func (c *MaxPDVolumeCountChecker) filterVolumes(volumes []v1.Volume, namespace string, randomPrefix string, filteredVolumes map[string]bool) error { for i := range volumes { vol := &volumes[i] if id, ok := c.filter.FilterVolume(vol); ok { @@ -216,40 +214,37 @@ func (c *MaxPDVolumeCountChecker) filterVolumes(volumes []v1.Volume, namespace s if pvcName == "" { return fmt.Errorf("PersistentVolumeClaim had no name") } + + // Until we know real ID of the volume use namespace/pvcName as substitute + // With a random prefix so it can't conflict with existing volume ID. + pvId := fmt.Sprintf("%s-%s/%s", randomPrefix, namespace, pvcName) + pvc, err := c.pvcInfo.GetPersistentVolumeClaimInfo(namespace, pvcName) - if err != nil { + if err != nil || pvc == nil { // if the PVC is not found, log the error and count the PV towards the PV limit - // generate a random volume ID since its required for de-dup glog.V(4).Infof("Unable to look up PVC info for %s/%s, assuming PVC matches predicate when counting limits: %v", namespace, pvcName, err) - source := rand.NewSource(time.Now().UnixNano()) - generatedID := "missingPVC" + strconv.Itoa(rand.New(source).Intn(1000000)) - filteredVolumes[generatedID] = true - return nil + filteredVolumes[pvId] = true + continue } - if pvc == nil { - return fmt.Errorf("PersistentVolumeClaim not found: %q", pvcName) + if pvc.Spec.VolumeName == "" { + // PVC is not bound. It was either deleted and created again or + // it was forcefuly unbound by admin. The pod can still use the + // original PV where it was bound to -> log the error and count + // the PV towards the PV limit + glog.V(4).Infof("PVC %s/%s is not bound, assuming PVC matches predicate when counting limits", namespace, pvcName) + filteredVolumes[pvId] = true + continue } pvName := pvc.Spec.VolumeName - if pvName == "" { - return fmt.Errorf("PersistentVolumeClaim is not bound: %q", pvcName) - } - pv, err := c.pvInfo.GetPersistentVolumeInfo(pvName) - if err != nil { + if err != nil || pv == nil { // if the PV is not found, log the error // and count the PV towards the PV limit - // generate a random volume ID since it is required for de-dup glog.V(4).Infof("Unable to look up PV info for %s/%s/%s, assuming PV matches predicate when counting limits: %v", namespace, pvcName, pvName, err) - source := rand.NewSource(time.Now().UnixNano()) - generatedID := "missingPV" + strconv.Itoa(rand.New(source).Intn(1000000)) - filteredVolumes[generatedID] = true - return nil - } - - if pv == nil { - return fmt.Errorf("PersistentVolume not found: %q", pvName) + filteredVolumes[pvId] = true + continue } if id, ok := c.filter.FilterPersistentVolume(pv); ok { @@ -268,8 +263,15 @@ func (c *MaxPDVolumeCountChecker) predicate(pod *v1.Pod, meta algorithm.Predicat return true, nil, nil } + // randomPrefix is a prefix of auxiliary volume IDs when we don't know the + // real volume ID, e.g. because the corresponding PV or PVC was deleted. It + // is random to avoid conflicts with real volume IDs and it needs to be + // stable in whole predicate() call so a deleted PVC used by two pods is + // counted as one volume and not as two. + randomPrefix := rand.String(32) + newVolumes := make(map[string]bool) - if err := c.filterVolumes(pod.Spec.Volumes, pod.Namespace, newVolumes); err != nil { + if err := c.filterVolumes(pod.Spec.Volumes, pod.Namespace, randomPrefix, newVolumes); err != nil { return false, nil, err } @@ -281,7 +283,7 @@ func (c *MaxPDVolumeCountChecker) predicate(pod *v1.Pod, meta algorithm.Predicat // count unique volumes existingVolumes := make(map[string]bool) for _, existingPod := range nodeInfo.Pods() { - if err := c.filterVolumes(existingPod.Spec.Volumes, existingPod.Namespace, existingVolumes); err != nil { + if err := c.filterVolumes(existingPod.Spec.Volumes, existingPod.Namespace, randomPrefix, existingVolumes); err != nil { return false, nil, err } } diff --git a/vendor/k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go b/vendor/k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go index d085d9d6f6e4..a148e112bc28 100644 --- a/vendor/k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go +++ b/vendor/k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go @@ -1708,6 +1708,26 @@ func TestEBSVolumeCountConflicts(t *testing.T) { }, }, } + twoDeletedPVCPod := &v1.Pod{ + Spec: v1.PodSpec{ + Volumes: []v1.Volume{ + { + VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "deletedPVC", + }, + }, + }, + { + VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "anotherDeletedPVC", + }, + }, + }, + }, + }, + } deletedPVPod := &v1.Pod{ Spec: v1.PodSpec{ Volumes: []v1.Volume{ @@ -1721,9 +1741,79 @@ func TestEBSVolumeCountConflicts(t *testing.T) { }, }, } + // deletedPVPod2 is a different pod than deletedPVPod but using the same PVC + deletedPVPod2 := &v1.Pod{ + Spec: v1.PodSpec{ + Volumes: []v1.Volume{ + { + VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "pvcWithDeletedPV", + }, + }, + }, + }, + }, + } + // anotherDeletedPVPod is a different pod than deletedPVPod and uses another PVC + anotherDeletedPVPod := &v1.Pod{ + Spec: v1.PodSpec{ + Volumes: []v1.Volume{ + { + VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "anotherPVCWithDeletedPV", + }, + }, + }, + }, + }, + } emptyPod := &v1.Pod{ Spec: v1.PodSpec{}, } + unboundPVCPod := &v1.Pod{ + Spec: v1.PodSpec{ + Volumes: []v1.Volume{ + { + VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "unboundPVC", + }, + }, + }, + }, + }, + } + // Different pod than unboundPVCPod, but using the same unbound PVC + unboundPVCPod2 := &v1.Pod{ + Spec: v1.PodSpec{ + Volumes: []v1.Volume{ + { + VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "unboundPVC", + }, + }, + }, + }, + }, + } + + // pod with unbound PVC that's different to unboundPVC + anotherUnboundPVCPod := &v1.Pod{ + Spec: v1.PodSpec{ + Volumes: []v1.Volume{ + { + VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "anotherUnboundPVC", + }, + }, + }, + }, + }, + } tests := []struct { newPod *v1.Pod @@ -1809,6 +1899,13 @@ func TestEBSVolumeCountConflicts(t *testing.T) { fits: true, test: "pod with missing PVC is counted towards the PV limit", }, + { + newPod: ebsPVCPod, + existingPods: []*v1.Pod{oneVolPod, twoDeletedPVCPod}, + maxVols: 3, + fits: false, + test: "pod with missing two PVCs is counted towards the PV limit twice", + }, { newPod: ebsPVCPod, existingPods: []*v1.Pod{oneVolPod, deletedPVPod}, @@ -1823,6 +1920,48 @@ func TestEBSVolumeCountConflicts(t *testing.T) { fits: true, test: "pod with missing PV is counted towards the PV limit", }, + { + newPod: deletedPVPod2, + existingPods: []*v1.Pod{oneVolPod, deletedPVPod}, + maxVols: 2, + fits: true, + test: "two pods missing the same PV are counted towards the PV limit only once", + }, + { + newPod: anotherDeletedPVPod, + existingPods: []*v1.Pod{oneVolPod, deletedPVPod}, + maxVols: 2, + fits: false, + test: "two pods missing different PVs are counted towards the PV limit twice", + }, + { + newPod: ebsPVCPod, + existingPods: []*v1.Pod{oneVolPod, unboundPVCPod}, + maxVols: 2, + fits: false, + test: "pod with unbound PVC is counted towards the PV limit", + }, + { + newPod: ebsPVCPod, + existingPods: []*v1.Pod{oneVolPod, unboundPVCPod}, + maxVols: 3, + fits: true, + test: "pod with unbound PVC is counted towards the PV limit", + }, + { + newPod: unboundPVCPod2, + existingPods: []*v1.Pod{oneVolPod, unboundPVCPod}, + maxVols: 2, + fits: true, + test: "the same unbound PVC in multiple pods is counted towards the PV limit only once", + }, + { + newPod: anotherUnboundPVCPod, + existingPods: []*v1.Pod{oneVolPod, unboundPVCPod}, + maxVols: 2, + fits: false, + test: "two different unbound PVCs are counted towards the PV limit as two volumes", + }, } pvInfo := FakePersistentVolumeInfo{ @@ -1855,6 +1994,18 @@ func TestEBSVolumeCountConflicts(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: "pvcWithDeletedPV"}, Spec: v1.PersistentVolumeClaimSpec{VolumeName: "pvcWithDeletedPV"}, }, + { + ObjectMeta: metav1.ObjectMeta{Name: "anotherPVCWithDeletedPV"}, + Spec: v1.PersistentVolumeClaimSpec{VolumeName: "anotherPVCWithDeletedPV"}, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "unboundPVC"}, + Spec: v1.PersistentVolumeClaimSpec{VolumeName: ""}, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "anotherUnboundPVC"}, + Spec: v1.PersistentVolumeClaimSpec{VolumeName: ""}, + }, } filter := VolumeFilter{ From 41001e574e4c63e1587c60bf7f6603eb4624360d Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Thu, 23 Nov 2017 14:07:20 +0100 Subject: [PATCH 2/2] UPSTREAM: 53989: Remove repeated random string generations in scheduler volume predicate --- .../algorithm/predicates/predicates.go | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/vendor/k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/predicates.go b/vendor/k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/predicates.go index efe1fac490f0..4282fb82cdd2 100644 --- a/vendor/k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/predicates.go +++ b/vendor/k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/predicates.go @@ -177,6 +177,11 @@ type MaxPDVolumeCountChecker struct { maxVolumes int pvInfo PersistentVolumeInfo pvcInfo PersistentVolumeClaimInfo + + // The string below is generated randomly during the struct's initialization. + // It is used to prefix volumeID generated inside the predicate() method to + // avoid conflicts with any real volume. + randomVolumeIDPrefix string } // VolumeFilter contains information on how to filter PD Volumes when checking PD Volume caps @@ -195,16 +200,17 @@ type VolumeFilter struct { // the maximum. func NewMaxPDVolumeCountPredicate(filter VolumeFilter, maxVolumes int, pvInfo PersistentVolumeInfo, pvcInfo PersistentVolumeClaimInfo) algorithm.FitPredicate { c := &MaxPDVolumeCountChecker{ - filter: filter, - maxVolumes: maxVolumes, - pvInfo: pvInfo, - pvcInfo: pvcInfo, + filter: filter, + maxVolumes: maxVolumes, + pvInfo: pvInfo, + pvcInfo: pvcInfo, + randomVolumeIDPrefix: rand.String(32), } return c.predicate } -func (c *MaxPDVolumeCountChecker) filterVolumes(volumes []v1.Volume, namespace string, randomPrefix string, filteredVolumes map[string]bool) error { +func (c *MaxPDVolumeCountChecker) filterVolumes(volumes []v1.Volume, namespace string, filteredVolumes map[string]bool) error { for i := range volumes { vol := &volumes[i] if id, ok := c.filter.FilterVolume(vol); ok { @@ -216,8 +222,9 @@ func (c *MaxPDVolumeCountChecker) filterVolumes(volumes []v1.Volume, namespace s } // Until we know real ID of the volume use namespace/pvcName as substitute - // With a random prefix so it can't conflict with existing volume ID. - pvId := fmt.Sprintf("%s-%s/%s", randomPrefix, namespace, pvcName) + // with a random prefix (calculated and stored inside 'c' during initialization) + // to avoid conflicts with existing volume IDs. + pvId := fmt.Sprintf("%s-%s/%s", c.randomVolumeIDPrefix, namespace, pvcName) pvc, err := c.pvcInfo.GetPersistentVolumeClaimInfo(namespace, pvcName) if err != nil || pvc == nil { @@ -263,15 +270,8 @@ func (c *MaxPDVolumeCountChecker) predicate(pod *v1.Pod, meta algorithm.Predicat return true, nil, nil } - // randomPrefix is a prefix of auxiliary volume IDs when we don't know the - // real volume ID, e.g. because the corresponding PV or PVC was deleted. It - // is random to avoid conflicts with real volume IDs and it needs to be - // stable in whole predicate() call so a deleted PVC used by two pods is - // counted as one volume and not as two. - randomPrefix := rand.String(32) - newVolumes := make(map[string]bool) - if err := c.filterVolumes(pod.Spec.Volumes, pod.Namespace, randomPrefix, newVolumes); err != nil { + if err := c.filterVolumes(pod.Spec.Volumes, pod.Namespace, newVolumes); err != nil { return false, nil, err } @@ -283,7 +283,7 @@ func (c *MaxPDVolumeCountChecker) predicate(pod *v1.Pod, meta algorithm.Predicat // count unique volumes existingVolumes := make(map[string]bool) for _, existingPod := range nodeInfo.Pods() { - if err := c.filterVolumes(existingPod.Spec.Volumes, existingPod.Namespace, randomPrefix, existingVolumes); err != nil { + if err := c.filterVolumes(existingPod.Spec.Volumes, existingPod.Namespace, existingVolumes); err != nil { return false, nil, err } }