diff --git a/pkg/build/controller/buildpod/controller.go b/pkg/build/controller/buildpod/controller.go index 8b6d1dea2721..1e5eb7d7dd96 100644 --- a/pkg/build/controller/buildpod/controller.go +++ b/pkg/build/controller/buildpod/controller.go @@ -121,6 +121,9 @@ func (bc *BuildPodController) HandlePod(pod *kapi.Pod) error { nextStatus := build.Status.Phase currentReason := build.Status.Reason + // if the build is marked failed, the build status reason has already been + // set (probably by the build pod itself), so don't do any updating here + // or we'll overwrite the correct value. if build.Status.Phase != buildapi.BuildPhaseFailed { switch pod.Status.Phase { case kapi.PodPending: @@ -176,9 +179,14 @@ func (bc *BuildPodController) HandlePod(pod *kapi.Pod) error { build.Status.Message = "" } } + + needsUpdate := false // Update the build object when it progress to a next state or the reason for - // the current state changed. - if (!common.HasBuildPodNameAnnotation(build) || build.Status.Phase != nextStatus || build.Status.Phase == buildapi.BuildPhaseFailed) && !buildutil.IsBuildComplete(build) { + // the current state changed. Do not touch builds that are complete + // because we'd potentially be overwriting build state information set by the + // build pod directly. + if (!common.HasBuildPodNameAnnotation(build) || build.Status.Phase != nextStatus) && !buildutil.IsBuildComplete(build) { + needsUpdate = true common.SetBuildPodNameAnnotation(build, pod.Name) reason := "" if len(build.Status.Reason) > 0 { @@ -186,23 +194,33 @@ func (bc *BuildPodController) HandlePod(pod *kapi.Pod) error { } glog.V(4).Infof("Updating build %s/%s status %s -> %s%s", build.Namespace, build.Name, build.Status.Phase, nextStatus, reason) build.Status.Phase = nextStatus - - if buildutil.IsBuildComplete(build) { - common.SetBuildCompletionTimeAndDuration(build) - } if build.Status.Phase == buildapi.BuildPhaseRunning { now := unversioned.Now() build.Status.StartTimestamp = &now } + } + + // we're going to get pod relist events for completed/failed pods, + // there's no reason to re-update the build and rerun + // HandleBuildCompletion if we've already done it for this + // build previously. + buildWasComplete := build.Status.CompletionTimestamp != nil + if !buildWasComplete && buildutil.IsBuildComplete(build) && build.Status.Phase != buildapi.BuildPhaseCancelled { + needsUpdate = common.SetBuildCompletionTimeAndDuration(build) + } + if needsUpdate { if err := bc.buildUpdater.Update(build.Namespace, build); err != nil { return fmt.Errorf("failed to update build %s/%s: %v", build.Namespace, build.Name, err) } glog.V(4).Infof("Build %s/%s status was updated to %s", build.Namespace, build.Name, build.Status.Phase) - - if buildutil.IsBuildComplete(build) { - common.HandleBuildCompletion(build, bc.runPolicies) - } } + // if the build was not previously marked complete but it's complete now, + // handle completion for it. otherwise ignore it because we've already + // handled its completion previously. + if !buildWasComplete && buildutil.IsBuildComplete(build) { + common.HandleBuildCompletion(build, bc.runPolicies) + } + return nil } @@ -244,6 +262,7 @@ func (bc *BuildPodController) HandleBuildPodDeletion(pod *kapi.Pod) error { if err := bc.buildUpdater.Update(build.Namespace, build); err != nil { return fmt.Errorf("Failed to update build %s/%s: %v", build.Namespace, build.Name, err) } + common.HandleBuildCompletion(build, bc.runPolicies) } return nil } diff --git a/pkg/build/controller/buildpod/controller_test.go b/pkg/build/controller/buildpod/controller_test.go index 5eaa3e9c0e25..d418f93ef815 100644 --- a/pkg/build/controller/buildpod/controller_test.go +++ b/pkg/build/controller/buildpod/controller_test.go @@ -158,7 +158,7 @@ func TestHandlePod(t *testing.T) { outStatus: buildapi.BuildPhaseComplete, podStatus: kapi.PodSucceeded, exitCode: 0, - startTimestamp: nil, + startTimestamp: curtime, completionTimestamp: curtime, }, { // 4 @@ -167,7 +167,7 @@ func TestHandlePod(t *testing.T) { outStatus: buildapi.BuildPhaseFailed, podStatus: kapi.PodFailed, exitCode: -1, - startTimestamp: nil, + startTimestamp: curtime, completionTimestamp: curtime, }, { // 5 @@ -177,7 +177,7 @@ func TestHandlePod(t *testing.T) { podStatus: kapi.PodSucceeded, exitCode: 0, buildUpdater: &errBuildUpdater{}, - startTimestamp: nil, + startTimestamp: curtime, completionTimestamp: curtime, }, { // 6 diff --git a/pkg/build/controller/common/util.go b/pkg/build/controller/common/util.go index f9f9ae037804..afd3841a7e91 100644 --- a/pkg/build/controller/common/util.go +++ b/pkg/build/controller/common/util.go @@ -11,12 +11,23 @@ import ( utilruntime "k8s.io/kubernetes/pkg/util/runtime" ) -func SetBuildCompletionTimeAndDuration(build *buildapi.Build) { +// SetBuildCompletionTimeAndDuration will set the build completion timestamp +// to the current time if it is nil. It will also set the start timestamp to +// the same value if it is nil. Returns true if the build object was +// modified. +func SetBuildCompletionTimeAndDuration(build *buildapi.Build) bool { + if build.Status.CompletionTimestamp != nil { + return false + } now := unversioned.Now() build.Status.CompletionTimestamp = &now - if build.Status.StartTimestamp != nil { - build.Status.Duration = build.Status.CompletionTimestamp.Rfc3339Copy().Time.Sub(build.Status.StartTimestamp.Rfc3339Copy().Time) + // apparently this build completed so fast we didn't see the pod running event, + // so just use the completion time as the start time. + if build.Status.StartTimestamp == nil { + build.Status.StartTimestamp = &now } + build.Status.Duration = build.Status.CompletionTimestamp.Rfc3339Copy().Time.Sub(build.Status.StartTimestamp.Rfc3339Copy().Time) + return true } func HandleBuildCompletion(build *buildapi.Build, runPolicies []policy.RunPolicy) { diff --git a/pkg/build/controller/controller_test.go b/pkg/build/controller/controller_test.go index 0ff0f3cfb683..ac69779b9d48 100644 --- a/pkg/build/controller/controller_test.go +++ b/pkg/build/controller/controller_test.go @@ -447,7 +447,7 @@ func TestCancelBuild(t *testing.T) { inStatus: buildapi.BuildPhaseNew, outStatus: buildapi.BuildPhaseCancelled, exitCode: 0, - startTimestamp: nil, + startTimestamp: curtime, completionTimestamp: curtime, }, { // 1 @@ -455,7 +455,7 @@ func TestCancelBuild(t *testing.T) { outStatus: buildapi.BuildPhaseCancelled, podStatus: kapi.PodRunning, exitCode: 0, - startTimestamp: nil, + startTimestamp: curtime, completionTimestamp: curtime, }, { // 2 @@ -463,7 +463,7 @@ func TestCancelBuild(t *testing.T) { outStatus: buildapi.BuildPhaseCancelled, podStatus: kapi.PodRunning, exitCode: 0, - startTimestamp: nil, + startTimestamp: curtime, completionTimestamp: curtime, }, { // 3 diff --git a/pkg/build/controller/policy/policy.go b/pkg/build/controller/policy/policy.go index f3bf2b568cfb..e7b163299688 100644 --- a/pkg/build/controller/policy/policy.go +++ b/pkg/build/controller/policy/policy.go @@ -142,17 +142,21 @@ func handleComplete(lister buildclient.BuildLister, updater buildclient.BuildUpd } for _, build := range nextBuilds { // TODO: replace with informer notification requeueing in the future - build.Annotations[buildapi.BuildAcceptedAnnotation] = uuid.NewRandom().String() - err := wait.Poll(500*time.Millisecond, 5*time.Second, func() (bool, error) { - err := updater.Update(build.Namespace, build) - if err != nil && errors.IsConflict(err) { - glog.V(5).Infof("Error updating build %s/%s: %v (will retry)", build.Namespace, build.Name, err) - return false, nil + + // only set the annotation once. + if _, ok := build.Annotations[buildapi.BuildAcceptedAnnotation]; !ok { + build.Annotations[buildapi.BuildAcceptedAnnotation] = uuid.NewRandom().String() + err := wait.Poll(500*time.Millisecond, 5*time.Second, func() (bool, error) { + err := updater.Update(build.Namespace, build) + if err != nil && errors.IsConflict(err) { + glog.V(5).Infof("Error updating build %s/%s: %v (will retry)", build.Namespace, build.Name, err) + return false, nil + } + return true, err + }) + if err != nil { + return err } - return true, err - }) - if err != nil { - return err } } return nil diff --git a/test/extended/builds/run_policy.go b/test/extended/builds/run_policy.go index 2e7434333c7d..42a32c8c3347 100644 --- a/test/extended/builds/run_policy.go +++ b/test/extended/builds/run_policy.go @@ -132,10 +132,21 @@ var _ = g.Describe("[builds][Slow] using build configuration runPolicy", func() defer buildWatch.Stop() o.Expect(err).NotTo(o.HaveOccurred()) + sawCompletion := false for { event := <-buildWatch.ResultChan() build := event.Object.(*buildapi.Build) + var lastCompletion time.Time + if build.Status.Phase == buildapi.BuildPhaseComplete { + lastCompletion = time.Now() + o.Expect(build.Status.StartTimestamp).ToNot(o.BeNil(), "completed builds should have a valid start time") + o.Expect(build.Status.CompletionTimestamp).ToNot(o.BeNil(), "completed builds should have a valid completion time") + sawCompletion = true + } if build.Status.Phase == buildapi.BuildPhaseRunning { + latency := lastCompletion.Sub(time.Now()) + o.Expect(latency).To(o.BeNumerically("<", 10*time.Second), "next build should have started less than 10s after last completed build") + // Ignore events from complete builds (if there are any) if we already // verified the build. if _, exists := buildVerified[build.Name]; exists { @@ -166,6 +177,7 @@ var _ = g.Describe("[builds][Slow] using build configuration runPolicy", func() break } } + o.Expect(sawCompletion).To(o.BeTrue(), "should have seen at least one build complete") }) }) @@ -212,6 +224,78 @@ var _ = g.Describe("[builds][Slow] using build configuration runPolicy", func() }) }) + g.Describe("build configuration with Serial build run policy handling failure", func() { + g.It("starts the next build immediately after one fails", func() { + g.By("starting multiple builds") + bcName := "sample-serial-build-fail" + + for i := 0; i < 3; i++ { + _, _, err := exutil.StartBuild(oc, bcName) + o.Expect(err).NotTo(o.HaveOccurred()) + } + + buildWatch, err := oc.Client().Builds(oc.Namespace()).Watch(kapi.ListOptions{ + LabelSelector: buildutil.BuildConfigSelector(bcName), + }) + defer buildWatch.Stop() + o.Expect(err).NotTo(o.HaveOccurred()) + + var failTime, failTime2 time.Time + done, timestamps1, timestamps2, timestamps3 := false, false, false, false + + for done == false { + select { + case event := <-buildWatch.ResultChan(): + build := event.Object.(*buildapi.Build) + if build.Status.Phase == buildapi.BuildPhaseFailed { + if build.Name == "sample-serial-build-fail-1" { + // this may not be set on the first build modified to failed event because + // the build gets marked failed by the build pod, but the timestamps get + // set by the buildpod controller. + if build.Status.CompletionTimestamp != nil { + o.Expect(build.Status.StartTimestamp).ToNot(o.BeNil(), "failed builds should have a valid start time") + o.Expect(build.Status.CompletionTimestamp).ToNot(o.BeNil(), "failed builds should have a valid completion time") + timestamps1 = true + } + failTime = time.Now() + } + if build.Name == "sample-serial-build-fail-2" { + duration := time.Now().Sub(failTime) + o.Expect(duration).To(o.BeNumerically("<", 10*time.Second), "next build should have started less than 10s after failed build") + if build.Status.CompletionTimestamp != nil { + o.Expect(build.Status.StartTimestamp).ToNot(o.BeNil(), "failed builds should have a valid start time") + o.Expect(build.Status.CompletionTimestamp).ToNot(o.BeNil(), "failed builds should have a valid completion time") + timestamps2 = true + } + failTime2 = time.Now() + } + if build.Name == "sample-serial-build-fail-3" { + duration := time.Now().Sub(failTime2) + o.Expect(duration).To(o.BeNumerically("<", 10*time.Second), "next build should have started less than 10s after failed build") + if build.Status.CompletionTimestamp != nil { + o.Expect(build.Status.StartTimestamp).ToNot(o.BeNil(), "failed builds should have a valid start time") + o.Expect(build.Status.CompletionTimestamp).ToNot(o.BeNil(), "failed builds should have a valid completion time") + timestamps3 = true + } + } + } + // once we have all the expected timestamps, or we run out of time, we can bail. + if timestamps1 && timestamps2 && timestamps3 { + done = true + } + case <-time.After(2 * time.Minute): + // we've waited as long as we dare, go see if we got all the timestamp data we expected. + // if we have the timestamp data, we also know that we checked the next build start latency. + done = true + } + } + o.Expect(timestamps1).To(o.BeTrue(), "failed builds should have start and completion timestamps set") + o.Expect(timestamps2).To(o.BeTrue(), "failed builds should have start and completion timestamps set") + o.Expect(timestamps3).To(o.BeTrue(), "failed builds should have start and completion timestamps set") + + }) + }) + g.Describe("build configuration with SerialLatestOnly build run policy", func() { g.It("runs the builds in serial order but cancel previous builds", func() { g.By("starting multiple builds") diff --git a/test/extended/testdata/bindata.go b/test/extended/testdata/bindata.go index eee3d287d49e..ad8a2c3b067b 100644 --- a/test/extended/testdata/bindata.go +++ b/test/extended/testdata/bindata.go @@ -5626,10 +5626,27 @@ var _testExtendedTestdataRun_policySerialBcYaml = []byte(`--- from: kind: "DockerImage" name: "centos/ruby-22-centos7" - resources: {} - status: - lastVersion: 0 - + - + kind: "BuildConfig" + apiVersion: "v1" + metadata: + name: "sample-serial-build-fail" + spec: + runPolicy: "Serial" + triggers: + - + type: "imageChange" + imageChange: {} + source: + type: "Git" + git: + uri: "git://github.com/openshift/invalidrepo.git" + strategy: + type: "Source" + sourceStrategy: + from: + kind: "DockerImage" + name: "centos/ruby-22-centos7" `) func testExtendedTestdataRun_policySerialBcYamlBytes() ([]byte, error) { diff --git a/test/extended/testdata/run_policy/serial-bc.yaml b/test/extended/testdata/run_policy/serial-bc.yaml index 3b348bd40649..654941ad6463 100644 --- a/test/extended/testdata/run_policy/serial-bc.yaml +++ b/test/extended/testdata/run_policy/serial-bc.yaml @@ -24,7 +24,24 @@ from: kind: "DockerImage" name: "centos/ruby-22-centos7" - resources: {} - status: - lastVersion: 0 - + - + kind: "BuildConfig" + apiVersion: "v1" + metadata: + name: "sample-serial-build-fail" + spec: + runPolicy: "Serial" + triggers: + - + type: "imageChange" + imageChange: {} + source: + type: "Git" + git: + uri: "git://github.com/openshift/invalidrepo.git" + strategy: + type: "Source" + sourceStrategy: + from: + kind: "DockerImage" + name: "centos/ruby-22-centos7"