kubernetes-retired
diff --git a/‎doc/design/podgroup-status.md
Lines changed: 62 additions & 39 deletions b/‎doc/design/podgroup-status.md
Lines changed: 62 additions & 39 deletions
diff --git a/‎pkg/apis/scheduling/v1alpha1/types.go
Lines changed: 29 additions & 30 deletions b/‎pkg/apis/scheduling/v1alpha1/types.go
Lines changed: 29 additions & 30 deletions
diff --git a/‎pkg/apis/scheduling/v1alpha1/zz_generated.deepcopy.go
Lines changed: 24 additions & 19 deletions b/‎pkg/apis/scheduling/v1alpha1/zz_generated.deepcopy.go
Lines changed: 24 additions & 19 deletions
diff --git a/‎pkg/scheduler/actions/allocate/allocate_test.go
Lines changed: 17 additions & 9 deletions b/‎pkg/scheduler/actions/allocate/allocate_test.go
Lines changed: 17 additions & 9 deletions
diff --git a/‎pkg/scheduler/api/helpers.go
Lines changed: 0 additions & 1 deletion b/‎pkg/scheduler/api/helpers.go
Lines changed: 0 additions & 1 deletion
@@ -31,67 +31,91 @@ const (
     // PodPending means the pod group has been accepted by the system, but scheduler can not allocate
     // enough resources to it.
     PodGroupPending PodGroupPhase = "Pending"
+
     // PodRunning means `spec.minMember` pods of PodGroups has been in running phase.
     PodGroupRunning PodGroupPhase = "Running"
-	// PodGroupRecovering means part of `spec.minMember` pods have exception, e.g. killed; scheduler will
-	// wait for related controller to recover it.
-    PodGroupRecovering PodGroupPhase = "Recovering"
-	// PodGroupUnschedulable means part of `spec.minMember` pods are running but the other part can not
-	// be scheduled, e.g. not enough resource; scheduler will wait for related controller to recover it.
-    PodGroupUnschedulable PodGroupPhase = "Unschedulable"
+
+    // PodGroupUnknown means part of `spec.minMember` pods are running but the other part can not
+    // be scheduled, e.g. not enough resource; scheduler will wait for related controller to recover it.
+    PodGroupUnknown PodGroupPhase = "Unknown"
 )
 
+type PodGroupConditionType string
+
 const (
-	// PodFailedReason is probed if pod of PodGroup failed
-	PodFailedReason string = "PodFailed"
-	// PodDeletedReason is probed if pod of PodGroup deleted
-	PodDeletedReason string = "PodDeleted"
-	// NotEnoughResourcesReason is probed if there're not enough resources to schedule pods
-	NotEnoughResourcesReason string = "NotEnoughResources"
-	// NotEnoughPodsReason is probed if there're not enough tasks compared to `spec.minMember`
-	NotEnoughPodsReason string = "NotEnoughTasks"
+    PodGroupUnschedulableType PodGroupConditionType = "Unschedulable"
 )
 
-// PodGroupState contains details for the current state of this pod group.
-type PodGroupState struct {
-    // Current phase of PodGroup.
-    Phase PodGroupPhase `json:"phase,omitempty" protobuf:"bytes,1,opt,name=phase"`
-	
-    // Last time we probed to this Phase.
-    // +optional
-    LastProbeTime metav1.Time `json:"lastProbeTime,omitempty" protobuf:"bytes,2,opt,name=lastProbeTime"`
+// PodGroupCondition contains details for the current state of this pod group.
+type PodGroupCondition struct {
+    // Type is the type of the condition
+    Type PodGroupConditionType `json:"type,omitempty" protobuf:"bytes,1,opt,name=type"`
+
+    // Status is the status of the condition.
+    Status v1.ConditionStatus `json:"status,omitempty" protobuf:"bytes,2,opt,name=status"`
+
+    // The ID of condition transition.
+    TransitionID string `json:"transitionID,omitempty" protobuf:"bytes,3,opt,name=transitionID"`
+
     // Last time the phase transitioned from another to current phase.
     // +optional
-    LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" protobuf:"bytes,3,opt,name=lastTransitionTime"`
+    LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" protobuf:"bytes,4,opt,name=lastTransitionTime"`
+
     // Unique, one-word, CamelCase reason for the phase's last transition.
     // +optional
-    Reason string `json:"reason,omitempty" protobuf:"bytes,4,opt,name=reason"`
+    Reason string `json:"reason,omitempty" protobuf:"bytes,5,opt,name=reason"`
+
     // Human-readable message indicating details about last transition.
     // +optional
-    Message string `json:"message,omitempty" protobuf:"bytes,5,opt,name=message"`
+    Message string `json:"message,omitempty" protobuf:"bytes,6,opt,name=message"`
 }
 
+const (
+    // PodFailedReason is probed if pod of PodGroup failed
+    PodFailedReason string = "PodFailed"
+
+    // PodDeletedReason is probed if pod of PodGroup deleted
+    PodDeletedReason string = "PodDeleted"
+
+    // NotEnoughResourcesReason is probed if there're not enough resources to schedule pods
+    NotEnoughResourcesReason string = "NotEnoughResources"
+
+    // NotEnoughPodsReason is probed if there're not enough tasks compared to `spec.minMember`
+    NotEnoughPodsReason string = "NotEnoughTasks"
+)
+
+// PodGroupStatus represents the current state of a pod group.
 type PodGroupStatus struct {
-    ......
+    // Current phase of PodGroup.
+    Phase PodGroupPhase `json:"phase,omitempty" protobuf:"bytes,1,opt,name=phase"`
+
+    // The conditions of PodGroup.
+    // +optional
+    Conditions []PodGroupCondition `json:"conditions,omitempty" protobuf:"bytes,2,opt,name=conditions"`
 
+    // The number of actively running pods.
     // +optional
-    State PodGroupState `json:"state,omitempty" protobuf:"bytes,1,opt,name=state,casttype=State"`
+    Running int32 `json:"running,omitempty" protobuf:"bytes,3,opt,name=running"`
+
+    // The number of pods which reached phase Succeeded.
+    // +optional
+    Succeeded int32 `json:"succeeded,omitempty" protobuf:"bytes,4,opt,name=succeeded"`
+
+    // The number of pods which reached phase Failed.
+    // +optional
+    Failed int32 `json:"failed,omitempty" protobuf:"bytes,5,opt,name=failed"`
 }
+
 ```
 
 According to the PodGroup's lifecycle, the following phase/state transactions are reasonable. And related
 reasons will be appended to `Reason` field.  
 
-| From          | To            | Reason  |
-|---------------|---------------|---------|
-| Pending       | Running       | When every pods of `spec.minMember` are running |
-| Pending       | Recovering    | When only part of `spec.minMember` are running and the other part pod are rejected by kubelet |
-| Running       | Recovering    | When part of `spec.minMember` have exception, e.g. kill |
-| Recovering    | Running       | When the failed pods re-run successfully |
-| Recovering    | Unschedulable | When the new pod can not be scheduled |
-| Unschedulable | Pending       | When all pods (`spec.minMember`) in PodGroups are deleted |
-| Unschedulable | Running       | When all pods (`spec.minMember`) are deleted |
-
+| From    | To            | Reason  |
+|---------|---------------|---------|
+| Pending | Running       | When every pods of `spec.minMember` are running |
+| Running | Unknown       | When some pods of `spec.minMember` are restarted but can not be rescheduled |
+| Unknown | Pending       | When all pods (`spec.minMember`) in PodGroups are deleted |
 
 ## Feature Interaction
 
@@ -110,8 +134,7 @@ Cluster-Autoscaler right now. Alternative solution will be proposed later for th
 ### Operators/Controllers
 
 The lifecycle of `PodGroup` are managed by operators/controllers, the scheduler only probes related state for
-controllers. For example, if `PodGroup` is `Unschedulable` for MPI job, the controller need to re-start all
-pods in `PodGroup`.  
+controllers. For example, if `PodGroup` is `Unknown` for MPI job, the controller need to re-start all pods in `PodGroup`.
 
 ## Reference
 
 
@@ -17,18 +17,10 @@ limitations under the License.
 package v1alpha1
 
 import (
+	"k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
 
-// Event represent the phase of PodGroup, e.g. pod-failed.
-type Event string
-
-const (
-	EvictEvent            Event = "Evict"
-	UnschedulableEvent    Event = "Unschedulable"
-	FailedSchedulingEvent Event = "FailedScheduling"
-)
-
 // PodGroupPhase is the phase of a pod group at the current time.
 type PodGroupPhase string
 
@@ -41,35 +33,39 @@ const (
 	// PodRunning means `spec.minMember` pods of PodGroups has been in running phase.
 	PodGroupRunning PodGroupPhase = "Running"
 
-	// PodGroupRecovering means part of `spec.minMember` pods have exception, e.g. killed; scheduler will
-	// wait for related controller to recover it.
-	PodGroupRecovering PodGroupPhase = "Recovering"
-
-	// PodGroupUnschedulable means part of `spec.minMember` pods are running but the other part can not
+	// PodGroupUnknown means part of `spec.minMember` pods are running but the other part can not
 	// be scheduled, e.g. not enough resource; scheduler will wait for related controller to recover it.
-	PodGroupUnschedulable PodGroupPhase = "Unschedulable"
+	PodGroupUnknown PodGroupPhase = "Unknown"
 )
 
-// PodGroupState contains details for the current state of this pod group.
-type PodGroupState struct {
-	// Current phase of PodGroup.
-	Phase PodGroupPhase `json:"phase,omitempty" protobuf:"bytes,1,opt,name=phase"`
+type PodGroupConditionType string
 
-	// Last time we probed to this Phase.
-	// +optional
-	LastProbeTime metav1.Time `json:"lastProbeTime,omitempty" protobuf:"bytes,2,opt,name=lastProbeTime"`
+const (
+	PodGroupUnschedulableType PodGroupConditionType = "Unschedulable"
+)
+
+// PodGroupCondition contains details for the current state of this pod group.
+type PodGroupCondition struct {
+	// Type is the type of the condition
+	Type PodGroupConditionType `json:"type,omitempty" protobuf:"bytes,1,opt,name=type"`
+
+	// Status is the status of the condition.
+	Status v1.ConditionStatus `json:"status,omitempty" protobuf:"bytes,2,opt,name=status"`
+
+	// The ID of condition transition.
+	TransitionID string `json:"transitionID,omitempty" protobuf:"bytes,3,opt,name=transitionID"`
 
 	// Last time the phase transitioned from another to current phase.
 	// +optional
-	LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" protobuf:"bytes,3,opt,name=lastTransitionTime"`
+	LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" protobuf:"bytes,4,opt,name=lastTransitionTime"`
 
 	// Unique, one-word, CamelCase reason for the phase's last transition.
 	// +optional
-	Reason string `json:"reason,omitempty" protobuf:"bytes,4,opt,name=reason"`
+	Reason string `json:"reason,omitempty" protobuf:"bytes,5,opt,name=reason"`
 
 	// Human-readable message indicating details about last transition.
 	// +optional
-	Message string `json:"message,omitempty" protobuf:"bytes,5,opt,name=message"`
+	Message string `json:"message,omitempty" protobuf:"bytes,6,opt,name=message"`
 }
 
 const (
@@ -122,21 +118,24 @@ type PodGroupSpec struct {
 
 // PodGroupStatus represents the current state of a pod group.
 type PodGroupStatus struct {
-	// The state of PodGroup.
+	// Current phase of PodGroup.
+	Phase PodGroupPhase `json:"phase,omitempty" protobuf:"bytes,1,opt,name=phase"`
+
+	// The conditions of PodGroup.
 	// +optional
-	State PodGroupState `json:"state,omitempty" protobuf:"bytes,1,opt,name=state,casttype=State"`
+	Conditions []PodGroupCondition `json:"conditions,omitempty" protobuf:"bytes,2,opt,name=conditions"`
 
 	// The number of actively running pods.
 	// +optional
-	Running int32 `json:"running,omitempty" protobuf:"bytes,2,opt,name=running"`
+	Running int32 `json:"running,omitempty" protobuf:"bytes,3,opt,name=running"`
 
 	// The number of pods which reached phase Succeeded.
 	// +optional
-	Succeeded int32 `json:"succeeded,omitempty" protobuf:"bytes,3,opt,name=succeeded"`
+	Succeeded int32 `json:"succeeded,omitempty" protobuf:"bytes,4,opt,name=succeeded"`
 
 	// The number of pods which reached phase Failed.
 	// +optional
-	Failed int32 `json:"failed,omitempty" protobuf:"bytes,4,opt,name=failed"`
+	Failed int32 `json:"failed,omitempty" protobuf:"bytes,5,opt,name=failed"`
 }
 
 // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
 
@@ -28,6 +28,7 @@ import (
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/tools/record"
 
 	kbv1 "github.com/kubernetes-sigs/kube-batch/pkg/apis/scheduling/v1alpha1"
 	"github.com/kubernetes-sigs/kube-batch/pkg/scheduler/api"
@@ -113,12 +114,17 @@ func (fb *fakeBinder) Bind(p *v1.Pod, hostname string) error {
 	return nil
 }
 
-type fakeTaskStatusUpdater struct {
+type fakeStatusUpdater struct {
 }
 
-func (ftsu *fakeTaskStatusUpdater) Update(pod *v1.Pod, podCondition *v1.PodCondition) error {
+func (ftsu *fakeStatusUpdater) UpdatePod(pod *v1.Pod, podCondition *v1.PodCondition) (*v1.Pod, error) {
 	// do nothing here
-	return nil
+	return nil, nil
+}
+
+func (ftsu *fakeStatusUpdater) UpdatePodGroup(pg *kbv1.PodGroup) (*kbv1.PodGroup, error) {
+	// do nothing here
+	return nil, nil
 }
 
 type fakeVolumeBinder struct {
@@ -239,12 +245,14 @@ func TestAllocate(t *testing.T) {
 			c:     make(chan string),
 		}
 		schedulerCache := &cache.SchedulerCache{
-			Nodes:             make(map[string]*api.NodeInfo),
-			Jobs:              make(map[api.JobID]*api.JobInfo),
-			Queues:            make(map[api.QueueID]*api.QueueInfo),
-			Binder:            binder,
-			TaskStatusUpdater: &fakeTaskStatusUpdater{},
-			VolumeBinder:      &fakeVolumeBinder{},
+			Nodes:         make(map[string]*api.NodeInfo),
+			Jobs:          make(map[api.JobID]*api.JobInfo),
+			Queues:        make(map[api.QueueID]*api.QueueInfo),
+			Binder:        binder,
+			StatusUpdater: &fakeStatusUpdater{},
+			VolumeBinder:  &fakeVolumeBinder{},
+
+			Recorder: record.NewFakeRecorder(100),
 		}
 		for _, node := range test.nodes {
 			schedulerCache.AddNode(node)
 
@@ -99,6 +99,5 @@ func MergeErrors(errs ...error) error {
 // JobTerminated checkes whether job was terminated.
 func JobTerminated(job *JobInfo) bool {
 	return job.PodGroup == nil &&
-		job.PDB == nil &&
 		len(job.Tasks) == 0
 }
Original file line number	Diff line number	Diff line change
`@@ -99,6 +99,5 @@ func MergeErrors(errs ...error) error {`
`99`	`99`	`// JobTerminated checkes whether job was terminated.`
`100`	`100`	`func JobTerminated(job *JobInfo) bool {`
`101`	`101`	`return job.PodGroup == nil &&`
`102`		`- job.PDB == nil &&`
`103`	`102`	`len(job.Tasks) == 0`
`104`	`103`	`}`