Skip to content

Commit 713c47b

Browse files
Disruptive worker tests delete machines and ensure recovery
1 parent abe4e29 commit 713c47b

File tree

1 file changed

+210
-0
lines changed

1 file changed

+210
-0
lines changed

test/extended/machines/workers.go

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
package operators
2+
3+
import (
4+
"bytes"
5+
"fmt"
6+
"text/tabwriter"
7+
"time"
8+
9+
g "github.com/onsi/ginkgo"
10+
o "github.com/onsi/gomega"
11+
"github.com/stretchr/objx"
12+
13+
corev1 "k8s.io/api/core/v1"
14+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
15+
"k8s.io/apimachinery/pkg/runtime/schema"
16+
"k8s.io/apimachinery/pkg/util/sets"
17+
"k8s.io/apimachinery/pkg/util/wait"
18+
"k8s.io/client-go/dynamic"
19+
e2e "k8s.io/kubernetes/test/e2e/framework"
20+
)
21+
22+
const (
23+
machineLabelSelectorWorker = "machine.openshift.io/cluster-api-machine-role=worker"
24+
machineAPINamespace = "openshift-machine-api"
25+
nodeLabelSelectorWorker = "node-role.kubernetes.io/worker"
26+
27+
// time after purge of machine to wait for replacement and ready node
28+
// TODO: tighten this further based on node lifecycle controller [appears to be ~5m30s]
29+
machineRepairWait = 7 * time.Minute
30+
)
31+
32+
// machineClient returns a client for machines scoped to the proper namespace
33+
func machineClient(dc dynamic.Interface) dynamic.ResourceInterface {
34+
machineClient := dc.Resource(schema.GroupVersionResource{Group: "machine.openshift.io", Resource: "machines", Version: "v1beta1"})
35+
return machineClient.Namespace(machineAPINamespace)
36+
}
37+
38+
// listMachines list all machines scoped by selector
39+
func listMachines(dc dynamic.Interface, labelSelector string) ([]objx.Map, error) {
40+
mc := machineClient(dc)
41+
obj, err := mc.List(metav1.ListOptions{
42+
LabelSelector: labelSelector,
43+
})
44+
if err != nil {
45+
return nil, err
46+
}
47+
machines := objx.Map(obj.UnstructuredContent())
48+
items := objects(machines.Get("items"))
49+
return items, nil
50+
}
51+
52+
// deleteMachine deletes the named machine
53+
func deleteMachine(dc dynamic.Interface, machineName string) error {
54+
mc := machineClient(dc)
55+
return mc.Delete(machineName, &metav1.DeleteOptions{})
56+
}
57+
58+
// machineName returns the machine name
59+
func machineName(item objx.Map) string {
60+
return item.Get("metadata.name").String()
61+
}
62+
63+
// nodeNames returns the names of nodes
64+
func nodeNames(nodes []corev1.Node) sets.String {
65+
result := sets.NewString()
66+
for i := range nodes {
67+
result.Insert(nodes[i].Name)
68+
}
69+
return result
70+
}
71+
72+
// nodeNames returns the names of nodes
73+
func machineNames(machines []objx.Map) sets.String {
74+
result := sets.NewString()
75+
for i := range machines {
76+
result.Insert(machineName(machines[i]))
77+
}
78+
return result
79+
}
80+
81+
// mapNodeNameToMachineName returns a tuple (map node to machine by name, true if a match is found for every node)
82+
func mapNodeNameToMachineName(nodes []corev1.Node, machines []objx.Map) (map[string]string, bool) {
83+
result := map[string]string{}
84+
for i := range nodes {
85+
for j := range machines {
86+
if nodes[i].Name == nodeNameFromNodeRef(machines[j]) {
87+
result[nodes[i].Name] = machineName(machines[j])
88+
break
89+
}
90+
}
91+
}
92+
return result, len(nodes) == len(result)
93+
}
94+
95+
// mapMachineNameToNodeName returns a tuple (map node to machine by name, true if a match is found for every node)
96+
func mapMachineNameToNodeName(machines []objx.Map, nodes []corev1.Node) (map[string]string, bool) {
97+
result := map[string]string{}
98+
for i := range machines {
99+
for j := range nodes {
100+
if nodes[j].Name == nodeNameFromNodeRef(machines[i]) {
101+
result[machineName(machines[i])] = nodes[j].Name
102+
break
103+
}
104+
}
105+
}
106+
return result, len(machines) == len(result)
107+
}
108+
109+
var _ = g.Describe("[Feature:Machines][Disruptive] Managed cluster should", func() {
110+
defer g.GinkgoRecover()
111+
112+
g.It("recover from deleted worker machines", func() {
113+
cfg, err := e2e.LoadConfig()
114+
o.Expect(err).NotTo(o.HaveOccurred())
115+
c, err := e2e.LoadClientset()
116+
o.Expect(err).NotTo(o.HaveOccurred())
117+
dc, err := dynamic.NewForConfig(cfg)
118+
o.Expect(err).NotTo(o.HaveOccurred())
119+
120+
g.By("checking for the openshift machine api operator")
121+
// TODO: skip if platform != aws
122+
skipUnlessMachineAPIOperator(c.Core().Namespaces())
123+
124+
g.By("validating node and machine invariants")
125+
// fetch machines
126+
machines, err := listMachines(dc, machineLabelSelectorWorker)
127+
if err != nil {
128+
e2e.Failf("unable to fetch worker machines: %v", err)
129+
}
130+
numMachineWorkers := len(machines)
131+
if numMachineWorkers == 0 {
132+
e2e.Failf("cluster should have worker machines")
133+
}
134+
135+
// fetch nodes
136+
nodes, err := c.CoreV1().Nodes().List(metav1.ListOptions{
137+
LabelSelector: nodeLabelSelectorWorker,
138+
})
139+
o.Expect(err).NotTo(o.HaveOccurred())
140+
// map node -> machine
141+
nodeToMachine, nodeMatch := mapNodeNameToMachineName(nodes.Items, machines)
142+
if !nodeMatch {
143+
e2e.Failf("unable to map every node to machine. nodeToMachine: %v, nodeName: %v", nodeToMachine, nodeNames(nodes.Items))
144+
}
145+
machineToNode, machineMatch := mapMachineNameToNodeName(machines, nodes.Items)
146+
if !machineMatch {
147+
e2e.Failf("unable to map every machine to node. machineToNode: %v, machineNames: %v", machineToNode, machineNames(machines))
148+
}
149+
150+
g.By("deleting all worker nodes")
151+
for _, machine := range machines {
152+
machineName := machine.Get("metadata.name").String()
153+
if err := deleteMachine(dc, machineName); err != nil {
154+
e2e.Failf("Unable to delete machine %s/%s with error: %v", machineAPINamespace, machineName, err)
155+
}
156+
}
157+
158+
g.By("waiting for cluster to replace and recover workers")
159+
if pollErr := wait.PollImmediate(3*time.Second, machineRepairWait, func() (bool, error) {
160+
machines, err = listMachines(dc, machineLabelSelectorWorker)
161+
if err != nil {
162+
return false, nil
163+
}
164+
if numMachineWorkers != len(machines) {
165+
e2e.Logf("Waiting for %v machines, but only found: %v", numMachineWorkers, len(machines))
166+
return false, nil
167+
}
168+
nodes, err = c.CoreV1().Nodes().List(metav1.ListOptions{
169+
LabelSelector: nodeLabelSelectorWorker,
170+
})
171+
if err != nil {
172+
return false, nil
173+
}
174+
// map both data sets for easy comparison now
175+
nodeToMachine, nodeMatch = mapNodeNameToMachineName(nodes.Items, machines)
176+
machineToNode, machineMatch = mapMachineNameToNodeName(machines, nodes.Items)
177+
if !nodeMatch {
178+
e2e.Logf("unable to map every node to machine. nodeToMachine: %v\n, \tnodeName: %v", nodeToMachine, nodeNames(nodes.Items))
179+
return false, nil
180+
}
181+
if !machineMatch {
182+
e2e.Logf("unable to map every machine to node. machineToNode: %v\n, \tmachineNames: %v", machineToNode, machineNames(machines))
183+
return false, nil
184+
}
185+
return true, nil
186+
}); pollErr != nil {
187+
buf := &bytes.Buffer{}
188+
w := tabwriter.NewWriter(buf, 0, 4, 1, ' ', 0)
189+
fmt.Fprintf(w, "NAMESPACE\tNAME\tNODE NAME\n")
190+
for _, machine := range machines {
191+
ns := machine.Get("metadata.namespace").String()
192+
name := machine.Get("metadata.name").String()
193+
nodeName := nodeNameFromNodeRef(machine)
194+
fmt.Fprintf(w, "%s\t%s\t%s\n",
195+
ns,
196+
name,
197+
nodeName,
198+
)
199+
}
200+
w.Flush()
201+
e2e.Logf("Machines:\n%s", buf.String())
202+
e2e.Logf("Machines to nodes:\n%v", machineToNode)
203+
e2e.Logf("Node to machines:\n%v", nodeToMachine)
204+
e2e.Failf("Worker machines were not replaced as expected: %v", pollErr)
205+
}
206+
207+
// TODO: ensure all nodes are ready
208+
// TODO: ensure no pods pending
209+
})
210+
})

0 commit comments

Comments
 (0)