|
| 1 | +package operators |
| 2 | + |
| 3 | +import ( |
| 4 | + "bytes" |
| 5 | + "fmt" |
| 6 | + "text/tabwriter" |
| 7 | + "time" |
| 8 | + |
| 9 | + g "github.com/onsi/ginkgo" |
| 10 | + o "github.com/onsi/gomega" |
| 11 | + "github.com/stretchr/objx" |
| 12 | + |
| 13 | + corev1 "k8s.io/api/core/v1" |
| 14 | + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |
| 15 | + "k8s.io/apimachinery/pkg/runtime/schema" |
| 16 | + "k8s.io/apimachinery/pkg/util/sets" |
| 17 | + "k8s.io/apimachinery/pkg/util/wait" |
| 18 | + "k8s.io/client-go/dynamic" |
| 19 | + e2e "k8s.io/kubernetes/test/e2e/framework" |
| 20 | +) |
| 21 | + |
| 22 | +const ( |
| 23 | + machineLabelSelectorWorker = "machine.openshift.io/cluster-api-machine-role=worker" |
| 24 | + machineAPINamespace = "openshift-machine-api" |
| 25 | + nodeLabelSelectorWorker = "node-role.kubernetes.io/worker" |
| 26 | + |
| 27 | + // time after purge of machine to wait for replacement and ready node |
| 28 | + // TODO: tighten this further based on node lifecycle controller [appears to be ~5m30s] |
| 29 | + machineRepairWait = 7 * time.Minute |
| 30 | +) |
| 31 | + |
| 32 | +// machineClient returns a client for machines scoped to the proper namespace |
| 33 | +func machineClient(dc dynamic.Interface) dynamic.ResourceInterface { |
| 34 | + machineClient := dc.Resource(schema.GroupVersionResource{Group: "machine.openshift.io", Resource: "machines", Version: "v1beta1"}) |
| 35 | + return machineClient.Namespace(machineAPINamespace) |
| 36 | +} |
| 37 | + |
| 38 | +// listMachines list all machines scoped by selector |
| 39 | +func listMachines(dc dynamic.Interface, labelSelector string) ([]objx.Map, error) { |
| 40 | + mc := machineClient(dc) |
| 41 | + obj, err := mc.List(metav1.ListOptions{ |
| 42 | + LabelSelector: labelSelector, |
| 43 | + }) |
| 44 | + if err != nil { |
| 45 | + return nil, err |
| 46 | + } |
| 47 | + machines := objx.Map(obj.UnstructuredContent()) |
| 48 | + items := objects(machines.Get("items")) |
| 49 | + return items, nil |
| 50 | +} |
| 51 | + |
| 52 | +// deleteMachine deletes the named machine |
| 53 | +func deleteMachine(dc dynamic.Interface, machineName string) error { |
| 54 | + mc := machineClient(dc) |
| 55 | + return mc.Delete(machineName, &metav1.DeleteOptions{}) |
| 56 | +} |
| 57 | + |
| 58 | +// machineName returns the machine name |
| 59 | +func machineName(item objx.Map) string { |
| 60 | + return item.Get("metadata.name").String() |
| 61 | +} |
| 62 | + |
| 63 | +// nodeNames returns the names of nodes |
| 64 | +func nodeNames(nodes []corev1.Node) sets.String { |
| 65 | + result := sets.NewString() |
| 66 | + for i := range nodes { |
| 67 | + result.Insert(nodes[i].Name) |
| 68 | + } |
| 69 | + return result |
| 70 | +} |
| 71 | + |
| 72 | +// nodeNames returns the names of nodes |
| 73 | +func machineNames(machines []objx.Map) sets.String { |
| 74 | + result := sets.NewString() |
| 75 | + for i := range machines { |
| 76 | + result.Insert(machineName(machines[i])) |
| 77 | + } |
| 78 | + return result |
| 79 | +} |
| 80 | + |
| 81 | +// mapNodeNameToMachineName returns a tuple (map node to machine by name, true if a match is found for every node) |
| 82 | +func mapNodeNameToMachineName(nodes []corev1.Node, machines []objx.Map) (map[string]string, bool) { |
| 83 | + result := map[string]string{} |
| 84 | + for i := range nodes { |
| 85 | + for j := range machines { |
| 86 | + if nodes[i].Name == nodeNameFromNodeRef(machines[j]) { |
| 87 | + result[nodes[i].Name] = machineName(machines[j]) |
| 88 | + break |
| 89 | + } |
| 90 | + } |
| 91 | + } |
| 92 | + return result, len(nodes) == len(result) |
| 93 | +} |
| 94 | + |
| 95 | +// mapMachineNameToNodeName returns a tuple (map node to machine by name, true if a match is found for every node) |
| 96 | +func mapMachineNameToNodeName(machines []objx.Map, nodes []corev1.Node) (map[string]string, bool) { |
| 97 | + result := map[string]string{} |
| 98 | + for i := range machines { |
| 99 | + for j := range nodes { |
| 100 | + if nodes[j].Name == nodeNameFromNodeRef(machines[i]) { |
| 101 | + result[machineName(machines[i])] = nodes[j].Name |
| 102 | + break |
| 103 | + } |
| 104 | + } |
| 105 | + } |
| 106 | + return result, len(machines) == len(result) |
| 107 | +} |
| 108 | + |
| 109 | +var _ = g.Describe("[Feature:Machines][Disruptive] Managed cluster should", func() { |
| 110 | + defer g.GinkgoRecover() |
| 111 | + |
| 112 | + g.It("recover from deleted worker machines", func() { |
| 113 | + cfg, err := e2e.LoadConfig() |
| 114 | + o.Expect(err).NotTo(o.HaveOccurred()) |
| 115 | + c, err := e2e.LoadClientset() |
| 116 | + o.Expect(err).NotTo(o.HaveOccurred()) |
| 117 | + dc, err := dynamic.NewForConfig(cfg) |
| 118 | + o.Expect(err).NotTo(o.HaveOccurred()) |
| 119 | + |
| 120 | + g.By("checking for the openshift machine api operator") |
| 121 | + // TODO: skip if platform != aws |
| 122 | + skipUnlessMachineAPIOperator(c.Core().Namespaces()) |
| 123 | + |
| 124 | + g.By("validating node and machine invariants") |
| 125 | + // fetch machines |
| 126 | + machines, err := listMachines(dc, machineLabelSelectorWorker) |
| 127 | + if err != nil { |
| 128 | + e2e.Failf("unable to fetch worker machines: %v", err) |
| 129 | + } |
| 130 | + numMachineWorkers := len(machines) |
| 131 | + if numMachineWorkers == 0 { |
| 132 | + e2e.Failf("cluster should have worker machines") |
| 133 | + } |
| 134 | + |
| 135 | + // fetch nodes |
| 136 | + nodes, err := c.CoreV1().Nodes().List(metav1.ListOptions{ |
| 137 | + LabelSelector: nodeLabelSelectorWorker, |
| 138 | + }) |
| 139 | + o.Expect(err).NotTo(o.HaveOccurred()) |
| 140 | + // map node -> machine |
| 141 | + nodeToMachine, nodeMatch := mapNodeNameToMachineName(nodes.Items, machines) |
| 142 | + if !nodeMatch { |
| 143 | + e2e.Failf("unable to map every node to machine. nodeToMachine: %v, nodeName: %v", nodeToMachine, nodeNames(nodes.Items)) |
| 144 | + } |
| 145 | + machineToNode, machineMatch := mapMachineNameToNodeName(machines, nodes.Items) |
| 146 | + if !machineMatch { |
| 147 | + e2e.Failf("unable to map every machine to node. machineToNode: %v, machineNames: %v", machineToNode, machineNames(machines)) |
| 148 | + } |
| 149 | + |
| 150 | + g.By("deleting all worker nodes") |
| 151 | + for _, machine := range machines { |
| 152 | + machineName := machine.Get("metadata.name").String() |
| 153 | + if err := deleteMachine(dc, machineName); err != nil { |
| 154 | + e2e.Failf("Unable to delete machine %s/%s with error: %v", machineAPINamespace, machineName, err) |
| 155 | + } |
| 156 | + } |
| 157 | + |
| 158 | + g.By("waiting for cluster to replace and recover workers") |
| 159 | + if pollErr := wait.PollImmediate(3*time.Second, machineRepairWait, func() (bool, error) { |
| 160 | + machines, err = listMachines(dc, machineLabelSelectorWorker) |
| 161 | + if err != nil { |
| 162 | + return false, nil |
| 163 | + } |
| 164 | + if numMachineWorkers != len(machines) { |
| 165 | + e2e.Logf("Waiting for %v machines, but only found: %v", numMachineWorkers, len(machines)) |
| 166 | + return false, nil |
| 167 | + } |
| 168 | + nodes, err = c.CoreV1().Nodes().List(metav1.ListOptions{ |
| 169 | + LabelSelector: nodeLabelSelectorWorker, |
| 170 | + }) |
| 171 | + if err != nil { |
| 172 | + return false, nil |
| 173 | + } |
| 174 | + // map both data sets for easy comparison now |
| 175 | + nodeToMachine, nodeMatch = mapNodeNameToMachineName(nodes.Items, machines) |
| 176 | + machineToNode, machineMatch = mapMachineNameToNodeName(machines, nodes.Items) |
| 177 | + if !nodeMatch { |
| 178 | + e2e.Logf("unable to map every node to machine. nodeToMachine: %v\n, \tnodeName: %v", nodeToMachine, nodeNames(nodes.Items)) |
| 179 | + return false, nil |
| 180 | + } |
| 181 | + if !machineMatch { |
| 182 | + e2e.Logf("unable to map every machine to node. machineToNode: %v\n, \tmachineNames: %v", machineToNode, machineNames(machines)) |
| 183 | + return false, nil |
| 184 | + } |
| 185 | + return true, nil |
| 186 | + }); pollErr != nil { |
| 187 | + buf := &bytes.Buffer{} |
| 188 | + w := tabwriter.NewWriter(buf, 0, 4, 1, ' ', 0) |
| 189 | + fmt.Fprintf(w, "NAMESPACE\tNAME\tNODE NAME\n") |
| 190 | + for _, machine := range machines { |
| 191 | + ns := machine.Get("metadata.namespace").String() |
| 192 | + name := machine.Get("metadata.name").String() |
| 193 | + nodeName := nodeNameFromNodeRef(machine) |
| 194 | + fmt.Fprintf(w, "%s\t%s\t%s\n", |
| 195 | + ns, |
| 196 | + name, |
| 197 | + nodeName, |
| 198 | + ) |
| 199 | + } |
| 200 | + w.Flush() |
| 201 | + e2e.Logf("Machines:\n%s", buf.String()) |
| 202 | + e2e.Logf("Machines to nodes:\n%v", machineToNode) |
| 203 | + e2e.Logf("Node to machines:\n%v", nodeToMachine) |
| 204 | + e2e.Failf("Worker machines were not replaced as expected: %v", pollErr) |
| 205 | + } |
| 206 | + |
| 207 | + // TODO: ensure all nodes are ready |
| 208 | + // TODO: ensure no pods pending |
| 209 | + }) |
| 210 | +}) |
0 commit comments