Skip to content

Commit a9d9884

Browse files
authored
Merge branch 'main' into chore/more-linters
2 parents 9ea6b24 + aee7a26 commit a9d9884

File tree

9 files changed

+162
-142
lines changed

9 files changed

+162
-142
lines changed

.github/workflows/functional.yml

+7
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,13 @@ jobs:
119119
make test${{ inputs.image == 'plus' && '-with-plus' || ''}} PREFIX=${ngf_prefix} TAG=${ngf_tag} GINKGO_LABEL=telemetry GW_SERVICE_TYPE=LoadBalancer
120120
working-directory: ./tests
121121

122+
- name: Run functional graceful-recovery tests
123+
run: |
124+
ngf_prefix=ghcr.io/nginxinc/nginx-gateway-fabric
125+
ngf_tag=${{ steps.ngf-meta.outputs.version }}
126+
make test${{ inputs.image == 'plus' && '-with-plus' || ''}} PREFIX=${ngf_prefix} TAG=${ngf_tag} GINKGO_LABEL=graceful-recovery GW_SERVICE_TYPE=LoadBalancer CLUSTER_NAME=${{ github.run_id }}
127+
working-directory: ./tests
128+
122129
- name: Run functional tests
123130
run: |
124131
ngf_prefix=ghcr.io/nginxinc/nginx-gateway-fabric

tests/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ test: ## Runs the functional tests on your default k8s cluster
124124
--image-tag=$(TAG) --version-under-test=$(NGF_VERSION) \
125125
--plus-enabled=$(PLUS_ENABLED) --ngf-image-repo=$(PREFIX) --nginx-image-repo=$(NGINX_PREFIX) --nginx-plus-image-repo=$(NGINX_PLUS_PREFIX) \
126126
--pull-policy=$(PULL_POLICY) --service-type=$(GW_SERVICE_TYPE) \
127-
--is-gke-internal-lb=$(GW_SVC_GKE_INTERNAL)
127+
--is-gke-internal-lb=$(GW_SVC_GKE_INTERNAL) --cluster-name=$(CLUSTER_NAME)
128128

129129
.PHONY: test-with-plus
130130
test-with-plus: PLUS_ENABLED=true

tests/graceful-recovery/graceful-recovery.md

-101
This file was deleted.

tests/suite/graceful_recovery_test.go

+149-40
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"errors"
66
"fmt"
77
"net/http"
8+
"os/exec"
89
"strings"
910
"time"
1011

@@ -15,6 +16,7 @@ import (
1516
core "k8s.io/api/core/v1"
1617
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1718
"k8s.io/apimachinery/pkg/types"
19+
ctlr "sigs.k8s.io/controller-runtime"
1820
"sigs.k8s.io/controller-runtime/pkg/client"
1921
"sigs.k8s.io/yaml"
2022

@@ -28,7 +30,7 @@ const (
2830

2931
// Since checkContainerLogsForErrors may experience interference from previous tests (as explained in the function
3032
// documentation), this test is recommended to be run separate from other tests.
31-
var _ = Describe("Graceful Recovery test", Ordered, Label("functional", "graceful-recovery"), func() {
33+
var _ = Describe("Graceful Recovery test", Ordered, Label("graceful-recovery"), func() {
3234
files := []string{
3335
"graceful-recovery/cafe.yaml",
3436
"graceful-recovery/cafe-secret.yaml",
@@ -45,10 +47,11 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("functional", "gracefu
4547

4648
var ngfPodName string
4749

48-
BeforeAll(func() {
50+
BeforeEach(func() {
4951
// this test is unique in that it will check the entire log of both ngf and nginx containers
5052
// for any errors, so in order to avoid errors generated in previous tests we will uninstall
51-
// NGF installed at the suite level, then re-deploy our own
53+
// NGF installed at the suite level, then re-deploy our own. We will also uninstall and re-install
54+
// NGF between each graceful-recovery test for the same reason.
5255
teardown(releaseName)
5356

5457
setup(getDefaultSetupCfg())
@@ -64,9 +67,7 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("functional", "gracefu
6467
if portFwdHTTPSPort != 0 {
6568
teaURL = fmt.Sprintf("%s:%d/tea", baseHTTPSURL, portFwdHTTPSPort)
6669
}
67-
})
6870

69-
BeforeEach(func() {
7071
ns = core.Namespace{
7172
ObjectMeta: metav1.ObjectMeta{
7273
Name: "graceful-recovery",
@@ -98,8 +99,97 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("functional", "gracefu
9899
It("recovers when nginx container is restarted", func() {
99100
runRecoveryTest(teaURL, coffeeURL, ngfPodName, nginxContainerName, files, &ns)
100101
})
102+
103+
It("recovers when drained node is restarted", func() {
104+
runRestartNodeWithDrainingTest(teaURL, coffeeURL, files, &ns)
105+
})
106+
107+
It("recovers when node is restarted abruptly", func() {
108+
runRestartNodeAbruptlyTest(teaURL, coffeeURL, files, &ns)
109+
})
101110
})
102111

112+
func runRestartNodeWithDrainingTest(teaURL, coffeeURL string, files []string, ns *core.Namespace) {
113+
runRestartNodeTest(teaURL, coffeeURL, files, ns, true)
114+
}
115+
116+
func runRestartNodeAbruptlyTest(teaURL, coffeeURL string, files []string, ns *core.Namespace) {
117+
runRestartNodeTest(teaURL, coffeeURL, files, ns, false)
118+
}
119+
120+
func runRestartNodeTest(teaURL, coffeeURL string, files []string, ns *core.Namespace, drain bool) {
121+
nodeNames, err := getNodeNames()
122+
Expect(err).ToNot(HaveOccurred())
123+
Expect(nodeNames).To(HaveLen(1))
124+
125+
kindNodeName := nodeNames[0]
126+
127+
Expect(clusterName).ToNot(BeNil(), "clusterName variable not set")
128+
Expect(*clusterName).ToNot(BeEmpty())
129+
containerName := *clusterName + "-control-plane"
130+
131+
if portFwdPort != 0 {
132+
close(portForwardStopCh)
133+
}
134+
135+
if drain {
136+
_, err := exec.Command(
137+
"kubectl",
138+
"drain",
139+
kindNodeName,
140+
"--ignore-daemonsets",
141+
"--delete-local-data",
142+
).CombinedOutput()
143+
Expect(err).ToNot(HaveOccurred())
144+
145+
_, err = exec.Command("kubectl", "delete", "node", kindNodeName).CombinedOutput()
146+
Expect(err).ToNot(HaveOccurred())
147+
}
148+
149+
_, err = exec.Command("docker", "restart", containerName).CombinedOutput()
150+
Expect(err).ToNot(HaveOccurred())
151+
152+
// need to wait for docker container to restart and be running before polling for ready NGF Pods or else we will error
153+
Eventually(
154+
func() bool {
155+
output, err := exec.Command(
156+
"docker",
157+
"inspect",
158+
"-f",
159+
"{{.State.Running}}",
160+
containerName,
161+
).CombinedOutput()
162+
return strings.TrimSpace(string(output)) == "true" && err == nil
163+
}).
164+
WithTimeout(timeoutConfig.CreateTimeout).
165+
WithPolling(500 * time.Millisecond).
166+
Should(BeTrue())
167+
168+
// ngf can often oscillate between ready and error, so we wait for a stable readiness in ngf
169+
var podNames []string
170+
Eventually(
171+
func() bool {
172+
podNames, err = framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetStatusTimeout)
173+
return len(podNames) == 1 && err == nil
174+
}).
175+
WithTimeout(timeoutConfig.CreateTimeout * 2).
176+
WithPolling(500 * time.Millisecond).
177+
MustPassRepeatedly(20).
178+
Should(BeTrue())
179+
180+
ngfPodName := podNames[0]
181+
Expect(ngfPodName).ToNot(BeEmpty())
182+
183+
if portFwdPort != 0 {
184+
ports := []string{fmt.Sprintf("%d:80", ngfHTTPForwardedPort), fmt.Sprintf("%d:443", ngfHTTPSForwardedPort)}
185+
portForwardStopCh = make(chan struct{})
186+
err = framework.PortForward(ctlr.GetConfigOrDie(), ngfNamespace, ngfPodName, ports, portForwardStopCh)
187+
Expect(err).ToNot(HaveOccurred())
188+
}
189+
190+
checkNGFFunctionality(teaURL, coffeeURL, ngfPodName, "", files, ns)
191+
}
192+
103193
func runRecoveryTest(teaURL, coffeeURL, ngfPodName, containerName string, files []string, ns *core.Namespace) {
104194
var (
105195
err error
@@ -127,36 +217,7 @@ func runRecoveryTest(teaURL, coffeeURL, ngfPodName, containerName string, files
127217
Should(Succeed())
128218
}
129219

130-
Eventually(
131-
func() error {
132-
return checkForWorkingTraffic(teaURL, coffeeURL)
133-
}).
134-
WithTimeout(timeoutConfig.RequestTimeout).
135-
WithPolling(500 * time.Millisecond).
136-
Should(Succeed())
137-
138-
Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed())
139-
140-
Eventually(
141-
func() error {
142-
return checkForFailingTraffic(teaURL, coffeeURL)
143-
}).
144-
WithTimeout(timeoutConfig.RequestTimeout).
145-
WithPolling(500 * time.Millisecond).
146-
Should(Succeed())
147-
148-
Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed())
149-
Expect(resourceManager.WaitForAppsToBeReadyWithPodCount(ns.Name, 2)).To(Succeed())
150-
151-
Eventually(
152-
func() error {
153-
return checkForWorkingTraffic(teaURL, coffeeURL)
154-
}).
155-
WithTimeout(timeoutConfig.RequestTimeout * 2).
156-
WithPolling(500 * time.Millisecond).
157-
Should(Succeed())
158-
159-
checkContainerLogsForErrors(ngfPodName, containerName == nginxContainerName)
220+
checkNGFFunctionality(teaURL, coffeeURL, ngfPodName, containerName, files, ns)
160221
}
161222

162223
func restartContainer(ngfPodName, containerName string) {
@@ -254,11 +315,41 @@ func expectRequestToFail(appURL, address string) error {
254315
return nil
255316
}
256317

257-
// checkContainerLogsForErrors checks both nginx and ngf container's logs for any possible errors.
258-
// Since this function retrieves all the logs from both containers and the NGF pod may be shared between tests,
259-
// the logs retrieved may contain log messages from previous tests, thus any errors in the logs from previous tests
260-
// may cause an interference with this test and cause this test to fail.
261-
// Additionally, when the NGINX process is killed, some errors are expected in the NGF logs while we wait for the
318+
func checkNGFFunctionality(teaURL, coffeeURL, ngfPodName, containerName string, files []string, ns *core.Namespace) {
319+
Eventually(
320+
func() error {
321+
return checkForWorkingTraffic(teaURL, coffeeURL)
322+
}).
323+
WithTimeout(timeoutConfig.RequestTimeout * 2).
324+
WithPolling(500 * time.Millisecond).
325+
Should(Succeed())
326+
327+
Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed())
328+
329+
Eventually(
330+
func() error {
331+
return checkForFailingTraffic(teaURL, coffeeURL)
332+
}).
333+
WithTimeout(timeoutConfig.RequestTimeout).
334+
WithPolling(500 * time.Millisecond).
335+
Should(Succeed())
336+
337+
Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed())
338+
Expect(resourceManager.WaitForAppsToBeReadyWithPodCount(ns.Name, 2)).To(Succeed())
339+
340+
Eventually(
341+
func() error {
342+
return checkForWorkingTraffic(teaURL, coffeeURL)
343+
}).
344+
WithTimeout(timeoutConfig.RequestTimeout * 2).
345+
WithPolling(500 * time.Millisecond).
346+
Should(Succeed())
347+
348+
checkContainerLogsForErrors(ngfPodName, containerName == nginxContainerName)
349+
}
350+
351+
// checkContainerLogsForErrors checks both nginx and NGF container's logs for any possible errors.
352+
// When the NGINX process is killed, some errors are expected in the NGF logs while we wait for the
262353
// NGINX container to be restarted.
263354
func checkContainerLogsForErrors(ngfPodName string, checkNginxLogsOnly bool) {
264355
nginxLogs, err := resourceManager.GetPodLogs(
@@ -349,6 +440,24 @@ func getContainerRestartCount(ngfPodName, containerName string) (int, error) {
349440
return restartCount, nil
350441
}
351442

443+
func getNodeNames() ([]string, error) {
444+
ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout)
445+
defer cancel()
446+
var nodes core.NodeList
447+
448+
if err := k8sClient.List(ctx, &nodes); err != nil {
449+
return nil, fmt.Errorf("error listing nodes: %w", err)
450+
}
451+
452+
names := make([]string, 0, len(nodes.Items))
453+
454+
for _, node := range nodes.Items {
455+
names = append(names, node.Name)
456+
}
457+
458+
return names, nil
459+
}
460+
352461
func runNodeDebuggerJob(ngfPodName, jobScript string) (*v1.Job, error) {
353462
ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout)
354463
defer cancel()

0 commit comments

Comments
 (0)