Skip to content

Commit 3c4e514

Browse files
authored
Merge pull request #3800 from telepresenceio/thallgren/recursion-check-config
Make the DNS recursion check configurable and turn it off by default.
2 parents d439fdc + 4a5846c commit 3c4e514

22 files changed

+324
-249
lines changed

CHANGELOG.yml

+6
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,12 @@ items:
9090
body: >-
9191
The output of the `telepresence list` command will now include the workload kind (deployment, replicaset,
9292
statefulset, or rollout) in all entries.
93+
- type: change
94+
title: Make the DNS recursion check configurable and turn it off by default.
95+
body: >-
96+
Very few systems experience a DNS recursion lookup problem. It can only occur when the cluster runs locally
97+
and the cluster's DNS is configured to somehow use DNS server that is started by Telepresence. The check
98+
is therefore now configurable through the client setting `dns.recursionCheck`, and it is `false` by default.
9399
- type: change
94100
title: Trigger the mutating webhook with Kubernetes eviction objects instead of patching workloads.
95101
body: >-

docs/release-notes.md

+6
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,12 @@ namespaceSelector:
7272
The output of the `telepresence list` command will now include the workload kind (deployment, replicaset, statefulset, or rollout) in all entries.
7373
</div>
7474

75+
## <div style="display:flex;"><img src="images/change.png" alt="change" style="width:30px;height:fit-content;"/><div style="display:flex;margin-left:7px;">Make the DNS recursion check configurable and turn it off by default.</div></div>
76+
<div style="margin-left: 15px">
77+
78+
Very few systems experience a DNS recursion lookup problem. It can only occur when the cluster runs locally and the cluster's DNS is configured to somehow use DNS server that is started by Telepresence. The check is therefore now configurable through the client setting `dns.recursionCheck`, and it is `false` by default.
79+
</div>
80+
7581
## <div style="display:flex;"><img src="images/change.png" alt="change" style="width:30px;height:fit-content;"/><div style="display:flex;margin-left:7px;">Trigger the mutating webhook with Kubernetes eviction objects instead of patching workloads.</div></div>
7682
<div style="margin-left: 15px">
7783

docs/release-notes.mdx

+4
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ namespaceSelector:
6868
<Title type="feature">List output includes workload kind.</Title>
6969
<Body>The output of the `telepresence list` command will now include the workload kind (deployment, replicaset, statefulset, or rollout) in all entries.</Body>
7070
</Note>
71+
<Note>
72+
<Title type="change">Make the DNS recursion check configurable and turn it off by default.</Title>
73+
<Body>Very few systems experience a DNS recursion lookup problem. It can only occur when the cluster runs locally and the cluster's DNS is configured to somehow use DNS server that is started by Telepresence. The check is therefore now configurable through the client setting `dns.recursionCheck`, and it is `false` by default.</Body>
74+
</Note>
7175
<Note>
7276
<Title type="change">Trigger the mutating webhook with Kubernetes eviction objects instead of patching workloads.</Title>
7377
<Body>Instead of patching workloads, or scaling the workloads down to zero and up again, Telepresence will now create policy/v1 Eviction objects to trigger the mutating webhook. This causes a slight change in the traffic-manager RBAC. The `patch` permissions are no longer needed. Instead, the traffic-manager must be able to create "pod/eviction" objects.</Body>

integration_test/inject_policy_test.go

+17-6
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import (
99
"sync"
1010
"time"
1111

12+
labels2 "k8s.io/apimachinery/pkg/labels"
13+
1214
"github.com/datawire/dlib/dlog"
1315
"github.com/telepresenceio/telepresence/v2/integration_test/itest"
1416
"github.com/telepresenceio/telepresence/v2/pkg/agentconfig"
@@ -105,14 +107,22 @@ func (is *installSuite) TestInjectPolicy() {
105107
}
106108

107109
func (is *installSuite) applyMultipleServices(svcCount int) {
108-
is.applyOrDeleteMultipleServices(svcCount, is.ApplyTemplate, true)
110+
is.applyOrDeleteMultipleServices(svcCount, is.ApplyTemplate)
111+
// And check that all pods receive a traffic-agent
112+
is.Eventually(func() bool {
113+
pods := itest.RunningPodsSelector(is.Context(), is.AppNamespace(), labels2.SelectorFromSet(map[string]string{
114+
"multi-service-test": "inject",
115+
}))
116+
dlog.Infof(is.Context(), "pod count %d, expected %d", len(pods), svcCount)
117+
return len(pods) == svcCount
118+
}, 120*time.Second, 5*time.Second)
109119
}
110120

111121
func (is *installSuite) deleteMultipleServices(svcCount int) {
112-
is.applyOrDeleteMultipleServices(svcCount, is.DeleteTemplate, false)
122+
is.applyOrDeleteMultipleServices(svcCount, is.DeleteTemplate)
113123
}
114124

115-
func (is *installSuite) applyOrDeleteMultipleServices(svcCount int, applyOrDelete func(context.Context, string, any), wait bool) {
125+
func (is *installSuite) applyOrDeleteMultipleServices(svcCount int, applyOrDelete func(context.Context, string, any)) {
116126
ctx := is.Context()
117127
wg := sync.WaitGroup{}
118128
wg.Add(svcCount)
@@ -128,10 +138,10 @@ func (is *installSuite) applyOrDeleteMultipleServices(svcCount int, applyOrDelet
128138
Annotations: map[string]string{
129139
agentconfig.InjectAnnotation: "enabled",
130140
},
141+
Labels: map[string]string{
142+
"multi-service-test": "inject",
143+
},
131144
})
132-
if wait {
133-
is.NoError(is.RolloutStatusWait(ctx, "deploy/"+svc))
134-
}
135145
}()
136146
}
137147
wg.Wait()
@@ -181,6 +191,7 @@ func (is *installSuite) Test_MultiOnDemandInjectOnApply() {
181191

182192
// First install the traffic-manager
183193
is.TelepresenceHelmInstallOK(ctx, false)
194+
time.Sleep(3 * time.Second)
184195
defer func() {
185196
is.UninstallTrafficManager(ctx, is.ManagerNamespace())
186197
is.Eventually(func() bool {

integration_test/injector_test.go

+26-37
Original file line numberDiff line numberDiff line change
@@ -18,43 +18,12 @@ import (
1818
// injection of a traffic-agent.
1919
// See ticket https://github.com/telepresenceio/telepresence/issues/3441 for more info.
2020
func (s *singleServiceSuite) Test_InterceptOperationRestoredAfterFailingInject() {
21-
if !s.ClientIsVersion(">2.21.x") {
22-
s.T().Skip("Not part of compatibility tests.")
21+
if s.ClientIsVersion("<2.22.0") && s.ManagerIsVersion(">=2.22.0") {
22+
s.T().Skip("Not part of compatibility tests. Clients < 2.22.0 cannot uninstall agents with traffic-manager >= 2.22.0")
2323
}
2424
ctx := s.Context()
2525
rq := s.Require()
2626

27-
// Create an intercept and ensure that it lists as intercepted
28-
stdout := itest.TelepresenceOk(ctx, "intercept", s.ServiceName(), "--mount=false")
29-
rq.Contains(stdout, "Using Deployment "+s.ServiceName())
30-
rq.Eventually(func() bool {
31-
stdout, _, err := itest.Telepresence(ctx, "list", "--intercepts")
32-
return err == nil && regexp.MustCompile(s.ServiceName()+`\s*: intercepted`).MatchString(stdout)
33-
}, 12*time.Second, 3*time.Second)
34-
35-
// Leave the intercept. We are now 100% sure that an agent is present in the
36-
// pod.
37-
itest.TelepresenceOk(ctx, "leave", s.ServiceName())
38-
39-
// Break the TLS by temporally disabling the agent-injector service. We do this by the port of the
40-
// service that the webhook is calling.
41-
portRestored := false
42-
wh := "agent-injector-webhook-" + s.ManagerNamespace()
43-
pmf := `{"webhooks":[{"name": "agent-injector-%s.getambassador.io", "clientConfig": {"service": {"name": "agent-injector", "port": %d}}}]}`
44-
rq.NoError(itest.Kubectl(ctx, s.ManagerNamespace(), "patch", "mutatingwebhookconfiguration", wh,
45-
"--patch", fmt.Sprintf(pmf, s.ManagerNamespace(), 8443)))
46-
47-
// Restore the webhook port when this test ends in case an error occurred that prevented it
48-
defer func() {
49-
if !portRestored {
50-
s.NoError(itest.Kubectl(ctx, s.ManagerNamespace(), "patch", "mutatingwebhookconfiguration", wh,
51-
"--patch", fmt.Sprintf(pmf, s.ManagerNamespace(), 443)))
52-
}
53-
}()
54-
55-
// Uninstall the agent.
56-
itest.TelepresenceOk(ctx, "uninstall", s.ServiceName())
57-
5827
oneContainer := func() bool {
5928
pods := itest.RunningPodNames(ctx, s.ServiceName(), s.AppNamespace())
6029
if len(pods) != 1 {
@@ -80,11 +49,31 @@ func (s *singleServiceSuite) Test_InterceptOperationRestoredAfterFailingInject()
8049
return false
8150
}
8251

83-
// Verify that the pod has no agent
84-
rq.Eventually(oneContainer, 30*time.Second, 3*time.Second)
52+
// Ensure that agent is uninstalled.
53+
so, se, err := itest.Telepresence(ctx, "uninstall", s.ServiceName())
54+
// We don't care if it succeeds, but the output and error might be of interest when debugging.
55+
dlog.Debugf(ctx, "stdout: %s, stderr %s, err: %v", so, se, err)
56+
57+
rq.Eventually(oneContainer, 60*time.Second, 3*time.Second)
58+
59+
// Break the TLS by temporally disabling the agent-injector service. We do this by the port of the
60+
// service that the webhook is calling.
61+
wh := "agent-injector-webhook-" + s.ManagerNamespace()
62+
pmf := `{"webhooks":[{"name": "agent-injector-%s.getambassador.io", "clientConfig": {"service": {"name": "agent-injector", "port": %d}}}]}`
63+
rq.NoError(itest.Kubectl(ctx, s.ManagerNamespace(), "patch", "mutatingwebhookconfiguration", wh,
64+
"--patch", fmt.Sprintf(pmf, s.ManagerNamespace(), 8443)))
65+
portRestored := false
66+
67+
// Restore the webhook port when this test ends in case an error occurred that prevented it
68+
defer func() {
69+
if !portRestored {
70+
s.NoError(itest.Kubectl(ctx, s.ManagerNamespace(), "patch", "mutatingwebhookconfiguration", wh,
71+
"--patch", fmt.Sprintf(pmf, s.ManagerNamespace(), 443)))
72+
}
73+
}()
8574

8675
// Now try to intercept. This attempt will timeout because the agent is never injected.
87-
_, _, err := itest.Telepresence(ctx, "intercept", s.ServiceName(), "--mount=false")
76+
_, _, err = itest.Telepresence(ctx, "intercept", s.ServiceName(), "--mount=false")
8877
// Wait for the intercept call to return. It must return an error.
8978
rq.Error(err)
9079

@@ -97,7 +86,7 @@ func (s *singleServiceSuite) Test_InterceptOperationRestoredAfterFailingInject()
9786
portRestored = true
9887

9988
// Verify that intercept works OK again.
100-
stdout = itest.TelepresenceOk(ctx, "intercept", s.ServiceName(), "--mount=false")
89+
stdout := itest.TelepresenceOk(ctx, "intercept", s.ServiceName(), "--mount=false")
10190
rq.Contains(stdout, "Using Deployment "+s.ServiceName())
10291
rq.Eventually(func() bool {
10392
stdout, _, err := itest.Telepresence(ctx, "list", "--intercepts")

integration_test/install_test.go

+7-1
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ func (is *installSuite) AmendSuiteContext(ctx context.Context) context.Context {
6666
}
6767

6868
func (is *installSuite) Test_UpgradeRetainsValues() {
69+
if is.ClientIsVersion("<2.22.0") && !is.ManagerVersion().EQ(is.ClientVersion()) {
70+
is.T().Skip("Not part of compatibility tests. Client < 2.22.0 cannot handle helm --version flag.")
71+
}
6972
ctx := is.Context()
7073
rq := is.Require()
7174
is.TelepresenceHelmInstallOK(ctx, false, "--set", "logLevel=debug")
@@ -86,7 +89,7 @@ func (is *installSuite) Test_UpgradeRetainsValues() {
8689
oldValues, err := getValues()
8790
rq.NoError(err)
8891
args := []string{"helm", "upgrade", "--namespace", is.ManagerNamespace()}
89-
if !(is.ManagerVersion().EQ(is.ClientVersion()) || is.ManagerVersion().LT(version.Structured)) {
92+
if !is.ManagerVersion().EQ(is.ClientVersion()) {
9093
args = append(args, "--version", is.ManagerVersion().String())
9194
}
9295

@@ -130,6 +133,9 @@ func (is *installSuite) Test_UpgradeRetainsValues() {
130133
}
131134

132135
func (is *installSuite) Test_HelmTemplateInstall() {
136+
if !(is.ManagerVersion().EQ(version.Structured) && is.ClientVersion().EQ(version.Structured)) {
137+
is.T().Skip("Not part of compatibility tests. PackageHelmChart assumes current version.")
138+
}
133139
ctx := is.Context()
134140
require := is.Require()
135141

integration_test/itest/cluster.go

+36-7
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"github.com/stretchr/testify/require"
3030
core "k8s.io/api/core/v1"
3131
"k8s.io/apimachinery/pkg/api/resource"
32+
"k8s.io/apimachinery/pkg/labels"
3233
k8sruntime "k8s.io/apimachinery/pkg/runtime"
3334
"k8s.io/client-go/tools/clientcmd"
3435
"k8s.io/client-go/tools/clientcmd/api"
@@ -52,7 +53,9 @@ import (
5253
)
5354

5455
const (
55-
TestUser = "telepresence-test-developer"
56+
purposeLabel = "tp-cli-testing"
57+
AssignPurposeLabel = "purpose=" + purposeLabel
58+
TestUser = "telepresence-test-developer"
5659
)
5760

5861
type Cluster interface {
@@ -285,7 +288,12 @@ func (s *cluster) Initialize(ctx context.Context) context.Context {
285288
}
286289

287290
s.ensureQuit(ctx)
288-
_ = Run(ctx, "kubectl", "delete", "ns", "-l", "purpose=tp-cli-testing")
291+
s.ensureNoManager(ctx)
292+
_ = Run(ctx, "kubectl", "delete", "ns", "-l", AssignPurposeLabel)
293+
_ = Run(ctx, "kubectl", "delete", "-f", filepath.Join("testdata", "k8s", "client_rbac.yaml"))
294+
_ = Run(ctx, "kubectl", "delete", "ns", "-l", AssignPurposeLabel)
295+
_ = Run(ctx, "kubectl", "delete", "pv", "-l", AssignPurposeLabel)
296+
_ = Run(ctx, "kubectl", "delete", "storageclass", "-l", AssignPurposeLabel)
289297
return ctx
290298
}
291299

@@ -345,9 +353,10 @@ func (s *cluster) tearDown(ctx context.Context) {
345353
if s.kubeConfig != "" {
346354
ctx = WithWorkingDir(ctx, GetOSSRoot(ctx))
347355
_ = Run(ctx, "kubectl", "delete", "-f", filepath.Join("testdata", "k8s", "client_rbac.yaml"))
348-
_ = Run(ctx, "kubectl", "delete", "--wait=false", "ns", "-l", "purpose=tp-cli-testing")
349-
_ = Run(ctx, "kubectl", "delete", "--wait=false", "pv", "-l", "purpose=tp-cli-testing")
350-
_ = Run(ctx, "kubectl", "delete", "--wait=false", "storageclass", "-l", "purpose=tp-cli-testing")
356+
_ = Run(ctx, "kubectl", "delete", "--wait=false", "ns", "-l", AssignPurposeLabel)
357+
_ = Run(ctx, "kubectl", "delete", "--wait=false", "pv", "-l", AssignPurposeLabel)
358+
_ = Run(ctx, "kubectl", "delete", "--wait=false", "storageclass", "-l", AssignPurposeLabel)
359+
_ = Run(ctx, "kubectl", "delete", "--wait=false", "mutatingwebhookconfigurations", "-l", AssignPurposeLabel)
351360
}
352361
}
353362

@@ -359,6 +368,22 @@ func (s *cluster) ensureQuit(ctx context.Context) {
359368
_ = rmAsRoot(ctx, socket.RootDaemonPath(ctx))
360369
}
361370

371+
func (s *cluster) ensureNoManager(ctx context.Context) {
372+
out, err := Output(ctx, "helm", "list", "-A", "--output", "json")
373+
t := getT(ctx)
374+
require.NoError(t, err)
375+
var es []map[string]any
376+
err = json.Unmarshal([]byte(out), &es)
377+
require.NoError(t, err)
378+
ix := slices.IndexFunc(es, func(v map[string]any) bool {
379+
return v["name"] == "traffic-manager"
380+
})
381+
if ix >= 0 {
382+
e := es[ix]
383+
t.Fatalf("%s is already installed in namespace %s. Please uninstall before testing.", e["chart"], e["namespace"])
384+
}
385+
}
386+
362387
// PodCreateTimeout will return a timeout suitable for operations that create pods.
363388
// This is longer when running against clusters that scale up nodes on demand for new pods.
364389
func PodCreateTimeout(c context.Context) time.Duration {
@@ -910,7 +935,7 @@ func CreateNamespaces(ctx context.Context, namespaces ...string) {
910935
go func(ns string) {
911936
defer wg.Done()
912937
assert.NoError(t, Kubectl(ctx, "", "create", "namespace", ns), "failed to create namespace %q", ns)
913-
assert.NoError(t, Kubectl(ctx, "", "label", "namespace", ns, "purpose="+purposeLabel, fmt.Sprintf("app.kubernetes.io/name=%s", ns)))
938+
assert.NoError(t, Kubectl(ctx, "", "label", "namespace", ns, AssignPurposeLabel, fmt.Sprintf("app.kubernetes.io/name=%s", ns)))
914939
}(ns)
915940
}
916941
wg.Wait()
@@ -1089,7 +1114,11 @@ func WithKubeConfig(ctx context.Context, cfg *api.Config) context.Context {
10891114
}
10901115

10911116
func RunningPods(ctx context.Context, svc, ns string) []core.Pod {
1092-
out, err := KubectlOut(ctx, ns, "get", "pods", "-o", "json", "--field-selector", "status.phase==Running", "-l", "app="+svc)
1117+
return RunningPodsSelector(ctx, ns, labels.SelectorFromSet(map[string]string{"app": svc}))
1118+
}
1119+
1120+
func RunningPodsSelector(ctx context.Context, ns string, selector labels.Selector) []core.Pod {
1121+
out, err := KubectlOut(ctx, ns, "get", "pods", "-o", "json", "--field-selector", "status.phase==Running", "-l", selector.String())
10931122
if err != nil {
10941123
getT(ctx).Log(err.Error())
10951124
return nil

integration_test/itest/helm.go

+3
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,9 @@ func (s *cluster) TelepresenceHelmInstall(ctx context.Context, upgrade bool, set
310310
// Give the manager time to perform rollouts, listen to telepresence-agents configmap, etc.
311311
time.Sleep(2 * time.Second)
312312
}
313+
if err != nil {
314+
return "", err
315+
}
313316
return logFileName, nil
314317
}
315318

integration_test/itest/namespace.go

-2
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,6 @@ func WithNamespacePair(ctx context.Context, suffix string, f func(NamespacePair)
9696
})
9797
}
9898

99-
const purposeLabel = "tp-cli-testing"
100-
10199
func (s *nsPair) setup(ctx context.Context) bool {
102100
CreateNamespaces(ctx, s.AppNamespace(), s.Namespace)
103101
t := getT(ctx)

integration_test/itest/template.go

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ type ServicePort struct {
2929
type Generic struct {
3030
Name string
3131
Annotations map[string]string
32+
Labels map[string]string
3233
Environment []core.EnvVar
3334
TargetPort string
3435
ServicePorts []ServicePort

integration_test/multiple_intercepts_test.go

+11
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,17 @@ func (s *multipleInterceptsSuite) TearDownSuite() {
6868
cancel()
6969
}
7070
}
71+
// Ensure that we have OK statuses on our services after leaving the intercept.
72+
s.Eventually(func() bool {
73+
stdout := itest.TelepresenceOk(ctx, "-n", s.AppNamespace(), "list")
74+
for i := 0; i < s.ServiceCount(); i++ {
75+
rx := regexp.MustCompile(fmt.Sprintf(`%s-%d\s*: ready to (engage|intercept)`, s.Name(), i))
76+
if !rx.MatchString(stdout) {
77+
return false
78+
}
79+
}
80+
return true
81+
}, 30*time.Second, 2*time.Second)
7182
}
7283

7384
func (s *multipleInterceptsSuite) Test_Intercepts() {

integration_test/multiple_services_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ func (s *multipleServicesSuite) Test_LargeRequest() {
4949
s.TelepresenceConnect(s.Context())
5050
}()
5151

52-
const sendSize = 1024 * 1024 * 16
52+
const sendSize = 1024 * 1024 * 12
5353
const varyMax = 1024 * 1024 * 4 // vary last 4Mi
54-
const concurrentRequests = 100
54+
const concurrentRequests = 64
5555

5656
tb := [sendSize + varyMax]byte{}
5757
tb[0] = '!'

integration_test/testdata/k8s/generic.goyaml

+3
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ spec:
4141
metadata:
4242
labels:
4343
app: {{ .Name }}
44+
{{- with .Labels }}
45+
{{- toYaml . | nindent 8 }}
46+
{{- end}}
4447
{{- with .Annotations }}
4548
annotations:
4649
{{- toYaml . | nindent 8 }}

0 commit comments

Comments
 (0)