Skip to content

Commit 534e891

Browse files
committed
Fix DNS-problem when sending singleton names to the traffic-manager.
Allow the use of a DNS search-path when performing `<name.namespace>` lookups from the traffic-manager. Signed-off-by: Thomas Hallgren <[email protected]>
1 parent 93f9036 commit 534e891

File tree

5 files changed

+79
-41
lines changed

5 files changed

+79
-41
lines changed

CHANGELOG.yml

+7
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ items:
3333
A shared informer was sometimes reused when namespaces were removed and then later added again, leading
3434
to errors like "handler ... was not added to shared informer because it has stopped already".
3535
docs: https://github.com/telepresenceio/telepresence/issues/3831
36+
- type: bugfix
37+
title: Single label name DNS lookups didn't work unless at least one traffic-agent was installed
38+
body: >-
39+
A problem with incorrect handling of single label names in the traffic-manager's DNS resolver was fixed. The
40+
problem would cause lookups like `curl echo` to fail, even though telepresence was connected to a namespace
41+
containing an "echo" service, unless at least one of the workloads in the connected namespace had a
42+
traffic-agent.
3643
- version: 2.22.2
3744
date: 2025-03-28
3845
notes:

cmd/traffic/cmd/manager/service.go

+47-41
Original file line numberDiff line numberDiff line change
@@ -841,7 +841,7 @@ func (s *service) LookupDNS(ctx context.Context, request *rpc.DNSRequest) (respo
841841
default:
842842
result = rrs.String()
843843
}
844-
dlog.Debugf(ctx, "LookupDNS: %s %s -> %s", request.Name, qtn, result)
844+
dlog.Debugf(ctx, "%s %s -> %s", request.Name, qtn, result)
845845
}()
846846
}
847847

@@ -873,58 +873,64 @@ func (s *service) LookupDNS(ctx context.Context, request *rpc.DNSRequest) (respo
873873
dlog.Errorf(ctx, "AgentsLookupDNS %s %s: %v", request.Name, qtn, err)
874874
} else if rCode != state.RcodeNoAgents {
875875
if len(rrs) == 0 {
876-
dlog.Tracef(ctx, "LookupDNS on agents: %s %s -> %s", request.Name, qtn, dns2.RcodeToString[rCode])
876+
dlog.Tracef(ctx, "agents: %s %s -> %s", request.Name, qtn, dns2.RcodeToString[rCode])
877877
} else {
878-
dlog.Tracef(ctx, "LookupDNS on agents: %s %s -> %s", request.Name, qtn, rrs)
878+
dlog.Tracef(ctx, "agents: %s %s -> %s", request.Name, qtn, rrs)
879879
}
880880
}
881881
}
882882

883883
if rCode == state.RcodeNoAgents {
884-
client := s.state.GetClient(sessionID)
885-
name := request.Name
886-
restoreName := false
887-
nDots := 0
888-
if client != nil {
889-
for _, c := range name {
890-
if c == '.' {
891-
nDots++
892-
}
893-
}
894-
if nDots == 1 && client.Namespace != tmNamespace {
895-
noSearchDomain = client.Namespace + "."
896-
name += noSearchDomain
897-
restoreName = true
884+
rrs, rCode = s.lookupFromManager(ctx, sessionID, qType, request.Name, noSearchDomain)
885+
}
886+
return dnsproxy.ToRPC(rrs, rCode)
887+
}
888+
889+
func (s *service) lookupFromManager(ctx context.Context, sessionID tunnel.SessionID, qType uint16, qName, noSearchDomain string) (dnsproxy.RRs, int) {
890+
name := qName
891+
client := s.state.GetClient(sessionID)
892+
tmNamespace := managerutil.GetEnv(ctx).ManagerNamespace
893+
restoreName := false
894+
nDots := 0
895+
if client != nil {
896+
for _, c := range name {
897+
if c == '.' {
898+
nDots++
898899
}
899900
}
900-
dlog.Tracef(ctx, "LookupDNS on traffic-manager: %s", name)
901-
rrs, rCode, err = dnsproxy.Lookup(ctx, qType, name, noSearchDomain)
902-
if err != nil {
903-
// Could still be x.y.<client namespace>, but let's avoid x.<cluster domain>.<client namespace> and x.<client-namespace>.<client namespace>
904-
if client != nil && nDots > 1 && client.Namespace != tmNamespace && !strings.HasSuffix(name, s.dotClusterDomain) && !hasDomainSuffix(name, client.Namespace) {
905-
name += client.Namespace + "."
906-
restoreName = true
907-
dlog.Debugf(ctx, "LookupDNS on traffic-manager: %s", name)
908-
rrs, rCode, err = dnsproxy.Lookup(ctx, qType, name, noSearchDomain)
909-
}
910-
if err != nil {
911-
dlog.Tracef(ctx, "LookupDNS on traffic-manager: %s %s -> %s %s", request.Name, qtn, dns2.RcodeToString[rCode], err)
912-
return nil, err
913-
}
901+
if nDots == 1 && client.Namespace != tmNamespace {
902+
name += client.Namespace + "."
903+
restoreName = true
914904
}
915-
if len(rrs) == 0 {
916-
dlog.Tracef(ctx, "LookupDNS on traffic-manager: %s %s -> %s", request.Name, qtn, dns2.RcodeToString[rCode])
917-
} else {
918-
if restoreName {
919-
dlog.Tracef(ctx, "LookupDNS on traffic-manager: restore %s to %s", name, request.Name)
920-
for _, rr := range rrs {
921-
rr.Header().Name = request.Name
922-
}
905+
}
906+
dlog.Tracef(ctx, "traffic-manager: %s", name)
907+
qtn := dns2.TypeToString[qType]
908+
rrs, rCode, err := dnsproxy.Lookup(ctx, qType, name, noSearchDomain)
909+
if err == nil && rCode == dns2.RcodeNameError {
910+
// Could still be x.y.<client namespace>, but let's avoid x.<cluster domain>.<client namespace> and x.<client-namespace>.<client namespace>
911+
if client != nil && nDots > 1 && client.Namespace != tmNamespace && !strings.HasSuffix(name, s.dotClusterDomain) && !hasDomainSuffix(name, client.Namespace) {
912+
name += client.Namespace + "."
913+
restoreName = true
914+
dlog.Debugf(ctx, "traffic-manager: %s", name)
915+
rrs, rCode, err = dnsproxy.Lookup(ctx, qType, name, noSearchDomain)
916+
}
917+
}
918+
if err != nil {
919+
dlog.Errorf(ctx, "traffic-manager: %s %s -> %s %s", qName, qtn, dns2.RcodeToString[rCode], err)
920+
return nil, dns2.RcodeServerFailure
921+
}
922+
if len(rrs) == 0 {
923+
dlog.Tracef(ctx, "traffic-manager: %s %s -> %s", qName, qtn, dns2.RcodeToString[rCode])
924+
} else {
925+
if restoreName {
926+
dlog.Tracef(ctx, "traffic-manager: restore %s to %s", name, qName)
927+
for _, rr := range rrs {
928+
rr.Header().Name = qName
923929
}
924-
dlog.Tracef(ctx, "LookupDNS on traffic-manager: %s %s -> %s", request.Name, qtn, rrs)
925930
}
931+
dlog.Tracef(ctx, "traffic-manager: %s %s -> %s", qName, qtn, rrs)
926932
}
927-
return dnsproxy.ToRPC(rrs, rCode)
933+
return rrs, rCode
928934
}
929935

930936
func (s *service) AgentLookupDNSResponse(ctx context.Context, response *rpc.DNSAgentResponse) (*empty.Empty, error) {

docs/release-notes.md

+6
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@
88
A shared informer was sometimes reused when namespaces were removed and then later added again, leading to errors like "handler ... was not added to shared informer because it has stopped already".
99
</div>
1010

11+
## <div style="display:flex;"><img src="images/bugfix.png" alt="bugfix" style="width:30px;height:fit-content;"/><div style="display:flex;margin-left:7px;">Single label name DNS lookups didn't work unless at least one traffic-agent was installed</div></div>
12+
<div style="margin-left: 15px">
13+
14+
A problem with incorrect handling of single label names in the traffic-manager's DNS resolver was fixed. The problem would cause lookups like `curl echo` to fail, even though telepresence was connected to a namespace containing an "echo" service, unless at least one of the workloads in the connected namespace had a traffic-agent.
15+
</div>
16+
1117
## Version 2.22.2 <span style="font-size: 16px;">(March 28)</span>
1218
## <div style="display:flex;"><img src="images/bugfix.png" alt="bugfix" style="width:30px;height:fit-content;"/><div style="display:flex;margin-left:7px;">[Panic when using telepresence replace in a IPv6-only cluster](https://github.com/telepresenceio/telepresence/issues/3828)</div></div>
1319
<div style="margin-left: 15px">

docs/release-notes.mdx

+6
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,12 @@ import { Note, Title, Body } from '@site/src/components/ReleaseNotes'
1414
A shared informer was sometimes reused when namespaces were removed and then later added again, leading to errors like "handler ... was not added to shared informer because it has stopped already".
1515
</Body>
1616
</Note>
17+
<Note>
18+
<Title type="bugfix">Single label name DNS lookups didn't work unless at least one traffic-agent was installed</Title>
19+
<Body>
20+
A problem with incorrect handling of single label names in the traffic-manager's DNS resolver was fixed. The problem would cause lookups like `curl echo` to fail, even though telepresence was connected to a namespace containing an "echo" service, unless at least one of the workloads in the connected namespace had a traffic-agent.
21+
</Body>
22+
</Note>
1723
## Version 2.22.2 <span style={{fontSize:'16px'}}>(March 28)</span>
1824
<Note>
1925
<Title type="bugfix" docs="https://github.com/telepresenceio/telepresence/issues/3828">Panic when using telepresence replace in a IPv6-only cluster</Title>

integration_test/docker_daemon_test.go

+13
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,19 @@ func (s *dockerDaemonSuite) Test_DockerDaemon_daemonHostNotConflict() {
118118
s.TelepresenceConnect(ctx)
119119
}
120120

121+
func (s *dockerDaemonSuite) Test_DockerDaemon_singleNameLookup() {
122+
ctx := s.Context()
123+
const svc = "echo-easy"
124+
s.ApplyApp(ctx, svc, "deploy/"+svc)
125+
defer s.DeleteSvcAndWorkload(ctx, "deploy", svc)
126+
out := s.TelepresenceConnect(ctx, "--docker", "--", itest.GetExecutable(ctx), "curl", "--silent", "--max-time", "1", svc)
127+
s.Contains(out, "Request served by "+svc)
128+
so, err := itest.TelepresenceStatus(ctx)
129+
s.NoError(err)
130+
s.Nil(so.ContainerizedDaemon)
131+
s.False(so.UserDaemon.Running)
132+
}
133+
121134
func (s *dockerDaemonSuite) Test_DockerDaemon_cacheFiles() {
122135
ctx := s.Context()
123136
rq := s.Require()

0 commit comments

Comments
 (0)