Skip to content

Commit 31e40da

Browse files
committed
On Windows, also set the global DNS suffix search path
This ensures that the search path works as intended on Windows Server 2019 (and hopefully on other windows platforms as well) where it hasn't been enough to just set the search-path for the Telepresence VIF. Signed-off-by: Thomas Hallgren <[email protected]>
1 parent e79135d commit 31e40da

File tree

6 files changed

+138
-42
lines changed

6 files changed

+138
-42
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
- Feature: There's a new --address flag to the intercept command allowing users to set the target IP of the intercept.
1111

12+
- Bugfix: DNS on windows is more reliable and performant.
13+
1214
- Bugfix: The kubeconfig is made self-contained before running Telepresence daemon in a Docker container.
1315

1416
- Bugfix: The client will no longer need cluster wide permissions when connected to a namespace scoped Traffic Manager.

pkg/client/rootd/dns/server_windows.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ func (s *Server) updateRouterDNS(c context.Context, paths []string, dev vif.Devi
4545
s.namespaces = namespaces
4646
s.search = search
4747
s.domainsLock.Unlock()
48-
err := dev.SetDNS(c, s.config.RemoteIp, search)
48+
err := dev.SetDNS(c, s.clusterDomain, s.config.RemoteIp, search)
4949
s.flushDNS()
5050
if err != nil {
5151
return fmt.Errorf("failed to set DNS: %w", err)

pkg/proc/cmd.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
package proc
22

33
import (
4+
"bytes"
45
"context"
6+
"fmt"
7+
"strings"
58

69
"github.com/datawire/dlib/dexec"
710
"github.com/datawire/dlib/dlog"
@@ -19,3 +22,24 @@ func StdCommand(ctx context.Context, exe string, args ...string) *dexec.Cmd {
1922
dlog.Debugf(ctx, shellquote.ShellString(exe, args))
2023
return cmd
2124
}
25+
26+
// CaptureErr disables command logging, captures Stdout and Stderr to two different buffers.
27+
// If an error occurs, the stdout output is discarded and the stderr output is included in the
28+
// returned error unless the error itself already contains that output.
29+
// On success, any output on stderr is discarded and the stdout output is returned.
30+
func CaptureErr(ctx context.Context, cmd *dexec.Cmd) ([]byte, error) {
31+
var stdOut, stdErr bytes.Buffer
32+
cmd.DisableLogging = true
33+
cmd.Stdout = &stdOut
34+
cmd.Stderr = &stdErr
35+
dlog.Debugf(ctx, shellquote.ShellArgsString(cmd.Args))
36+
if err := cmd.Run(); err != nil {
37+
if es := strings.TrimSpace(stdErr.String()); es != "" {
38+
if !strings.Contains(err.Error(), es) {
39+
err = fmt.Errorf("%s: %w", es, err)
40+
}
41+
}
42+
return nil, err
43+
}
44+
return stdOut.Bytes(), nil
45+
}

pkg/vif/device.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ type Device interface {
3636
Name() string
3737
AddSubnet(context.Context, *net.IPNet) error
3838
RemoveSubnet(context.Context, *net.IPNet) error
39-
SetDNS(context.Context, net.IP, []string) (err error)
39+
SetDNS(context.Context, string, net.IP, []string) (err error)
4040
}
4141

4242
const defaultDevMtu = 1500
@@ -99,8 +99,8 @@ func (d *device) Name() string {
9999
}
100100

101101
// SetDNS sets the DNS configuration for the device on the windows platform.
102-
func (d *device) SetDNS(ctx context.Context, server net.IP, domains []string) (err error) {
103-
return d.dev.setDNS(ctx, server, domains)
102+
func (d *device) SetDNS(ctx context.Context, clusterDomain string, server net.IP, domains []string) (err error) {
103+
return d.dev.setDNS(ctx, clusterDomain, server, domains)
104104
}
105105

106106
func (d *device) SetMTU(mtu int) error {

pkg/vif/device_unix.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import (
1111
"golang.org/x/sys/unix"
1212
)
1313

14-
func (t *nativeDevice) setDNS(_ context.Context, server net.IP, domains []string) (err error) {
14+
func (t *nativeDevice) setDNS(context.Context, string, net.IP, []string) (err error) {
1515
// DNS is configured by other means than through the actual device
1616
return nil
1717
}

pkg/vif/device_windows.go

Lines changed: 107 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,19 @@ import (
66
"fmt"
77
"net"
88
"net/netip"
9+
"os"
910
"strings"
11+
"time"
1012

1113
"golang.org/x/sys/windows"
14+
"golang.org/x/sys/windows/registry"
1215
"golang.zx2c4.com/wireguard/tun"
1316
"golang.zx2c4.com/wireguard/windows/tunnel/winipcfg"
1417

18+
"github.com/datawire/dlib/dcontext"
1519
"github.com/datawire/dlib/derror"
1620
"github.com/datawire/dlib/dlog"
1721
"github.com/telepresenceio/telepresence/v2/pkg/proc"
18-
"github.com/telepresenceio/telepresence/v2/pkg/shellquote"
1922
"github.com/telepresenceio/telepresence/v2/pkg/vif/buffer"
2023
)
2124

@@ -115,7 +118,17 @@ func (t *nativeDevice) removeSubnet(_ context.Context, subnet *net.IPNet) error
115118
return t.getLUID().DeleteIPAddress(prefixFromIPNet(subnet))
116119
}
117120

118-
func (t *nativeDevice) setDNS(ctx context.Context, server net.IP, domains []string) (err error) {
121+
func (t *nativeDevice) setDNS(ctx context.Context, clusterDomain string, server net.IP, searchList []string) (err error) {
122+
// This function must not be interrupted by a context cancellation, so we give it a timeout instead.
123+
parentCtx := ctx
124+
ctx, cancel := context.WithCancel(dcontext.WithoutCancel(ctx))
125+
go func() {
126+
<-parentCtx.Done()
127+
// Give this function some time to complete its task. Configuring DSN on windows is slow.
128+
time.Sleep(10 * time.Second)
129+
cancel()
130+
}()
131+
119132
ipFamily := func(ip net.IP) winipcfg.AddressFamily {
120133
f := winipcfg.AddressFamily(windows.AF_INET6)
121134
if ip4 := ip.To4(); ip4 != nil {
@@ -130,46 +143,103 @@ func (t *nativeDevice) setDNS(ctx context.Context, server net.IP, domains []stri
130143
_ = luid.FlushDNS(oldFamily)
131144
}
132145
}
133-
if err = luid.SetDNS(family, []netip.Addr{addrFromIP(server)}, domains); err != nil {
146+
serverStr := server.String()
147+
servers16, err := windows.UTF16PtrFromString(serverStr)
148+
if err != nil {
134149
return err
135150
}
151+
searchList16, err := windows.UTF16PtrFromString(strings.Join(searchList, ","))
152+
if err != nil {
153+
return err
154+
}
155+
guid, err := luid.GUID()
156+
if err != nil {
157+
return err
158+
}
159+
dnsInterfaceSettings := &winipcfg.DnsInterfaceSettings{
160+
Version: winipcfg.DnsInterfaceSettingsVersion1,
161+
Flags: winipcfg.DnsInterfaceSettingsFlagNameserver | winipcfg.DnsInterfaceSettingsFlagSearchList,
162+
NameServer: servers16,
163+
SearchList: searchList16,
164+
}
165+
if family == windows.AF_INET6 {
166+
dnsInterfaceSettings.Flags |= winipcfg.DnsInterfaceSettingsFlagIPv6
167+
}
168+
if err = winipcfg.SetInterfaceDnsSettings(*guid, dnsInterfaceSettings); err != nil {
169+
return err
170+
}
171+
172+
// Unless we also update the global DNS search path, the one for the device doesn't work on some platforms.
173+
// This behavior is mainly observed on Windows Server editions.
136174

137-
// On some systems (e.g. CircleCI but not josecv's windows box), SetDNS isn't enough to allow the domains to be resolved,
138-
// and the network adapter's domain has to be set explicitly.
139-
// It's actually way easier to do this via powershell than any system calls that can be run from go code
140-
domain := ""
141-
if len(domains) > 0 {
142-
// Quote the domain to prevent powershell injection
143-
domain = shellquote.ShellArgsString([]string{strings.TrimSuffix(domains[0], ".")})
144-
}
145-
// It's apparently well known that WMI queries can hang under various conditions, so we add a timeout here to prevent hanging the daemon
146-
// Fun fact: terminating the context that powershell is running in will not stop a hanging WMI call (!) perhaps because it is considered uninterruptible
147-
// For more on WMI queries hanging, see:
148-
// * http://www.yusufozturk.info/windows-powershell/how-to-avoid-wmi-query-hangs-in-powershell.html
149-
// * https://theolddogscriptingblog.wordpress.com/2012/05/11/wmi-hangs-and-how-to-avoid-them/
150-
// * https://stackoverflow.com/questions/24294408/gwmi-query-hangs-powershell-script
151-
// * http://use-powershell.blogspot.com/2018/03/get-wmiobject-hangs.html
152-
pshScript := fmt.Sprintf(`
153-
$job = Get-WmiObject Win32_NetworkAdapterConfiguration -filter "interfaceindex='%d'" -AsJob | Wait-Job -Timeout 30
154-
if ($job.State -ne 'Completed') {
155-
throw "timed out getting network adapter after 30 seconds."
156-
}
157-
$obj = $job | Receive-Job
158-
$job = Invoke-WmiMethod -InputObject $obj -Name SetDNSDomain -ArgumentList "%s" -AsJob | Wait-Job -Timeout 30
159-
if ($job.State -ne 'Completed') {
160-
throw "timed out setting network adapter DNS Domain after 30 seconds."
161-
}
162-
$job | Receive-Job
163-
`, t.interfaceIndex, domain)
164-
cmd := proc.CommandContext(ctx, "powershell.exe", "-NoProfile", "-NonInteractive", pshScript)
165-
cmd.DisableLogging = true // disable chatty logging
166-
dlog.Debugf(ctx, "Calling powershell's SetDNSDomain %q", domain)
167-
if err := cmd.Run(); err != nil {
168-
// Log the error, but don't actually fail on it: This is all just a fallback for SetDNS, so the domains might actually be working
169-
dlog.Errorf(ctx, "Failed to set NetworkAdapterConfiguration DNS Domain: %v. Will proceed, but namespace mapping might not be functional.", err)
175+
// Retrieve the current global search paths so that paths that aren't related to
176+
// the cluster domain (i.e. not managed by us) can be retained.
177+
gss, err := getGlobalSearchList()
178+
if err != nil {
179+
return err
180+
}
181+
if oldLen := len(gss); oldLen > 0 {
182+
// Windows does not use a dot suffix in the search path.
183+
clusterDomain = strings.TrimSuffix(clusterDomain, ".")
184+
185+
// Put our new search path in front of other entries. Then include those
186+
// that don't end with our cluster domain (these are entries that aren't
187+
// managed by Telepresence).
188+
newGss := make([]string, len(searchList), oldLen)
189+
copy(newGss, searchList)
190+
for _, gs := range gss {
191+
if !strings.HasSuffix(gs, clusterDomain) {
192+
newGss = append(newGss, gs)
193+
}
194+
}
195+
gss = newGss
196+
} else {
197+
gss = searchList
170198
}
171199
t.dns = server
172-
return nil
200+
return setGlobalSearchList(ctx, gss)
201+
}
202+
203+
func psList(values []string) string {
204+
var sb strings.Builder
205+
sb.WriteString("@(")
206+
for i, gs := range values {
207+
if i > 0 {
208+
sb.WriteByte(',')
209+
}
210+
sb.WriteByte('"')
211+
sb.WriteString(strings.TrimSuffix(gs, "."))
212+
sb.WriteByte('"')
213+
}
214+
sb.WriteByte(')')
215+
return sb.String()
216+
}
217+
218+
func getGlobalSearchList() ([]string, error) {
219+
rk, err := registry.OpenKey(registry.LOCAL_MACHINE, `System\CurrentControlSet\Services\Tcpip\Parameters`, registry.QUERY_VALUE)
220+
if err != nil {
221+
if os.IsNotExist(err) {
222+
err = nil
223+
}
224+
return nil, err
225+
}
226+
csv, _, err := rk.GetStringValue("SearchList")
227+
if err != nil {
228+
if os.IsNotExist(err) {
229+
err = nil
230+
}
231+
return nil, err
232+
}
233+
if csv == "" {
234+
return nil, nil
235+
}
236+
return strings.Split(csv, ","), nil
237+
}
238+
239+
func setGlobalSearchList(ctx context.Context, gss []string) error {
240+
cmd := proc.CommandContext(ctx, "powershell.exe", "Set-DnsClientGlobalSetting", "-SuffixSearchList", psList(gss))
241+
_, err := proc.CaptureErr(ctx, cmd)
242+
return err
173243
}
174244

175245
func (t *nativeDevice) setMTU(int) error {

0 commit comments

Comments
 (0)