Skip to content

Commit 7030008

Browse files
committed
feat: Automatially check for DNS changes periodically. On change, close all connections and create a new dialer.
chore: Expose the refresh strategy UseIAMAuthN() value to the dialer. Part of #842 chore: Add domain name to the cloudsql.ConnName struct Feat: Check for DNS changes on connect. On change, close all connections and create a new dialer. feat: Automatially check for DNS changes periodically. On change, close all connections and create a new dialer. wip: eno changes wip: eno interface cleanup wip: convert monitoredInstance to *monitoredInstance
1 parent d380389 commit 7030008

10 files changed

+617
-66
lines changed

README.md

+37-1
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,8 @@ func connect() {
234234
// ... etc
235235
}
236236
```
237-
### Using DNS to identify an instance
237+
238+
### Using DNS domain names to identify instances
238239

239240
The connector can be configured to use DNS to look up an instance. This would
240241
allow you to configure your application to connect to a database instance, and
@@ -292,6 +293,41 @@ func connect() {
292293
}
293294
```
294295

296+
### Automatic fail-over using DNS domain names
297+
298+
When the connector is configured using a domain name, the connector will
299+
periodically check if the DNS record for an instance changes. When the connector
300+
detects that the domain name refers to a different instance, the connector will
301+
close all open connections to the old instance. Subsequent connection attempts
302+
will be directed to the new instance.
303+
304+
For example: suppose application is configured to connect using the
305+
domain name `prod-db.mycompany.example.com`. Initially the corporate DNS
306+
zone has a TXT record with the value `my-project:region:my-instance`. The
307+
application establishes connections to the `my-project:region:my-instance`
308+
Cloud SQL instance.
309+
310+
Then, to reconfigure the application using a different database
311+
instance: `my-project:other-region:my-instance-2`. You update the DNS record
312+
for `prod-db.mycompany.example.com` with the target
313+
`my-project:other-region:my-instance-2`
314+
315+
The connector inside the application detects the change to this
316+
DNS entry. Now, when the application connects to its database using the
317+
domain name `prod-db.mycompany.example.com`, it will connect to the
318+
`my-project:other-region:my-instance-2` Cloud SQL instance.
319+
320+
The connector will automatically close all existing connections to
321+
`my-project:region:my-instance`. This will force the connection pools to
322+
establish new connections. Also, it may cause database queries in progress
323+
to fail.
324+
325+
The connector will poll for changes to the DNS name every 30 seconds by default.
326+
You may configure the frequency of the connections using the option
327+
`WithFailoverPeriod(d time.Duration)`. When this is set to 0, the connector will
328+
disable polling and only check if the DNS record changed when it is
329+
creating a new connection.
330+
295331

296332
### Using Options
297333

dialer.go

+27-37
Original file line numberDiff line numberDiff line change
@@ -107,41 +107,10 @@ type connectionInfoCache interface {
107107
ConnectionInfo(context.Context) (cloudsql.ConnectionInfo, error)
108108
UpdateRefresh(*bool)
109109
ForceRefresh()
110+
UseIAMAuthN() bool
110111
io.Closer
111112
}
112113

113-
// monitoredCache is a wrapper around a connectionInfoCache that tracks the
114-
// number of connections to the associated instance.
115-
type monitoredCache struct {
116-
openConnsCount *uint64
117-
118-
connectionInfoCache
119-
}
120-
121-
func (c *monitoredCache) Close() error {
122-
return c.connectionInfoCache.Close()
123-
}
124-
125-
func (c *monitoredCache) ForceRefresh() {
126-
if c == nil || c.connectionInfoCache == nil {
127-
return
128-
}
129-
c.connectionInfoCache.ForceRefresh()
130-
}
131-
132-
func (c *monitoredCache) UpdateRefresh(b *bool) {
133-
if c == nil || c.connectionInfoCache == nil {
134-
return
135-
}
136-
c.connectionInfoCache.UpdateRefresh(b)
137-
}
138-
func (c *monitoredCache) ConnectionInfo(ctx context.Context) (cloudsql.ConnectionInfo, error) {
139-
if c == nil || c.connectionInfoCache == nil {
140-
return cloudsql.ConnectionInfo{}, nil
141-
}
142-
return c.connectionInfoCache.ConnectionInfo(ctx)
143-
}
144-
145114
// A Dialer is used to create connections to Cloud SQL instances.
146115
//
147116
// Use NewDialer to initialize a Dialer.
@@ -178,7 +147,8 @@ type Dialer struct {
178147
iamTokenSource oauth2.TokenSource
179148

180149
// resolver converts instance names into DNS names.
181-
resolver instance.ConnectionNameResolver
150+
resolver instance.ConnectionNameResolver
151+
failoverPeriod time.Duration
182152
}
183153

184154
var (
@@ -202,6 +172,7 @@ func NewDialer(ctx context.Context, opts ...Option) (*Dialer, error) {
202172
logger: nullLogger{},
203173
useragents: []string{userAgent},
204174
serviceUniverse: "googleapis.com",
175+
failoverPeriod: cloudsql.FailoverPeriod,
205176
}
206177
for _, opt := range opts {
207178
opt(cfg)
@@ -215,6 +186,7 @@ func NewDialer(ctx context.Context, opts ...Option) (*Dialer, error) {
215186
if cfg.setIAMAuthNTokenSource && !cfg.useIAMAuthN {
216187
return nil, errUseTokenSource
217188
}
189+
218190
// Add this to the end to make sure it's not overridden
219191
cfg.sqladminOpts = append(cfg.sqladminOpts, option.WithUserAgent(strings.Join(cfg.useragents, " ")))
220192

@@ -297,7 +269,9 @@ func NewDialer(ctx context.Context, opts ...Option) (*Dialer, error) {
297269
iamTokenSource: cfg.iamLoginTokenSource,
298270
dialFunc: cfg.dialFunc,
299271
resolver: r,
272+
failoverPeriod: cfg.failoverPeriod,
300273
}
274+
301275
return d, nil
302276
}
303277

@@ -413,6 +387,13 @@ func (d *Dialer) Dial(ctx context.Context, icn string, opts ...DialOption) (conn
413387
trace.RecordOpenConnections(context.Background(), int64(n), d.dialerID, cn.String())
414388
}, d.dialerID, cn.String())
415389

390+
// If this connection was opened using a Domain Name, then store it for later
391+
// in case it needs to be forcibly closed.
392+
if cn.DomainName() != "" {
393+
c.mu.Lock()
394+
c.openConns = append(c.openConns, iConn)
395+
c.mu.Unlock()
396+
}
416397
return iConn, nil
417398
}
418399

@@ -514,6 +495,7 @@ func newInstrumentedConn(conn net.Conn, closeFunc func(), dialerID, connName str
514495
type instrumentedConn struct {
515496
net.Conn
516497
closeFunc func()
498+
mu sync.RWMutex
517499
closed bool
518500
dialerID string
519501
connName string
@@ -539,9 +521,18 @@ func (i *instrumentedConn) Write(b []byte) (int, error) {
539521
return bytesWritten, err
540522
}
541523

524+
// isClosed returns true if this connection is closing or is already closed.
525+
func (i *instrumentedConn) isClosed() bool {
526+
i.mu.RLock()
527+
defer i.mu.RUnlock()
528+
return i.closed
529+
}
530+
542531
// Close delegates to the underlying net.Conn interface and reports the close
543532
// to the provided closeFunc only when Close returns no error.
544533
func (i *instrumentedConn) Close() error {
534+
i.mu.Lock()
535+
defer i.mu.Unlock()
545536
i.closed = true
546537
err := i.Conn.Close()
547538
if err != nil {
@@ -582,6 +573,8 @@ func (d *Dialer) connectionInfoCache(
582573
})
583574

584575
if old != nil {
576+
// ensure that the old cache entry is closed.
577+
// monitoredCache.Close() may be called safely, even if old is closed.
585578
old.Close()
586579
}
587580

@@ -621,10 +614,7 @@ func (d *Dialer) createConnectionInfoCache(
621614
d.dialerID, useIAMAuthNDial,
622615
)
623616
}
624-
c := &monitoredCache{
625-
openConnsCount: new(uint64),
626-
connectionInfoCache: cache,
627-
}
617+
c := newMonitoredCache(ctx, cache, cn, d.failoverPeriod, d.resolver, d.logger)
628618

629619
c.UpdateRefresh(useIAMAuthN)
630620

dialer_cache.go

+7-2
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ func (d *dialerCache) getOrAdd(cn instance.ConnName, f func() (*monitoredCache,
8989
d.mu.RLock()
9090
c, ok := d.cache[cn]
9191
d.mu.RUnlock()
92-
if ok {
92+
if ok && !c.isClosed() {
9393
return c, oldC, nil
9494
}
9595

@@ -100,7 +100,12 @@ func (d *dialerCache) getOrAdd(cn instance.ConnName, f func() (*monitoredCache,
100100
// Look up in the map by CN again
101101
c, ok = d.cache[cn]
102102
if ok {
103-
return c, nil, nil
103+
if !c.isClosed() {
104+
return c, nil, nil
105+
}
106+
// c is closed, therefore remove it from the cache.
107+
oldC = c
108+
delete(d.cache, cn)
104109
}
105110

106111
// Try to get an instance with the same domain name but different instance

dialer_cache_test.go

+40
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,46 @@ func TestDialerCache_Get_UsesExistingInstance(t *testing.T) {
7676

7777
}
7878

79+
func TestDialerCache_Get_ReplacesClosedInstance(t *testing.T) {
80+
cache := newDialerCache(&testLog{t: t})
81+
var wantOpenConns uint64 = 10
82+
var wantOpenConns2 uint64 = 20
83+
wantCn, _ := instance.ParseConnNameWithDomainName("project:region:instance", "d1.example.com")
84+
85+
// First, put d1.example.com = project:region:instance
86+
c, _, err := cache.getOrAdd(wantCn, func() (*monitoredCache, error) {
87+
return &monitoredCache{openConnsCount: &wantOpenConns}, nil
88+
})
89+
if err != nil {
90+
t.Error("Got error, want no error", err)
91+
}
92+
if *c.openConnsCount != wantOpenConns {
93+
t.Fatal("Got wrong cache instance")
94+
}
95+
96+
// Close the instance.
97+
c.closed = true
98+
99+
// Attempt to get the instance again after it closed. This will create
100+
// a new cache entry.
101+
c2, oldC, err := cache.getOrAdd(wantCn, func() (*monitoredCache, error) {
102+
return &monitoredCache{openConnsCount: &wantOpenConns2}, nil
103+
})
104+
if err != nil {
105+
t.Error("Got error, want no error", err)
106+
}
107+
if *oldC.openConnsCount != wantOpenConns {
108+
t.Fatal("Got wrong cache instance")
109+
}
110+
if *c2.openConnsCount != wantOpenConns2 {
111+
t.Fatal("Got wrong value for cache")
112+
}
113+
if len(cache.cache) != 1 {
114+
t.Fatal("Got wrong number of cache entries: want 1, got ", len(cache.cache))
115+
}
116+
117+
}
118+
79119
func TestDialerCache_Get_ErrorOnInstanceCreate(t *testing.T) {
80120
cache := newDialerCache(&testLog{t: t})
81121
wantCn, _ := instance.ParseConnName("myproject:region:instance")

0 commit comments

Comments
 (0)