Skip to content

Commit d7dbc56

Browse files
committed
chore: Expose the refresh strategy UseIAMAuthN() value to the dialer.
Part of #842 chore: Add domain name to the cloudsql.ConnName struct feat: Check for DNS changes on connect. On change, close all connections and create a new dialer. feat: Automatially check for DNS changes periodically. On change, close all connections and create a new dialer. wip: eno changes wip: eno interface cleanup wip: convert monitoredInstance to *monitoredInstance
1 parent 26dc9f7 commit d7dbc56

File tree

6 files changed

+311
-13
lines changed

6 files changed

+311
-13
lines changed

README.md

+37-1
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,8 @@ func connect() {
234234
// ... etc
235235
}
236236
```
237-
### Using DNS to identify an instance
237+
238+
### Using DNS domain names to identify instances
238239

239240
The connector can be configured to use DNS to look up an instance. This would
240241
allow you to configure your application to connect to a database instance, and
@@ -292,6 +293,41 @@ func connect() {
292293
}
293294
```
294295

296+
### Automatic fail-over using DNS domain names
297+
298+
When the connector is configured using a domain name, the connector will
299+
periodically check if the DNS record for an instance changes. When the connector
300+
detects that the domain name refers to a different instance, the connector will
301+
close all open connections to the old instance. Subsequent connection attempts
302+
will be directed to the new instance.
303+
304+
For example: suppose application is configured to connect using the
305+
domain name `prod-db.mycompany.example.com`. Initially the corporate DNS
306+
zone has a TXT record with the value `my-project:region:my-instance`. The
307+
application establishes connections to the `my-project:region:my-instance`
308+
Cloud SQL instance.
309+
310+
Then, to reconfigure the application using a different database
311+
instance: `my-project:other-region:my-instance-2`. You update the DNS record
312+
for `prod-db.mycompany.example.com` with the target
313+
`my-project:other-region:my-instance-2`
314+
315+
The connector inside the application detects the change to this
316+
DNS entry. Now, when the application connects to its database using the
317+
domain name `prod-db.mycompany.example.com`, it will connect to the
318+
`my-project:other-region:my-instance-2` Cloud SQL instance.
319+
320+
The connector will automatically close all existing connections to
321+
`my-project:region:my-instance`. This will force the connection pools to
322+
establish new connections. Also, it may cause database queries in progress
323+
to fail.
324+
325+
The connector will poll for changes to the DNS name every 30 seconds by default.
326+
You may configure the frequency of the connections using the option
327+
`WithFailoverPeriod(d time.Duration)`. When this is set to 0, the connector will
328+
disable polling and only check if the DNS record changed when it is
329+
creating a new connection.
330+
295331

296332
### Using Options
297333

dialer.go

+114-1
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ type connectionInfoCache interface {
107107
ConnectionInfo(context.Context) (cloudsql.ConnectionInfo, error)
108108
UpdateRefresh(*bool)
109109
ForceRefresh()
110+
UseIAMAuthN() bool
110111
io.Closer
111112
}
112113

@@ -115,13 +116,26 @@ type connectionInfoCache interface {
115116
type monitoredCache struct {
116117
openConnsCount *uint64
117118

119+
mu sync.Mutex
120+
openConns []*instrumentedConn
121+
118122
connectionInfoCache
119123
}
120124

121125
func (c *monitoredCache) Close() error {
122126
if c == nil || c.connectionInfoCache == nil {
123127
return nil
124128
}
129+
130+
if atomic.LoadUint64(c.openConnsCount) > 0 {
131+
for _, socket := range c.openConns {
132+
if !socket.isClosed() {
133+
_ = socket.Close() // force socket closed, ok to ignore error.
134+
}
135+
}
136+
atomic.StoreUint64(c.openConnsCount, 0)
137+
}
138+
125139
return c.connectionInfoCache.Close()
126140
}
127141

@@ -145,6 +159,21 @@ func (c *monitoredCache) ConnectionInfo(ctx context.Context) (cloudsql.Connectio
145159
return c.connectionInfoCache.ConnectionInfo(ctx)
146160
}
147161

162+
func (c *monitoredCache) purgeClosedConns() {
163+
if c == nil || c.connectionInfoCache == nil {
164+
return
165+
}
166+
c.mu.Lock()
167+
var open []*instrumentedConn
168+
for _, s := range c.openConns {
169+
if !s.isClosed() {
170+
open = append(open, s)
171+
}
172+
}
173+
c.openConns = open
174+
c.mu.Unlock()
175+
}
176+
148177
// A Dialer is used to create connections to Cloud SQL instances.
149178
//
150179
// Use NewDialer to initialize a Dialer.
@@ -182,6 +211,10 @@ type Dialer struct {
182211

183212
// resolver converts instance names into DNS names.
184213
resolver instance.ConnectionNameResolver
214+
215+
// domainNameTicker periodically checks any domain names to see if they
216+
// changed.
217+
domainNameTicker *time.Ticker
185218
}
186219

187220
var (
@@ -205,6 +238,7 @@ func NewDialer(ctx context.Context, opts ...Option) (*Dialer, error) {
205238
logger: nullLogger{},
206239
useragents: []string{userAgent},
207240
serviceUniverse: "googleapis.com",
241+
failoverPeriod: cloudsql.FailoverPeriod,
208242
}
209243
for _, opt := range opts {
210244
opt(cfg)
@@ -218,6 +252,7 @@ func NewDialer(ctx context.Context, opts ...Option) (*Dialer, error) {
218252
if cfg.setIAMAuthNTokenSource && !cfg.useIAMAuthN {
219253
return nil, errUseTokenSource
220254
}
255+
221256
// Add this to the end to make sure it's not overridden
222257
cfg.sqladminOpts = append(cfg.sqladminOpts, option.WithUserAgent(strings.Join(cfg.useragents, " ")))
223258

@@ -231,7 +266,7 @@ func NewDialer(ctx context.Context, opts ...Option) (*Dialer, error) {
231266
}
232267
ud, err := c.GetUniverseDomain()
233268
if err != nil {
234-
return nil, fmt.Errorf("failed to getOrAdd universe domain: %v", err)
269+
return nil, fmt.Errorf("failed to get universe domain: %v", err)
235270
}
236271
cfg.credentialsUniverse = ud
237272
cfg.sqladminOpts = append(cfg.sqladminOpts, option.WithTokenSource(c.TokenSource))
@@ -301,8 +336,28 @@ func NewDialer(ctx context.Context, opts ...Option) (*Dialer, error) {
301336
dialFunc: cfg.dialFunc,
302337
resolver: r,
303338
}
339+
340+
// If the failover period is set, start a goroutine to periodically
341+
// check for DNS changes.
342+
if cfg.failoverPeriod > 0 {
343+
d.initFailoverRoutine(ctx, cfg.failoverPeriod)
344+
}
345+
304346
return d, nil
305347
}
348+
func (d *Dialer) initFailoverRoutine(ctx context.Context, p time.Duration) {
349+
d.domainNameTicker = time.NewTicker(p)
350+
go func() {
351+
for {
352+
select {
353+
case <-d.domainNameTicker.C:
354+
d.pollDomainNames(ctx)
355+
case <-d.closed:
356+
return
357+
}
358+
}
359+
}()
360+
}
306361

307362
// Dial returns a net.Conn connected to the specified Cloud SQL instance. The
308363
// icn argument must be the instance's connection name, which is in the format
@@ -416,6 +471,13 @@ func (d *Dialer) Dial(ctx context.Context, icn string, opts ...DialOption) (conn
416471
trace.RecordOpenConnections(context.Background(), int64(n), d.dialerID, cn.String())
417472
}, d.dialerID, cn.String())
418473

474+
// If this connection was opened using a Domain Name, then store it for later
475+
// in case it needs to be forcibly closed.
476+
if cn.DomainName() != "" {
477+
c.mu.Lock()
478+
c.openConns = append(c.openConns, iConn)
479+
c.mu.Unlock()
480+
}
419481
return iConn, nil
420482
}
421483

@@ -517,6 +579,7 @@ func newInstrumentedConn(conn net.Conn, closeFunc func(), dialerID, connName str
517579
type instrumentedConn struct {
518580
net.Conn
519581
closeFunc func()
582+
mu sync.RWMutex
520583
closed bool
521584
dialerID string
522585
connName string
@@ -542,6 +605,13 @@ func (i *instrumentedConn) Write(b []byte) (int, error) {
542605
return bytesWritten, err
543606
}
544607

608+
// isClosed returns true if this connection is closing or is already closed.
609+
func (i *instrumentedConn) isClosed() bool {
610+
i.mu.RLock()
611+
defer i.mu.RUnlock()
612+
return i.closed
613+
}
614+
545615
// Close delegates to the underlying net.Conn interface and reports the close
546616
// to the provided closeFunc only when Close returns no error.
547617
func (i *instrumentedConn) Close() error {
@@ -565,13 +635,56 @@ func (d *Dialer) Close() error {
565635
}
566636
close(d.closed)
567637

638+
if d.domainNameTicker != nil {
639+
d.domainNameTicker.Stop()
640+
}
641+
568642
d.cache.replaceAll(func(cn instance.ConnName, c *monitoredCache) (instance.ConnName, *monitoredCache) {
569643
c.Close() // close the monitoredCache
570644
return instance.ConnName{}, nil // Remove from cache
571645
})
572646
return nil
573647
}
574648

649+
func (d *Dialer) pollDomainNames(ctx context.Context) {
650+
d.cache.replaceAll(func(cn instance.ConnName, cache *monitoredCache) (instance.ConnName, *monitoredCache) {
651+
if cn.DomainName() == "" {
652+
return cn, cache
653+
}
654+
655+
// Resolve the domain name.
656+
newCn, err := d.resolver.Resolve(ctx, cn.DomainName())
657+
658+
if err != nil {
659+
// the domain name no longer resolves to a valid instance
660+
d.logger.Debugf(ctx, "[failover] unable to resolve DNS for instance %s: %v", cn.DomainName(), err)
661+
cache.Close()
662+
return instance.ConnName{}, nil
663+
} else if newCn != cn {
664+
d.logger.Debugf(ctx, "domain name %s changed from old instance %s to new instance %s",
665+
cn.DomainName(), cn.String(), newCn.String())
666+
667+
useIamAuthn := cache.UseIAMAuthN()
668+
// The domain name points to a different instance.
669+
cache.Close()
670+
671+
newC, err := d.createConnectionInfoCache(ctx, cn, &useIamAuthn)
672+
if err != nil {
673+
d.logger.Debugf(ctx, "error connecting to new instance %s, %s: %v",
674+
cn.DomainName(), newCn.String(), err)
675+
return instance.ConnName{}, nil
676+
}
677+
return newCn, newC
678+
}
679+
680+
// Remove closed sockets from cache.openConns
681+
cache.purgeClosedConns()
682+
return cn, cache
683+
684+
})
685+
686+
}
687+
575688
// connectionInfoCache is a helper function for returning the appropriate
576689
// connection info Cache in a threadsafe way. It will create a new cache,
577690
// modify the existing one, or leave it unchanged as needed.

0 commit comments

Comments
 (0)