Skip to content

Commit 63469ab

Browse files
committed
chore: Expose the refresh strategy UseIAMAuthN() value to the dialer.
Part of #842 chore: Add domain name to the cloudsql.ConnName struct feat: Check for DNS changes on connect. On change, close all connections and create a new dialer. feat: Automatially check for DNS changes periodically. On change, close all connections and create a new dialer. wip: eno changes wip: eno interface cleanup wip: convert monitoredInstance to *monitoredInstance
1 parent 5af311b commit 63469ab

File tree

6 files changed

+318
-20
lines changed

6 files changed

+318
-20
lines changed

README.md

+37-1
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,8 @@ func connect() {
234234
// ... etc
235235
}
236236
```
237-
### Using DNS to identify an instance
237+
238+
### Using DNS domain names to identify instances
238239

239240
The connector can be configured to use DNS to look up an instance. This would
240241
allow you to configure your application to connect to a database instance, and
@@ -292,6 +293,41 @@ func connect() {
292293
}
293294
```
294295

296+
### Automatic fail-over using DNS domain names
297+
298+
When the connector is configured using a domain name, the connector will
299+
periodically check if the DNS record for an instance changes. When the connector
300+
detects that the domain name refers to a different instance, the connector will
301+
close all open connections to the old instance. Subsequent connection attempts
302+
will be directed to the new instance.
303+
304+
For example: suppose application is configured to connect using the
305+
domain name `prod-db.mycompany.example.com`. Initially the corporate DNS
306+
zone has a TXT record with the value `my-project:region:my-instance`. The
307+
application establishes connections to the `my-project:region:my-instance`
308+
Cloud SQL instance.
309+
310+
Then, to reconfigure the application using a different database
311+
instance: `my-project:other-region:my-instance-2`. You update the DNS record
312+
for `prod-db.mycompany.example.com` with the target
313+
`my-project:other-region:my-instance-2`
314+
315+
The connector inside the application detects the change to this
316+
DNS entry. Now, when the application connects to its database using the
317+
domain name `prod-db.mycompany.example.com`, it will connect to the
318+
`my-project:other-region:my-instance-2` Cloud SQL instance.
319+
320+
The connector will automatically close all existing connections to
321+
`my-project:region:my-instance`. This will force the connection pools to
322+
establish new connections. Also, it may cause database queries in progress
323+
to fail.
324+
325+
The connector will poll for changes to the DNS name every 30 seconds by default.
326+
You may configure the frequency of the connections using the option
327+
`WithFailoverPeriod(d time.Duration)`. When this is set to 0, the connector will
328+
disable polling and only check if the DNS record changed when it is
329+
creating a new connection.
330+
295331

296332
### Using Options
297333

dialer.go

+121-8
Original file line numberDiff line numberDiff line change
@@ -107,13 +107,17 @@ type connectionInfoCache interface {
107107
ConnectionInfo(context.Context) (cloudsql.ConnectionInfo, error)
108108
UpdateRefresh(*bool)
109109
ForceRefresh()
110+
UseIAMAuthN() bool
110111
io.Closer
111112
}
112113

113114
// monitoredCache is a wrapper around a connectionInfoCache that tracks the
114115
// number of connections to the associated instance.
115116
type monitoredCache struct {
116-
openConns *uint64
117+
openConnsCount *uint64
118+
119+
mu sync.Mutex
120+
openConns []*instrumentedConn
117121

118122
connectionInfoCache
119123
}
@@ -122,6 +126,16 @@ func (c *monitoredCache) Close() error {
122126
if c == nil || c.connectionInfoCache == nil {
123127
return nil
124128
}
129+
130+
if atomic.LoadUint64(c.openConnsCount) > 0 {
131+
for _, socket := range c.openConns {
132+
if !socket.isClosed() {
133+
_ = socket.Close() // force socket closed, ok to ignore error.
134+
}
135+
}
136+
atomic.StoreUint64(c.openConnsCount, 0)
137+
}
138+
125139
return c.connectionInfoCache.Close()
126140
}
127141

@@ -145,6 +159,21 @@ func (c *monitoredCache) ConnectionInfo(ctx context.Context) (cloudsql.Connectio
145159
return c.connectionInfoCache.ConnectionInfo(ctx)
146160
}
147161

162+
func (c *monitoredCache) purgeClosedConns() {
163+
if c == nil || c.connectionInfoCache == nil {
164+
return
165+
}
166+
c.mu.Lock()
167+
var open []*instrumentedConn
168+
for _, s := range c.openConns {
169+
if !s.isClosed() {
170+
open = append(open, s)
171+
}
172+
}
173+
c.openConns = open
174+
c.mu.Unlock()
175+
}
176+
148177
// A Dialer is used to create connections to Cloud SQL instances.
149178
//
150179
// Use NewDialer to initialize a Dialer.
@@ -182,6 +211,10 @@ type Dialer struct {
182211

183212
// resolver converts instance names into DNS names.
184213
resolver instance.ConnectionNameResolver
214+
215+
// domainNameTicker periodically checks any domain names to see if they
216+
// changed.
217+
domainNameTicker *time.Ticker
185218
}
186219

187220
var (
@@ -205,6 +238,7 @@ func NewDialer(ctx context.Context, opts ...Option) (*Dialer, error) {
205238
logger: nullLogger{},
206239
useragents: []string{userAgent},
207240
serviceUniverse: "googleapis.com",
241+
failoverPeriod: cloudsql.FailoverPeriod,
208242
}
209243
for _, opt := range opts {
210244
opt(cfg)
@@ -218,6 +252,7 @@ func NewDialer(ctx context.Context, opts ...Option) (*Dialer, error) {
218252
if cfg.setIAMAuthNTokenSource && !cfg.useIAMAuthN {
219253
return nil, errUseTokenSource
220254
}
255+
221256
// Add this to the end to make sure it's not overridden
222257
cfg.sqladminOpts = append(cfg.sqladminOpts, option.WithUserAgent(strings.Join(cfg.useragents, " ")))
223258

@@ -231,7 +266,7 @@ func NewDialer(ctx context.Context, opts ...Option) (*Dialer, error) {
231266
}
232267
ud, err := c.GetUniverseDomain()
233268
if err != nil {
234-
return nil, fmt.Errorf("failed to getOrAdd universe domain: %v", err)
269+
return nil, fmt.Errorf("failed to get universe domain: %v", err)
235270
}
236271
cfg.credentialsUniverse = ud
237272
cfg.sqladminOpts = append(cfg.sqladminOpts, option.WithTokenSource(c.TokenSource))
@@ -301,8 +336,28 @@ func NewDialer(ctx context.Context, opts ...Option) (*Dialer, error) {
301336
dialFunc: cfg.dialFunc,
302337
resolver: r,
303338
}
339+
340+
// If the failover period is set, start a goroutine to periodically
341+
// check for DNS changes.
342+
if cfg.failoverPeriod > 0 {
343+
d.initFailoverRoutine(ctx, cfg.failoverPeriod)
344+
}
345+
304346
return d, nil
305347
}
348+
func (d *Dialer) initFailoverRoutine(ctx context.Context, p time.Duration) {
349+
d.domainNameTicker = time.NewTicker(p)
350+
go func() {
351+
for {
352+
select {
353+
case <-d.domainNameTicker.C:
354+
d.pollDomainNames(ctx)
355+
case <-d.closed:
356+
return
357+
}
358+
}
359+
}()
360+
}
306361

307362
// Dial returns a net.Conn connected to the specified Cloud SQL instance. The
308363
// icn argument must be the instance's connection name, which is in the format
@@ -406,16 +461,23 @@ func (d *Dialer) Dial(ctx context.Context, icn string, opts ...DialOption) (conn
406461

407462
latency := time.Since(startTime).Milliseconds()
408463
go func() {
409-
n := atomic.AddUint64(c.openConns, 1)
464+
n := atomic.AddUint64(c.openConnsCount, 1)
410465
trace.RecordOpenConnections(ctx, int64(n), d.dialerID, cn.String())
411466
trace.RecordDialLatency(ctx, icn, d.dialerID, latency)
412467
}()
413468

414469
iConn := newInstrumentedConn(tlsConn, func() {
415-
n := atomic.AddUint64(c.openConns, ^uint64(0))
470+
n := atomic.AddUint64(c.openConnsCount, ^uint64(0))
416471
trace.RecordOpenConnections(context.Background(), int64(n), d.dialerID, cn.String())
417472
}, d.dialerID, cn.String())
418473

474+
// If this connection was opened using a Domain Name, then store it for later
475+
// in case it needs to be forcibly closed.
476+
if cn.DomainName() != "" {
477+
c.mu.Lock()
478+
c.openConns = append(c.openConns, iConn)
479+
c.mu.Unlock()
480+
}
419481
return iConn, nil
420482
}
421483

@@ -520,6 +582,7 @@ func newInstrumentedConn(conn net.Conn, closeFunc func(), dialerID, connName str
520582
type instrumentedConn struct {
521583
net.Conn
522584
closeFunc func()
585+
mu sync.RWMutex
523586
closed bool
524587
dialerID string
525588
connName string
@@ -545,6 +608,13 @@ func (i *instrumentedConn) Write(b []byte) (int, error) {
545608
return bytesWritten, err
546609
}
547610

611+
// isClosed returns true if this connection is closing or is already closed.
612+
func (i *instrumentedConn) isClosed() bool {
613+
i.mu.RLock()
614+
defer i.mu.RUnlock()
615+
return i.closed
616+
}
617+
548618
// Close delegates to the underlying net.Conn interface and reports the close
549619
// to the provided closeFunc only when Close returns no error.
550620
func (i *instrumentedConn) Close() error {
@@ -568,13 +638,56 @@ func (d *Dialer) Close() error {
568638
}
569639
close(d.closed)
570640

571-
d.cache.replaceAll(func(cn instance.ConnName, c monitoredCache) (instance.ConnName, monitoredCache) {
572-
c.Close() // close the monitoredCache
573-
return instance.ConnName{}, monitoredCache{} // Remove from cache
641+
if d.domainNameTicker != nil {
642+
d.domainNameTicker.Stop()
643+
}
644+
645+
d.cache.replaceAll(func(cn instance.ConnName, c *monitoredCache) (instance.ConnName, *monitoredCache) {
646+
c.Close() // close the monitoredCache
647+
return instance.ConnName{}, nil // Remove from cache
574648
})
575649
return nil
576650
}
577651

652+
func (d *Dialer) pollDomainNames(ctx context.Context) {
653+
d.cache.replaceAll(func(cn instance.ConnName, cache *monitoredCache) (instance.ConnName, *monitoredCache) {
654+
if cn.DomainName() == "" {
655+
return cn, cache
656+
}
657+
658+
// Resolve the domain name.
659+
newCn, err := d.resolver.Resolve(ctx, cn.DomainName())
660+
661+
if err != nil {
662+
// the domain name no longer resolves to a valid instance
663+
d.logger.Debugf(ctx, "[failover] unable to resolve DNS for instance %s: %v", cn.DomainName(), err)
664+
cache.Close()
665+
return instance.ConnName{}, nil
666+
} else if newCn != cn {
667+
d.logger.Debugf(ctx, "domain name %s changed from old instance %s to new instance %s",
668+
cn.DomainName(), cn.String(), newCn.String())
669+
670+
useIamAuthn := cache.UseIAMAuthN()
671+
// The domain name points to a different instance.
672+
cache.Close()
673+
674+
newC, err := d.createConnectionInfoCache(ctx, cn, &useIamAuthn)
675+
if err != nil {
676+
d.logger.Debugf(ctx, "error connecting to new instance %s, %s: %v",
677+
cn.DomainName(), newCn.String(), err)
678+
return instance.ConnName{}, nil
679+
}
680+
return newCn, newC
681+
}
682+
683+
// Remove closed sockets from cache.openConns
684+
cache.purgeClosedConns()
685+
return cn, cache
686+
687+
})
688+
689+
}
690+
578691
// connectionInfoCache is a helper function for returning the appropriate
579692
// connection info Cache in a threadsafe way. It will create a new cache,
580693
// modify the existing one, or leave it unchanged as needed.
@@ -624,7 +737,7 @@ func (d *Dialer) createConnectionInfoCache(
624737
)
625738
}
626739
c := &monitoredCache{
627-
openConns: new(uint64),
740+
openConnsCount: new(uint64),
628741
connectionInfoCache: cache,
629742
}
630743

0 commit comments

Comments
 (0)