Skip to content
This repository was archived by the owner on Jul 11, 2023. It is now read-only.

Commit 28b3238

Browse files
authored
fix(certs): update checkAndRotate to use current durations (#4800)
Updates the checkAndRotate method to update certs with the current validity duration specified in the MeshConfig for their respective cert type. Adds a certType to the certificate struct that is used to lookup validity duration when calling IssuingCertificates. Updates shouldRotate to check if the cert needs to be rotated due to an in-progress root certificate rotation. Signed-off-by: jaellio <[email protected]>
1 parent 905005f commit 28b3238

27 files changed

+319
-157
lines changed

cmd/osm-controller/osm-controller.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ func main() {
242242
proxyRegistry := registry.NewProxyRegistry(proxyMapper, msgBroker)
243243
go proxyRegistry.ReleaseCertificateHandler(certManager, stop)
244244

245-
adsCert, err := certManager.IssueCertificate(xdsServerCertificateCommonName, certificate.WithValidityPeriod(constants.XDSCertificateValidityPeriod))
245+
adsCert, err := certManager.IssueCertificate(xdsServerCertificateCommonName, certificate.Internal)
246246
if err != nil {
247247
events.GenericEventRecorder().FatalEvent(err, events.CertificateIssuanceFailure, "Error issuing XDS certificate to ADS server")
248248
}

go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ require (
7575
github.com/moby/sys/mountinfo v0.6.2 // indirect
7676
github.com/stretchr/objx v0.3.0 // indirect
7777
golang.org/x/net v0.0.0-20220607020251-c690dde0001d // indirect
78+
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
7879
golang.org/x/sys v0.0.0-20220610221304-9f5ed59c137d // indirect
7980
google.golang.org/genproto v0.0.0-20220608133413-ed9918b62aac // indirect
8081
honnef.co/go/tools v0.1.1 // indirect
@@ -356,7 +357,6 @@ require (
356357
golang.org/x/crypto v0.0.0-20220315160706-3147a52a75dd // indirect
357358
golang.org/x/mod v0.4.2 // indirect
358359
golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 // indirect
359-
golang.org/x/sync v0.0.0-20220601150217-0de741cfad7f // indirect
360360
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect
361361
golang.org/x/text v0.3.7 // indirect
362362
golang.org/x/time v0.0.0-20211116232009-f0f3c7e86c11 // indirect

go.sum

+1-2
Original file line numberDiff line numberDiff line change
@@ -2341,9 +2341,8 @@ golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJ
23412341
golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
23422342
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
23432343
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
2344+
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ=
23442345
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
2345-
golang.org/x/sync v0.0.0-20220601150217-0de741cfad7f h1:Ax0t5p6N38Ga0dThY21weqDEyz2oklo4IvDkpigvkD8=
2346-
golang.org/x/sync v0.0.0-20220601150217-0de741cfad7f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
23472346
golang.org/x/sys v0.0.0-20170830134202-bb24a47a89ea/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
23482347
golang.org/x/sys v0.0.0-20180117170059-2c42eef0765b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
23492348
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=

pkg/certificate/certificate.go

-13
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package certificate
22

33
import (
4-
"math/rand"
54
time "time"
65

76
"github.com/rs/zerolog/log"
@@ -69,18 +68,6 @@ func (c *Certificate) GetTrustedCAs() pem.RootCertificate {
6968
return c.TrustedCAs
7069
}
7170

72-
// ShouldRotate determines whether a certificate should be rotated.
73-
func (c *Certificate) ShouldRotate() bool {
74-
// The certificate is going to expire at a timestamp T
75-
// We want to renew earlier. How much earlier is defined in renewBeforeCertExpires.
76-
// We add a few seconds noise to the early renew period so that certificates that may have been
77-
// created at the same time are not renewed at the exact same time.
78-
79-
intNoise := rand.Intn(noiseSeconds) // #nosec G404
80-
secondsNoise := time.Duration(intNoise) * time.Second
81-
return time.Until(c.GetExpiration()) <= (RenewBeforeCertExpires + secondsNoise)
82-
}
83-
8471
// NewFromPEM is a helper returning a *certificate.Certificate from the PEM components given.
8572
func NewFromPEM(pemCert pem.Certificate, pemKey pem.PrivateKey) (*Certificate, error) {
8673
x509Cert, err := DecodePEMCertificate(pemCert)

pkg/certificate/certificate_test.go

-13
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,6 @@ import (
1414
"github.com/openservicemesh/osm/pkg/certificate/pem"
1515
)
1616

17-
func TestShouldRotate(t *testing.T) {
18-
assert := tassert.New(t)
19-
cert := &Certificate{
20-
Expiration: time.Now().Add(-1 * time.Hour),
21-
}
22-
assert.True(cert.ShouldRotate())
23-
24-
cert = &Certificate{
25-
Expiration: time.Now().Add(time.Hour),
26-
}
27-
assert.False(cert.ShouldRotate())
28-
}
29-
3017
func TestNewFromPEM(t *testing.T) {
3118
assert := tassert.New(t)
3219
cn := CommonName("Test CA")

pkg/certificate/fake_manager.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,12 @@ func (i *fakeIssuer) IssueCertificate(cn CommonName, validityPeriod time.Duratio
9292

9393
// FakeCertManager is a testing helper that returns a *certificate.Manager
9494
func FakeCertManager() (*Manager, error) {
95+
getCertValidityDuration := func() time.Duration { return validity }
9596
cm, err := NewManager(
9697
context.Background(),
9798
&fakeMRCClient{},
98-
validity,
99+
getCertValidityDuration,
100+
getCertValidityDuration,
99101
nil,
100102
1*time.Hour,
101103
)

pkg/certificate/manager.go

+117-58
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package certificate
33
import (
44
"context"
55
"errors"
6+
"math/rand"
67
"sync"
78
"time"
89

@@ -16,9 +17,10 @@ import (
1617
)
1718

1819
// NewManager creates a new CertificateManager with the passed MRCClient and options
19-
func NewManager(ctx context.Context, mrcClient MRCClient, serviceCertValidityDuration time.Duration, msgBroker *messaging.Broker, checkInterval time.Duration) (*Manager, error) {
20+
func NewManager(ctx context.Context, mrcClient MRCClient, getServiceCertValidityPeriod func() time.Duration, getIngressCertValidityDuration func() time.Duration, msgBroker *messaging.Broker, checkInterval time.Duration) (*Manager, error) {
2021
m := &Manager{
21-
serviceCertValidityDuration: serviceCertValidityDuration,
22+
serviceCertValidityDuration: getServiceCertValidityPeriod,
23+
ingressCertValidityDuration: getIngressCertValidityDuration,
2224
msgBroker: msgBroker,
2325
}
2426

@@ -152,75 +154,119 @@ func (m *Manager) GetTrustDomain() string {
152154
return m.signingIssuer.TrustDomain
153155
}
154156

157+
// shouldRotate determines whether a certificate should be rotated.
158+
func (m *Manager) shouldRotate(c *Certificate) bool {
159+
// The certificate is going to expire at a timestamp T
160+
// We want to renew earlier. How much earlier is defined in renewBeforeCertExpires.
161+
// We add a few seconds noise to the early renew period so that certificates that may have been
162+
// created at the same time are not renewed at the exact same time.
163+
intNoise := rand.Intn(noiseSeconds) // #nosec G404
164+
secondsNoise := time.Duration(intNoise) * time.Second
165+
renewBefore := RenewBeforeCertExpires + secondsNoise
166+
if time.Until(c.GetExpiration()) <= renewBefore {
167+
log.Info().Msgf("Cert %s should be rotated; expires in %+v; renewBefore is %+v",
168+
c.GetCommonName(),
169+
time.Until(c.GetExpiration()),
170+
renewBefore)
171+
return true
172+
}
173+
174+
m.mu.Lock()
175+
validatingIssuer := m.validatingIssuer
176+
signingIssuer := m.signingIssuer
177+
m.mu.Unlock()
178+
179+
// During root certificate rotation the Issuers will change. If the Manager's Issuers are
180+
// different than the validating Issuer and signing Issuer IDs in the certificate, the
181+
// certificate must be reissued with the correct Issuers for the current rotation stage and
182+
// state. If there is no root certificate rotation in progress, the cert and Manager Issuers
183+
// will match.
184+
if c.signingIssuerID != signingIssuer.ID || c.validatingIssuerID != validatingIssuer.ID {
185+
log.Info().Msgf("Cert %s should be rotated; in progress root certificate rotation",
186+
c.GetCommonName())
187+
return true
188+
}
189+
return false
190+
}
191+
155192
func (m *Manager) checkAndRotate() {
156193
// NOTE: checkAndRotate can reintroduce a certificate that has been released, thereby creating an unbounded cache.
157194
// A certificate can also have been rotated already, leaving the list of issued certs stale, and we re-rotate.
158195
// the latter is not a bug, but a source of inefficiency.
159-
160196
certs := map[string]*Certificate{}
161197
m.cache.Range(func(keyIface interface{}, certInterface interface{}) bool {
162198
key := keyIface.(string)
163199
certs[key] = certInterface.(*Certificate)
164200
return true // continue the iteration
165201
})
166-
for key, cert := range certs {
167-
shouldRotate := cert.ShouldRotate()
168-
169-
word := map[bool]string{true: "will", false: "will not"}[shouldRotate]
170-
log.Trace().Msgf("Cert %s %s be rotated; expires in %+v; renewBeforeCertExpires is %+v",
171-
cert.GetCommonName(),
172-
word,
173-
time.Until(cert.GetExpiration()),
174-
RenewBeforeCertExpires)
175-
176-
if shouldRotate {
177-
opts := []IssueOption{WithValidityPeriod(m.serviceCertValidityDuration)}
178-
// if the key is equal to the common name, then it was issued with FullCNProvided(). This will prevent
179-
// an additional trust domain from being appended. We don't do this in every case, in case the trust domain
180-
// has changed since the last issue.
181-
if key == cert.CommonName.String() {
182-
opts = append(opts, FullCNProvided())
183-
}
184-
newCert, err := m.IssueCertificate(key, opts...)
185-
if err != nil {
186-
// TODO(#3962): metric might not be scraped before process restart resulting from this error
187-
log.Error().Err(err).Str(errcode.Kind, errcode.GetErrCodeWithMetric(errcode.ErrRotatingCert)).
188-
Msgf("Error rotating cert SerialNumber=%s", cert.GetSerialNumber())
189-
continue
190-
}
191202

192-
m.msgBroker.GetCertPubSub().Pub(events.PubSubMessage{
193-
Kind: announcements.CertificateRotated,
194-
NewObj: newCert,
195-
OldObj: cert,
196-
}, announcements.CertificateRotated.String())
203+
for key, cert := range certs {
204+
opts := []IssueOption{}
205+
if key == cert.CommonName.String() {
206+
opts = append(opts, FullCNProvided())
207+
}
197208

198-
log.Debug().Msgf("Rotated certificate (old SerialNumber=%s) with new SerialNumber=%s", cert.SerialNumber, newCert.SerialNumber)
209+
_, err := m.IssueCertificate(key, cert.certType, opts...)
210+
if err != nil {
211+
log.Error().Err(err).Str(errcode.Kind, errcode.GetErrCodeWithMetric(errcode.ErrRotatingCert)).
212+
Msgf("Error rotating cert SerialNumber=%s", cert.GetSerialNumber())
199213
}
200214
}
201215
}
202216

217+
func (m *Manager) getValidityDurationForCertType(ct CertType) time.Duration {
218+
switch ct {
219+
case Internal:
220+
return constants.OSMCertificateValidityPeriod
221+
case IngressGateway:
222+
return m.ingressCertValidityDuration()
223+
case Service:
224+
return m.serviceCertValidityDuration()
225+
default:
226+
log.Debug().Msgf("Unknown certificate type %s provided when getting validity duration", ct)
227+
return constants.OSMCertificateValidityPeriod
228+
}
229+
}
230+
231+
// getFromCache returns the certificate with the specified cn from cache if it exists.
232+
// Note: getFromCache might return an expired or invalid certificate.
203233
func (m *Manager) getFromCache(key string) *Certificate {
204234
certInterface, exists := m.cache.Load(key)
205235
if !exists {
206236
return nil
207237
}
208238
cert := certInterface.(*Certificate)
209239
log.Trace().Msgf("Certificate found in cache SerialNumber=%s", cert.GetSerialNumber())
210-
if cert.ShouldRotate() {
211-
log.Trace().Msgf("Certificate found in cache but has expired SerialNumber=%s", cert.GetSerialNumber())
212-
return nil
213-
}
214240
return cert
215241
}
216242

217-
// IssueCertificate implements Manager and returns a newly issued certificate from the given client.
218-
func (m *Manager) IssueCertificate(prefix string, opts ...IssueOption) (*Certificate, error) {
219-
var err error
220-
cert := m.getFromCache(prefix) // Don't call this while holding the lock
243+
// IssueCertificate returns a newly issued certificate from the given client
244+
// or an existing valid certificate from the local cache.
245+
func (m *Manager) IssueCertificate(prefix string, ct CertType, opts ...IssueOption) (*Certificate, error) {
246+
// a singleflight group is used here to ensure that only one issueCertificate is in
247+
// flight at a time for a given certificate prefix. Helps avoid a race condition if
248+
// issueCertificate is called multiple times in a row for the same certificate prefix.
249+
cert, err, _ := m.group.Do(prefix, func() (interface{}, error) {
250+
return m.issueCertificate(prefix, ct, opts...)
251+
})
252+
if err != nil {
253+
return nil, err
254+
}
255+
return cert.(*Certificate), nil
256+
}
221257

222-
options := defaultOptions(m.serviceCertValidityDuration)
258+
func (m *Manager) issueCertificate(prefix string, ct CertType, opts ...IssueOption) (*Certificate, error) {
259+
var rotate bool
260+
cert := m.getFromCache(prefix) // Don't call this while holding the lock
261+
if cert != nil {
262+
// check if cert needs to be rotated
263+
rotate = m.shouldRotate(cert)
264+
if !rotate {
265+
return cert, nil
266+
}
267+
}
223268

269+
options := &issueOptions{}
224270
for _, o := range opts {
225271
o(options)
226272
}
@@ -231,27 +277,40 @@ func (m *Manager) IssueCertificate(prefix string, opts ...IssueOption) (*Certifi
231277
m.mu.Unlock()
232278

233279
start := time.Now()
234-
if cert == nil || cert.signingIssuerID != signingIssuer.ID || cert.validatingIssuerID != validatingIssuer.ID {
235-
cert, err = signingIssuer.IssueCertificate(options.formatCN(prefix, signingIssuer.TrustDomain), options.validityPeriod)
236-
if err != nil {
237-
return nil, err
238-
}
239280

240-
// if we have different signing and validating issuers,
241-
// create the cert's trust context
242-
if validatingIssuer.ID != signingIssuer.ID {
243-
cert = cert.newMergedWithRoot(validatingIssuer.CertificateAuthority)
244-
}
281+
validityDuration := m.getValidityDurationForCertType(ct)
282+
newCert, err := signingIssuer.IssueCertificate(options.formatCN(prefix, signingIssuer.TrustDomain), validityDuration)
283+
if err != nil {
284+
return nil, err
285+
}
245286

246-
cert.signingIssuerID = signingIssuer.ID
247-
cert.validatingIssuerID = validatingIssuer.ID
287+
// if we have different signing and validating issuers,
288+
// create the cert's trust context
289+
if validatingIssuer.ID != signingIssuer.ID {
290+
newCert = newCert.newMergedWithRoot(validatingIssuer.CertificateAuthority)
248291
}
249292

250-
m.cache.Store(prefix, cert)
293+
newCert.signingIssuerID = signingIssuer.ID
294+
newCert.validatingIssuerID = validatingIssuer.ID
251295

252-
log.Trace().Msgf("It took %s to issue certificate with SerialNumber=%s", time.Since(start), cert.GetSerialNumber())
296+
newCert.certType = ct
297+
298+
m.cache.Store(prefix, newCert)
299+
300+
log.Trace().Msgf("It took %s to issue certificate with SerialNumber=%s", time.Since(start), newCert.GetSerialNumber())
301+
302+
if rotate {
303+
// Certificate was rotated
304+
m.msgBroker.GetCertPubSub().Pub(events.PubSubMessage{
305+
Kind: announcements.CertificateRotated,
306+
NewObj: newCert,
307+
OldObj: cert,
308+
}, announcements.CertificateRotated.String())
309+
310+
log.Debug().Msgf("Rotated certificate (old SerialNumber=%s) with new SerialNumber=%s", cert.SerialNumber, newCert.SerialNumber)
311+
}
253312

254-
return cert, nil
313+
return newCert, nil
255314
}
256315

257316
// ReleaseCertificate is called when a cert will no longer be needed and should be removed from the system.

0 commit comments

Comments
 (0)