Skip to content

MWI: Wait for bot identity before starting services #55610

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions lib/tbot/loop.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ type runOnIntervalConfig struct {
retryLimit int
exitOnRetryExhausted bool
waitBeforeFirstRun bool
// identityReadyCh allows the service to wait until the internal bot identity
// renewal has completed before running, to avoid spamming the logs if the
// service doesn't support gracefully degrading when there is no API client
// available.
identityReadyCh <-chan struct{}
}

// runOnInterval runs a function on a given interval, with retries and jitter.
Expand All @@ -97,6 +102,19 @@ func runOnInterval(ctx context.Context, cfg runOnIntervalConfig) error {

log := cfg.log.With("task", cfg.name)

if cfg.identityReadyCh != nil {
select {
case <-cfg.identityReadyCh:
default:
log.InfoContext(ctx, "Waiting for internal bot identity to be renewed before running")
select {
case <-cfg.identityReadyCh:
case <-ctx.Done():
return nil
}
}
}

if cfg.clock == nil {
cfg.clock = clockwork.NewRealClock()
}
Expand Down
30 changes: 16 additions & 14 deletions lib/tbot/service_application_output.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,14 @@ import (
// ApplicationOutputService generates the artifacts necessary to connect to a
// HTTP or TCP application using Teleport.
type ApplicationOutputService struct {
botAuthClient *apiclient.Client
botCfg *config.BotConfig
cfg *config.ApplicationOutput
getBotIdentity getBotIdentityFn
log *slog.Logger
reloadBroadcaster *channelBroadcaster
resolver reversetunnelclient.Resolver
botAuthClient *apiclient.Client
botIdentityReadyCh <-chan struct{}
botCfg *config.BotConfig
cfg *config.ApplicationOutput
getBotIdentity getBotIdentityFn
log *slog.Logger
reloadBroadcaster *channelBroadcaster
resolver reversetunnelclient.Resolver
}

func (s *ApplicationOutputService) String() string {
Expand All @@ -60,13 +61,14 @@ func (s *ApplicationOutputService) Run(ctx context.Context) error {
defer unsubscribe()

err := runOnInterval(ctx, runOnIntervalConfig{
service: s.String(),
name: "output-renewal",
f: s.generate,
interval: cmp.Or(s.cfg.CredentialLifetime, s.botCfg.CredentialLifetime).RenewalInterval,
retryLimit: renewalRetryLimit,
log: s.log,
reloadCh: reloadCh,
service: s.String(),
name: "output-renewal",
f: s.generate,
interval: cmp.Or(s.cfg.CredentialLifetime, s.botCfg.CredentialLifetime).RenewalInterval,
retryLimit: renewalRetryLimit,
log: s.log,
reloadCh: reloadCh,
identityReadyCh: s.botIdentityReadyCh,
})
return trace.Wrap(err)
}
Expand Down
28 changes: 21 additions & 7 deletions lib/tbot/service_application_tunnel.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,14 @@ import (
// an authenticating tunnel and will automatically issue and renew certificates
// as needed.
type ApplicationTunnelService struct {
botCfg *config.BotConfig
cfg *config.ApplicationTunnelService
proxyPingCache *proxyPingCache
log *slog.Logger
resolver reversetunnelclient.Resolver
botClient *apiclient.Client
getBotIdentity getBotIdentityFn
botCfg *config.BotConfig
cfg *config.ApplicationTunnelService
proxyPingCache *proxyPingCache
log *slog.Logger
resolver reversetunnelclient.Resolver
botClient *apiclient.Client
getBotIdentity getBotIdentityFn
botIdentityReadyCh <-chan struct{}
}

func (s *ApplicationTunnelService) Run(ctx context.Context) error {
Expand Down Expand Up @@ -118,6 +119,19 @@ func (s *ApplicationTunnelService) buildLocalProxyConfig(ctx context.Context) (l
ctx, span := tracer.Start(ctx, "ApplicationTunnelService/buildLocalProxyConfig")
defer span.End()

if s.botIdentityReadyCh != nil {
select {
case <-s.botIdentityReadyCh:
default:
s.log.InfoContext(ctx, "Waiting for internal bot identity to be renewed before running")
select {
case <-s.botIdentityReadyCh:
case <-ctx.Done():
return alpnproxy.LocalProxyConfig{}, ctx.Err()
}
}
}

// Determine the roles to use for the impersonated app access user. We fall
// back to all the roles the bot has if none are configured.
roles := s.cfg.Roles
Expand Down
22 changes: 18 additions & 4 deletions lib/tbot/service_ca_rotation.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,11 @@ const caRotationRetryBackoff = time.Second * 2
// certificates issued by the old CA, and stop trusting the new CA.
// - Update Servers -> Standby: So we can stop trusting the old CA.
type caRotationService struct {
log *slog.Logger
reloadBroadcaster *channelBroadcaster
botClient *apiclient.Client
getBotIdentity getBotIdentityFn
log *slog.Logger
reloadBroadcaster *channelBroadcaster
botClient *apiclient.Client
getBotIdentity getBotIdentityFn
botIdentityReadyCh <-chan struct{}
}

func (s *caRotationService) String() string {
Expand All @@ -149,6 +150,19 @@ func (s *caRotationService) Run(ctx context.Context) error {
}
jitter := retryutils.DefaultJitter

if s.botIdentityReadyCh != nil {
select {
case <-s.botIdentityReadyCh:
default:
s.log.InfoContext(ctx, "Waiting for internal bot identity to be renewed before running")
select {
case <-s.botIdentityReadyCh:
case <-ctx.Done():
return nil
}
}
}

for {
err := s.watchCARotations(ctx, rd.attempt)
if ctx.Err() != nil {
Expand Down
28 changes: 15 additions & 13 deletions lib/tbot/service_client_credential.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,13 @@ type ClientCredentialOutputService struct {
// botAuthClient should be an auth client using the bots internal identity.
// This will not have any roles impersonated and should only be used to
// fetch CAs.
botAuthClient *apiclient.Client
botCfg *config.BotConfig
cfg *config.UnstableClientCredentialOutput
getBotIdentity getBotIdentityFn
log *slog.Logger
reloadBroadcaster *channelBroadcaster
botAuthClient *apiclient.Client
botIdentityReadyCh <-chan struct{}
botCfg *config.BotConfig
cfg *config.UnstableClientCredentialOutput
getBotIdentity getBotIdentityFn
log *slog.Logger
reloadBroadcaster *channelBroadcaster
}

func (s *ClientCredentialOutputService) String() string {
Expand All @@ -55,13 +56,14 @@ func (s *ClientCredentialOutputService) Run(ctx context.Context) error {
defer unsubscribe()

err := runOnInterval(ctx, runOnIntervalConfig{
service: s.String(),
name: "output-renewal",
f: s.generate,
interval: s.botCfg.CredentialLifetime.RenewalInterval,
retryLimit: renewalRetryLimit,
log: s.log,
reloadCh: reloadCh,
service: s.String(),
name: "output-renewal",
f: s.generate,
interval: s.botCfg.CredentialLifetime.RenewalInterval,
retryLimit: renewalRetryLimit,
log: s.log,
reloadCh: reloadCh,
identityReadyCh: s.botIdentityReadyCh,
})
return trace.Wrap(err)
}
Expand Down
30 changes: 16 additions & 14 deletions lib/tbot/service_database_output.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,14 @@ import (
// DatabaseOutputService generates the artifacts necessary to connect to a
// database using Teleport.
type DatabaseOutputService struct {
botAuthClient *apiclient.Client
botCfg *config.BotConfig
cfg *config.DatabaseOutput
getBotIdentity getBotIdentityFn
log *slog.Logger
reloadBroadcaster *channelBroadcaster
resolver reversetunnelclient.Resolver
botAuthClient *apiclient.Client
botIdentityReadyCh <-chan struct{}
botCfg *config.BotConfig
cfg *config.DatabaseOutput
getBotIdentity getBotIdentityFn
log *slog.Logger
reloadBroadcaster *channelBroadcaster
resolver reversetunnelclient.Resolver
}

func (s *DatabaseOutputService) String() string {
Expand All @@ -61,13 +62,14 @@ func (s *DatabaseOutputService) Run(ctx context.Context) error {
defer unsubscribe()

err := runOnInterval(ctx, runOnIntervalConfig{
service: s.String(),
name: "output-renewal",
f: s.generate,
interval: cmp.Or(s.cfg.CredentialLifetime, s.botCfg.CredentialLifetime).RenewalInterval,
retryLimit: renewalRetryLimit,
log: s.log,
reloadCh: reloadCh,
service: s.String(),
name: "output-renewal",
f: s.generate,
interval: cmp.Or(s.cfg.CredentialLifetime, s.botCfg.CredentialLifetime).RenewalInterval,
retryLimit: renewalRetryLimit,
log: s.log,
reloadCh: reloadCh,
identityReadyCh: s.botIdentityReadyCh,
})
return trace.Wrap(err)
}
Expand Down
28 changes: 21 additions & 7 deletions lib/tbot/service_database_tunnel.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,14 @@ func (a alpnProxyMiddleware) OnStart(ctx context.Context, lp *alpnproxy.LocalPro
// connections to a remote database service. It is an authenticating tunnel and
// will automatically issue and renew certificates as needed.
type DatabaseTunnelService struct {
botCfg *config.BotConfig
cfg *config.DatabaseTunnelService
proxyPingCache *proxyPingCache
log *slog.Logger
resolver reversetunnelclient.Resolver
botClient *apiclient.Client
getBotIdentity getBotIdentityFn
botCfg *config.BotConfig
cfg *config.DatabaseTunnelService
proxyPingCache *proxyPingCache
log *slog.Logger
resolver reversetunnelclient.Resolver
botClient *apiclient.Client
getBotIdentity getBotIdentityFn
botIdentityReadyCh <-chan struct{}
}

// buildLocalProxyConfig initializes the service, fetching any initial information and setting
Expand All @@ -78,6 +79,19 @@ func (s *DatabaseTunnelService) buildLocalProxyConfig(ctx context.Context) (lpCf
ctx, span := tracer.Start(ctx, "DatabaseTunnelService/buildLocalProxyConfig")
defer span.End()

if s.botIdentityReadyCh != nil {
select {
case <-s.botIdentityReadyCh:
default:
s.log.InfoContext(ctx, "Waiting for internal bot identity to be renewed before running")
select {
case <-s.botIdentityReadyCh:
case <-ctx.Done():
return alpnproxy.LocalProxyConfig{}, ctx.Err()
}
}
}

// Determine the roles to use for the impersonated db access user. We fall
// back to all the roles the bot has if none are configured.
roles := s.cfg.Roles
Expand Down
2 changes: 2 additions & 0 deletions lib/tbot/service_heartbeat.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ type heartbeatService struct {
botCfg *config.BotConfig
startedAt time.Time
heartbeatSubmitter heartbeatSubmitter
botIdentityReadyCh <-chan struct{}
interval time.Duration
retryLimit int
}
Expand Down Expand Up @@ -117,6 +118,7 @@ func (s *heartbeatService) Run(ctx context.Context) error {
isStartup = false
return nil
},
identityReadyCh: s.botIdentityReadyCh,
})
return trace.Wrap(err)
}
Expand Down
32 changes: 17 additions & 15 deletions lib/tbot/service_identity_output.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,15 @@ type IdentityOutputService struct {
// botAuthClient should be an auth client using the bots internal identity.
// This will not have any roles impersonated and should only be used to
// fetch CAs.
botAuthClient *apiclient.Client
botCfg *config.BotConfig
cfg *config.IdentityOutput
getBotIdentity getBotIdentityFn
log *slog.Logger
proxyPingCache *proxyPingCache
reloadBroadcaster *channelBroadcaster
resolver reversetunnelclient.Resolver
botAuthClient *apiclient.Client
botIdentityReadyCh <-chan struct{}
botCfg *config.BotConfig
cfg *config.IdentityOutput
getBotIdentity getBotIdentityFn
log *slog.Logger
proxyPingCache *proxyPingCache
reloadBroadcaster *channelBroadcaster
resolver reversetunnelclient.Resolver
// executablePath is called to get the path to the tbot executable.
// Usually this is os.Executable
executablePath func() (string, error)
Expand All @@ -75,13 +76,14 @@ func (s *IdentityOutputService) Run(ctx context.Context) error {
defer unsubscribe()

err := runOnInterval(ctx, runOnIntervalConfig{
service: s.String(),
name: "output-renewal",
f: s.generate,
interval: cmp.Or(s.cfg.CredentialLifetime, s.botCfg.CredentialLifetime).RenewalInterval,
retryLimit: renewalRetryLimit,
log: s.log,
reloadCh: reloadCh,
service: s.String(),
name: "output-renewal",
f: s.generate,
interval: cmp.Or(s.cfg.CredentialLifetime, s.botCfg.CredentialLifetime).RenewalInterval,
retryLimit: renewalRetryLimit,
log: s.log,
reloadCh: reloadCh,
identityReadyCh: s.botIdentityReadyCh,
})
return trace.Wrap(err)
}
Expand Down
32 changes: 17 additions & 15 deletions lib/tbot/service_kubernetes_output.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,15 @@ type KubernetesOutputService struct {
// botAuthClient should be an auth client using the bots internal identity.
// This will not have any roles impersonated and should only be used to
// fetch CAs.
botAuthClient *apiclient.Client
botCfg *config.BotConfig
cfg *config.KubernetesOutput
getBotIdentity getBotIdentityFn
log *slog.Logger
proxyPingCache *proxyPingCache
reloadBroadcaster *channelBroadcaster
resolver reversetunnelclient.Resolver
botAuthClient *apiclient.Client
botIdentityReadyCh <-chan struct{}
botCfg *config.BotConfig
cfg *config.KubernetesOutput
getBotIdentity getBotIdentityFn
log *slog.Logger
proxyPingCache *proxyPingCache
reloadBroadcaster *channelBroadcaster
resolver reversetunnelclient.Resolver
// executablePath is called to get the path to the tbot executable.
// Usually this is os.Executable
executablePath func() (string, error)
Expand All @@ -79,13 +80,14 @@ func (s *KubernetesOutputService) Run(ctx context.Context) error {
defer unsubscribe()

err := runOnInterval(ctx, runOnIntervalConfig{
service: s.String(),
name: "output-renewal",
f: s.generate,
interval: cmp.Or(s.cfg.CredentialLifetime, s.botCfg.CredentialLifetime).RenewalInterval,
retryLimit: renewalRetryLimit,
log: s.log,
reloadCh: reloadCh,
service: s.String(),
name: "output-renewal",
f: s.generate,
interval: cmp.Or(s.cfg.CredentialLifetime, s.botCfg.CredentialLifetime).RenewalInterval,
retryLimit: renewalRetryLimit,
log: s.log,
reloadCh: reloadCh,
identityReadyCh: s.botIdentityReadyCh,
})
return trace.Wrap(err)
}
Expand Down
Loading
Loading