Skip to content

Commit aa5bc33

Browse files
authored
🌱 Hot Reload for secrets (#49)
1 parent 62cb91e commit aa5bc33

File tree

16 files changed

+624
-81
lines changed

16 files changed

+624
-81
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ permissions:
1717
jobs:
1818
manager-image:
1919
name: Build and push manager image
20-
runs-on: ubuntu-latest
20+
runs-on: ubuntu-24.04
2121
steps:
2222
- name: Checkout code
2323
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1

.github/workflows/release.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ permissions:
1616
jobs:
1717
manager-image:
1818
name: Build and push manager image
19-
runs-on: ubuntu-latest
19+
runs-on: ubuntu-24.04
2020
steps:
2121
- name: Checkout code
2222
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
@@ -123,7 +123,7 @@ jobs:
123123
124124
release:
125125
name: Create draft release
126-
runs-on: ubuntu-latest
126+
runs-on: ubuntu-24.04
127127
needs:
128128
- manager-image
129129
steps:

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ deploy/gen/
99
hack/.*
1010
/*.kubeconfig
1111
/etc
12+
/*.yaml

README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ Additional PRs we should create in upstream, so that we can use upstream instead
3333

3434
PRs which are **not** needed in upstream, because upstream has this feature:
3535

36+
* [PR hotreload credentials, when mounted secret changed](https://github.com/syself/hetzner-cloud-controller-manager/pull/49)
3637
* [PR getInstanceTypeOfRobotServer: convert invalid characters to dashes](https://github.com/syself/hetzner-cloud-controller-manager/pull/40)
3738
* [Make robot client optional for lb client](https://github.com/syself/hetzner-cloud-controller-manager/pull/37): upstream uses ROBOT_ENABLED. We need to set that env var.
3839
* [Fix InstanceExists for baremetal servers, check node name](https://github.com/syself/hetzner-cloud-controller-manager/pull/32)
@@ -70,6 +71,30 @@ helm upgrade --install ccm syself/ccm-hetzner --version X.Y.Z \
7071

7172
See [CAPH docs](https://syself.com/docs/caph/topics/baremetal/creating-workload-cluster#deploying-the-hetzner-cloud-controller-manager) for more details.
7273

74+
## Usage
75+
76+
We recommend to mount the secret `hetzner` as volume and make it avaiable for the container as `/etc/hetzner-secret`.
77+
Then the credentials are automatically reloaded, when the secret changes.
78+
You see an example in the [ccm helm chart](https://github.com/syself/charts/tree/main/charts/ccm-hetzner)
79+
80+
## Env Variables
81+
82+
ROBOT_DEBUG: When set to `true`, then api calls to the hetzner robot API will be logged.
83+
84+
CACHE_TIMEOUT: Timeout of the Robot API Cache. See [ParseDuration](https://pkg.go.dev/time#ParseDuration) for supported syntax.
85+
86+
HCLOUD_ENDPOINT: Defaults to `https://api.hetzner.cloud/v1`
87+
88+
Additional Env Variables are defined at the top of [cloud.go](https://github.com/syself/hetzner-cloud-controller-manager/blob/master/hcloud/cloud.go)
89+
90+
Deprecated (use mounted secret instead):
91+
92+
```
93+
HCLOUD_TOKEN
94+
ROBOT_USER_NAME
95+
ROBOT_PASSWORD
96+
```
97+
7398
---
7499

75100
End of "About the fork"

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ module github.com/syself/hetzner-cloud-controller-manager
33
go 1.23.0
44

55
require (
6+
github.com/fsnotify/fsnotify v1.8.0
67
github.com/hetznercloud/hcloud-go/v2 v2.17.0
78
github.com/prometheus/client_golang v1.20.5
89
github.com/spf13/pflag v1.0.5
@@ -32,7 +33,6 @@ require (
3233
github.com/emicklei/go-restful/v3 v3.12.1 // indirect
3334
github.com/evanphx/json-patch v4.12.0+incompatible // indirect
3435
github.com/felixge/httpsnoop v1.0.4 // indirect
35-
github.com/fsnotify/fsnotify v1.8.0 // indirect
3636
github.com/go-logr/logr v1.4.2 // indirect
3737
github.com/go-logr/stdr v1.2.2 // indirect
3838
github.com/go-openapi/jsonpointer v0.21.0 // indirect

hcloud/cloud.go

Lines changed: 57 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,14 @@ import (
2727
"runtime/debug"
2828
"strconv"
2929
"strings"
30-
"time"
3130

3231
"github.com/hetznercloud/hcloud-go/v2/hcloud"
3332
"github.com/hetznercloud/hcloud-go/v2/hcloud/metadata"
33+
"github.com/syself/hetzner-cloud-controller-manager/internal/credentials"
3434
"github.com/syself/hetzner-cloud-controller-manager/internal/hcops"
3535
"github.com/syself/hetzner-cloud-controller-manager/internal/metrics"
3636
robotclient "github.com/syself/hetzner-cloud-controller-manager/internal/robot/client"
3737
"github.com/syself/hetzner-cloud-controller-manager/internal/robot/client/cache"
38-
"github.com/syself/hetzner-cloud-controller-manager/internal/util"
39-
hrobot "github.com/syself/hrobot-go"
4038
corev1 "k8s.io/api/core/v1"
4139
"k8s.io/client-go/tools/record"
4240
cloudprovider "k8s.io/cloud-provider"
@@ -51,16 +49,10 @@ const (
5149
hcloudDebugENVVar = "HCLOUD_DEBUG"
5250
robotDebugENVVar = "ROBOT_DEBUG"
5351

54-
robotUserNameENVVar = "ROBOT_USER_NAME"
55-
robotPasswordENVVar = "ROBOT_PASSWORD"
56-
5752
// Only as reference - is used in hcops package.
5853
// Default is 5 minutes.
5954
RateLimitWaitTimeRobot = "RATE_LIMIT_WAIT_TIME_ROBOT"
6055

61-
// default is 5 minutes.
62-
CacheTimeout = "CACHE_TIMEOUT"
63-
6456
// Disable the "master/server is attached to the network" check against the metadata service.
6557
hcloudNetworkDisableAttachedCheckENVVar = "HCLOUD_NETWORK_DISABLE_ATTACHED_CHECK"
6658
hcloudNetworkRoutesEnabledENVVar = "HCLOUD_NETWORK_ROUTES_ENABLED"
@@ -73,7 +65,6 @@ const (
7365
hcloudLoadBalancersDisableIPv6 = "HCLOUD_LOAD_BALANCERS_DISABLE_IPV6"
7466
hcloudMetricsEnabledENVVar = "HCLOUD_METRICS_ENABLED"
7567
hcloudMetricsAddress = ":8233"
76-
nodeNameENVVar = "NODE_NAME"
7768
providerName = "hcloud"
7869
hostNamePrefixRobot = "bm-"
7970
)
@@ -84,7 +75,7 @@ var errMissingRobotCredentials = errors.New("missing robot credentials - cannot
8475
var providerVersion = "unknown"
8576

8677
type cloud struct {
87-
client *hcloud.Client
78+
hcloudClient *hcloud.Client
8879
robotClient robotclient.Client
8980
instances *instances
9081
routes *routes
@@ -111,22 +102,21 @@ func (lt *LoggingTransport) RoundTrip(req *http.Request) (resp *http.Response, e
111102
return resp, nil
112103
}
113104

114-
func newCloud(_ io.Reader) (cloudprovider.Interface, error) {
115-
const op = "hcloud/newCloud"
116-
metrics.OperationCalled.WithLabelValues(op).Inc()
117-
118-
token := os.Getenv(hcloudTokenENVVar)
119-
if token == "" {
120-
return nil, fmt.Errorf("environment variable %q is required", hcloudTokenENVVar)
105+
func newHcloudClient(rootDir string) (*hcloud.Client, error) {
106+
credentialsDir := credentials.GetDirectory(rootDir)
107+
token, err := credentials.GetInitialHcloudCredentialsFromDirectory(credentialsDir)
108+
if err != nil {
109+
klog.V(1).Infof("reading Hetzner Cloud token from directory failed. Will try env var: %s", err.Error())
110+
token = os.Getenv(hcloudTokenENVVar)
111+
if token == "" {
112+
return nil, fmt.Errorf("Either token from directory %q or environment variable %q is required", credentialsDir, hcloudTokenENVVar)
113+
}
114+
} else {
115+
klog.V(1).Infof("reading Hetzner Cloud token from %q. The controller will reload the credentials, when the file changes", credentialsDir)
121116
}
122117
if len(token) != 64 {
123118
return nil, fmt.Errorf("entered token is invalid (must be exactly 64 characters long)")
124119
}
125-
nodeName := os.Getenv(nodeNameENVVar)
126-
if nodeName == "" {
127-
return nil, fmt.Errorf("environment variable %q is required", nodeNameENVVar)
128-
}
129-
130120
opts := []hcloud.ClientOption{
131121
hcloud.WithToken(token),
132122
hcloud.WithApplication("hetzner-cloud-controller", providerVersion),
@@ -146,43 +136,43 @@ func newCloud(_ io.Reader) (cloudprovider.Interface, error) {
146136
opts = append(opts, hcloud.WithEndpoint(endpoint))
147137
}
148138
client := hcloud.NewClient(opts...)
149-
metadataClient := metadata.NewClient()
139+
return client, nil
140+
}
150141

151-
robotUserName := os.Getenv(robotUserNameENVVar)
152-
robotPassword := os.Getenv(robotPasswordENVVar)
142+
func newCloud(_ io.Reader) (cloudprovider.Interface, error) {
143+
const op = "hcloud/newCloud"
144+
metrics.OperationCalled.WithLabelValues(op).Inc()
153145

154-
cacheTimeout, err := util.GetEnvDuration(CacheTimeout)
146+
rootDir, err := os.Getwd()
155147
if err != nil {
156148
return nil, fmt.Errorf("%s: %w", op, err)
157149
}
158-
159-
if cacheTimeout == 0 {
160-
cacheTimeout = 5 * time.Minute
150+
hcloudClient, err := newHcloudClient(rootDir)
151+
if err != nil {
152+
return nil, fmt.Errorf("%s: %w", op, err)
161153
}
154+
metadataClient := metadata.NewClient()
162155

163-
var robotClient robotclient.Client
164-
if robotUserName != "" && robotPassword != "" {
165-
var c hrobot.RobotClient
166-
if os.Getenv(robotDebugENVVar) == "true" {
167-
client := &http.Client{
168-
Transport: &LoggingTransport{
169-
roundTripper: http.DefaultTransport,
170-
},
171-
}
172-
c = hrobot.NewBasicAuthClientWithCustomHttpClient(robotUserName, robotPassword, client)
173-
klog.Info("Enabled robot API debugging")
174-
} else {
175-
c = hrobot.NewBasicAuthClient(robotUserName, robotPassword)
176-
klog.Infof("Not enabling robot API debugging. Set env var %s=true to enable it.", robotDebugENVVar)
156+
var httpClient *http.Client
157+
if os.Getenv(robotDebugENVVar) == "true" {
158+
httpClient = &http.Client{
159+
Transport: &LoggingTransport{
160+
roundTripper: http.DefaultTransport,
161+
},
177162
}
178-
robotClient = cache.NewClient(c, cacheTimeout)
179-
} else {
180-
klog.Infof("Hetzner robot is not support because of insufficient credentials. Robot user name specified: %v. Robot password specified: %v", robotUserName != "", robotPassword != "")
163+
}
164+
robotClient, err := cache.NewCachedRobotClient(rootDir, httpClient, "")
165+
if err != nil {
166+
return nil, fmt.Errorf("%s: %w", op, err)
167+
}
168+
169+
if robotClient == nil {
170+
klog.Info("Robot client is nil, will not be able to manage bare metal servers.")
181171
}
182172

183173
var networkID int64
184174
if v, ok := os.LookupEnv(hcloudNetworkENVVar); ok {
185-
n, _, err := client.Network.Get(context.Background(), v)
175+
n, _, err := hcloudClient.Network.Get(context.Background(), v)
186176
if err != nil {
187177
return nil, fmt.Errorf("%s: %w", op, err)
188178
}
@@ -210,7 +200,7 @@ func newCloud(_ io.Reader) (cloudprovider.Interface, error) {
210200
}
211201

212202
// Validate that the provided token works, and we have network connectivity to the Hetzner Cloud API
213-
_, _, err = client.Server.List(context.Background(), hcloud.ServerListOpts{})
203+
_, _, err = hcloudClient.Server.List(context.Background(), hcloud.ServerListOpts{})
214204
if err != nil {
215205
return nil, fmt.Errorf("%s: %w", op, err)
216206
}
@@ -228,30 +218,39 @@ func newCloud(_ io.Reader) (cloudprovider.Interface, error) {
228218
lbRecorder := eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: "hetzner-ccm-loadbalancer"})
229219

230220
lbOps := &hcops.LoadBalancerOps{
231-
LBClient: &client.LoadBalancer,
232-
CertOps: &hcops.CertificateOps{CertClient: &client.Certificate},
233-
ActionClient: &client.Action,
234-
NetworkClient: &client.Network,
221+
LBClient: &hcloudClient.LoadBalancer,
222+
CertOps: &hcops.CertificateOps{CertClient: &hcloudClient.Certificate},
223+
ActionClient: &hcloudClient.Action,
224+
NetworkClient: &hcloudClient.Network,
235225
RobotClient: robotClient,
236226
NetworkID: networkID,
237227
Recorder: lbRecorder,
238228
Defaults: lbOpsDefaults,
239229
}
240230

241-
loadBalancers := newLoadBalancers(lbOps, &client.Action, lbDisablePrivateIngress, lbDisableIPv6)
231+
loadBalancers := newLoadBalancers(lbOps, &hcloudClient.Action, lbDisablePrivateIngress, lbDisableIPv6)
242232
if os.Getenv(hcloudLoadBalancersEnabledENVVar) == "false" {
243233
loadBalancers = nil
244234
}
245-
246235
instancesAddressFamily, err := addressFamilyFromEnv()
247236
if err != nil {
248237
return nil, fmt.Errorf("%s: %w", op, err)
249238
}
250239

240+
credentialsDir := credentials.GetDirectory(rootDir)
241+
_, err = os.Stat(credentialsDir)
242+
if err == nil {
243+
// Watch for changes in the secrets directory
244+
err := credentials.Watch(credentialsDir, hcloudClient, robotClient)
245+
if err != nil {
246+
return nil, fmt.Errorf("%s: %w", op, err)
247+
}
248+
}
249+
251250
return &cloud{
252-
client: client,
251+
hcloudClient: hcloudClient,
253252
robotClient: robotClient,
254-
instances: newInstances(client, robotClient, instancesAddressFamily, networkID),
253+
instances: newInstances(hcloudClient, robotClient, instancesAddressFamily, networkID),
255254
loadBalancer: loadBalancers,
256255
routes: nil,
257256
networkID: networkID,
@@ -288,7 +287,7 @@ func (c *cloud) Clusters() (cloudprovider.Clusters, bool) {
288287

289288
func (c *cloud) Routes() (cloudprovider.Routes, bool) {
290289
if c.networkID > 0 && os.Getenv(hcloudNetworkRoutesEnabledENVVar) != "false" {
291-
r, err := newRoutes(c.client, c.networkID)
290+
r, err := newRoutes(c.hcloudClient, c.networkID)
292291
if err != nil {
293292
klog.ErrorS(err, "create routes provider", "networkID", c.networkID)
294293
return nil, false

0 commit comments

Comments
 (0)