Skip to content

Commit 696f462

Browse files
Merge pull request #38 from segmentio/yolken-improve-locks-opa
Improve locking and OPA evaluation
2 parents bd5e6c9 + 96b68a2 commit 696f462

File tree

12 files changed

+400
-104
lines changed

12 files changed

+400
-104
lines changed

README.md

+19-3
Original file line numberDiff line numberDiff line change
@@ -222,9 +222,7 @@ other source types use custom code in the `kubeapply` binary.
222222
This validates all of the expanded configs for the cluster using the
223223
[`kubeconform`](https://github.com/yannh/kubeconform) library. It also, optionally, supports
224224
validating configs using one or more [OPA](https://www.openpolicyagent.org/) policies in
225-
rego format. The latter allows checking that configs satisfy organization-specific standards,
226-
e.g. that resource labels are in the correct format, that images are only pulled from the
227-
expected registries, etc.
225+
rego format; see the "Experimental features" section below for more details.
228226

229227
#### Diff
230228

@@ -338,6 +336,24 @@ where the `url`s are in the same format as those for Helm chart locations,
338336
e.g. `file://path/to/my/file`. The outputs of each profile will be expanded into
339337
`[expanded dir]/[profile name]/...`.
340338

339+
### OPA policy checks
340+
341+
The `kubeapply validate` subcommand now supports checking expanded configs against policies in
342+
[Open Policy Agent (OPA)](https://www.openpolicyagent.org/) format. This can be helpful for
343+
enforcing organization-specific standards, e.g. that images need to be pulled from a particular
344+
private registry, that all labels are in a consistent format, etc.
345+
346+
To use this, write up your policies as `.rego` files as described in the OPA documentation and run
347+
the former subcommand with one or more `--policy=[path to policy]` arguments. By default, policies
348+
should be in the `com.segment.kubeapply` package. Denial reasons, if any, are returned by
349+
setting a `deny` variable with a set of denial reason strings. If this set is empty,
350+
`kubeapply` will assume that the config has passed all checks in the policy file.
351+
352+
If a denial reason begins with the string `warn:`, then that denial will be treated as a
353+
non-blocking warning as opposed to an error that causes validation to fail.
354+
355+
See [this unit test](/pkg/validation/policy_test.go) for some examples.
356+
341357
## Testing
342358

343359
### Unit tests

cmd/kubeapply/subcmd/validate.go

+18-59
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package subcmd
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
67
"path/filepath"
78

@@ -144,69 +145,27 @@ func execValidation(ctx context.Context, clusterConfig *config.ClusterConfig) er
144145
return err
145146
}
146147

147-
numInvalidResourceChecks := 0
148-
numValidResourceChecks := 0
149-
numSkippedResourceChecks := 0
150-
151-
for _, result := range results {
152-
for _, checkResult := range result.CheckResults {
153-
switch checkResult.Status {
154-
case validation.StatusValid:
155-
numValidResourceChecks++
156-
log.Debugf(
157-
"Resource %s in file %s OK according to check %s",
158-
result.Resource.PrettyName(),
159-
result.Resource.Path,
160-
checkResult.CheckName,
161-
)
162-
case validation.StatusSkipped:
163-
numSkippedResourceChecks++
164-
log.Debugf(
165-
"Resource %s in file %s was skipped by check %s",
166-
result.Resource.PrettyName(),
167-
result.Resource.Path,
168-
checkResult.CheckName,
169-
)
170-
case validation.StatusError:
171-
numInvalidResourceChecks++
172-
log.Errorf(
173-
"Resource %s in file %s could not be processed by check %s: %s",
174-
result.Resource.PrettyName(),
175-
result.Resource.Path,
176-
checkResult.CheckName,
177-
checkResult.Message,
178-
)
179-
case validation.StatusInvalid:
180-
numInvalidResourceChecks++
181-
log.Errorf(
182-
"Resource %s in file %s is invalid according to check %s: %s",
183-
result.Resource.PrettyName(),
184-
result.Resource.Path,
185-
checkResult.CheckName,
186-
checkResult.Message,
187-
)
188-
case validation.StatusEmpty:
189-
default:
190-
log.Infof("Unrecognized result type: %+v", result)
191-
}
148+
counts := validation.CountsByStatus(results)
149+
resultsWithIssues := validation.ResultsWithIssues(results)
150+
151+
if len(resultsWithIssues) > 0 {
152+
log.Warnf("Found %d resources with potential issues", len(resultsWithIssues))
153+
for _, result := range resultsWithIssues {
154+
fmt.Println(
155+
validation.ResultTable(
156+
result,
157+
clusterConfig.DescriptiveName(),
158+
clusterConfig.ExpandedPath,
159+
debug,
160+
),
161+
)
192162
}
193163
}
194164

195-
if numInvalidResourceChecks > 0 {
196-
return fmt.Errorf(
197-
"Validation failed for %d resources in cluster %s (%d checks valid, %d skipped)",
198-
numInvalidResourceChecks,
199-
clusterConfig.DescriptiveName(),
200-
numValidResourceChecks,
201-
numSkippedResourceChecks,
202-
)
165+
if counts[validation.StatusError]+counts[validation.StatusInvalid] > 0 {
166+
return errors.New("Validation failed")
203167
}
204168

205-
log.Infof(
206-
"Validation of cluster %s passed (%d checks valid, %d skipped)",
207-
clusterConfig.DescriptiveName(),
208-
numValidResourceChecks,
209-
numSkippedResourceChecks,
210-
)
169+
log.Infof("Validation passed")
211170
return nil
212171
}

data/data.go

+8-8
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/store/leaderelection/leaderelection.go

+16-4
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ import (
6565
)
6666

6767
const (
68-
JitterFactor = 1.2
68+
JitterFactor = 1.2
69+
releaseTimeout = 10 * time.Second
6970
)
7071

7172
// NewLeaderElector creates a LeaderElector from a LeaderElectionConfig
@@ -240,7 +241,8 @@ func (le *LeaderElector) acquire(ctx context.Context) bool {
240241
return succeeded
241242
}
242243

243-
// renew loops calling tryAcquireOrRenew and returns immediately when tryAcquireOrRenew fails or ctx signals done.
244+
// renew loops calling tryAcquireOrRenew and returns immediately when tryAcquireOrRenew fails or
245+
// ctx signals done.
244246
func (le *LeaderElector) renew(ctx context.Context) {
245247
ctx, cancel := context.WithCancel(ctx)
246248
defer cancel()
@@ -264,7 +266,14 @@ func (le *LeaderElector) renew(ctx context.Context) {
264266

265267
// if we hold the lease, give it up
266268
if le.config.ReleaseOnCancel {
267-
le.release(ctx)
269+
// Use the background context, not the one that was passed in originally. If
270+
// the latter was cancelled, then we can't actually do the release.
271+
releaseCtx, releaseCancel := context.WithTimeout(
272+
context.Background(),
273+
releaseTimeout,
274+
)
275+
defer releaseCancel()
276+
le.release(releaseCtx)
268277
}
269278
}
270279

@@ -332,7 +341,10 @@ func (le *LeaderElector) tryAcquireOrRenew(ctx context.Context) bool {
332341
le.observedTime.Add(le.config.LeaseDuration).After(now.Time) &&
333342
oldLeaderElectionRecord.RenewTime.Time.After(thresholdTime) &&
334343
!le.IsLeader() {
335-
log.Infof("Lock is held by %v and has not yet expired", oldLeaderElectionRecord.HolderIdentity)
344+
log.Infof(
345+
"Lock is held by %v and has not yet expired",
346+
oldLeaderElectionRecord.HolderIdentity,
347+
)
336348
return false
337349
}
338350

0 commit comments

Comments
 (0)