Skip to content

Commit 87326f5

Browse files
authored
Experimental implementation of Policy v2 (#2214)
* utility iterator for ipset Signed-off-by: Kristoffer Dalby <[email protected]> * split policy -> policy and v1 This commit split out the common policy logic and policy implementation into separate packages. policy contains functions that are independent of the policy implementation, this typically means logic that works on tailcfg types and generic formats. In addition, it defines the PolicyManager interface which the v1 implements. v1 is a subpackage which implements the PolicyManager using the "original" policy implementation. Signed-off-by: Kristoffer Dalby <[email protected]> * use polivyv1 definitions in integration tests These can be marshalled back into JSON, which the new format might not be able to. Also, just dont change it all to JSON strings for now. Signed-off-by: Kristoffer Dalby <[email protected]> * formatter: breaks lines Signed-off-by: Kristoffer Dalby <[email protected]> * remove compareprefix, use tsaddr version Signed-off-by: Kristoffer Dalby <[email protected]> * remove getacl test, add back autoapprover Signed-off-by: Kristoffer Dalby <[email protected]> * use policy manager tag handling Signed-off-by: Kristoffer Dalby <[email protected]> * rename display helper for user Signed-off-by: Kristoffer Dalby <[email protected]> * introduce policy v2 package policy v2 is built from the ground up to be stricter and follow the same pattern for all types of resolvers. TODO introduce aliass resolver Signed-off-by: Kristoffer Dalby <[email protected]> * wire up policyv2 in integration testing Signed-off-by: Kristoffer Dalby <[email protected]> * split policy v2 tests into seperate workflow to work around github limit Signed-off-by: Kristoffer Dalby <[email protected]> * add policy manager output to /debug Signed-off-by: Kristoffer Dalby <[email protected]> * update changelog Signed-off-by: Kristoffer Dalby <[email protected]> --------- Signed-off-by: Kristoffer Dalby <[email protected]>
1 parent b6fbd37 commit 87326f5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+5986
-2221
lines changed

.github/workflows/gh-action-integration-generator.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,13 @@ func findTests() []string {
3838
return tests
3939
}
4040

41-
func updateYAML(tests []string) {
41+
func updateYAML(tests []string, testPath string) {
4242
testsForYq := fmt.Sprintf("[%s]", strings.Join(tests, ", "))
4343

4444
yqCommand := fmt.Sprintf(
45-
"yq eval '.jobs.integration-test.strategy.matrix.test = %s' ./test-integration.yaml -i",
45+
"yq eval '.jobs.integration-test.strategy.matrix.test = %s' %s -i",
4646
testsForYq,
47+
testPath,
4748
)
4849
cmd := exec.Command("bash", "-c", yqCommand)
4950

@@ -58,7 +59,7 @@ func updateYAML(tests []string) {
5859
log.Fatalf("failed to run yq command: %s", err)
5960
}
6061

61-
fmt.Println("YAML file updated successfully")
62+
fmt.Printf("YAML file (%s) updated successfully\n", testPath)
6263
}
6364

6465
func main() {
@@ -69,5 +70,6 @@ func main() {
6970
quotedTests[i] = fmt.Sprintf("\"%s\"", test)
7071
}
7172

72-
updateYAML(quotedTests)
73+
updateYAML(quotedTests, "./test-integration.yaml")
74+
updateYAML(quotedTests, "./test-integration-policyv2.yaml")
7375
}
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
name: Integration Tests (policy v2)
2+
# To debug locally on a branch, and when needing secrets
3+
# change this to include `push` so the build is ran on
4+
# the main repository.
5+
on: [pull_request]
6+
concurrency:
7+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
8+
cancel-in-progress: true
9+
jobs:
10+
integration-test:
11+
runs-on: ubuntu-latest
12+
strategy:
13+
fail-fast: false
14+
matrix:
15+
test:
16+
- TestACLHostsInNetMapTable
17+
- TestACLAllowUser80Dst
18+
- TestACLDenyAllPort80
19+
- TestACLAllowUserDst
20+
- TestACLAllowStarDst
21+
- TestACLNamedHostsCanReachBySubnet
22+
- TestACLNamedHostsCanReach
23+
- TestACLDevice1CanAccessDevice2
24+
- TestPolicyUpdateWhileRunningWithCLIInDatabase
25+
- TestAuthKeyLogoutAndReloginSameUser
26+
- TestAuthKeyLogoutAndReloginNewUser
27+
- TestAuthKeyLogoutAndReloginSameUserExpiredKey
28+
- TestOIDCAuthenticationPingAll
29+
- TestOIDCExpireNodesBasedOnTokenExpiry
30+
- TestOIDC024UserCreation
31+
- TestOIDCAuthenticationWithPKCE
32+
- TestOIDCReloginSameNodeNewUser
33+
- TestAuthWebFlowAuthenticationPingAll
34+
- TestAuthWebFlowLogoutAndRelogin
35+
- TestUserCommand
36+
- TestPreAuthKeyCommand
37+
- TestPreAuthKeyCommandWithoutExpiry
38+
- TestPreAuthKeyCommandReusableEphemeral
39+
- TestPreAuthKeyCorrectUserLoggedInCommand
40+
- TestApiKeyCommand
41+
- TestNodeTagCommand
42+
- TestNodeAdvertiseTagCommand
43+
- TestNodeCommand
44+
- TestNodeExpireCommand
45+
- TestNodeRenameCommand
46+
- TestNodeMoveCommand
47+
- TestPolicyCommand
48+
- TestPolicyBrokenConfigCommand
49+
- TestDERPVerifyEndpoint
50+
- TestResolveMagicDNS
51+
- TestResolveMagicDNSExtraRecordsPath
52+
- TestValidateResolvConf
53+
- TestDERPServerScenario
54+
- TestDERPServerWebsocketScenario
55+
- TestPingAllByIP
56+
- TestPingAllByIPPublicDERP
57+
- TestEphemeral
58+
- TestEphemeralInAlternateTimezone
59+
- TestEphemeral2006DeletedTooQuickly
60+
- TestPingAllByHostname
61+
- TestTaildrop
62+
- TestUpdateHostnameFromClient
63+
- TestExpireNode
64+
- TestNodeOnlineStatus
65+
- TestPingAllByIPManyUpDown
66+
- Test2118DeletingOnlineNodePanics
67+
- TestEnablingRoutes
68+
- TestHASubnetRouterFailover
69+
- TestEnableDisableAutoApprovedRoute
70+
- TestAutoApprovedSubRoute2068
71+
- TestSubnetRouteACL
72+
- TestEnablingExitRoutes
73+
- TestHeadscale
74+
- TestCreateTailscale
75+
- TestTailscaleNodesJoiningHeadcale
76+
- TestSSHOneUserToAll
77+
- TestSSHMultipleUsersAllToAll
78+
- TestSSHNoSSHConfigured
79+
- TestSSHIsBlockedInACL
80+
- TestSSHUserOnlyIsolation
81+
database: [postgres, sqlite]
82+
env:
83+
# Github does not allow us to access secrets in pull requests,
84+
# so this env var is used to check if we have the secret or not.
85+
# If we have the secrets, meaning we are running on push in a fork,
86+
# there might be secrets available for more debugging.
87+
# If TS_OAUTH_CLIENT_ID and TS_OAUTH_SECRET is set, then the job
88+
# will join a debug tailscale network, set up SSH and a tmux session.
89+
# The SSH will be configured to use the SSH key of the Github user
90+
# that triggered the build.
91+
HAS_TAILSCALE_SECRET: ${{ secrets.TS_OAUTH_CLIENT_ID }}
92+
steps:
93+
- uses: actions/checkout@v4
94+
with:
95+
fetch-depth: 2
96+
- name: Get changed files
97+
id: changed-files
98+
uses: dorny/paths-filter@v3
99+
with:
100+
filters: |
101+
files:
102+
- '*.nix'
103+
- 'go.*'
104+
- '**/*.go'
105+
- 'integration_test/'
106+
- 'config-example.yaml'
107+
- name: Tailscale
108+
if: ${{ env.HAS_TAILSCALE_SECRET }}
109+
uses: tailscale/github-action@v2
110+
with:
111+
oauth-client-id: ${{ secrets.TS_OAUTH_CLIENT_ID }}
112+
oauth-secret: ${{ secrets.TS_OAUTH_SECRET }}
113+
tags: tag:gh
114+
- name: Setup SSH server for Actor
115+
if: ${{ env.HAS_TAILSCALE_SECRET }}
116+
uses: alexellis/setup-sshd-actor@master
117+
- uses: DeterminateSystems/nix-installer-action@main
118+
if: steps.changed-files.outputs.files == 'true'
119+
- uses: DeterminateSystems/magic-nix-cache-action@main
120+
if: steps.changed-files.outputs.files == 'true'
121+
- uses: satackey/action-docker-layer-caching@main
122+
if: steps.changed-files.outputs.files == 'true'
123+
continue-on-error: true
124+
- name: Run Integration Test
125+
uses: Wandalen/wretry.action@master
126+
if: steps.changed-files.outputs.files == 'true'
127+
env:
128+
USE_POSTGRES: ${{ matrix.database == 'postgres' && '1' || '0' }}
129+
with:
130+
attempt_limit: 5
131+
command: |
132+
nix develop --command -- docker run \
133+
--tty --rm \
134+
--volume ~/.cache/hs-integration-go:/go \
135+
--name headscale-test-suite \
136+
--volume $PWD:$PWD -w $PWD/integration \
137+
--volume /var/run/docker.sock:/var/run/docker.sock \
138+
--volume $PWD/control_logs:/tmp/control \
139+
--env HEADSCALE_INTEGRATION_POSTGRES=${{env.USE_POSTGRES}} \
140+
--env HEADSCALE_EXPERIMENTAL_POLICY_V2=1 \
141+
golang:1 \
142+
go run gotest.tools/gotestsum@latest -- ./... \
143+
-failfast \
144+
-timeout 120m \
145+
-parallel 1 \
146+
-run "^${{ matrix.test }}$"
147+
- uses: actions/upload-artifact@v4
148+
if: always() && steps.changed-files.outputs.files == 'true'
149+
with:
150+
name: ${{ matrix.test }}-${{matrix.database}}-${{matrix.policy}}-logs
151+
path: "control_logs/*.log"
152+
- uses: actions/upload-artifact@v4
153+
if: always() && steps.changed-files.outputs.files == 'true'
154+
with:
155+
name: ${{ matrix.test }}-${{matrix.database}}-${{matrix.policy}}-pprof
156+
path: "control_logs/*.pprof.tar"
157+
- name: Setup a blocking tmux session
158+
if: ${{ env.HAS_TAILSCALE_SECRET }}
159+
uses: alexellis/block-with-tmux-action@master

.github/workflows/test-integration.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ jobs:
137137
--volume /var/run/docker.sock:/var/run/docker.sock \
138138
--volume $PWD/control_logs:/tmp/control \
139139
--env HEADSCALE_INTEGRATION_POSTGRES=${{env.USE_POSTGRES}} \
140+
--env HEADSCALE_EXPERIMENTAL_POLICY_V2=0 \
140141
golang:1 \
141142
go run gotest.tools/gotestsum@latest -- ./... \
142143
-failfast \
@@ -146,12 +147,12 @@ jobs:
146147
- uses: actions/upload-artifact@v4
147148
if: always() && steps.changed-files.outputs.files == 'true'
148149
with:
149-
name: ${{ matrix.test }}-${{matrix.database}}-logs
150+
name: ${{ matrix.test }}-${{matrix.database}}-${{matrix.policy}}-logs
150151
path: "control_logs/*.log"
151152
- uses: actions/upload-artifact@v4
152153
if: always() && steps.changed-files.outputs.files == 'true'
153154
with:
154-
name: ${{ matrix.test }}-${{matrix.database}}-pprof
155+
name: ${{ matrix.test }}-${{matrix.database}}-${{matrix.policy}}-pprof
155156
path: "control_logs/*.pprof.tar"
156157
- name: Setup a blocking tmux session
157158
if: ${{ env.HAS_TAILSCALE_SECRET }}

CHANGELOG.md

Lines changed: 52 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44

55
### BREAKING
66

7-
Route internals have been rewritten, removing the dedicated route table in the database.
8-
This was done to simplify the codebase, which had grown unnecessarily complex after
9-
the routes were split into separate tables. The overhead of having to go via the database
10-
and keeping the state in sync made the code very hard to reason about and prone to errors.
11-
The majority of the route state is only relevant when headscale is running, and is now only
12-
kept in memory.
13-
As part of this, the CLI and API has been simplified to reflect the changes;
7+
Route internals have been rewritten, removing the dedicated route table in the
8+
database. This was done to simplify the codebase, which had grown unnecessarily
9+
complex after the routes were split into separate tables. The overhead of having
10+
to go via the database and keeping the state in sync made the code very hard to
11+
reason about and prone to errors. The majority of the route state is only
12+
relevant when headscale is running, and is now only kept in memory. As part of
13+
this, the CLI and API has been simplified to reflect the changes;
1414

1515
```console
1616
$ headscale nodes list-routes
@@ -27,15 +27,55 @@ ID | Hostname | Approved | Available | Serving
2727
2 | ts-unstable-fq7ob4 | | 0.0.0.0/0, ::/0 |
2828
```
2929

30-
Note that if an exit route is approved (0.0.0.0/0 or ::/0), both IPv4 and IPv6 will be approved.
30+
Note that if an exit route is approved (0.0.0.0/0 or ::/0), both IPv4 and IPv6
31+
will be approved.
3132

32-
- Route API and CLI has been removed [#2422](https://github.com/juanfont/headscale/pull/2422)
33-
- Routes are now managed via the Node API [#2422](https://github.com/juanfont/headscale/pull/2422)
33+
- Route API and CLI has been removed
34+
[#2422](https://github.com/juanfont/headscale/pull/2422)
35+
- Routes are now managed via the Node API
36+
[#2422](https://github.com/juanfont/headscale/pull/2422)
37+
38+
### Experimental Policy v2
39+
40+
This release introduces a new experimental version of Headscales policy
41+
implementation. In this context, experimental means that the feature is not yet
42+
fully tested and may contain bugs or unexpected behavior and that we are still
43+
experimenting with how the final interface/behavior will be.
44+
45+
#### Breaking changes
46+
47+
- The policy is validated and "resolved" when loading, providing errors for
48+
invalid rules and conditions.
49+
- Previously this was done as a mix between load and runtime (when it was
50+
applied to a node).
51+
- This means that when you convert the first time, what was previously a
52+
policy that loaded, but failed at runtime, will now fail at load time.
53+
- Error messages should be more descriptive and informative.
54+
- There is still work to be here, but it is already improved with "typing"
55+
(e.g. only Users can be put in Groups)
56+
- All users must contain an `@` character.
57+
- If your user naturally contains and `@`, like an email, this will just work.
58+
- If its based on usernames, or other identifiers not containing an `@`, an
59+
`@` should be appended at the end. For example, if your user is `john`, it
60+
must be written as `john@` in the policy.
61+
62+
#### Current state
63+
64+
The new policy is passing all tests, both integration and unit tests. This does
65+
not mean it is perfect, but it is a good start. Corner cases that is currently
66+
working in v1 and not tested might be broken in v2 (and vice versa).
67+
68+
**We do need help testing this code**, and we think that most of the user facing
69+
API will not really change. We are not sure yet when this code will replace v1,
70+
but we are confident that it will, and all new changes and fixes will be made
71+
towards this code.
72+
73+
The new policy can be used by setting the environment variable
74+
`HEADSCALE_EXPERIMENTAL_POLICY_V2` to `1`.
3475

3576
### Changes
3677

37-
- Use Go 1.24
38-
[#2427](https://github.com/juanfont/headscale/pull/2427)
78+
- Use Go 1.24 [#2427](https://github.com/juanfont/headscale/pull/2427)
3979
- `oidc.map_legacy_users` and `oidc.strip_email_domain` has been removed
4080
[#2411](https://github.com/juanfont/headscale/pull/2411)
4181
- Add more information to `/debug` endpoint

hscontrol/app.go

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -194,10 +194,14 @@ func NewHeadscale(cfg *types.Config) (*Headscale, error) {
194194

195195
var magicDNSDomains []dnsname.FQDN
196196
if cfg.PrefixV4 != nil {
197-
magicDNSDomains = append(magicDNSDomains, util.GenerateIPv4DNSRootDomain(*cfg.PrefixV4)...)
197+
magicDNSDomains = append(
198+
magicDNSDomains,
199+
util.GenerateIPv4DNSRootDomain(*cfg.PrefixV4)...)
198200
}
199201
if cfg.PrefixV6 != nil {
200-
magicDNSDomains = append(magicDNSDomains, util.GenerateIPv6DNSRootDomain(*cfg.PrefixV6)...)
202+
magicDNSDomains = append(
203+
magicDNSDomains,
204+
util.GenerateIPv6DNSRootDomain(*cfg.PrefixV6)...)
201205
}
202206

203207
// we might have routes already from Split DNS
@@ -459,11 +463,13 @@ func (h *Headscale) createRouter(grpcMux *grpcRuntime.ServeMux) *mux.Router {
459463
router := mux.NewRouter()
460464
router.Use(prometheusMiddleware)
461465

462-
router.HandleFunc(ts2021UpgradePath, h.NoiseUpgradeHandler).Methods(http.MethodPost, http.MethodGet)
466+
router.HandleFunc(ts2021UpgradePath, h.NoiseUpgradeHandler).
467+
Methods(http.MethodPost, http.MethodGet)
463468

464469
router.HandleFunc("/health", h.HealthHandler).Methods(http.MethodGet)
465470
router.HandleFunc("/key", h.KeyHandler).Methods(http.MethodGet)
466-
router.HandleFunc("/register/{registration_id}", h.authProvider.RegisterHandler).Methods(http.MethodGet)
471+
router.HandleFunc("/register/{registration_id}", h.authProvider.RegisterHandler).
472+
Methods(http.MethodGet)
467473

468474
if provider, ok := h.authProvider.(*AuthProviderOIDC); ok {
469475
router.HandleFunc("/oidc/callback", provider.OIDCCallbackHandler).Methods(http.MethodGet)
@@ -523,7 +529,11 @@ func usersChangedHook(db *db.HSDatabase, polMan policy.PolicyManager, notif *not
523529
// Maybe we should attempt a new in memory state and not go via the DB?
524530
// Maybe this should be implemented as an event bus?
525531
// A bool is returned indicating if a full update was sent to all nodes
526-
func nodesChangedHook(db *db.HSDatabase, polMan policy.PolicyManager, notif *notifier.Notifier) (bool, error) {
532+
func nodesChangedHook(
533+
db *db.HSDatabase,
534+
polMan policy.PolicyManager,
535+
notif *notifier.Notifier,
536+
) (bool, error) {
527537
nodes, err := db.ListNodes()
528538
if err != nil {
529539
return false, err
@@ -1143,6 +1153,7 @@ func (h *Headscale) loadPolicyManager() error {
11431153
errOut = fmt.Errorf("creating policy manager: %w", err)
11441154
return
11451155
}
1156+
log.Info().Msgf("Using policy manager version: %d", h.polMan.Version())
11461157

11471158
if len(nodes) > 0 {
11481159
_, err = h.polMan.SSHPolicy(nodes[0])

hscontrol/db/db.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"gorm.io/gorm"
2323
"gorm.io/gorm/logger"
2424
"gorm.io/gorm/schema"
25+
"tailscale.com/net/tsaddr"
2526
"tailscale.com/util/set"
2627
"zgo.at/zcache/v2"
2728
)
@@ -655,7 +656,7 @@ AND auth_key_id NOT IN (
655656
}
656657

657658
for nodeID, routes := range nodeRoutes {
658-
slices.SortFunc(routes, util.ComparePrefix)
659+
tsaddr.SortPrefixes(routes)
659660
slices.Compact(routes)
660661

661662
data, err := json.Marshal(routes)

0 commit comments

Comments
 (0)