Skip to content

Feat/nat router #1681

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -996,7 +996,7 @@ enable_delete_protection = {
</details>
<details>

<summary>Use only private ips in your cluster</summary>
<summary>Use only private ips in your cluster (Wireguard)</summary>

To use only private ips on your cluster, you need in your project:
1. A network already configured.
Expand All @@ -1017,6 +1017,22 @@ If you follow this values, in your kube.tf, please set:
This setup is compatible with a loadbalancer for your control planes, however you should consider to set
`control_plane_lb_enable_public_interface = false` to keep ip private.
</details>
<details>

<summary>Use only private ips in your cluster (NAT Router)</summary>

Setup a purely private cluster where public internet traffic is limited to the
following paths:
- egress: entirely through the NAT router, using a single IP for all egress traffic.
- ssh: entirely through the bastion host, at the moment the same as the NAT router.
- control-plane (kubectl): through the control plane load balancer only.
- regular ingress: through the agents load balancer only.

By seperating various roles, this decreases the attack surfaces a bit.

If you need highly available egress (often this is not necessary), this setup is not for you. This setup does not have any impact on the availability of ingress.

</details>


<details>
Expand Down
36 changes: 31 additions & 5 deletions agents.tf
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,19 @@ module "agents" {
keep_disk_size = var.keep_disk_agents
disable_ipv4 = each.value.disable_ipv4
disable_ipv6 = each.value.disable_ipv6
network_id = length(var.existing_network_id) > 0 ? var.existing_network_id[0] : 0

private_ipv4 = cidrhost(hcloud_network_subnet.agent[[for i, v in var.agent_nodepools : i if v.name == each.value.nodepool_name][0]].ip_range, each.value.index + 101)
ssh_bastion = local.ssh_bastion
network_id = data.hcloud_network.k3s.id
private_ipv4 = cidrhost(hcloud_network_subnet.agent[[for i, v in var.agent_nodepools : i if v.name == each.value.nodepool_name][0]].ip_range, each.value.index + 101)

labels = merge(local.labels, local.labels_agent_node)

automatically_upgrade_os = var.automatically_upgrade_os

depends_on = [
hcloud_network_subnet.agent,
hcloud_placement_group.agent
hcloud_placement_group.agent,
hcloud_server.nat_router,
null_resource.nat_router_await_cloud_init,
]
}

Expand Down Expand Up @@ -92,6 +94,12 @@ resource "null_resource" "agent_config" {
agent_identity = local.ssh_agent_identity
host = local.agent_ips[each.key]
port = var.ssh_port

bastion_host = local.ssh_bastion.bastion_host
bastion_port = local.ssh_bastion.bastion_port
bastion_user = local.ssh_bastion.bastion_user
bastion_private_key = local.ssh_bastion.bastion_private_key

}

# Generating k3s agent config file
Expand All @@ -118,6 +126,12 @@ resource "null_resource" "agents" {
agent_identity = local.ssh_agent_identity
host = local.agent_ips[each.key]
port = var.ssh_port

bastion_host = local.ssh_bastion.bastion_host
bastion_port = local.ssh_bastion.bastion_port
bastion_user = local.ssh_bastion.bastion_user
bastion_private_key = local.ssh_bastion.bastion_private_key

}

# Install k3s agent
Expand All @@ -128,7 +142,7 @@ resource "null_resource" "agents" {
# Start the k3s agent and wait for it to have started
provisioner "remote-exec" {
inline = concat(var.enable_longhorn || var.enable_iscsid ? ["systemctl enable --now iscsid"] : [], [
"systemctl start k3s-agent 2> /dev/null",
"timeout 120 systemctl start k3s-agent 2> /dev/null",
<<-EOT
timeout 120 bash <<EOF
until systemctl status k3s-agent > /dev/null; do
Expand Down Expand Up @@ -187,6 +201,12 @@ resource "null_resource" "configure_longhorn_volume" {
agent_identity = local.ssh_agent_identity
host = local.agent_ips[each.key]
port = var.ssh_port

bastion_host = local.ssh_bastion.bastion_host
bastion_port = local.ssh_bastion.bastion_port
bastion_user = local.ssh_bastion.bastion_user
bastion_private_key = local.ssh_bastion.bastion_private_key

}

depends_on = [
Expand Down Expand Up @@ -269,6 +289,12 @@ resource "null_resource" "configure_floating_ip" {
agent_identity = local.ssh_agent_identity
host = local.agent_ips[each.key]
port = var.ssh_port

bastion_host = local.ssh_bastion.bastion_host
bastion_port = local.ssh_bastion.bastion_port
bastion_user = local.ssh_bastion.bastion_user
bastion_private_key = local.ssh_bastion.bastion_private_key

}

depends_on = [
Expand Down
14 changes: 14 additions & 0 deletions autoscaler-agents.tf
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ locals {
firewall_id = hcloud_firewall.k3s.id
cluster_name = local.cluster_prefix
node_pools = var.autoscaler_nodepools
enable_ipv4 = !(var.autoscaler_disable_ipv4 || local.use_nat_router)
enable_ipv6 = !(var.autoscaler_disable_ipv6 || local.use_nat_router)
})
# A concatenated list of all autoscaled nodes
autoscaled_nodes = length(var.autoscaler_nodepools) == 0 ? {} : {
Expand All @@ -66,6 +68,12 @@ resource "null_resource" "configure_autoscaler" {
agent_identity = local.ssh_agent_identity
host = local.first_control_plane_ip
port = var.ssh_port

bastion_host = local.ssh_bastion.bastion_host
bastion_port = local.ssh_bastion.bastion_port
bastion_user = local.ssh_bastion.bastion_user
bastion_private_key = local.ssh_bastion.bastion_private_key

}

# Upload the autoscaler resource defintion
Expand Down Expand Up @@ -173,6 +181,12 @@ resource "null_resource" "autoscaled_nodes_registries" {
agent_identity = local.ssh_agent_identity
host = coalesce(each.value.ipv4_address, each.value.ipv6_address, try(one(each.value.network).ip, null))
port = var.ssh_port

bastion_host = local.ssh_bastion.bastion_host
bastion_port = local.ssh_bastion.bastion_port
bastion_user = local.ssh_bastion.bastion_user
bastion_private_key = local.ssh_bastion.bastion_private_key

}

provisioner "file" {
Expand Down
24 changes: 23 additions & 1 deletion control_planes.tf
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ module "control_planes" {
keep_disk_size = var.keep_disk_cp
disable_ipv4 = each.value.disable_ipv4
disable_ipv6 = each.value.disable_ipv6
network_id = length(var.existing_network_id) > 0 ? var.existing_network_id[0] : 0
ssh_bastion = local.ssh_bastion
network_id = data.hcloud_network.k3s.id

# We leave some room so 100 eventual Hetzner LBs that can be created perfectly safely
# It leaves the subnet with 254 x 254 - 100 = 64416 IPs to use, so probably enough.
Expand All @@ -44,6 +45,8 @@ module "control_planes" {
depends_on = [
hcloud_network_subnet.control_plane,
hcloud_placement_group.control_plane,
hcloud_server.nat_router,
null_resource.nat_router_await_cloud_init,
]
}

Expand All @@ -63,6 +66,7 @@ resource "hcloud_load_balancer_network" "control_plane" {
load_balancer_id = hcloud_load_balancer.control_plane.*.id[0]
subnet_id = hcloud_network_subnet.control_plane.*.id[0]
enable_public_interface = var.control_plane_lb_enable_public_interface
ip = cidrhost(hcloud_network_subnet.control_plane.*.ip_range[0], 254)

# To ensure backwards compatibility, we ignore changes to the IP address
# as before it was set manually.
Expand Down Expand Up @@ -160,6 +164,12 @@ resource "null_resource" "control_plane_config" {
agent_identity = local.ssh_agent_identity
host = local.control_plane_ips[each.key]
port = var.ssh_port

bastion_host = local.ssh_bastion.bastion_host
bastion_port = local.ssh_bastion.bastion_port
bastion_user = local.ssh_bastion.bastion_user
bastion_private_key = local.ssh_bastion.bastion_private_key

}

# Generating k3s server config file
Expand Down Expand Up @@ -193,6 +203,12 @@ resource "null_resource" "authentication_config" {
agent_identity = local.ssh_agent_identity
host = local.control_plane_ips[each.key]
port = var.ssh_port

bastion_host = local.ssh_bastion.bastion_host
bastion_port = local.ssh_bastion.bastion_port
bastion_user = local.ssh_bastion.bastion_user
bastion_private_key = local.ssh_bastion.bastion_private_key

}

provisioner "file" {
Expand Down Expand Up @@ -223,6 +239,12 @@ resource "null_resource" "control_planes" {
agent_identity = local.ssh_agent_identity
host = local.control_plane_ips[each.key]
port = var.ssh_port

bastion_host = local.ssh_bastion.bastion_host
bastion_port = local.ssh_bastion.bastion_port
bastion_user = local.ssh_bastion.bastion_user
bastion_private_key = local.ssh_bastion.bastion_private_key

}

# Install k3s server
Expand Down
13 changes: 13 additions & 0 deletions init.tf
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ resource "hcloud_load_balancer_network" "cluster" {

load_balancer_id = hcloud_load_balancer.cluster.*.id[0]
subnet_id = hcloud_network_subnet.agent.*.id[0]
ip = cidrhost(hcloud_network_subnet.agent.*.ip_range[0], 254)
}

resource "hcloud_load_balancer_target" "cluster" {
Expand Down Expand Up @@ -51,6 +52,12 @@ resource "null_resource" "first_control_plane" {
agent_identity = local.ssh_agent_identity
host = local.first_control_plane_ip
port = var.ssh_port

bastion_host = local.ssh_bastion.bastion_host
bastion_port = local.ssh_bastion.bastion_port
bastion_user = local.ssh_bastion.bastion_user
bastion_private_key = local.ssh_bastion.bastion_private_key

}

# Generating k3s master config file
Expand Down Expand Up @@ -175,6 +182,12 @@ resource "null_resource" "kustomization" {
agent_identity = local.ssh_agent_identity
host = local.first_control_plane_ip
port = var.ssh_port

bastion_host = local.ssh_bastion.bastion_host
bastion_port = local.ssh_bastion.bastion_port
bastion_user = local.ssh_bastion.bastion_user
bastion_private_key = local.ssh_bastion.bastion_private_key

}

# Upload kustomization.yaml, containing Hetzner CSI & CSM, as well as kured.
Expand Down
17 changes: 17 additions & 0 deletions kube.tf.example
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,23 @@ module "kube-hetzner" {
# Specifies the number of times a health check is retried before a target is marked as unhealthy. (default: 3)
# load_balancer_health_check_retries = 3


# Setup a NAT router, and automatically disable public ips on all control plane and agent nodes.
# To use this, you must also set use_control_plane_lb = true, otherwise kubectl can never
# reach the cluster. The NAT router will also function as bastion. This makes securing the cluster
# easier, as all public traffic passes through a single strongly secured node. It does
# however also introduce a single point of failure, so if you need high-availability on your
# egress, you should consider other configurations.
#
#
# nat_router = {
# server_type = "cax21"
# location = "fsn1"
# enable_sudo = false # optional, default to false. Set to true to add nat-router user to the sudo'ers. Note that ssh as root is disabled.
# labels = {} # optionally add labels.
# }


### The following values are entirely optional (and can be removed from this if unused)

# You can refine a base domain name to be use in this form of nodename.base_domain for setting the reverse dns inside Hetzner
Expand Down
37 changes: 27 additions & 10 deletions kubeconfig.tf
Original file line number Diff line number Diff line change
@@ -1,12 +1,29 @@
data "remote_file" "kubeconfig" {
conn {
host = local.first_control_plane_ip
port = var.ssh_port
user = "root"
private_key = var.ssh_private_key
agent = var.ssh_private_key == null
}
path = "/etc/rancher/k3s/k3s.yaml"
resource "ssh_sensitive_resource" "kubeconfig" {
# Note: moved from remote_file to ssh_sensitive_resource because
# remote_file does not support bastion hosts and ssh_sensitive_resource does.
# The default behaviour is to run file blocks and commands at create time
# You can also specify 'destroy' to run the commands at destroy time
when = "create"

bastion_host = local.ssh_bastion.bastion_host
bastion_port = local.ssh_bastion.bastion_port
bastion_user = local.ssh_bastion.bastion_user
bastion_private_key = local.ssh_bastion.bastion_private_key

host = local.first_control_plane_ip
port = var.ssh_port
user = "root"
private_key = var.ssh_private_key
agent = var.ssh_private_key == null

# An ssh-agent with your SSH private keys should be running
# Use 'private_key' to set the SSH key otherwise

timeout = "15m"

commands = [
"cat /etc/rancher/k3s/k3s.yaml"
]

depends_on = [null_resource.control_planes[0]]
}
Expand All @@ -21,7 +38,7 @@ locals {
:
(can(local.first_control_plane_ip) ? local.first_control_plane_ip : "unknown")
)
kubeconfig_external = replace(replace(data.remote_file.kubeconfig.content, "127.0.0.1", local.kubeconfig_server_address), "default", var.cluster_name)
kubeconfig_external = replace(replace(ssh_sensitive_resource.kubeconfig.result, "127.0.0.1", local.kubeconfig_server_address), "default", var.cluster_name)
kubeconfig_parsed = yamldecode(local.kubeconfig_external)
kubeconfig_data = {
host = local.kubeconfig_parsed["clusters"][0]["cluster"]["server"]
Expand Down
12 changes: 12 additions & 0 deletions kustomization_user.tf
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ resource "null_resource" "kustomization_user" {
agent_identity = local.ssh_agent_identity
host = local.first_control_plane_ip
port = var.ssh_port

bastion_host = local.ssh_bastion.bastion_host
bastion_port = local.ssh_bastion.bastion_port
bastion_user = local.ssh_bastion.bastion_user
bastion_private_key = local.ssh_bastion.bastion_private_key

}

provisioner "remote-exec" {
Expand Down Expand Up @@ -42,6 +48,12 @@ resource "null_resource" "kustomization_user_deploy" {
agent_identity = local.ssh_agent_identity
host = local.first_control_plane_ip
port = var.ssh_port

bastion_host = local.ssh_bastion.bastion_host
bastion_port = local.ssh_bastion.bastion_port
bastion_user = local.ssh_bastion.bastion_user
bastion_private_key = local.ssh_bastion.bastion_private_key

}

# Remove templates after rendering, and apply changes.
Expand Down
Loading