Skip to content

Integration test cleanup: Security Groups for Pods #2547

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion test/framework/resources/aws/utils/nodegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"os"
"strconv"
"strings"
"time"

"gopkg.in/yaml.v2"

Expand All @@ -26,6 +27,7 @@ import (

"github.com/aws/amazon-vpc-cni-k8s/pkg/vpc"
"github.com/aws/amazon-vpc-cni-k8s/test/framework"
k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils"
"github.com/aws/amazon-vpc-cni-k8s/test/framework/utils"
)

Expand Down Expand Up @@ -283,7 +285,7 @@ func GetClusterVPCConfig(f *framework.Framework) (*ClusterVPCConfig, error) {
for _, subnet := range clusterConfig.PublicSubnetList {
describeSubnet, err := f.CloudServices.EC2().DescribeSubnet(subnet)
if err != nil {
return nil, fmt.Errorf("failed to descrieb the subnet %s: %v", subnet, err)
return nil, fmt.Errorf("failed to describe the subnet %s: %v", subnet, err)
}
if ok := uniqueAZ[*describeSubnet.Subnets[0].AvailabilityZone]; !ok {
uniqueAZ[*describeSubnet.Subnets[0].AvailabilityZone] = true
Expand All @@ -294,3 +296,24 @@ func GetClusterVPCConfig(f *framework.Framework) (*ClusterVPCConfig, error) {

return clusterConfig, nil
}

func TerminateInstances(f *framework.Framework, ngLabelKey string, ngLabelVal string) error {
nodeList, err := f.K8sResourceManagers.NodeManager().GetNodes(ngLabelKey, ngLabelVal)
if err != nil {
return fmt.Errorf("failed to get list of nodes created: %v", err)
}

var instanceIDs []string
for _, node := range nodeList.Items {
instanceIDs = append(instanceIDs, k8sUtils.GetInstanceIDFromNode(node))
}

err = f.CloudServices.EC2().TerminateInstance(instanceIDs)
if err != nil {
return fmt.Errorf("failed to terminate instances: %v", err)
}

// Wait for instances to be replaced
time.Sleep(time.Second * 450)
return nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -205,22 +205,3 @@ var _ = AfterSuite(func() {
}
Expect(errs.MaybeUnwrap()).ToNot(HaveOccurred())
})

func TerminateInstances(f *framework.Framework) {
By("getting the list of nodes created")
nodeList, err := f.K8sResourceManagers.NodeManager().
GetNodes(nodeGroupProperties.NgLabelKey, nodeGroupProperties.NgLabelVal)
Expect(err).ToNot(HaveOccurred())

var instanceIDs []string
for _, node := range nodeList.Items {
instanceIDs = append(instanceIDs, k8sUtils.GetInstanceIDFromNode(node))
}

By("terminating all the nodes")
err = f.CloudServices.EC2().TerminateInstance(instanceIDs)
Expect(err).ToNot(HaveOccurred())

By("waiting for nodes to be recycled")
time.Sleep(time.Second * 300)
}
10 changes: 8 additions & 2 deletions test/integration/custom-networking/custom_networking_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"net"
"strconv"

awsUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/aws/utils"
"github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest"
"github.com/aws/amazon-vpc-cni-k8s/test/framework/utils"

Expand Down Expand Up @@ -140,7 +141,9 @@ var _ = Describe("Custom Networking Test", func() {
})

It("deployment should not become ready", func() {
TerminateInstances(f)
By("terminating instances")
err := awsUtils.TerminateInstances(f, nodeGroupProperties.NgLabelKey, nodeGroupProperties.NgLabelVal)
Expect(err).ToNot(HaveOccurred())

// Nodes should be stuck in NotReady state since no ENIs could be attached and no pod
// IP addresses are available.
Expand Down Expand Up @@ -181,7 +184,10 @@ var _ = Describe("Custom Networking Test", func() {
})

It("deployment should become ready", func() {
TerminateInstances(f)
By("terminating instances")
err := awsUtils.TerminateInstances(f, nodeGroupProperties.NgLabelKey, nodeGroupProperties.NgLabelVal)
Expect(err).ToNot(HaveOccurred())

deployment := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry).
Replicas(2).
NodeSelector(nodeGroupProperties.NgLabelKey, nodeGroupProperties.NgLabelVal).
Expand Down
123 changes: 36 additions & 87 deletions test/integration/pod-eni/security_group_per_pod_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@
package pod_eni

import (
"net/url"
"path"
"fmt"
"strings"
"testing"

Expand All @@ -24,7 +23,7 @@ import (
k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils"
"github.com/aws/amazon-vpc-cni-k8s/test/framework/utils"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/vpc"
v1 "k8s.io/api/core/v1"
corev1 "k8s.io/api/core/v1"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
Expand All @@ -35,26 +34,22 @@ const AmazonEKSVPCResourceControllerARN = "arn:aws:iam::aws:policy/AmazonEKSVPCR
var (
f *framework.Framework
err error
// Key pair used for creating new self managed node group
keyPairName = "pod-eni-test"
// Security Group that will be used to to create Security Group Policy
securityGroupId string
// Ports that will be opened on the Security Group used for testing
openPort = 80
// Size of the Auto Scaling Group used for testing Security Group For Pods
asgSize = 3
// Nitro Based instance type only
instanceType = "c5.xlarge"
// Port than metrics server listens on
metricsPort = 8080
// Maximum number of Branch Interface created across all the self managed nodes
totalBranchInterface int
// Self managed node group
nodeGroupProperties awsUtils.NodeGroupProperties
// Cluster Role name derived from cluster Role ARN, used to attach VPC Controller Policy
clusterRoleName string
// NodeSecurityGroupId for Node-Node communication
nodeSecurityGroupID string
// Cluster security group ID for node to node communication
clusterSGID string

node v1.Node
targetNode corev1.Node
// Number of nodes in cluster
numNodes int
)

func TestSecurityGroupForPods(t *testing.T) {
Expand All @@ -65,25 +60,15 @@ func TestSecurityGroupForPods(t *testing.T) {
var _ = BeforeSuite(func() {
f = framework.New(framework.GlobalOptions)

By("creating ec2 key-pair for the new node group")
_, err := f.CloudServices.EC2().CreateKey(keyPairName)
Expect(err).ToNot(HaveOccurred())

By("creating a new security group used in Security Group Policy")
securityGroupOutput, err := f.CloudServices.EC2().CreateSecurityGroup("pod-eni-automation",
"test created by vpc cni automation test suite", f.Options.AWSVPCID)
Expect(err).ToNot(HaveOccurred())
securityGroupId = *securityGroupOutput.GroupId

By("authorizing egress and ingress on security group for client-server communication")
f.CloudServices.EC2().
AuthorizeSecurityGroupEgress(securityGroupId, "TCP", openPort, openPort, "0.0.0.0/0")
f.CloudServices.EC2().
AuthorizeSecurityGroupIngress(securityGroupId, "TCP", openPort, openPort, "0.0.0.0/0")

By("getting the cluster VPC Config")
clusterVPCConfig, err := awsUtils.GetClusterVPCConfig(f)
Expect(err).ToNot(HaveOccurred())
f.CloudServices.EC2().AuthorizeSecurityGroupEgress(securityGroupId, "TCP", openPort, openPort, "0.0.0.0/0")
f.CloudServices.EC2().AuthorizeSecurityGroupIngress(securityGroupId, "TCP", openPort, openPort, "0.0.0.0/0")

By("getting the cluster role name")
describeClusterOutput, err := f.CloudServices.EKS().DescribeCluster(f.Options.ClusterName)
Expand All @@ -95,69 +80,38 @@ var _ = BeforeSuite(func() {
AttachRolePolicy(AmazonEKSVPCResourceControllerARN, clusterRoleName)
Expect(err).ToNot(HaveOccurred())

nodeGroupProperties = awsUtils.NodeGroupProperties{
NgLabelKey: "node-type",
NgLabelVal: "pod-eni-node",
AsgSize: asgSize,
NodeGroupName: "pod-eni-node",
Subnet: clusterVPCConfig.PublicSubnetList,
InstanceType: instanceType,
KeyPairName: keyPairName,
ContainerRuntime: f.Options.ContainerRuntime,
}

if f.Options.InstanceType == "arm64" {
// override instanceType for arm64
instanceType = "m6g.large"
nodeGroupProperties.InstanceType = instanceType
nodeGroupProperties.NodeImageId = "ami-087fca294139386b6"
}

totalBranchInterface = vpc.Limits[instanceType].BranchInterface * asgSize

By("creating a new self managed node group")
err = awsUtils.CreateAndWaitTillSelfManagedNGReady(f, nodeGroupProperties)
By("getting branch ENI limits")
nodeList, err := f.K8sResourceManagers.NodeManager().GetNodes(f.Options.NgNameLabelKey, f.Options.NgNameLabelVal)
Expect(err).ToNot(HaveOccurred())
numNodes = len(nodeList.Items)
Expect(numNodes).Should(BeNumerically(">", 1))

By("Get Reference to any node from the self managed node group")
nodeList, err := f.K8sResourceManagers.NodeManager().GetNodes(nodeGroupProperties.NgLabelKey,
nodeGroupProperties.NgLabelVal)
Expect(err).ToNot(HaveOccurred())
Expect(len(nodeList.Items)).Should(BeNumerically(">", 0))

// Get ref to any node from newly created nodegroup
By("Getting providerID of the node")
node = nodeList.Items[0]
providerID := node.Spec.ProviderID
Expect(len(providerID)).To(BeNumerically(">", 0))

By("Get InstanceID from the node")
awsUrl, err := url.Parse(providerID)
Expect(err).NotTo(HaveOccurred())

instanceID := path.Base(awsUrl.Path)
Expect(len(instanceID)).To(BeNumerically(">", 0))
node := nodeList.Items[0]
instanceID := k8sUtils.GetInstanceIDFromNode(node)
nodeInstance, err := f.CloudServices.EC2().DescribeInstance(instanceID)
instanceType := *nodeInstance.InstanceType
totalBranchInterface = vpc.Limits[instanceType].BranchInterface * numNodes

By("Fetching Node Security GroupId")
instance, err := f.CloudServices.EC2().DescribeInstance(instanceID)
By("Getting Cluster Security Group ID")
clusterRes, err := f.CloudServices.EKS().DescribeCluster(f.Options.ClusterName)
Expect(err).NotTo(HaveOccurred())

networkInterface := instance.NetworkInterfaces[0]
securityGroups := networkInterface.Groups
nodeSecurityGroupPrefix := nodeGroupProperties.NgLabelVal + "-NodeSecurityGroup"
for _, group := range securityGroups {
if strings.HasPrefix(*group.GroupName, nodeSecurityGroupPrefix) {
nodeSecurityGroupID = *group.GroupId
break
}
}
Expect(len(nodeSecurityGroupID)).To(BeNumerically(">", 0))
clusterSGID = *(clusterRes.Cluster.ResourcesVpcConfig.ClusterSecurityGroupId)
fmt.Fprintf(GinkgoWriter, "cluster security group is %s\n", clusterSGID)

By("enabling pod eni on aws-node DaemonSet")
k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName,
utils.AwsNodeNamespace, utils.AwsNodeName, map[string]string{
"ENABLE_POD_ENI": "true",
})

By("terminating instances")
err = awsUtils.TerminateInstances(f, f.Options.NgNameLabelKey, f.Options.NgNameLabelVal)
Expect(err).ToNot(HaveOccurred())

By("getting target node")
nodeList, err = f.K8sResourceManagers.NodeManager().GetNodes(f.Options.NgNameLabelKey, f.Options.NgNameLabelVal)
Expect(err).ToNot(HaveOccurred())
targetNode = nodeList.Items[0]
})

var _ = AfterSuite(func() {
Expand All @@ -167,20 +121,15 @@ var _ = AfterSuite(func() {
"ENABLE_POD_ENI": {},
})

By("deleting the key-pair used to create nodegroup")
err = f.CloudServices.EC2().DeleteKey(keyPairName)
Expect(err).ToNot(HaveOccurred())

By("deleting the self managed node group")
err = awsUtils.DeleteAndWaitTillSelfManagedNGStackDeleted(f, nodeGroupProperties)
By("terminating instances")
err := awsUtils.TerminateInstances(f, f.Options.NgNameLabelKey, f.Options.NgNameLabelVal)
Expect(err).ToNot(HaveOccurred())

By("deleting the security group")
err = f.CloudServices.EC2().DeleteSecurityGroup(securityGroupId)
Expect(err).ToNot(HaveOccurred())

By("detaching the AmazonEKSVPCResourceController policy from the cluster role")
err = f.CloudServices.IAM().
DetachRolePolicy(AmazonEKSVPCResourceControllerARN, clusterRoleName)
err = f.CloudServices.IAM().DetachRolePolicy(AmazonEKSVPCResourceControllerARN, clusterRoleName)
Expect(err).ToNot(HaveOccurred())
})
29 changes: 15 additions & 14 deletions test/integration/pod-eni/security_group_per_pod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,7 @@ var _ = Describe("Security Group for Pods Test", func() {
CreateNamespace(utils.DefaultTestNamespace)

serverDeploymentBuilder = manifest.NewDefaultDeploymentBuilder().
Name("traffic-server").
NodeSelector(nodeGroupProperties.NgLabelKey, nodeGroupProperties.NgLabelVal)
Name("traffic-server")

securityGroupPolicy, err = vpcControllerFW.NewSGPBuilder().
Namespace(utils.DefaultTestNamespace).
Expand All @@ -73,8 +72,7 @@ var _ = Describe("Security Group for Pods Test", func() {
Expect(err).ToNot(HaveOccurred())

By("creating the Security Group Policy")
err = f.K8sResourceManagers.
CustomResourceManager().CreateResource(securityGroupPolicy)
err = f.K8sResourceManagers.CustomResourceManager().CreateResource(securityGroupPolicy)
Expect(err).ToNot(HaveOccurred())
})

Expand Down Expand Up @@ -122,10 +120,11 @@ var _ = Describe("Security Group for Pods Test", func() {
// Both the Server and Client Pods will get Branch ENI
branchPodLabelVal = []string{serverPodLabelVal, clientPodLabelVal}

// Allow Ingress on NodeSecurityGroup so that client-pods can communicate with metric pod
// Allow Ingress on cluster security group so client pods can communicate with metric pod
// 8080: metric-pod listener port
By("Adding an additional Ingress Rule on NodeSecurityGroupID to allow client-to-metric traffic")
f.CloudServices.EC2().AuthorizeSecurityGroupIngress(nodeSecurityGroupID, "tcp", openPort, 8080, "0.0.0.0/0")
err := f.CloudServices.EC2().AuthorizeSecurityGroupIngress(clusterSGID, "TCP", metricsPort, metricsPort, "0.0.0.0/0")
Expect(err).ToNot(HaveOccurred())
})

It("should have 99%+ success rate", func() {
Expand All @@ -152,11 +151,12 @@ var _ = Describe("Security Group for Pods Test", func() {
AfterEach(func() {
// Revoke the Ingress rule for traffic from client pods added to Node Security Group
By("Revoking the additional Ingress rule added to allow client-to-metric traffic")
f.CloudServices.EC2().RevokeSecurityGroupIngress(nodeSecurityGroupID, "tcp", openPort, 8080, "0.0.0.0/0")
err := f.CloudServices.EC2().RevokeSecurityGroupIngress(clusterSGID, "TCP", metricsPort, metricsPort, "0.0.0.0/0")
Expect(err).ToNot(HaveOccurred())
})
})

Context("when testing traffic to a port on Branch ENI that's not open", func() {
Context("when testing traffic to a port on Branch ENI that is not open", func() {
BeforeEach(func() {
// Only the Server Pods will get Branch ENI
branchPodLabelVal = []string{serverPodLabelVal}
Expand Down Expand Up @@ -226,19 +226,18 @@ var _ = Describe("Security Group for Pods Test", func() {
Name("liveliness-pod").
Container(container).
PodLabel(labelKey, serverPodLabelVal).
NodeSelector(nodeGroupProperties.NgLabelKey, nodeGroupProperties.NgLabelVal).
RestartPolicy(v1.RestartPolicyAlways).
Build()

By("creating branch ENI pod with liveliness probe")
By("creating branch ENI pod with liveness probe")
pod, err := f.K8sResourceManagers.PodManager().CreateAndWaitTillRunning(pod)
Expect(err).ToNot(HaveOccurred())

ValidatePodsHaveBranchENI(v1.PodList{Items: []v1.Pod{*pod}})

timeAfterLivelinessProbeFails := initialDelay + (periodSecond * failureCount) + 10

By("waiting for the liveliness probe to succeed/fail")
By("waiting for the liveness probe to succeed/fail")
time.Sleep(time.Second * time.Duration(timeAfterLivelinessProbeFails))

By("getting the updated branch ENI pod")
Expand Down Expand Up @@ -279,10 +278,11 @@ var _ = Describe("Security Group for Pods Test", func() {
branchPodLabelVal = []string{busyboxPodLabelVal}
})
It("Deploy BusyBox Pods with branch ENI and verify HostNetworking", func() {
// Pin deployment to primary node
deployment := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry).
Replicas(totalBranchInterface/asgSize).
Replicas(totalBranchInterface/numNodes).
PodLabel(labelKey, busyboxPodLabelVal).
NodeName(node.Name).
NodeName(targetNode.Name).
Build()

By("creating a deployment to launch pod using Branch ENI")
Expand Down Expand Up @@ -350,9 +350,10 @@ func ValidateHostNetworking(testType TestType, podValidationInputString string)
Args(testerArgs).
Build()

// Pin pod to primary node
testPod := manifest.NewDefaultPodBuilder().
Container(testContainer).
NodeName(node.Name).
NodeName(targetNode.Name).
HostNetwork(true).
Build()

Expand Down
Loading