Skip to main content

EKS Production Deployment Guide

This guide covers deploying a production-grade EKS cluster using DevOpsGenie's Terraform modules.

Cluster Configuration

Terraform Module Reference

terraform/environments/production/main.tf
module "devopsgenie_cluster" {
source = "devopsgenie/eks/aws"
version = "~> 2.4"

cluster_name = "my-platform-production"
cluster_version = "1.29"
region = "us-east-1"

vpc_id = module.vpc.vpc_id
subnet_ids = module.vpc.private_subnets

# Control plane logging
cluster_enabled_log_types = [
"api", "audit", "authenticator", "controllerManager", "scheduler"
]

# Cluster endpoint access
cluster_endpoint_private_access = true
cluster_endpoint_public_access = true
cluster_endpoint_public_access_cidrs = [
"10.0.0.0/8", # internal networks
]

# Managed node groups
eks_managed_node_groups = {
system = {
name = "system-nodes"
instance_types = ["m6i.xlarge"]
min_size = 2
max_size = 5
desired_size = 3

labels = {
role = "system"
}

taints = [{
key = "CriticalAddonsOnly"
value = "true"
effect = "NO_SCHEDULE"
}]

update_config = {
max_unavailable_percentage = 33
}
}

workloads = {
name = "workload-nodes"
instance_types = ["m6i.2xlarge", "m6i.4xlarge", "m6a.2xlarge"]
min_size = 3
max_size = 20
desired_size = 5

labels = {
role = "workloads"
}

block_device_mappings = {
xvda = {
device_name = "/dev/xvda"
ebs = {
volume_size = 100
volume_type = "gp3"
iops = 3000
throughput = 125
delete_on_termination = true
encrypted = true
}
}
}
}
}

# EKS add-ons
cluster_addons = {
coredns = {
most_recent = true
configuration_values = jsonencode({
replicaCount = 2
resources = {
limits = { cpu = "200m", memory = "200Mi" }
requests = { cpu = "100m", memory = "100Mi" }
}
})
}

kube-proxy = {
most_recent = true
}

vpc-cni = {
most_recent = true
configuration_values = jsonencode({
env = {
ENABLE_PREFIX_DELEGATION = "true"
WARM_PREFIX_TARGET = "1"
}
})
}

aws-ebs-csi-driver = {
most_recent = true
service_account_role_arn = module.ebs_csi_irsa_role.iam_role_arn
}
}

tags = {
Environment = "production"
ManagedBy = "devopsgenie"
Team = "platform"
}
}

VPC Configuration

terraform/environments/production/vpc.tf
module "vpc" {
source = "terraform-aws-modules/vpc/aws"
version = "~> 5.0"

name = "my-platform-production"
cidr = "10.100.0.0/16"

azs = ["us-east-1a", "us-east-1b", "us-east-1c"]
private_subnets = ["10.100.0.0/20", "10.100.16.0/20", "10.100.32.0/20"]
public_subnets = ["10.100.48.0/24", "10.100.49.0/24", "10.100.50.0/24"]

enable_nat_gateway = true
single_nat_gateway = false # one NAT per AZ for HA
enable_dns_hostnames = true
enable_dns_support = true

# Tags required for EKS subnet discovery
private_subnet_tags = {
"kubernetes.io/role/internal-elb" = "1"
"kubernetes.io/cluster/my-platform-production" = "shared"
}

public_subnet_tags = {
"kubernetes.io/role/elb" = "1"
"kubernetes.io/cluster/my-platform-production" = "shared"
}
}

Karpenter Configuration

Karpenter provides highly efficient node lifecycle management and replaces Cluster Autoscaler for workload nodes.

kubernetes/karpenter/nodepool.yaml
apiVersion: karpenter.sh/v1beta1
kind: NodePool
metadata:
name: default
spec:
template:
metadata:
labels:
role: workloads
spec:
nodeClassRef:
name: default
requirements:
- key: karpenter.sh/capacity-type
operator: In
values: ["on-demand", "spot"]
- key: node.kubernetes.io/instance-type
operator: In
values:
- m6i.xlarge
- m6i.2xlarge
- m6i.4xlarge
- m6a.xlarge
- m6a.2xlarge
- m6a.4xlarge
- key: topology.kubernetes.io/zone
operator: In
values: ["us-east-1a", "us-east-1b", "us-east-1c"]
limits:
cpu: "200"
memory: 400Gi
disruption:
consolidationPolicy: WhenUnderutilized
consolidateAfter: 30s
expireAfter: 720h # rotate nodes every 30 days

---
apiVersion: karpenter.k8s.aws/v1beta1
kind: EC2NodeClass
metadata:
name: default
spec:
amiFamily: AL2
role: "KarpenterNodeRole-my-platform-production"
subnetSelectorTerms:
- tags:
kubernetes.io/cluster/my-platform-production: shared
kubernetes.io/role/internal-elb: "1"
securityGroupSelectorTerms:
- tags:
kubernetes.io/cluster/my-platform-production: owned
blockDeviceMappings:
- deviceName: /dev/xvda
ebs:
volumeSize: 100Gi
volumeType: gp3
iops: 3000
encrypted: true

IRSA Configuration

All platform components use IAM Roles for Service Accounts (IRSA) for least-privilege AWS access:

terraform/environments/production/irsa.tf
module "ebs_csi_irsa_role" {
source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks"

role_name = "ebs-csi-driver-${local.cluster_name}"
attach_ebs_csi_policy = true

oidc_providers = {
ex = {
provider_arn = module.devopsgenie_cluster.oidc_provider_arn
namespace_service_accounts = ["kube-system:ebs-csi-controller-sa"]
}
}
}

Post-Deployment Verification

# Confirm all nodes are Ready
kubectl get nodes -o wide

# Verify all system pods are Running
kubectl get pods -n kube-system
kubectl get pods -n karpenter

# Test Karpenter by deploying an inflate workload
kubectl apply -f - <<EOF
apiVersion: apps/v1
kind: Deployment
metadata:
name: inflate
spec:
replicas: 5
selector:
matchLabels:
app: inflate
template:
metadata:
labels:
app: inflate
spec:
containers:
- name: inflate
image: public.ecr.aws/eks-distro/kubernetes/pause:3.7
resources:
requests:
cpu: "1"
EOF

# Watch Karpenter provision new nodes
kubectl get nodes --watch
tip

Run devopsgenie cluster health for a complete health check across all cluster components.