Skip to main content

AKS Cluster Setup

This guide provisions a production-grade AKS cluster using DevOpsGenie's Terraform AzureRM modules.

1. Resource Group & Networking

terraform/environments/production/azure-network.tf
resource "azurerm_resource_group" "main" {
name = "devopsgenie-production"
location = "East US"

tags = {
environment = "production"
managed_by = "devopsgenie"
}
}

resource "azurerm_virtual_network" "main" {
name = "devopsgenie-vnet"
resource_group_name = azurerm_resource_group.main.name
location = azurerm_resource_group.main.location
address_space = ["10.100.0.0/16"]
}

resource "azurerm_subnet" "aks" {
name = "aks-subnet"
resource_group_name = azurerm_resource_group.main.name
virtual_network_name = azurerm_virtual_network.main.name
address_prefixes = ["10.100.0.0/20"]
}

2. AKS Cluster

terraform/environments/production/aks.tf
resource "azurerm_kubernetes_cluster" "main" {
name = "devopsgenie-production"
location = azurerm_resource_group.main.location
resource_group_name = azurerm_resource_group.main.name
dns_prefix = "devopsgenie-prod"
kubernetes_version = "1.29"

# System node pool — runs platform add-ons
default_node_pool {
name = "system"
node_count = 3
vm_size = "Standard_D4s_v5"
os_disk_size_gb = 128
os_disk_type = "Ephemeral"
vnet_subnet_id = azurerm_subnet.aks.id
zones = ["1", "2", "3"]
min_count = 2
max_count = 5
enable_auto_scaling = true

node_labels = {
role = "system"
}

node_taints = ["CriticalAddonsOnly=true:NoSchedule"]

upgrade_settings {
max_surge = "33%"
}
}

# Workload identity + OIDC issuer (required for Azure Workload Identity)
workload_identity_enabled = true
oidc_issuer_enabled = true

# Azure AD integration
azure_active_directory_role_based_access_control {
managed = true
azure_rbac_enabled = true
}

# Network plugin
network_profile {
network_plugin = "azure"
network_policy = "calico"
load_balancer_sku = "standard"
outbound_type = "loadBalancer"
}

# Container insights
monitor_metrics {}
oms_agent {
log_analytics_workspace_id = azurerm_log_analytics_workspace.main.id
}

tags = {
environment = "production"
managed_by = "devopsgenie"
}
}

# Workload node pool
resource "azurerm_kubernetes_cluster_node_pool" "workloads" {
name = "workloads"
kubernetes_cluster_id = azurerm_kubernetes_cluster.main.id
vm_size = "Standard_D8s_v5"
zones = ["1", "2", "3"]
enable_auto_scaling = true
min_count = 3
max_count = 20
os_disk_type = "Ephemeral"
vnet_subnet_id = azurerm_subnet.aks.id

node_labels = { role = "workloads" }

tags = { managed_by = "devopsgenie" }
}

3. Azure Container Registry

terraform/environments/production/acr.tf
resource "azurerm_container_registry" "main" {
name = "devopsgenieproduction"
resource_group_name = azurerm_resource_group.main.name
location = azurerm_resource_group.main.location
sku = "Premium"
admin_enabled = false

georeplications {
location = "West Europe"
zone_redundancy_enabled = true
}
}

# Grant AKS kubelet identity pull access to ACR
resource "azurerm_role_assignment" "aks_acr_pull" {
scope = azurerm_container_registry.main.id
role_definition_name = "AcrPull"
principal_id = azurerm_kubernetes_cluster.main.kubelet_identity[0].object_id
}

4. Apply & Connect

cd terraform/environments/production

terraform init
terraform plan -out=tfplan
terraform apply tfplan

# Update kubeconfig
az aks get-credentials \
--resource-group devopsgenie-production \
--name devopsgenie-production \
--overwrite-existing

kubectl get nodes

5. Install Platform Stack

devopsgenie platform install \
--provider azure \
--cluster devopsgenie-production \
--resource-group devopsgenie-production

Upgrade Strategy

# Check available upgrade versions
az aks get-upgrades \
--resource-group devopsgenie-production \
--name devopsgenie-production \
--output table

# Preview upgrade
devopsgenie cluster upgrade \
--target-version 1.30 \
--provider azure \
--dry-run

# Execute upgrade (blue/green node pool strategy)
devopsgenie cluster upgrade \
--target-version 1.30 \
--provider azure