2

I have a module definition as below:

===

providers.tf

provider "kubernetes" { #load_config_file = "false" host = azurerm_kubernetes_cluster.aks.kube_config.0.host username = azurerm_kubernetes_cluster.aks.kube_config.0.username password = azurerm_kubernetes_cluster.aks.kube_config.0.password client_certificate = base64decode(azurerm_kubernetes_cluster.aks.kube_config.0.client_certificate) client_key = base64decode(azurerm_kubernetes_cluster.aks.kube_config.0.client_key) cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.aks.kube_config.0.cluster_ca_certificate) } 

outputs.tf

output "node_resource_group" { value = azurerm_kubernetes_cluster.aks.node_resource_group description = "The name of resource group where the AKS Nodes are created" } output "kubeConfig" { value = azurerm_kubernetes_cluster.aks.kube_config_raw description = "Kubeconfig of AKS Cluster" } output "host" { value = azurerm_kubernetes_cluster.aks.kube_config.0.host } output "client_key" { value = azurerm_kubernetes_cluster.aks.kube_config.0.client_key } output "client_certificate" { value = azurerm_kubernetes_cluster.aks.kube_config.0.client_certificate } output "kube_config" { value = azurerm_kubernetes_cluster.aks.kube_config_raw } output "cluster_ca_certificate" { value = azurerm_kubernetes_cluster.aks.kube_config.0.cluster_ca_certificate } 

main.tf

resource "azurerm_log_analytics_workspace" "law" { name = "${var.tla}-la-${local.lookup_result}-${var.identifier}" location = data.azurerm_resource_group.rg.location resource_group_name = data.azurerm_resource_group.rg.name sku = var.la_sku retention_in_days = 30 } resource "azurerm_kubernetes_cluster" "aks" { name = "${var.tla}-aks-${local.lookup_result}-${var.identifier}" location = data.azurerm_resource_group.rg.location resource_group_name = data.azurerm_resource_group.rg.name dns_prefix = var.dns_prefix kubernetes_version = var.kubernetes_version sku_tier = var.sku_tier private_cluster_enabled = var.enable_private_cluster #api_server_authorized_ip_ranges = "" default_node_pool { name = "syspool001" orchestrator_version = var.orchestrator_version availability_zones = var.agents_availability_zones enable_auto_scaling = true node_count = var.default_pool_node_count max_count = var.default_pool_max_node_count min_count = var.default_pool_min_node_count max_pods = var.default_pool_max_pod_count vm_size = var.agents_size enable_node_public_ip = false os_disk_size_gb = var.default_pool_os_disk_size_gb type = "VirtualMachineScaleSets" vnet_subnet_id = var.vnet_subnet_id node_labels = var.agents_labels tags = merge(local.tags, var.agents_tags) } network_profile { network_plugin = var.network_plugin network_policy = var.network_policy dns_service_ip = var.net_profile_dns_service_ip docker_bridge_cidr = var.net_profile_docker_bridge_cidr service_cidr = var.net_profile_service_cidr } role_based_access_control { enabled = true azure_active_directory { managed = true admin_group_object_ids = var.rbac_aad_admin_group_object_ids } } identity { type = "SystemAssigned" } addon_profile { azure_policy { enabled = true } http_application_routing { enabled = false } oms_agent { enabled = true log_analytics_workspace_id = data.azurerm_log_analytics_workspace.log_analytics.id } } tags = local.tags lifecycle { ignore_changes = [ default_node_pool ] } } resource "azurerm_kubernetes_cluster_node_pool" "aksnp" { lifecycle { ignore_changes = [ node_count ] } for_each = var.additional_node_pools kubernetes_cluster_id = azurerm_kubernetes_cluster.aks.id name = each.value.node_os == "Windows" ? substr(each.key, 0, 6) : substr(each.key, 0, 12) node_count = each.value.node_count vm_size = each.value.vm_size availability_zones = each.value.zones max_pods = each.value.max_pods os_disk_size_gb = each.value.os_disk_size_gb os_type = each.value.node_os vnet_subnet_id = var.vnet_subnet_id node_taints = each.value.taints enable_auto_scaling = each.value.cluster_auto_scaling min_count = each.value.cluster_auto_scaling_min_count max_count = each.value.cluster_auto_scaling_max_count } resource "kubernetes_namespace" "aks-namespace" { metadata { name = var.namespace } } 

data.tf

data "azurerm_resource_group" "rg" { name = var.resource_group_name } 

lookups.tf

locals { environment_lookup = { dev = "d" test = "t" int = "i" prod = "p" prd = "p" uat = "a" poc = "d" dr = "r" lab = "l" } lookup_result = lookup(local.environment_lookup, var.environment) tags = merge( data.azurerm_resource_group.rg.tags, { Directory = "tectcompany.com", PrivateDNSZone = var.private_dns_zone, Immutable = "False", ManagedOS = "True", } ) } data "azurerm_log_analytics_workspace" "log_analytics" { name = "abc-az-lad2" resource_group_name = "abc-dev-aae" } 

variables.tf

variable "secondary_region" { description = "Is this resource being deployed into the secondary (pair) region?" default = false type = bool } variable "override_log_analytics_workspace" { description = "Override the vm log analytics workspace" type = string default = null } variable "override_log_analytics_resource_group_name" { description = "Overrides the log analytics resource group name" type = string default = null } variable "environment" { description = "The name of environment for the AKS Cluster" type = string default = "dev" } variable "identifier" { description = "The identifier for the AKS Cluster" type = number default = "001" } variable "kubernetes_version" { description = "Specify which Kubernetes release to use. The default used is the latest Kubernetes version available in the region" type = string default = "1.19.9" } variable "dns_prefix" { description = "The dns prefix for the AKS Cluster" type = string default = "odessa-sandpit" } variable "orchestrator_version" { description = "Specify which Kubernetes release to use for the orchestration layer. The default used is the latest Kubernetes version available in the region" type = string default = null } variable "agents_availability_zones" { description = "(Optional) A list of Availability Zones across which the Node Pool should be spread. Changing this forces a new resource to be created." type = list(string) default = null } variable "agents_size" { default = "Standard_D4s_v3" description = "The default virtual machine size for the Kubernetes agents" type = string } variable "vnet_subnet_id" { description = "(Optional) The ID of a Subnet where the Kubernetes Node Pool should exist. Changing this forces a new resource to be created." type = string default = null } variable "agents_labels" { description = "(Optional) A map of Kubernetes labels which should be applied to nodes in the Default Node Pool. Changing this forces a new resource to be created." type = map(string) default = {} } variable "agents_tags" { description = "(Optional) A mapping of tags to assign to the Node Pool." type = map(string) default = {} } variable "net_profile_dns_service_ip" { description = "(Optional) IP address within the Kubernetes service address range that will be used by cluster service discovery (kube-dns). Changing this forces a new resource to be created." type = string default = null } variable "net_profile_docker_bridge_cidr" { description = "(Optional) IP address (in CIDR notation) used as the Docker bridge IP address on nodes. Changing this forces a new resource to be created." type = string default = null } variable "net_profile_service_cidr" { description = "(Optional) The Network Range used by the Kubernetes service. Changing this forces a new resource to be created." type = string default = null } variable "rbac_aad_admin_group_object_ids" { description = "Object ID of groups with admin access." type = list(string) default = null } variable "network_policy" { description = "(Optional) The Network Policy to be used by the network profile of Azure Kubernetes Cluster." type = string default = "azure" } variable "network_plugin" { description = "(Optional) The Network Plugin to be used by the network profile of Azure Kubernetes Cluster." type = string default = "azure" } variable "enable_private_cluster" { description = "(Optional) Set this variable to true if you want Azure Kubernetes Cluster to be private." default = true } variable "default_pool_node_count" { description = "(Optional) The initial node count for the default pool of AKS Cluster" type = number default = 3 } variable "default_pool_max_node_count" { description = "(Optional) The max node count for the default pool of AKS Cluster" type = number default = 6 } variable "default_pool_min_node_count" { description = "(Optional) The min node count for the default pool of AKS Cluster" type = number default = 3 } variable "default_pool_max_pod_count" { description = "(Optional) The max pod count for the default pool of AKS Cluster" type = number default = 13 } variable "default_pool_os_disk_size_gb" { description = "(Optional) The size of os disk in gb for the nodes from default pool of AKS Cluster" type = string default = "64" } variable "additional_node_pools" { type = map(object({ node_count = number max_pods = number os_disk_size_gb = number vm_size = string zones = list(string) node_os = string taints = list(string) cluster_auto_scaling = bool cluster_auto_scaling_min_count = number cluster_auto_scaling_max_count = number })) } variable "sku_tier" { description = "(Optional)The SKU Tier that should be used for this Kubernetes Cluster, possible values Free or Paid" type = string default = "Paid" validation { condition = contains(["Free", "Paid"], var.sku_tier) error_message = "SKU_TIER can only be either Paid or Free." } } variable "la_sku" { description = "(Optional)The SKU Tier that should be used for Log Analytics. Multiple values are possible." type = string default = "PerGB2018" validation { condition = contains(["Free", "PerNode", "Premium", "Standard", "Standalone", "Unlimited", "CapacityReservation", "PerGB2018"], var.la_sku) error_message = "SKU_TIER for Log Analytics can be can only be either of Free, PerNode, Premium, Standard, Standalone, Unlimited, CapacityReservation and PerGB2018(Default Value)." } } variable "resource_group_name" { description = "Resource Group for deploying AKS Cluster" type = string } variable "private_dns_zone" { description = "DNS prefix for AKS Cluster" type = string default = "testcluster" } variable "tla" { description = "Three Level acronym - three letter abbreviation for application" type = string default = "" validation { condition = length(var.tla) == 3 error_message = "The TLA should be precisely three characters." } } variable "namespace"{ description = "AKS Namespace" type = string } 

Finally, I am calling my module below to create the AKS cluster, LA, and Namespace for the AKS Cluster:

provider "azurerm" { features {} #version = "~> 2.53.0" } module "aks-cluster1" { source = "../../" resource_group_name = "pst-aks-sandpit-dev-1" tla = "pqr" additional_node_pools = { pool1 = { node_count = "1" max_pods = "110" os_disk_size_gb = "30" vm_size = "Standard_D8s_v3" zones = ["1","2","3"] node_os = "Linux" taints = ["kubernetes.io/os=windows:NoSchedule"] cluster_auto_scaling = true cluster_auto_scaling_min_count = "2" cluster_auto_scaling_max_count = "4" } } namespace = "sample-ns" } 

Problem: I get an error that no such host when terraform attempts to create the cluster.

I think that it is not able to connect to the cluster but I could be wrong. I do not know how it handles internally.

Error: Post "https://testdns-05885a32.145f13c0-25ce-43e4-ae46-8cbef448ecf3.privatelink.australiaeast.azmk8s.io:443/api/v1/namespaces": dial tcp: lookup testdns-05885a32.145f13c0-25ce-43e4-ae46-8cbef448ecf3.privatelink.australiaeast.azmk8s.io: no such host 

4 Answers 4

7

I'm one of the maintainers of the Terraform Kubernetes provider, and I see this particular issue pretty often. As a former devops person myself, I empathize with the struggle I keep seeing in this area. It's something I would really love to fix in the provider, if it were possible.

The issue you're facing is a limitation in Terraform core when passing an unknown value to a provider configuration block. To quote their docs:

You can use expressions in the values of these configuration arguments, but can only reference values that are known before the configuration is applied. 

When you make a change to the underlying infrastructure, such the AKS cluster in this case, you're passing an unknown value into the Kubernetes provider configuration block, since the full scope of the cluster infrastructure is not known until after the change has been applied to the AKS cluster.

Although I did write the initial guide to show that it can be possible to work around some of these issues, as you've found from experience, there are many edge cases that make it an unreliable and unintuitive process, to get the Kubernetes provider working alongside the underlying infrastructure. This is due to a long-standing limitation in Terraform, that can't be fixed in any provider, but we do have plans to smooth out the bumps a little by adding better error messages upfront, which would have saved you some headache in this case.

To solve this particular type of problem, the cluster infrastructure needs to be kept in a state separate from the Kubernetes and Helm provider resources. I have an example here which builds an AKS cluster in one apply and then manages the Kubernetes/Helm resources in a second apply. You can use this approach to build the most robust configuration for you particular use case:

https://github.com/hashicorp/terraform-provider-kubernetes/tree/e058e225e621f06e393bcb6407e7737fd43817bd/_examples/aks

I know this two-apply approach is inconvenient, which is why we continue to try and accommodate users in single-apply scenarios, and scenarios which contain the Kubernetes and cluster resources in the same Terraform state. However, until upstream Terraform can add support for this, the single-apply workflow will remain buggy and less reliable than separating cluster infrastructure from Kubernetes resources.

Most cases can be worked around using depends_on (to ensure the cluster is created before the Kubernetes resource), or by moving the cluster infrastructure into a separate module and running terraform state rm module.kubernetes-config or terraform apply -target=module.aks-cluster. But I think encouraging this kind of work-around will cause more headaches in the long run, as it puts the user in charge of figuring out when to use special one-off apply commands, rather than setting up Terraform to behave reliably and predictably from the start. Plus it can have unintended side-effects, like orphaning cloud resources.

Sign up to request clarification or add additional context in comments.

Comments

2

Thanks for the additional detail. I see a few problems here. The first one is at the heart of your immediate problem:

variable "enable_private_cluster" { description = "(Optional) Set this variable to true if you want Azure Kubernetes Cluster to be private." default = true } 

Your cluster deployment is taking the default here, so your API endpoint is a private DNS entry in the zone privatelink.australiaeast.azmk8s.io:

Post "https://testdns-05885a32.145f13c0-25ce-43e4-ae46-8cbef448ecf3.privatelink.australiaeast.azmk8s.io:443/api/v1/namespaces" 

The terraform kubernetes provider must be able to reach the API endpoint in order to deploy the namespace. However, it is unable to resolve the domain. For this to work, you will need to ensure that:

  1. The private DNS zone exists in Azure
  2. The private DNS zone is linked to the relevant virtual networks, including the host where you're running Terraform
  3. The DNS resolver on the Terraform host can resolve the privatelink domain through the endpoint defined at https://learn.microsoft.com/en-us/azure/virtual-network/what-is-ip-address-168-63-129-16 - note that this may require forwarding the private domain if your network uses on-premises internal DNS.
  4. You must ensure that your terraform host can reach the privatelink endpoint deployed by the cluster on TCP port 443

Azure privatelink and private DNS can be non-trivial to configure correctly, especially in a complex networking environment. So, you may encounter additional hurdles that I haven't covered here.

Alternatively, you may wish to deploy this cluster without using privatelink by setting this module option to false. This may be undesirable for security and compliance reasons, so be sure you understand what you're doing here:

 enable_private_cluster = false 

The next issue I encountered is:

 Error: creating Managed Kubernetes Cluster "pqr-aks-d-1" (Resource Group "pst-aks-sandpit-dev-1"): containerservice.ManagedClustersClient#CreateOrUpdate: Failure sending request: StatusCode=0 -- Original Error: Code="InsufficientAgentPoolMaxPodsPerAgentPool" Message="The AgentPoolProfile 'syspool001' has an invalid total maxPods(maxPods per node * node count), the total maxPods(13 * 824668498368) should be larger than 30. Please refer to aka.ms/aks-min-max-pod for more detail." Target="agentPoolProfile.kubernetesConfig.kubeletConfig.maxPods" 

I overcame that by setting:

 default_pool_max_pod_count = 30 

The last issue is that you need to configure the kubernetes provider to have sufficient privileges to deploy the namespace:

│ Error: Unauthorized │ │ with module.aks-cluster1.kubernetes_namespace.aks-namespace, │ on ../../main.tf line 103, in resource "kubernetes_namespace" "aks-namespace": │ 103: resource "kubernetes_namespace" "aks-namespace" { 

One way to accomplish that is to use kube_admin_config instead of kube_config:

provider "kubernetes" { #load_config_file = "false" host = azurerm_kubernetes_cluster.aks.kube_admin_config.0.host username = azurerm_kubernetes_cluster.aks.kube_admin_config.0.username password = azurerm_kubernetes_cluster.aks.kube_admin_config.0.password client_certificate = base64decode(azurerm_kubernetes_cluster.aks.kube_admin_config.0.client_certificate) client_key = base64decode(azurerm_kubernetes_cluster.aks.kube_admin_config.0.client_key) cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.aks.kube_admin_config.0.cluster_ca_certificate) } 

Comments

1

We should get the cluster details to access using data source and use the provider for non private cluster kub8 API can be accessed directly.

step 1 : data source

 data "azurerm_kubernetes_cluster" "example" { name = var.cluster_name resource_group_name = azurerm_resource_group.rg.name } 

step 2 : provider

 provider "kubernetes" { host = data.azurerm_kubernetes_cluster.example.kube_config.0.host username = data.azurerm_kubernetes_cluster.example.kube_config.0.username password = data.azurerm_kubernetes_cluster.example.kube_config.0.password client_certificate = base64decode(data.azurerm_kubernetes_cluster.example.kube_config.0.client_certificate) client_key = base64decode(data.azurerm_kubernetes_cluster.example.kube_config.0.client_key) cluster_ca_certificate = base64decode(data.azurerm_kubernetes_cluster.example.kube_config.0.cluster_ca_certificate) } 

Comments

0

Difficult to say what the issue is since the code you posted is incomplete. For starters, you shouldn't be doing this:

provider "kubernetes" { config_path = "~/.kube/config" } 

The AKS URL you posted doesn't exist, so I think that's pulling and old cluster default from your kube config

1 Comment

This suggestion unfortunately does not work, I will add other tf files like lookup, data etc in a couple of hours when I am back home.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.