Today I found some events in kubernetes(v1.28.3) cluster, this error event look like this:
(combined from similar events): Failed to create pod sandbox: rpc error: code = DeadlineExceeded desc = failed to get sandbox image "registry.k8s.io/pause:3.6": failed to pull image "registry.k8s.io/pause:3.6": failed to pull and unpack image "registry.k8s.io/pause:3.6": failed to resolve reference "registry.k8s.io/pause:3.6": failed to do request: Head "https://us-west2-docker.pkg.dev/v2/k8s-artifacts-prod/images/pause/manifests/3.6": dial tcp 74.125.204.82:443: i/o timeout I know it may be a network issue when download the registry.k8s.io/pause:3.6 image, but I could not found the image registry.k8s.io/pause:3.6 config in the deamon set, this is the yaml define:
apiVersion: apps/v1 kind: DaemonSet metadata: name: calico-node namespace: calico-system annotations: deprecated.daemonset.template.generation: '1' status: currentNumberScheduled: 2 numberMisscheduled: 0 desiredNumberScheduled: 2 numberReady: 1 observedGeneration: 1 updatedNumberScheduled: 2 numberAvailable: 1 numberUnavailable: 1 spec: selector: matchLabels: k8s-app: calico-node template: metadata: creationTimestamp: null labels: app.kubernetes.io/name: calico-node k8s-app: calico-node annotations: hash.operator.tigera.io/cni-config: 9f0a12e03c58671de56ed3876cb88f1c43cef5dc hash.operator.tigera.io/system: bb4746872201725da2dea19756c475aa67d9c1e9 hash.operator.tigera.io/tigera-ca-private: 8766b9b7cef0ba4722da2eb519044c7c3b68df33 spec: volumes: - name: lib-modules hostPath: path: /lib/modules type: '' - name: xtables-lock hostPath: path: /run/xtables.lock type: FileOrCreate - name: policysync hostPath: path: /var/run/nodeagent type: DirectoryOrCreate - name: tigera-ca-bundle configMap: name: tigera-ca-bundle defaultMode: 420 - name: node-certs secret: secretName: node-certs defaultMode: 420 - name: var-run-calico hostPath: path: /var/run/calico type: '' - name: var-lib-calico hostPath: path: /var/lib/calico type: '' - name: cni-bin-dir hostPath: path: /opt/cni/bin type: '' - name: cni-net-dir hostPath: path: /etc/cni/net.d type: '' - name: cni-log-dir hostPath: path: /var/log/calico/cni type: '' - name: flexvol-driver-host hostPath: path: /usr/libexec/kubernetes/kubelet-plugins/volume/exec/nodeagent~uds type: DirectoryOrCreate initContainers: - name: flexvol-driver image: docker.io/calico/pod2daemon-flexvol:v3.26.1 resources: {} volumeMounts: - name: flexvol-driver-host mountPath: /host/driver terminationMessagePath: /dev/termination-log terminationMessagePolicy: File imagePullPolicy: IfNotPresent securityContext: capabilities: drop: - ALL privileged: true runAsUser: 0 runAsGroup: 0 runAsNonRoot: false allowPrivilegeEscalation: true seccompProfile: type: RuntimeDefault - name: install-cni image: docker.io/calico/cni:v3.26.1 command: - /opt/cni/bin/install env: - name: CNI_CONF_NAME value: 10-calico.conflist - name: SLEEP value: 'false' - name: CNI_NET_DIR value: /etc/cni/net.d - name: CNI_NETWORK_CONFIG valueFrom: configMapKeyRef: name: cni-config key: config - name: KUBERNETES_SERVICE_HOST value: 10.96.0.1 - name: KUBERNETES_SERVICE_PORT value: '443' resources: {} volumeMounts: - name: cni-bin-dir mountPath: /host/opt/cni/bin - name: cni-net-dir mountPath: /host/etc/cni/net.d terminationMessagePath: /dev/termination-log terminationMessagePolicy: File imagePullPolicy: IfNotPresent securityContext: capabilities: drop: - ALL privileged: true runAsUser: 0 runAsGroup: 0 runAsNonRoot: false allowPrivilegeEscalation: true seccompProfile: type: RuntimeDefault containers: - name: calico-node image: docker.io/calico/node:v3.26.1 env: - name: DATASTORE_TYPE value: kubernetes - name: WAIT_FOR_DATASTORE value: 'true' - name: CLUSTER_TYPE value: k8s,operator,bgp - name: CALICO_DISABLE_FILE_LOGGING value: 'false' - name: FELIX_DEFAULTENDPOINTTOHOSTACTION value: ACCEPT - name: FELIX_HEALTHENABLED value: 'true' - name: FELIX_HEALTHPORT value: '9099' - name: NODENAME valueFrom: fieldRef: apiVersion: v1 fieldPath: spec.nodeName - name: NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: FELIX_TYPHAK8SNAMESPACE value: calico-system - name: FELIX_TYPHAK8SSERVICENAME value: calico-typha - name: FELIX_TYPHACAFILE value: /etc/pki/tls/certs/tigera-ca-bundle.crt - name: FELIX_TYPHACERTFILE value: /node-certs/tls.crt - name: FELIX_TYPHAKEYFILE value: /node-certs/tls.key - name: FIPS_MODE_ENABLED value: 'false' - name: FELIX_TYPHACN value: typha-server - name: CALICO_MANAGE_CNI value: 'true' - name: CALICO_IPV4POOL_CIDR value: 10.96.0.0/12 - name: CALICO_IPV4POOL_VXLAN value: CrossSubnet - name: CALICO_IPV4POOL_BLOCK_SIZE value: '26' - name: CALICO_IPV4POOL_NODE_SELECTOR value: all() - name: CALICO_IPV4POOL_DISABLE_BGP_EXPORT value: 'false' - name: CALICO_NETWORKING_BACKEND value: bird - name: IP value: autodetect - name: IP_AUTODETECTION_METHOD value: first-found - name: IP6 value: none - name: FELIX_IPV6SUPPORT value: 'false' - name: KUBERNETES_SERVICE_HOST value: 10.96.0.1 - name: KUBERNETES_SERVICE_PORT value: '443' resources: {} volumeMounts: - name: tigera-ca-bundle readOnly: true mountPath: /etc/pki/tls/certs - name: tigera-ca-bundle readOnly: true mountPath: /etc/pki/tls/cert.pem subPath: ca-bundle.crt - name: lib-modules readOnly: true mountPath: /lib/modules - name: xtables-lock mountPath: /run/xtables.lock - name: policysync mountPath: /var/run/nodeagent - name: node-certs readOnly: true mountPath: /node-certs - name: var-run-calico mountPath: /var/run/calico - name: var-lib-calico mountPath: /var/lib/calico - name: cni-log-dir mountPath: /var/log/calico/cni - name: cni-net-dir mountPath: /host/etc/cni/net.d livenessProbe: httpGet: path: /liveness port: 9099 host: localhost scheme: HTTP timeoutSeconds: 10 periodSeconds: 10 successThreshold: 1 failureThreshold: 3 readinessProbe: exec: command: - /bin/calico-node - '-bird-ready' - '-felix-ready' timeoutSeconds: 5 periodSeconds: 10 successThreshold: 1 failureThreshold: 3 lifecycle: preStop: exec: command: - /bin/calico-node - '-shutdown' terminationMessagePath: /dev/termination-log terminationMessagePolicy: File imagePullPolicy: IfNotPresent securityContext: capabilities: drop: - ALL privileged: true runAsUser: 0 runAsGroup: 0 runAsNonRoot: false allowPrivilegeEscalation: true seccompProfile: type: RuntimeDefault restartPolicy: Always terminationGracePeriodSeconds: 5 dnsPolicy: ClusterFirst nodeSelector: kubernetes.io/os: linux serviceAccountName: calico-node serviceAccount: calico-node hostNetwork: true securityContext: {} schedulerName: default-scheduler tolerations: - key: CriticalAddonsOnly operator: Exists - operator: Exists effect: NoSchedule - operator: Exists effect: NoExecute priorityClassName: system-node-critical updateStrategy: type: RollingUpdate rollingUpdate: maxUnavailable: 1 maxSurge: 0 revisionHistoryLimit: 10 where is the pause image configuration? what should I do to find out? I have tried to change the kubelet config and add mirror address like this:
[root@k8sslave01 kubelet]# systemctl status kubelet --full ● kubelet.service - kubelet: The Kubernetes Node Agent Loaded: loaded (/usr/lib/systemd/system/kubelet.service; enabled; vendor preset: disabled) Drop-In: /usr/lib/systemd/system/kubelet.service.d └─10-kubeadm.conf Active: active (running) since Mon 2024-02-19 12:26:30 CST; 33min ago Docs: https://kubernetes.io/docs/ Main PID: 30876 (kubelet) Tasks: 11 Memory: 84.6M CGroup: /system.slice/kubelet.service └─30876 /usr/bin/kubelet --bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf --config=/var/lib/kubelet/config.yaml --container-runtime-endpoint=unix:///var/run/containerd/containerd.sock --pod-infra-container-image=registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.6 the registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.6 is the new address. It seems did not work. I have already test the container is accessable:
[root@k8sslave01 kubelet]# crictl pull registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.6 Image is up to date for sha256:6270bb605e12e581514ada5fd5b3216f727db55dc87d5889c790e4c760683fee