k8s日常运维常用命令

#查带有http关键字的pad

kubectl get pod -o wide -A | grep http （或使用egrep）

#过滤显示副本状态不为 "0/0" 的 Deployment

kubectl get deployments -A | awk '{if ($3!="0/0") print $1,$2,$3 }'

#查非正常的kube-system|monitoring的deployments，并打印它的前三例

kubectl get deployments -A | awk '{if ($3!="0/0") print $1,$2,$3 }' | egrep -v 'kube-system|monitoring'

#查非正常结果与进行统计

kubectl get deployments -A | awk '{if ($3!="0/0") print $1,$2,$3 }' | egrep -v 'kube-system|monitoring|jenkins|devops' | wc -l

#编辑namespace为monitoring的prometheus k8s的pod，修改保存后会直接生效

kubectl -n monitoring edit prometheus k8s

#查看namespace为infra-middle-domain下名为rdfa-extend-gateway-555d9db985-z5gjc的所有日志

kubectl logs -f rdfa-extend-gateway-555d9db985-z5gjc -n infra-middle-domain

#显示所有ingress信息，与kubectl get ingress -A效果一致

kubectl get ingress -o wide -A

#查看monitor这个namespace下的所有pod

kubectl -n monitor get pods

#再次检查 Pod 状态

kubectl -n monitoring get pods

#将 alertmanager 的 deployment 副本数缩容到 0

kubectl -n monitoring scale deployment alertmanager --replicas=0

#编辑名为 "main" 的 Alertmanager 自定义资源

kubectl -n monitoring edit alertmanagers.monitoring.coreos.com main

#查找 kube-system 命名空间中未就绪的 Pod

kubectl -n kube-system get pods |grep 0/1

#查找 kube-system 命名空间中多容器 Pod 的未就绪情况

kubectl -n kube-system get pods |grep 0/2

#在所有命名空间中查找包含 "open-platform-frontend" 的 Pod，并显示详细信息

kubectl get pod -o wide -A | egrep open-platform-frontend

#在所有命名空间中查找包含 "open-platform-frontend" 的服务

kubectl get svc -o wide -A | egrep open-platform-frontend

#在所有命名空间中查找包含 "open-platform-frontend" 的 Deployment

kubectl get deployments -o wide -A | egrep open-platform-frontend

在所有命名空间中查找包含 "ennew-portal-platform" 的 Deployment

kubectl get deployments -o wide -A | egrep ennew-portal-platform

#查看所有命名空间中所有 Pod 的详细信息

kubectl get pod -o wide -A

#查看所有命名空间中所有 Deployment 的完整状态

kubectl get deployments -o wide -A

#查看所有命名空间中的所有 Deployment

kubectl get deployments -A

#查找与特定 IP 地址 10.39.41.157 相关的 Pod

kubectl get pod -o wide -A | egrep 10.39.41.157

#查找使用 30700 端口的服务

kubectl get svc -o wide -A | grep 30700

#查看所有命名空间中的所有服务

kubectl get svc -o wide -A

#查找名称中包含 "http" 的 Pod

kubectl get pod -o wide -A | grep http

#在指定的 Pod 中执行命令，检查已建立的 MySQL（3306 端口）连接

kubectl exec -it http-executor-66864d9767-sw9b4 -n bigdata -- /bin/bash -c "netstat -an | grep ESTABLISH | grep 3306"

#查看日志

kubectl logs -f data-monitor-api-6fbf6bdc49-r5kgh -n bigdata

#统计集群中所有正在运行的 Deployment 数量（副本数不为 0/0）

kubectl get deployments -A | awk '{if ($3!="0/0") print $1,$2,$3 }' | wc -l

#查找包含 "fnw-operation-center-job" 的 Deployment 详细信息

kubectl get deployments -o wide -A | grep fnw-operation-center-job

#在所有命名空间中查找 data-monitor-api 相关的 Pod

kubectl get pod -o wide -A | egrep data-monitor-api

#显示所有非零副本的 Deployment（与命令 3 类似但显示完整列表）

kubectl get deployment -A|grep -v '0/0'

#查找包含 "indice-dispatcher" 的 NodePort 类型服务

kubectl get svc -o wide -A | egrep indice-dispatcher | grep NodePort

将 indice-dispatcher 部署的副本数缩容到 0

kubectl scale deployment/indice-dispatcher --replicas=0 -n bigdata

#统计非 kube-system 命名空间中正在运行的 Deployment 数量

kubectl get deployments -A | awk '{if ($3!="0/0") print $1,$2 }' | grep -v 'kube-system' | wc -l

#将运行中的业务应用列表保存到文件，排除特定命名空间和项目

kubectl get deployments -A | awk '{if ($3!="0/0") print $1,$2 }' | grep -v 'kube-system' | grep -v 'monitoring' | egrep -v 'p-qvmjv|p-lhsg9|p-gkwr6' >> k8s_qingyun_pro.txt

#获取所有节点名称列表（排除表头）

kubectl get nodes | awk '{if (NR> 1) print $1 }'

#统计集群中节点总数

kubectl get nodes | awk '{if (NR> 1) print $1 }' | wc -l

#显示节点的资源使用情况（CPU 和内存）

kubectl top nodes

#查找运行在 IP 包含 "161" 的节点上的 Pod

kubectl get pod -o wide -A | grep 161

#安全排空节点 10.39.41.157，准备维护

kubectl drain 10.39.41.157 --delete-local-data --ignore-daemonsets

#查看运行在节点 10.39.41.156 上的所有 Pod

kubectl get pods -o wide -A | grep 10.39.41.156

#查找多容器 Pod 中所有容器都未就绪的情况

kubectl get pod -o wide -A | grep 0/2

#将节点 10.39.41.96 标记为不可调度

kubectl cordon 10.39.41.96

# 安全排空节点

kubectl drain 10.39.41.157 --delete-local-data --ignore-daemonsets

#统计包含 "gDisabled" 标签的节点数量

kubectl get nodes | egrep gDisabled | wc -l

#显示所有包含 "gDisabled" 标签的节点

kubectl get nodes | egrep gDisabled

#获取所有命名空间中的 Pod，并按创建时间排序

kubectl get pods -A --sort-by=.metadata.creationTimestamp

#显示所有节点的基本信息和它们的所有标签

kubectl get nodes --show-labels

#从集群中删除节点 10.39.41.29

kubectl delete node 10.39.41.29

#查看所有 CoreDNS Pod 的状态和分布

kubectl get pod -o wide -A | grep coredns

#删除指定的 CoreDNS Pod

kubectl delete pod coredns-6998d84bf5-5j7w4 -n kube-system

#给节点 10.39.45.145 添加污点

kubectl taint nodes 10.39.45.145 coredns-failure=image-pull-fail:NoSchedule

#编辑 CoreDNS 部署配置

kubectl edit deployments/coredns -n kube-system

#统计被标记为不可调度的节点数量

kubectl get nodes | egrep SchedulingDisabled | wc -l

#获取 kube-system 命名空间中的事件，按时间排序

kubectl get events -n kube-system --sort-by=.lastTimestamp

#查找运行在节点 10.39.41.133 上的所有 Pod

kubectl get pods -o wide -A | egrep 10.39.41.133

#显示集群基本信息

kubectl cluster-info

#显示客户端和服务器端的 Kubernetes 版本

kubectl version

#进入 invitation-h5 Pod 的交互式 shell

kubectl exec -it invitation-h5-7f7cb78f75-gm8nl -n cloud-common /bin/bash

#实时跟踪 iot-standalone-chrome Pod 的日志

kubectl logs -f iot-standalone-chrome-67b6ddfdbf-lbfj4 -n iot-service-public

#查看所有命名空间中的事件

kubectl get events -A

#将 iot-web-scada-uac 部署的副本数调整为 1

kubectl scale deployment/iot-web-scada-uac --replicas=1 -n iot-service-web

#查找包含 iot-monitoring-operation-platform 的部署

kubectl get deployment -A | grep iot-monitoring-operation-platform

#显示 Pod 的详细信息

kubectl describe pod iot-monitoring-operation-platform-659c7bf8d6-jnjv4 -n cloud-common