Featured image of post 全手动部署kubernetes

全手动部署kubernetes

手动搭建k8s

本文所有的shell命令都在开头省略了$,方便可以直接复制使用。

规划配置

本次部署默认是在master节点下载生成配置,然后将配置分发到2台worker节点。

机器配置

机器名 机器角色 机器ip 机器配置 OS版本 部署软件
m01 主节点 172.15.110.188 4C-8G-50G RockyLinux 9.1 apiserver、controller-manager、scheduler、etcd、kubectl、kubelet、kube-proxy、containerd
w01 从节点 172.15.110.189 4C-8G-50G RockyLinux 9.1 kubelet、kube-proxy、containerd
w02 从节点 172.15.110.191 4C-8G- RockyLinux 9.1 kubelet、kube-proxy、containerd

网络环境

网段名称 网段CIDR
节点网段 172.15.110.0/24
pod网段 172.16.0.0/16
service网段 10.96.0.0/16

程序版本

kubernetes程序 程序版本
kubernetes 1.26.2
etcd 3.5.7
cfssl 1.6.1
coredns 1.9.3
metrics-server 0.6.1
基础软件 软件版本
containerd 1.6.19
runc 1.1.4
crictl 1.24.1

操作系统部署和设置

部署操作系统

从RockyLinux官网下载新版安装镜像ISO,传送门,具体安装过程省略,9.1 的root账号默认锁定,并且不允许root用户使用密码进行SSH登录,在安装过程中需要:

  • 取消锁定root账号;
  • 允许root用户使用密码进行ssh登录;

安装完以后顺便升级内核并重启:

1
2
dnf update -y
reboot

swap设置

由于k8s不支持swap,我们需要关闭swap:

1
2
swapoff -a
sed -ir 's/.*swap.*/#&/' /etc/fstab

主机安全设置

1
2
3
4
5
6
7
# disable防火墙
systemctl stop firewalld
systemctl disable firewalld

# disable selinux
sed -i 's/enforcing/disabled/' /etc/selinux/config
setenforce 0

设置主机名称

1
2
3
4
5
6
7
8
# master执行
hostnamectl set-hostname m01

# node1执行
hostnamectl set-hostname w01

# node2执行
hostnamectl set-hostname w02

设置hosts解析

1
2
3
4
5
6
7
8
9
cat >> /etc/hosts <<EOF
172.15.110.188 m01
172.15.110.190 w01
172.15.110.191 w02
EOF

export NODE_M_IP=$(cat /etc/hosts | grep m01 | awk '{print $1}')
export NODE_W1_IP=$(cat /etc/hosts | grep w01 | awk '{print $1}')
export NODE_W2_IP=$(cat /etc/hosts | grep w02 | awk '{print $1}')

安装需要的软件和工具

1
dnf install -y wget tree curl bash-completion jq vim net-tools telnet git lrzsz epel-release tar

设置ntp服务

装chrony服务并且服务已经启动,如果需要修改NTP服务器可以修改配置文件/etc/chrony.conf

1
2
3
4
5
6
7
8
# 使用客户端进行验证
chronyc sources
# MS Name/IP address         Stratum Poll Reach LastRx Last sample
# ===============================================================================
# ^- ntp8.flashdance.cx            2   9   375   997   -480us[ -509us] +/-  106ms
# ^* 202.118.1.81                  1  10   367   428  -1151us[-1183us] +/-   16ms
# ^- ntp.wdc2.us.leaseweb.net      2  10   377   152  -1599us[-1599us] +/-  215ms
# ^- time.cloudflare.com           3  10   276   38m  -7174us[-7299us] +/-   92ms

配置unlimit

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
# 文件句柄
ulimit -SHn 65535 && \
cat > /etc/security/limits.conf <<EOF
* soft nofile 655360
* hard nofile 131072
* soft nproc 655350
* hard nproc 655350
* soft memlock unlimited
* hard memlock unlimited
EOF

启用ipvs / 安装ipvsadmin

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
dnf install -y ipvsadm ipset sysstat conntrack libseccomp

cat > /etc/modules-load.d/ipvs.conf <<EOF
ip_vs
ip_vs_rr
ip_vs_wrr
ip_vs_sh
nf_conntrack
ip_tables
ip_set
xt_set
ipt_set
ipt_rpfilter
ipt_REJECT
ipip
EOF

systemctl restart systemd-modules-load.service

lsmod | grep ip_vs
# ip_vs_sh               16384  0
# ip_vs_wrr              16384  0
# ip_vs_rr               16384  0
# ip_vs                 188416  6 ip_vs_rr,ip_vs_sh,ip_vs_wrr
# nf_conntrack          176128  3 nf_nat,nft_ct,ip_vs
# nf_defrag_ipv6         24576  2 nf_conntrack,ip_vs
# libcrc32c              16384  5 nf_conntrack,nf_nat,nf_tables,xfs,ip_vs

启用overlay和br_netfilter

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF

systemctl restart systemd-modules-load.service

lsmod | grep br_netfilter
# br_netfilter           36864  0
# bridge                405504  1 br_netfilter
lsmod | grep overlay
# overlay               200704  0

修改内核参数

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
cat > /etc/sysctl.d/95-k8s-sysctl.conf <<EOF 
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-arptables = 1

fs.may_detach_mounts = 1
vm.swappiness = 0
vm.overcommit_memory=1
vm.panic_on_oom=0
vm.max_map_count=655360
fs.inotify.max_user_watches=89100
fs.file-max=52706963
fs.nr_open=52706963
net.netfilter.nf_conntrack_max=2310720

net.ipv4.tcp_keepalive_time = 600
net.ipv4.tcp_keepalive_probes = 3
net.ipv4.tcp_keepalive_intvl =15
net.ipv4.tcp_max_tw_buckets = 36000
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_max_orphans = 327680
net.ipv4.tcp_orphan_retries = 3
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.ip_conntrack_max = 65536
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.tcp_timestamps = 0
net.core.somaxconn = 16384
EOF

sysctl --system

配置免密登录

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
# 进入root家目录
cd ~

dnf install -y sshpass
ssh-keygen -f /root/.ssh/id_rsa -P ''
export HOST="m01 w01 w02 $NODE_M_IP $NODE_W1_IP $NODE_W2_IP" 
export SSHPASS=<ssh登录密码>
for H in $HOST; do \
sshpass -e ssh-copy-id -o StrictHostKeyChecking=no $H; \
done

ssh w01
# Last login: Thu Mar  2 16:53:06 2023 from 172.15.110.188

安装PKI管理工具-cfssl

检查下载新版本的工具,传送门

1
2
3
4
5
6
# 下载cfssl二进制程序
wget https://github.com/cloudflare/cfssl/releases/download/v1.6.3/cfssl_1.6.3_linux_amd64 -O /usr/local/bin/cfssl
wget https://github.com/cloudflare/cfssl/releases/download/v1.6.3/cfssljson_1.6.3_linux_amd64 -O /usr/local/bin/cfssljson

# 添加执行权限
chmod +x /usr/local/bin/cfssl*

安装containerd

下载二进制文件

检查下载软件的版本,传送门,下载带cri-containerd-cni开头的文件,这个tar包里面包含了containerd以及crictl管理工具和cni网络插件,下载后可以使用tar -tf <包名>,查看tar包的内容。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
# 创建配置生成目录
mkdir -p /root/containerd/{app,bin,cnibin,config,service}
cd /root/containerd

# 下载二进制文件
wget https://github.com/containerd/containerd/releases/download/v1.6.19/cri-containerd-cni-1.6.19-linux-amd64.tar.gz -O app/containerd.tar.gz

# 解压
tar -xf app/containerd.tar.gz --strip-components=3 -C bin usr/local/bin/{containerd*,crictl,ctr} 
tar -xf app/containerd.tar.gz --strip-components=3 -C cnibin opt/cni/bin/*

# 下载runc
wget https://github.com/opencontainers/runc/releases/download/v1.1.4/runc.amd64 -O bin/runc

# 添加执行权限
chmod +x bin/runc

创建containerd服务和配置文件

生成脚本containerd_config.sh

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
cat <<'EOF'> containerd_config.sh 

# 创建service文件
cat > service/containerd.service <<EOF1
[Unit]
Description=containerd container runtime
Documentation=https://containerd.io
After=network.target local-fs.target

[Service]
ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/local/bin/containerd

Type=notify
Delegate=yes
KillMode=process
Restart=always
RestartSec=5
# Having non-zero Limit*s causes performance problems due to accounting overhead
# in the kernel. We recommend using cgroups to do container-local accounting.
LimitNPROC=infinity
LimitCORE=infinity
LimitNOFILE=infinity
# Comment TasksMax if your systemd version does not supports it.
# Only systemd 226 and above support this version.
TasksMax=infinity
OOMScoreAdjust=-999

[Install]
WantedBy=multi-user.target
EOF1

cat > config/containerd.conf <<EOF2
overlay
br_netfilter
EOF2
EOF

执行

1
2
3
4
5
6
bash -x containerd_config.sh

# 在service目录下生成
# ├── containerd.service
# 在config目录下生成
# ├── containerd.conf

生成配置文件,并按需修改

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
# 创建配置文件
./bin/containerd config default > config/config.toml

# 由于k8s.gcr.io和registry.k8s.io网站打不开,需要修改sandbox_image参数,如果这里不修改,也可以手动下载pause镜像,
# 然后修改镜像名称
# ctr -n k8s.io i tag  registry.cn-hangzhou.aliyuncs.com/kubernetes-kubespray/pause:3.6 k8s.gcr.io/pause:3.6
# root:容器存储路径,修改成磁盘空间充足的路径,
# bin_dir:containerd二进制文件
# conf_dir: cni 插件存储路径
# sandbox_image:pause镜像名称以及镜像tag,

sed -i 's#root = \"/var/lib/containerd\"#root = \"/data/containerd\"#' config/config.toml
sed -i 's#sandbox_image = \"k8s.gcr.io/pause:#sandbox_image = \"registry.aliyuncs.com/google_containers/pause:#' config/config.toml
sed -i 's#sandbox_image = \"registry.k8s.io/pause:#sandbox_image = \"registry.aliyuncs.com/google_containers/pause:#' config/config.toml
sed -i 's#SystemdCgroup = false#SystemdCgroup = true#' config/config.toml

分发二进制文件、配置及创建相关路径

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
for i in m01 w01 w02; do \
ssh $i "mkdir -p /etc/containerd"; \
ssh $i "mkdir -p /opt/cni/bin"; \
ssh $i "mkdir -p /opt/containerd"; \
ssh $i "mkdir -p /etc/cni/net.d"; \
scp bin/* $i:/usr/local/bin/; \
scp cnibin/* $i:/opt/cni/bin/; \
scp service/containerd.service $i:/usr/lib/systemd/system/; \
scp config/config.toml $i:/etc/containerd/; \
scp config/containerd.conf $i:/etc/modules-load.d/; \
done

启动containerd服务

1
2
3
4
5
6
7
for i in m01 w01 w02; do \
ssh $i "systemctl restart systemd-modules-load.service"; \
ssh $i "systemctl daemon-reload"; \
ssh $i "systemctl enable containerd"; \
ssh $i "systemctl restart containerd --no-block"; \
ssh $i "systemctl is-active containerd"; \
done

测试containerd

Containerd有namespaces的概念,不同namespaces之间进行隔离,镜像和容器不可见

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
# 列出namespace
ctr ns list

# 创建ns
ctr ns create test
# 这里会比上面多一个叫test的命名空间

# 测试拉取busybox镜像,使用ctr拉取,镜像的路径要写全,没有指明ns,默认保存在default命名空间
ctr images pull docker.io/library/busybox:latest

# 查看镜像,test命名空间没有镜像
ctr -n test images list
# 这里可以看到拉取到的busybox镜像

配置crictl

使用containerd命令管理镜像较底层,对人类不友好,k8s内部提供了crictl来管理镜像,相当于docker命令行工具。

生成脚本crictl_config.sh

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
cat <<'EOF'> crictl_config.sh
# crictl是遵循CRI接口规范的一个命令行工具,通常用它来检查和管理kubelet节点上的容器运行时和镜像
# 使用cricti工具之前,需要先创建crictl的配置文件
# 注意runtime-endpoint和image-endpoint必须与/etc/containerd/config.toml中配置保持一致。
cat > config/crictl.yaml <<EOF1
runtime-endpoint: unix:///run/containerd/containerd.sock
image-endpoint: unix:///run/containerd/containerd.sock
timeout: 10
debug: false
EOF1
EOF

执行

1
2
bash -x crictl_config.sh
# ├── crictl.yaml

分发crictl.yaml

1
2
3
for i in m01 w01 w02; do \
scp config/crictl.yaml $i:/etc/
done

测试crictl

crictl的使用方法基本和docker用法相同,crictl所有的操作都在containerd的k8s.io这个namespace。

1
2
3
4
5
6
7
8
9
# 拉取镜像
crictl pull busybox
#Image is up to date for sha256:62aedd01bd8520c43d06b09f7a0f67ba9720bdc04631a8242c65ea995f3ecac8


# 列出所有cri容器镜像
crictl images
# IMAGE                       TAG                 IMAGE ID            SIZE
# docker.io/library/busybox   latest              2fb6fc2d97e10       777kB

部署etcd

下载etcd

1
2
3
4
5
6
7
8
9
# 创建保存配置的文件夹
mkdir -p /root/etcd/{bin,config,service,ssl,app}
cd /root/etcd

# 下载etcd二进制文件
# github二进制包下载地址:https://github.com/etcd-io/etcd/releases
wget https://github.com/etcd-io/etcd/releases/download/v3.5.7/etcd-v3.5.7-linux-amd64.tar.gz -O app/etcd.tar.gz

tar -xf app/etcd.tar.gz --strip-components=1 -C bin/ etcd-v3.5.7-linux-amd64/etcd{,ctl}

生成etcd使用的证书

生成脚本gen_etcd_cert.sh

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
cat <<'EOF'> gen_etcd_cert.sh
# example: ./etcd-cert.sh 127.0.0.1,master01,master02,master03,$NODE_M_IP,$NODE_W1_IP,$NODE_W2_IP

HOSTNAME=$1

# etcd ca的配置文件
cat > ca-config.json <<EOF1
{
  "signing": {
    "default": {
      "expiry": "876000h"
    },
    "profiles": {
      "peer": {
        "usages": [
            "signing",
            "key encipherment",
            "server auth",
            "client auth"
        ],
        "expiry": "876000h"
      }
    }
  }
}
EOF1

# etcd的ca证书签名请求文件
cat > etcd-ca-csr.json <<EOF2
{
  "CN": "etcd",
  "key": {
    "algo": "rsa",
    "size": 2048
  },
  "names": [
    {
      "C": "CN",
      "ST": "Hubei",
      "L": "Wuhan",
      "O": "etcd",
      "OU": "Etcd Security"
    }
  ],
  "ca": {
    "expiry": "876000h"
  }
}
EOF2

# 生成etcd集群使用的ca根证书
cfssl gencert \
	-initca etcd-ca-csr.json | cfssljson -bare ssl/etcd-ca

# 生成etcd集群使用的证书申请签名文件
cat > etcd-csr.json <<EOF3
{
  "CN": "etcd",
  "key": {
    "algo": "rsa",
    "size": 2048
  },
  "names": [
    {
      "C": "CN",
      "ST": "Hubei",
      "L": "Wuhan",
      "O": "etcd",
      "OU": "Etcd Security"
    }
  ]
}
EOF3

# 生产部署etcd集群可以使用3张证书用于不同认证。
# 1.etcd server持有的服务端证书
# 2.peer集群中节点互相通信使用的客户端证书
# 3.配置在kube-apiserver中用来与etcd-server做双向认证的客户端证书
# 学习环境使用一张peer类型的证书进行认证
cfssl gencert \
	-ca=ssl/etcd-ca.pem \
	-ca-key=ssl/etcd-ca-key.pem \
	-config=ca-config.json \
	-hostname=${HOSTNAME} \
	-profile=peer etcd-csr.json | cfssljson -bare ssl/etcd
EOF

执行

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
# example:bash gen_etcd_cert.sh <etcd相关主机ip及主机名>
bash -x gen_etcd_cert.sh 127.0.0.1,m01,$NODE_M_IP

## 在ssl目录下生成
# ├── etcd-ca.csr
# ├── etcd-ca-key.pem
# ├── etcd-ca.pem
# ├── etcd.csr
# ├── etcd-key.pem
# ├── etcd.pem

生成参数文件及启动service文件

生成脚本etcd_config.sh

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
cat <<'EOF'> etcd_config.sh
# example: ./etcd_config.sh master01 $NODE_M_IP master02=https://$NODE_W1_IP:2380,master03=https://$NODE_W2_IP:2380

ETCD_NAME=$1
ETCD_IP=$2
ETCD_CLUSTER=$3

WORK_DIR=/opt/etcd
ETCD_CONF_DIR=/opt/etcd/config
ETCD_CA_CERT=etcd-ca.pem
ETCD_SERVER_CERT_PREFIX=etcd

cat > config/etcd.config.yaml.$1 <<EOF1
name: '${ETCD_NAME}'
data-dir: ${WORK_DIR}/data
wal-dir: ${WORK_DIR}/data/wal
snapshot-count: 5000
heartbeat-interval: 100
election-timeout: 1000
quota-backend-bytes: 0
listen-peer-urls: 'https://${ETCD_IP}:2380'
listen-client-urls: 'https://${ETCD_IP}:2379,http://127.0.0.1:2379'
max-snapshots: 3
max-wals: 5
cors:
initial-advertise-peer-urls: 'https://${ETCD_IP}:2380'
advertise-client-urls: 'https://${ETCD_IP}:2379'
discovery:
discovery-fallback: 'proxy'
discovery-proxy:
discovery-srv:
initial-cluster: '${ETCD_NAME}=https://${ETCD_IP}:2380,${ETCD_CLUSTER}'
initial-cluster-token: 'etcd-cluster'
initial-cluster-state: 'new'
strict-reconfig-check: false
enable-v2: true
enable-pprof: true
proxy: 'off'
proxy-failure-wait: 5000
proxy-refresh-interval: 30000
proxy-dial-timeout: 1000
proxy-write-timeout: 5000
proxy-read-timeout: 0
client-transport-security:
  cert-file: '${WORK_DIR}/ssl/${ETCD_SERVER_CERT_PREFIX}.pem'
  key-file: '${WORK_DIR}/ssl/${ETCD_SERVER_CERT_PREFIX}-key.pem'
  client-cert-auth: true
  trusted-ca-file: '${WORK_DIR}/ssl/${ETCD_CA_CERT}'
  auto-tls: true
peer-transport-security:
  cert-file: '${WORK_DIR}/ssl/${ETCD_SERVER_CERT_PREFIX}.pem'
  key-file: '${WORK_DIR}/ssl/${ETCD_SERVER_CERT_PREFIX}-key.pem'
  peer-client-cert-auth: true
  trusted-ca-file: '${WORK_DIR}/ssl/${ETCD_CA_CERT}'
  auto-tls: true
debug: false
log-package-levels:
log-outputs: [default]
force-new-cluster: false
EOF1

cat > service/etcd.service <<EOF2

[Unit]
Description=Etcd Service
Documentation=https://coreos.com/etcd/docs/latest/
After=network.target

[Service]
Type=notify
ExecStart=/usr/local/bin/etcd \\
--config-file=${ETCD_CONF_DIR}/etcd.config.yaml
Restart=on-failure
RestartSec=10
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target
Alias=etcd3.service
EOF2
EOF

执行

1
2
3
4
5
6
7
8
# example:./etcd_config.sh <ETCD主机名> <ETCD_IP> <ETCD集群其他的信息>
bash -x etcd_config.sh m01 $NODE_M_IP

# 在config目录下生成
# ├── etcd.config.yaml.m01

# 在service目录下生成
# ├── etcd.service

分发etcd二进制文件、证书、配置及服务文件

1
2
3
4
5
6
7
for i in m01; do \
ssh $i "mkdir -p /opt/etcd/{config,data,ssl}"; \
scp bin/etcd* $i:/usr/local/bin; \
scp ssl/etcd{,-key,-ca}.pem $i:/opt/etcd/ssl/; \
scp config/etcd.config.yaml.$i $i:/opt/etcd/config/etcd.config.yaml; \
scp service/etcd.service $i:/usr/lib/systemd/system/; \
done

启动etcd服务

1
2
3
4
5
6
for i in m01; do \
ssh $i "systemctl daemon-reload"; \
ssh $i "systemctl enable etcd"; \
ssh $i "systemctl restart etcd --no-block"; \
ssh $i "systemctl is-active etcd"; \
done

验证集群

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# 查看集群
export ETCDCTL_API=3
export ENDPOINTS=$NODE_M_IP:2379
etcdctl \
    --endpoints="$ENDPOINTS" \
    --cacert=/opt/etcd/ssl/etcd-ca.pem \
    --cert=/opt/etcd/ssl/etcd.pem \
    --key=/opt/etcd/ssl/etcd-key.pem endpoint status \
    --write-out=table

etcdctl \
    --endpoints="$ENDPOINTS" \
    --cacert=/opt/etcd/ssl/etcd-ca.pem \
    --cert=/opt/etcd/ssl/etcd.pem \
    --key=/opt/etcd/ssl/etcd-key.pem member list \
    --write-out=table  

etcdctl \
    --endpoints="$ENDPOINTS" \
    --cacert=/opt/etcd/ssl/etcd-ca.pem \
    --cert=/opt/etcd/ssl/etcd.pem \
    --key=/opt/etcd/ssl/etcd-key.pem endpoint health \
    --write-out=table
# +-------------------+--------+-------------+-------+
# |     ENDPOINT      | HEALTH |    TOOK     | ERROR |
# +-------------------+--------+-------------+-------+
# |172.15.110.188:2379|   true | 20.158058ms |       |
# +-------------------+--------+-------------+-------+

部署k8s组件

在master节点部署3个服务

  • kube-apiserver
  • kube-controller-manager
  • kube-scheduler

下载二进制文件,传送门

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
# 创建配置目录
mkdir -p /root/k8s/{app,ssl,config,service,bin,kubeconfig}
cd /root/k8s

# 下载kubernets二进制包,按版本直接修改v1.26.2 -> v1.xx.x
wget https://dl.k8s.io/v1.26.2/kubernetes-server-linux-amd64.tar.gz -O app/kubernetes-server.tar.gz

# 解压
tar -xf app/kubernetes-server.tar.gz  --strip-components=3 -C \
	bin \
        kubernetes/server/bin/kube{let,ctl,-apiserver,-controller-manager,-scheduler,-proxy}

生成k8s使用的ca证书

生成脚本 gen_ca_cert.sh

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
cat <<'EOF'> gen_ca_cert.sh

cat > ca-config.json <<EOF1
{
    "signing": {
        "default": {
            "expiry": "87600h"
        },
        "profiles": {
            "peer": {
                "expiry": "87600h",
                "usages": [
                    "signing",
                    "key encipherment",
                    "server auth",
                    "client auth"
                ]
            }
        }
    }
}
EOF1

# 生成CA证书签名请求的配置文件
cat > ca-csr.json <<EOF2
{
    "CN": "kubernetes",
    "key": {
        "algo": "rsa",
        "size": 2048
    },
    "names": [
        {
            "C": "CN",
            "L": "Hubei",
            "ST": "Wuhan",
            "O": "Kubernetes",
            "OU": "System"
        }
    ],
  "ca": {
    "expiry": "876000h"
  }    
}
EOF2

# 生成ca证书和ca的私钥
cfssl gencert -initca ca-csr.json | cfssljson -bare ssl/ca
EOF

执行

1
2
3
4
5
bash -x gen_ca_cert.sh

# 在ssl目录下生成
# ├── ca-key.pem
# ├── ca.pem

部署apiserver

生成apiserver所需证书

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
cat <<'EOF'> gen_apiserver_cert.sh
# 生成apiserver的证书和私钥(apiserver和其它k8s组件通信使用)
APISERVER_NAME=$1

cat > kube-apiserver-csr.json <<EOF1
{
    "CN": "kube-apiserver",
    "key": {
        "algo": "rsa",
        "size": 2048
    },
    "names": [
        {
            "C": "CN",
            "L": "Hubei",
            "ST": "Wuhan",
            "O": "Kubernetes",
            "OU": "System"
        }
    ]
}
EOF1

cfssl gencert -ca=ssl/ca.pem -ca-key=ssl/ca-key.pem -config=ca-config.json \
	-hostname=${APISERVER_NAME} \
	-profile=peer kube-apiserver-csr.json | cfssljson -bare ssl/kube-apiserver

# apiserver聚合证书
# 访问kube-apiserver的另一种方式就是使用kube-proxy来代理访问, 而该证书就是用来支持SSL代理访问的. 在该种访问模
# 式下,我们是以http的方式发起请求到代理服务的, 此时, 代理服务会将该请求发送给kube-apiserver, 在此之前, 代理会
# 将发送给kube-apiserver的请求头里加入证书信息。

# 客户端 -- 发起请求 ---> 代理 --> Add Header信息:发起请求 --> kube-apiserver
# 如果apiserver所在的主机上没有运行kube-proxy,既无法通过服务的ClusterIP进行访问,需要
# --enable-aggregator-routing=true

# 生成ca签名请求文件
cat > front-proxy-ca-csr.json <<EOF2
{
  "CN": "kubernetes",
  "key": {
     "algo": "rsa",
     "size": 2048
  },
  "ca": {
    "expiry": "876000h"
  }
}
EOF2

# 此根证书用在requestheader-client-ca-file配置选项中, kube-apiserver使用该证书来验证客户端证书是否为自己所签发
cfssl gencert -initca front-proxy-ca-csr.json | cfssljson -bare ssl/front-proxy-ca

# 生成front-proxy-client证书请求文件
# 这里的CN名称要和apiserver启动参数--requestheader-allowed-names=front-proxy-client相同
cat > front-proxy-client-csr.json <<EOF3
{
  "CN": "front-proxy-client",
  "key": {
     "algo": "rsa",
     "size": 2048
  }
}
EOF3

# 生成代理层证书,代理端使用此证书,用来代用户向kube-apiserver认证
cfssl gencert -ca=ssl/front-proxy-ca.pem -ca-key=ssl/front-proxy-ca-key.pem -config=ca-config.json \
	-profile=peer front-proxy-client-csr.json | cfssljson -bare ssl/front-proxy-client

# 创建ServiceAccount Key —— secret
# serviceaccount账号的一种认证方式,创建serviceaccount的时候会创建一个与之绑定的secret,这个secret会生成
# token,这组的密钥对仅提供给controller-manager使用,controller-manager通过sa.key对token进行签名, master
# 节点通过公钥sa.pub进行签名的验证
openssl genrsa -out ssl/sa.key 2048
openssl rsa -in ssl/sa.key -pubout -out ssl/sa.pub
EOF

执行

1
2
# 10.96.0.1是server_cluseter_IP网段的第一个ip地址
bash -x gen_apiserver_cert.sh 127.0.0.1,kubernetes,kubernetes.default,kubernetes.default.svc,kubernetes.default.svc.cluster,kubernetes.default.svc.cluster.local,10.96.0.1,$NODE_M_IP

生成配置文件及启动service文件

生成脚本apiserver_config.sh

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# --service-cluster-ip-range,该网段不能和宿主机的网段、pod网段重复
cat <<'EOF'> apiserver_config.sh
#!/bin/bash

# 创建 kube-apiserver 启动参数配置文件
MASTER_ADDRESS=$1
ETCD_SERVERS=$2
ETCD_CERT_DIR=/opt/etcd/ssl
K8S_CERT_DIR=/opt/k8s/ssl
K8S_CONF_DIR=/opt/k8s/config
API_CERT_PRIFIX=kube-apiserver

cat > service/kube-apiserver.service.${MASTER_ADDRESS} <<EOF1
[Unit]
Description=Kubernetes API Server
Documentation=https://github.com/kubernetes/kubernetes
After=network.target

[Service]
ExecStart=/usr/local/bin/kube-apiserver \\
--v=2 \\
--allow-privileged=true \\
--bind-address=${MASTER_ADDRESS} \\
--advertise-address=${MASTER_ADDRESS} \\
--secure-port=6443 \\
--service-cluster-ip-range=10.96.0.0/16 \
--service-node-port-range=30000-32767 \\
--etcd-servers=${ETCD_SERVERS} \\
--etcd-cafile=${ETCD_CERT_DIR}/etcd-ca.pem \\
--etcd-certfile=${ETCD_CERT_DIR}/etcd.pem \\
--etcd-keyfile=${ETCD_CERT_DIR}/etcd-key.pem \\
--client-ca-file=${K8S_CERT_DIR}/ca.pem \\
--tls-cert-file=${K8S_CERT_DIR}/${API_CERT_PRIFIX}.pem \\
--tls-private-key-file=${K8S_CERT_DIR}/${API_CERT_PRIFIX}-key.pem \\
--kubelet-client-certificate=${K8S_CERT_DIR}/${API_CERT_PRIFIX}.pem \\
--kubelet-client-key=${K8S_CERT_DIR}/${API_CERT_PRIFIX}-key.pem \\
--service-account-key-file=${K8S_CERT_DIR}/sa.pub \\
--service-account-signing-key-file=${K8S_CERT_DIR}/sa.key \\
--service-account-issuer=https://kubernetes.default.svc.cluster.local \\
--kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname \\
--enable-admission-plugins=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,NodeRestriction,ResourceQuota \\
--authorization-mode=Node,RBAC \\
--enable-bootstrap-token-auth=true \\
--enable-aggregator-routing=true \\
--proxy-client-cert-file=${K8S_CERT_DIR}/front-proxy-client.pem \\
--proxy-client-key-file=${K8S_CERT_DIR}/front-proxy-client-key.pem \\
--requestheader-client-ca-file=${K8S_CERT_DIR}/front-proxy-ca.pem \\
--requestheader-allowed-names=front-proxy-client \\
--requestheader-group-headers=X-Remote-Group \\
--requestheader-extra-headers-prefix=X-Remote-Extra- \\
--requestheader-username-headers=X-Remote-User
#--token-auth-file=\${K8S_CONF_DIR}/token.csv 这里禁用token文件进行认证
    
Restart=on-failure
RestartSec=10s
LimitNOFILE=65535

[Install]
WantedBy=multi-user.target
EOF1
EOF

执行

1
2
3
4
5
# bash apiserver_config.sh <master_IP> <etcd_cluster>
bash -x apiserver_config.sh $NODE_M_IP https://$NODE_M_IP:2379

# 在service目录下生成
# ├── kube-apiserver.service.172.15.110.188

分发二进制文件、证书及service文件

1
2
3
4
5
6
for i in $NODE_M_IP; do \
ssh $i "mkdir -p /opt/k8s/{ssl,config,log}"; \
scp bin/kube-apiserver  $i:/usr/local/bin/ ;
scp ssl/{kube*.pem,ca{,-key}.pem,front-proxy-client*.pem,front-proxy-ca.pem,sa.*} $i:/opt/k8s/ssl/; \
scp service/kube-apiserver.service.$i $i:/usr/lib/systemd/system/kube-apiserver.service; \
done

启动kube-apiserver服务

1
2
3
4
5
6
for i in m01; do \
ssh $i "systemctl daemon-reload"; \
ssh $i "systemctl enable kube-apiserver"; \
ssh $i "systemctl restart kube-apiserver --no-block"; \
ssh $i "systemctl is-active kube-apiserver"; \
done

部署kubectl

先部署kubectl客户端工具,部署后可以使用命令kubectl查看集群的信息

生成kubectl所需证书

生成脚本gen_kubectl_cert.sh

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
cat <<EOF> gen_kubectl_cert.sh
# 生成kubectl的证书和私钥
# k8s安装时会创建一个集群角色(clusterrole),名字为cluster-admin,对集群具有最高管理权限同时会创建一个集群角色
# 绑定(clusterrolebingding),名字也叫做cluster-admin,这个绑定将集群角色(cluster-admin)和用户组
# (system:masters)关联起来,于是属于system:masters组内的用户,都会有这个集群角色赋予的权限
# 生成证书时会定义用户clusteradmin,所属组为system:masters,所以clusteradmin拥有集群角色(cluster-admin)赋
# 予的权限。
cat > kubectl-csr.json <<EOF1
{
  "CN": "clusteradmin",
  "key": {
    "algo": "rsa",
    "size": 2048
  },
  "names": [
    {
      "C": "CN",
      "ST": "Hubei",
      "L": "Wuhan",
      "O": "system:masters",
      "OU": "Kubernetes-manual"
    }
  ]
}
EOF1

cfssl gencert -ca=ssl/ca.pem -ca-key=ssl/ca-key.pem -config=ca-config.json -profile=peer kubectl-csr.json | cfssljson -bare ssl/kubectl
EOF

执行

1
2
3
4
5
bash -x gen_kubectl_cert.sh

# 在ssl目录先生成
# ├── kubectl-key.pem
# ├── kubectl.pem

生成kubeconfig文件

生成脚本kubeconfig_kubectl_config.sh

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# 参数根据自己部署需要进行修改
# USERNAME必须和证书申请的CN名字相同,这里的用户名是clusteradmin
cat <<'EOF' > kubeconfig_kubectl_config.sh
APISERVER_IP=$1
K8S_CERT_DIR=$2
PORT=6443
KUBE_APISERVER=https://${APISERVER_IP}:${PORT}
CLUSTER_NAME=kubernetes
USERNAME=clusteradmin
KUBECONFIG_FILE=kubeconfig/kubectl.kubeconfig
CONTEXT_NAME=${USERNAME}@${CLUSTER_NAME}
CERT_PRFIX=kubectl

# 设置集群参数
./bin/kubectl config set-cluster ${CLUSTER_NAME} \
    --certificate-authority=${K8S_CERT_DIR}/ca.pem \
    --embed-certs=true \
    --server=${KUBE_APISERVER} \
    --kubeconfig=${KUBECONFIG_FILE}

# 设置客户端认证参数
./bin/kubectl config set-credentials ${USERNAME} \
	--client-certificate=${K8S_CERT_DIR}/${CERT_PRFIX}.pem \
	--client-key=${K8S_CERT_DIR}/${CERT_PRFIX}-key.pem \
	--embed-certs=true \
	--kubeconfig=${KUBECONFIG_FILE}

# 设置context---将用户和集群关联起来
./bin/kubectl config set-context ${CONTEXT_NAME} \
    --cluster=${CLUSTER_NAME} \
    --user=${USERNAME} \
    --kubeconfig=${KUBECONFIG_FILE}

# 设置默认contexts
./bin/kubectl config use-context ${CONTEXT_NAME} \
    --kubeconfig=${KUBECONFIG_FILE}
EOF

执行

1
2
3
4
bash -x kubeconfig_kubectl_config.sh $NODE_M_IP ssl

# 在kubeconfig目录下生成
# ├── kubectl.kubeconfig

分发kubeconfig文件

1
2
3
4
5
6
# 分发kubeconfig证书
for i in m01; do \
ssh $i "mkdir -p $HOME/.kube/"; \
scp bin/kubectl $i:/usr/local/bin/; \
scp kubeconfig/kubectl.kubeconfig $i:$HOME/.kube/config; \
done

kubectl命令补全功能

1
2
3
# bash配置
source <(kubectl completion bash)
echo "source <(kubectl completion bash)" >> ~/.bashrc

查看集群状态

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
kubectl cluster-info
# Kubernetes control plane is running at https://172.15.110.188:6443
#
# To further debug and diagnose cluster problems, use 'kubectl cluster-info dump'.

kubectl get componentstatus
# Warning: v1 ComponentStatus is deprecated in v1.19+
# NAME                 STATUS      MESSAGE                                                                                        # ERROR
# scheduler            Unhealthy   Get "https://127.0.0.1:10259/healthz": dial tcp 127.0.0.1:10259: connect: connection refused
# controller-manager   Unhealthy   Get "https://127.0.0.1:10257/healthz": dial tcp 127.0.0.1:10257: connect: connection refused
# etcd-0               Healthy     {"health":"true","reason":""}

kubectl get all -A
# NAMESPACE   NAME                 TYPE        CLUSTER-IP   EXTERNAL-IP   PORT(S)   AGE
# default     service/kubernetes   ClusterIP   10.96.0.1    <none>        443/TCP   17m

部署controller-manager

生成controller-manager所需证书

创建生成证书脚本 gen_controller_cert.sh

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
cat <<'EOF'> gen_controller_cert.sh
#!/bin/bash

CONTROLLER_IP=$1

# 生成 controller-manager证书签名请求
cat > kube-controller-manager-csr.json <<EOF1
{
  "CN": "system:kube-controller-manager",
  "key": {
    "algo": "rsa",
    "size": 2048
  },
  "names": [
    {
      "C": "CN",
      "ST": "Hubei",
      "L": "Wuhan",
      "O": "system:kube-controller-manager",
      "OU": "Kubernetes-manual"
    }
  ]
}
EOF1

cfssl gencert -ca=ssl/ca.pem -ca-key=ssl/ca-key.pem \
-config=ca-config.json \
-hostname=${CONTROLLER_IP} \
-profile=peer kube-controller-manager-csr.json | cfssljson -bare ssl/kube-controller-manager
EOF

执行

1
2
3
4
5
bash +x gen_controller_cert.sh 127.0.0.1,$NODE_M_IP

# 在ssl目录下生成
# ├── kube-controller-manager-key.pem
# ├── kube-controller-manager.pem

生成kubeconfig文件

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
cat <<'EOF'> kubeconfig_kube-controller-manager.sh
#!/bin/bash
APISERVER_IP=$1
K8S_CERT_DIR=$2
PORT=6443
KUBE_APISERVER=https://${APISERVER_IP}:${PORT}
KUBECONFIG_FILE=kubeconfig/kube-controller-manager.kubeconfig
CLUSTER_NAME=kubernetes
USERNAME=system:kube-controller-manager
CONTEXT_NAME=${USERNAME}@${CLUSTER_NAME}
CERT_PRFIX=kube-controller-manager

# 设置集群参数
./bin/kubectl config set-cluster ${CLUSTER_NAME} \
    --certificate-authority=${K8S_CERT_DIR}/ca.pem \
    --embed-certs=true \
    --server=${KUBE_APISERVER} \
    --kubeconfig=${KUBECONFIG_FILE}

# 设置用户认证参数
./bin/kubectl config set-credentials ${USERNAME} \
	--client-certificate=${K8S_CERT_DIR}/${CERT_PRFIX}.pem \
	--client-key=${K8S_CERT_DIR}/${CERT_PRFIX}-key.pem \
	--embed-certs=true \
	--kubeconfig=${KUBECONFIG_FILE}

# 设置context---将用户和集群关联起来
./bin/kubectl config set-context ${CONTEXT_NAME} \
    --cluster=${CLUSTER_NAME} \
    --user=${USERNAME} \
    --kubeconfig=${KUBECONFIG_FILE}

# 设置默认context
./bin/kubectl config use-context ${CONTEXT_NAME} \
    --kubeconfig=${KUBECONFIG_FILE}
EOF

执行

1
2
3
4
5
# example:./kube-controller-manager_config.sh <MASTER_IPADDR> <证书目录>
bash -x kubeconfig_kube-controller-manager.sh $NODE_M_IP ssl

# 在kubeconfig目录下生成
# ├── kube-controller-manager.kubeconfig

生成kube-controller-manager的service文件

生成脚本controller_manager_config.sh

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# --cluster-cidr为pod网段,不能和宿主机网段,service网段重复
cat <<'EOF'> kube-controller-manager.sh
#!/bin/bash
K8S_CERT_DIR=/opt/k8s/ssl
K8S_CONF_DIR=/opt/k8s/config

cat > service/kube-controller-manager.service <<EOF1
[Unit]
Description=Kubernetes Controller Manager
Documentation=https://github.com/kubernetes/kubernetes
After=network.target

[Service]
ExecStart=/usr/local/bin/kube-controller-manager \\
--v=2 \\
--bind-address=127.0.0.1 \\
--root-ca-file=${K8S_CERT_DIR}/ca.pem \\
--cluster-signing-cert-file=${K8S_CERT_DIR}/ca.pem \\
--cluster-signing-key-file=${K8S_CERT_DIR}/ca-key.pem \\
--service-account-private-key-file=${K8S_CERT_DIR}/sa.key \\
--tls-cert-file=${K8S_CERT_DIR}/kube-controller-manager.pem \\
--tls-private-key-file=${K8S_CERT_DIR}/kube-controller-manager-key.pem \\
--kubeconfig=${K8S_CONF_DIR}/kube-controller-manager.kubeconfig \\
--leader-elect=true \\
--use-service-account-credentials=true \\
--node-monitor-grace-period=40s \\
--node-monitor-period=5s \\
--pod-eviction-timeout=2m0s \\
--controllers=*,bootstrapsigner,tokencleaner \\
--allocate-node-cidrs=true \\
--cluster-cidr=172.16.0.0/16 \\
--requestheader-client-ca-file=${K8S_CERT_DIR}/front-proxy-ca.pem \\
--node-cidr-mask-size=24
      
Restart=always
RestartSec=10s

[Install]
WantedBy=multi-user.target
EOF1
EOF

执行

1
2
3
4
bash -x kube-controller-manager.sh

# 在service目录下生成以下文件
# ├── kube-controller-manager.service

分发二进制文件、证书、kubeconfig文件及service文件

1
2
3
4
5
6
7
for i in m01; do \
ssh $i "mkdir -p /opt/k8s/{ssl,config}"; \
scp bin/kube-controller-manager  $i:/usr/local/bin/;
scp ssl/kube-controller*.pem $i:/opt/k8s/ssl/; \
scp service/kube-controller-manager.service $i:/usr/lib/systemd/system/; \
scp kubeconfig/kube-controller-manager.kubeconfig $i:/opt/k8s/config/; \
done

启动kube-controller-manager服务

1
2
3
4
5
6
for i in m01; do \
ssh $i "systemctl daemon-reload"; \
ssh $i "systemctl enable kube-controller-manager"; \
ssh $i "systemctl restart kube-controller-manager --no-block"; \
ssh $i "systemctl is-active kube-controller-manager"; \
done

验证

1
2
ss -tlp | grep kube-controller
# LISTEN 0      16384       127.0.0.1:10257             0.0.0.0:*    users:(("kube-controller",pid=2864,fd=7))

部署kube-scheduler组件

生成kube-scheduler所需证书

生成脚本gen_schduler_cert.sh

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
cat <<'EOF'> gen_schduler_cert.sh
# 生成 kube-scheduler 的证书和私钥
SCHEDULER_IP=$1
CSR_NAME_PREFIX=kube-scheduler

cat > ${CSR_NAME_PREFIX}-csr.json <<EOF1
{
  "CN": "system:kube-scheduler",
  "key": {
    "algo": "rsa",
    "size": 2048
  },
  "names": [
    {
      "C": "CN",
      "ST": "Hubei",
      "L": "Wuhan",
      "O": "system:kube-scheduler",
      "OU": "Kubernetes-manual"
    }
  ]
}
EOF1

cfssl gencert -ca=ssl/ca.pem -ca-key=ssl/ca-key.pem \
-config=ca-config.json \
-hostname=${SCHEDULER_IP} \
-profile=peer ${CSR_NAME_PREFIX}-csr.json | cfssljson -bare ssl/${CSR_NAME_PREFIX}
EOF

执行

1
2
3
4
5
bash -x gen_schduler_cert.sh 127.0.0.1,$NODE_M_IP

# 在ssl目录下生成
# ├── kube-scheduler-key.pem
# ├── kube-scheduler.pem

生成kubeconfig文件

生成脚本kubeconfig_kube-scheduler.sh

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
cat <<'EOF' > kubeconfig_kube-scheduler.sh 

APISERVER_IP=$1
K8S_CERT_DIR=$2
PORT=6443
KUBE_APISERVER=https://${APISERVER_IP}:${PORT}
KUBECONFIG_FILE=kubeconfig/kube-scheduler.kubeconfig
CLUSTER_NAME=kubernetes
USERNAME=system:kube-scheduler
CONTEXT_NAME=${USERNAME}@${CLUSTER_NAME}
CERT_PRFIX=kube-scheduler

# 设置集群参数
./bin/kubectl config set-cluster ${CLUSTER_NAME} \
    --certificate-authority=${K8S_CERT_DIR}/ca.pem \
    --embed-certs=true \
    --server=${KUBE_APISERVER} \
    --kubeconfig=${KUBECONFIG_FILE}

# 设置用户认证参数
./bin/kubectl config set-credentials ${USERNAME} \
	--client-certificate=${K8S_CERT_DIR}/${CERT_PRFIX}.pem \
	--client-key=${K8S_CERT_DIR}/${CERT_PRFIX}-key.pem \
	--embed-certs=true \
	--kubeconfig=${KUBECONFIG_FILE}

# 设置context---将用户和集群关联起来
./bin/kubectl config set-context ${CONTEXT_NAME} \
    --cluster=${CLUSTER_NAME} \
    --user=${USERNAME} \
    --kubeconfig=${KUBECONFIG_FILE}

# 设置默认context
./bin/kubectl config use-context ${CONTEXT_NAME} \
    --kubeconfig=${KUBECONFIG_FILE}
EOF

添加可执行权限并运行

1
2
3
4
bash -x kubeconfig_kube-scheduler.sh $NODE_M_IP ssl

## 在kubeconfig目录下生成
# ├── kube-scheduler.kubeconfig

生成kube-scheduler的service文件

生成脚本kube-scheduler.sh

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
cat <<'EOF'> kube-scheduler.sh

K8S_CONF_DIR=/opt/k8s/config

cat > service/kube-scheduler.service <<EOF1
[Unit]
Description=Kubernetes Scheduler
Documentation=https://github.com/kubernetes/kubernetes
After=network.target

[Service]
ExecStart=/usr/local/bin/kube-scheduler \\
--v=2 \\
--bind-address=127.0.0.1 \\
--leader-elect=true \\
--kubeconfig=${K8S_CONF_DIR}/kube-scheduler.kubeconfig

Restart=always
RestartSec=10s

[Install]
WantedBy=multi-user.target
EOF1
EOF

执行

1
2
3
4
bash -x kube-scheduler.sh

# 在service目录下生成
# ├── kube-scheduler.service

分发二进制文件、证书、kubeconfig文件及service文件

1
2
3
4
5
6
7
for i in m01; do \
ssh $i "mkdir -p /opt/k8s/{ssl,config}"; \
scp bin/kube-scheduler $i:/usr/local/bin/ ;
scp ssl/kube-scheduler*.pem $i:/opt/k8s/ssl/; \
scp service/kube-scheduler.service $i:/usr/lib/systemd/system/; \
scp kubeconfig/kube-scheduler.kubeconfig $i:/opt/k8s/config/; \
done

启动kube-scheduler服务

1
2
3
4
5
6
for i in m01; do \
ssh $i "systemctl daemon-reload"; \
ssh $i "systemctl enable kube-scheduler"; \
ssh $i "systemctl restart kube-scheduler --no-block"; \
ssh $i "systemctl is-active kube-scheduler"; \
done

验证集群

1
2
3
4
5
6
kubectl get cs
# Warning: v1 ComponentStatus is deprecated in v1.19+
# NAME                 STATUS    MESSAGE                         ERROR
# scheduler            Healthy   ok
# controller-manager   Healthy   ok
# etcd-0               Healthy   {"health":"true","reason":""}

至此master节点三大组件部署完毕

部署kubelet

配置TLS Bootstrap(手动)

为什么这个证书不是手动管理?因为k8s的master节点可能是固定的,创建好之后一直就是那几台,但worker节点可能变化比较多,如果添加,删除,故障维护时手动添加会比较麻烦,证书和主机名是有绑定的,而我们的主机名又是不一样的,所以需要有一种机制自动颁发证书请求。

每个节点的kubelet组件都要使用由apiserver 使用的CA签发的有效证书才能与apiserver通讯;此时如果节点多起来,为每个节点单独签署证书将是一件非常繁琐的事情;TLS bootstrapping功能就是让kubelet先使用一个预定的低权限用户连接到 apiserver,然后向apiserver申请证书,kubelet的证书由apiserver动态签署。

生成kubeconfig文件

bootstrap.kubeconfig文件是一个用来向apiserver申请证书的文件

生成脚本kubeconfig_bootstrap_config.sh

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
cat <<'EOF'> kubeconfig_bootstrap_config.sh
#!/bin/bash

APISERVER_IP=$1
K8S_CERT_DIR=$2
K8S_CONF_DIR=/opt/k8s/config
PORT=6443
KUBE_APISERVER=https://${APISERVER_IP}:${PORT}
KUBECONFIG_FILE=kubeconfig/bootstrap.kubeconfig
CLUSTER_NAME=kubernetes

# 生成bootstrap的token
TOKEN_ID=$(openssl rand -hex 3)
TOKEN_SECRET=$(openssl rand -hex 8)
BOOTSTRAP_TOKEN=${TOKEN_ID}.${TOKEN_SECRET}

USERNAME=system:bootstrap:${TOKEN_ID}
CONTEXT_NAME=${USERNAME}@${CLUSTER_NAME}

# 创建bootstrap.kubeconfig
# 设置集群参数
./bin/kubectl config set-cluster ${CLUSTER_NAME} \
--certificate-authority=${K8S_CERT_DIR}/ca.pem \
--embed-certs=true \
--server=${KUBE_APISERVER} \
--kubeconfig=${KUBECONFIG_FILE}

# 设置客户端认证参数,kubelet 使用bootstrap token认证
./bin/kubectl config set-credentials ${USERNAME} \
--token=${BOOTSTRAP_TOKEN} \
--kubeconfig=${KUBECONFIG_FILE}

# 设置上下文参数
./bin/kubectl config set-context ${CONTEXT_NAME}  \
--cluster=kubernetes \
--user=${USERNAME} \
--kubeconfig=${KUBECONFIG_FILE}

# 使用上下文参数生成 bootstrap.kubeconfig 文件
./bin/kubectl config use-context ${CONTEXT_NAME} --kubeconfig=${KUBECONFIG_FILE}

# 创建boostrap token secret
cat > config/bootstrap-token-secret.yaml <<EOF1
apiVersion: v1
kind: Secret
metadata:
  name: bootstrap-token-${TOKEN_ID}
  namespace: kube-system
type: bootstrap.kubernetes.io/token
stringData:
  token-id: ${TOKEN_ID}
  token-secret: ${TOKEN_SECRET}
  usage-bootstrap-authentication: "true"
  usage-bootstrap-signing: "true"
  auth-extra-groups: system:bootstrappers:default-node-token,system:bootstrappers:worker,system:bootstrappers:ingress
EOF1
EOF

执行

1
2
3
4
5
6
7
bash -x kubeconfig_bootstrap_config.sh $NODE_M_IP ssl

# 在config目录下生成
# ├── bootstrap-token-secret.yaml

# 在kubeconfig目录下生成
# ├── bootstrap.kubeconfig

导入bootstrap-token-secret

1
2
3
# 创建secret
kubectl apply -f config/bootstrap-token-secret.yaml
# secret/bootstrap-token-a0da46 created

查看bootstrap-token

1
2
3
kubectl get secret -nkube-system
# NAME                     TYPE                            DATA   AGE
# bootstrap-token-a0da46   bootstrap.kubernetes.io/token   5      14m

生成kubelet配置文件

生成脚本kubelet.sh

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
cat <<'EOF'> kubelet_config.sh
#!/bin/bash
K8S_CONF_DIR=/opt/k8s/config
K8S_CERT_DIR=/opt/k8s/ssl
CLUSTER_DNS=10.96.0.10

# 生成kubelet参数文件
cat > config/kubelet.conf <<EOF1
KUBELET_OPTS="--v=4 \\
--container-runtime-endpoint=unix:///run/containerd/containerd.sock \\
--runtime-cgroups=/systemd/system.slice \\
--kubeconfig=${K8S_CONF_DIR}/kubelet.kubeconfig \\
--bootstrap-kubeconfig=${K8S_CONF_DIR}/bootstrap.kubeconfig \\
--config=${K8S_CONF_DIR}/kubelet.yaml \\
--cert-dir=${K8S_CERT_DIR} \\
--node-labels=node.kubernetes.io/node="
EOF1

# 生成kubelet配置yaml文件
cat > config/kubelet.yaml <<EOF2
kind: KubeletConfiguration
apiVersion: kubelet.config.k8s.io/v1beta1
address: 0.0.0.0
port: 10250
readOnlyPort: 10255
authentication:
  anonymous:
    enabled: false
  webhook:
    cacheTTL: 2m0s
    enabled: true
  x509:
    clientCAFile: ${K8S_CERT_DIR}/ca.pem
authorization:
  mode: Webhook
  webhook:
    cacheAuthorizedTTL: 5m0s
    cacheUnauthorizedTTL: 30s
runtimeRequestTimeout: 15m
cgroupDriver: systemd
cgroupsPerQOS: true
clusterDNS:
- ${CLUSTER_DNS}
clusterDomain: cluster.local
EOF2

# 生成kubelet.service服务启动文件
cat > service/kubelet.service <<EOF3
[Unit]
Description=Kubernetes Kubelet
After=containerd.service
Requires=containerd.service

[Service]
ExecStartPre=-/bin/mkdir -p /sys/fs/cgroup/hugetlb/systemd/system.slice
ExecStartPre=-/bin/mkdir -p /sys/fs/cgroup/blkio/systemd/system.slice
ExecStartPre=-/bin/mkdir -p /sys/fs/cgroup/cpuset/systemd/system.slice
ExecStartPre=-/bin/mkdir -p /sys/fs/cgroup/devices/systemd/system.slice
ExecStartPre=-/bin/mkdir -p /sys/fs/cgroup/net_cls,net_prio/systemd/system.slice
ExecStartPre=-/bin/mkdir -p /sys/fs/cgroup/perf_event/systemd/system.slice
ExecStartPre=-/bin/mkdir -p /sys/fs/cgroup/cpu,cpuacct/systemd/system.slice
ExecStartPre=-/bin/mkdir -p /sys/fs/cgroup/freezer/systemd/system.slice
ExecStartPre=-/bin/mkdir -p /sys/fs/cgroup/memory/systemd/system.slice
ExecStartPre=-/bin/mkdir -p /sys/fs/cgroup/pids/systemd/system.slice
ExecStartPre=-/bin/mkdir -p /sys/fs/cgroup/systemd/systemd/system.slice
LimitNOFILE=655350
LimitNPROC=655350
LimitCORE=infinity
LimitMEMLOCK=infinity
# 在centos系统上需要配置CPUAccounting和MemoryAccounting
CPUAccounting=true
MemoryAccounting=true
EnvironmentFile=${K8S_CONF_DIR}/kubelet.conf
ExecStart=/usr/local/bin/kubelet \$KUBELET_OPTS

Restart=on-failure
KillMode=process

[Install]
WantedBy=multi-user.target
EOF3
EOF

执行

1
2
3
4
5
6
7
8
bash -x kubelet_config.sh

# 在service目录下
# ├── kubelet.service

# 在config目录下
# ├── kubelet.conf 
# ├── kubelet.yaml

分发二进制文件、配置文件、证书、kuconfig文件及service文件

1
2
3
4
5
6
7
8
9
# 分发给m01
for i in m01; do \
ssh $i "mkdir -p /opt/k8s/{config,ssl,manifests}"; \
scp bin/kubelet $i:/usr/local/bin/; \
scp config/kubelet.{conf,yaml} $i:/opt/k8s/config/; \
scp kubeconfig/bootstrap.kubeconfig $i:/opt/k8s/config/; \
scp ssl/ca.pem $i:/opt/k8s/ssl; \
scp service/kubelet.service $i:/usr/lib/systemd/system/; \
done

授权

使用systemctl status kubelet会发现kubelet启动失败,进一步使用journalctl -xe -u kubelet.service --no-pager | less会发现如下错误,提示User "system:bootstrap:a0da46"不能创建资源certificatesigningrequests

m01 kubelet[1452]: Error: failed to run Kubelet: cannot create certificate signing request: certificatesigningrequests.certificates.k8s.io is forbidden: User “system:bootstrap:a0da46” cannot create resource “certificatesigningrequests” in API group “certificates.k8s.io” at the cluster scope

在默认情况下,kubelet 通过 bootstrap.kubeconfig 中的预设用户Token声明了自己的身份,然后创建 CSR 请求;但是这个用户默认没有任何权限的,包括创建 CSR 请求;所以需要授权如下命令创建一个 ClusterRoleBinding,将预设用户 kubelet-bootstrap 与内置的 ClusterRole system:node-bootstrapper 绑定到一起,使其能够发起 CSR 请求

在使用 Bootstrap Token 进行引导时,Kubelet 组件使用 Token 发起的请求其用户名为system:bootstrap:<tokenid>,所属组为system:bootstrappers,然后创建CSR请求,但是此用户没有任何权限;在k8s中已经创建了一个clusterrole(system:node-bootstrapper),此集群角色具有发起CSR请求的权限,我们需要创建一个clusterrolebinding将clusterrole和此token的用户名或者所属组进行关联,然后system:bootstrap:<tokenid>拥有了system:node-bootstrapper的权限,这样任何用户拿着这个token连接apiserver都具有system:node-bootstrapper的权限

这里创建一个clusterrolebinding(create-csrs-for-bootstrapping)将clusterrole和group进行绑定

1
2
3
4
5
6
7
kubectl create clusterrolebinding create-csrs-for-bootstrapping \
        --clusterrole=system:node-bootstrapper \
        --group=system:bootstrappers:default-node-token 

kubectl get clusterrolebinding create-csrs-for-bootstrapping
# NAME                            ROLE                                   AGE
# create-csrs-for-bootstrapping   ClusterRole/system:node-bootstrapper   22s

手动签发kubelet证书并查看集群

任何人使用token进行认证通过后进入授权阶段,api-server从该token中获取namespace和name信息,并将该token特殊对待,授予anyone bootstrap权利,将该匿名用户划分到system:bootstraps组,至此anyone使用该token认证的时候都具有了system:node-bootstrapper的权利

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
# 重新启动kubelet服务
systemctl restart kubelet.service

# 查看节点kubelet启动证书请求状态,这时已经是Pending状态
kubectl get csr
# NAME                                                   AGE   SIGNERNAME                                    REQUESTOR                 REQUESTEDDURATION   CONDITION
# node-csr-Rd0Mxw-GPHt5p66qLCnLWUxj0g7uWUDtiNS0UjVafb8   4s    kubernetes.io/kube-apiserver-client-kubelet   system:bootstrap:a0da46   <none>              Pending

# 手动签发kubelet的证书
kubectl certificate approve $(kubectl get csr | grep node | awk '{print $1}')
# certificatesigningrequest.certificates.k8s.io/node-csr-Rd0Mxw-GPHt5p66qLCnLWUxj0g7uWUDtiNS0UjVafb8 approved

# 再次查看证书请求状态,已经变成了Approved,Issued
kubectl get csr
# NAME                                                   AGE     SIGNERNAME                                    REQUESTOR                 REQUESTEDDURATION   CONDITION
# node-csr-Rd0Mxw-GPHt5p66qLCnLWUxj0g7uWUDtiNS0UjVafb8   2m48s   kubernetes.io/kube-apiserver-client-kubelet   system:bootstrap:a0da46   <none>              Approved,Issued

# 查看node,由于网络插件还未安装,状态显示为NotReady
kubectl get nodes
# NAME   STATUS     ROLES    AGE   VERSION
# m01    NotReady   <none>   79s   v1.26.2

自动批准,自动续期,自动颁发

要是有很多worker节点要安装kubelet,手工去approve证书请求会很繁琐,增加工作量,就有了自动批准,自动续期,自动颁发的方法。

kubelet所发起的CSR请求是由controller manager签署的;如果想要是实现自动续期,就需要让controller manager能够在 kubelet发起证书请求的时候自动帮助其签署证书;那么controller manager不可能对所有的CSR证书申请都自动签署,这时候就需要配置RBAC规则,保证controller manager只对kubelet发起的特定CSR请求自动批准即可;在TLS bootstrapping官方文档中,CSR有三种请求类型:

  • nodeclient: kubelet以O=system:nodes和CN=system:node:(node name)形式发起的CSR请求
  • selfnodeclient: kubelet client renew自己的证书发起的CSR请求(与上一个证书就有相同的O和CN)
  • selfnodeserver: kubelet server renew 自己的证书发起的CSR请求

通俗点讲就是: nodeclient类型的CSR仅在第一次启动时会产生,selfnodeclient类型的CSR请求实际上就是kubelet renew自己作为client跟 apiserver通讯时使用的证书产生的,selfnodeserver类型的CSR请求则是kubelet首次申请或后续renew自己的10250 api端口证书时产生的,以下为3中CSR请求分别创建3种对应的Clusterrole

创建3个clusterrolebinding

  • 自动批准kubelet首次用于与 apiserver 通讯证书的 CSR 请求(nodeclient)
  • 自动批准kubelet首次用于10250端口鉴权的CSR请求(实际上这个请求走的也是 selfnodeserver 类型 CSR)
  • 自动批准kubelet后续renew用于与apiserver通讯证书的 CSR 请求(selfnodeclient)
  • 自动批准kubelet后续renew用于10250端口鉴权的 CSR 请求(selfnodeserver)
1
2
3
4
5
6
7
8
# 自动批准 kubelet 的首次 CSR 请求(用于与 apiserver 通讯的证书)
kubectl create clusterrolebinding node-client-auto-approve-csr --clusterrole=system:certificates.k8s.io:certificatesigningrequests:nodeclient --group=system:bootstrappers

# 自动批准 kubelet 后续 renew 用于与 apiserver 通讯证书的 CSR 请求
kubectl create clusterrolebinding node-client-auto-renew-crt --clusterrole=system:certificates.k8s.io:certificatesigningrequests:selfnodeclient --group=system:nodes

# 自动批准 kubelet 发起的用于 10250 端口鉴权证书的 CSR 请求(包括后续 renew)
kubectl create clusterrolebinding node-server-auto-renew-crt --clusterrole=system:certificates.k8s.io:certificatesigningrequests:selfnodeserver --group=system:nodes

分发二进制文件、配置文件、证书、kuconfig文件及service文件

1
2
3
4
5
6
7
8
9
# 分发到w01和w02
for i in w01 w02; do \
ssh $i "mkdir -p /opt/k8s/{config,ssl,manifests}"; \
scp bin/kubelet $i:/usr/local/bin/; \
scp config/kubelet.{conf,yaml} $i:/opt/k8s/config/; \
scp kubeconfig/bootstrap.kubeconfig $i:/opt/k8s/config/; \
scp ssl/ca.pem $i:/opt/k8s/ssl; \
scp service/kubelet.service $i:/usr/lib/systemd/system/; \
done

启动kubelet服务

1
2
3
4
5
6
for i in w01 w02; do \
ssh $i "systemctl daemon-reload"; \
ssh $i "systemctl enable kubelet"; \
ssh $i "systemctl restart kubelet --no-block"; \
ssh $i "systemctl is-active kubelet"; \
done

查看csr和集群信息

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# 再次查看csr,w01和w02已经自己申请证书并自动批准,颁发
kubectl get csr
# NAME                                                   AGE   SIGNERNAME                                    REQUESTOR                 REQUESTEDDURATION   CONDITION
# node-csr-Rd0Mxw-GPHt5p66qLCnLWUxj0g7uWUDtiNS0UjVafb8   10m   kubernetes.io/kube-apiserver-client-kubelet   system:bootstrap:a0da46   <none>              Approved,Issued
# node-csr-ZOEo99khFzaWWKmYTHn8DctSmnvyngGG-DLr7q-wIZo   13s   kubernetes.io/kube-apiserver-client-kubelet   system:bootstrap:a0da46   <none>              Approved,Issued
# node-csr-fO2nU2tQC9RjwRyvZ6aA2338yWzgA43UZr0ka79rO8A   11s   kubernetes.io/kube-apiserver-client-kubelet   system:bootstrap:a0da46   <none>              Approved,Issued


kubectl get nodes
# NAME   STATUS     ROLES    AGE     VERSION
# m01    NotReady   <none>   9m29s   v1.26.2
# w01    NotReady   <none>   77s     v1.26.2
# w02    NotReady   <none>   75s     v1.26.2


# 修改node的role标签
kubectl label nodes m01 node-role.kubernetes.io/master=
# node/m01 labeled

kubectl label nodes w01 node-role.kubernetes.io/worker=
# node/w01 labeled

kubectl label nodes w02 node-role.kubernetes.io/worker=
# node/w02 labeled

kubectl get nodes
# NAME   STATUS     ROLES    AGE    VERSION
# m01    NotReady   master   10m    v1.26.2
# w01    NotReady   worker   2m1s   v1.26.2
# w02    NotReady   worker   119s   v1.26.2

部署kube-proxy

kube-proxy运行在所有worker节点上,它监听apiserver中service和endpoint的变化情况,创建路由规则提供服务IP和负载均衡功能。

生成证书

kube-proxy提取证书中的CN作为客户端的用户名,即system:kube-proxy。 kube-apiserver预定义的 RBAC使用的ClusterRoleBindings system:node-proxier将用户system:kube-proxy与ClusterRole system:node-proxier绑定,该Role授予节点调用kube-apiserver proxy相关api的权限;

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
kubectl describe clusterrolebinding/system:node-proxier
# Name:         system:node-proxier
# Labels:       kubernetes.io/bootstrapping=rbac-defaults
# Annotations:  rbac.authorization.kubernetes.io/autoupdate: true
# Role:
#  Kind:  ClusterRole
#  Name:  system:node-proxier
# Subjects:
#   Kind  Name               Namespace
# ----  ----               ---------
#   User  system:kube-proxy  

生成脚本gen_kube_proxy_cert.sh

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
cat <<EOF> gen_kube_proxy_cert.sh
#!/bin/bash

# 生成 kube-proxy 的证书和私钥,
cat > kube-proxy-csr.json <<EOF1
{
  "CN": "system:kube-proxy",
  "key": {
    "algo": "rsa",
    "size": 2048
  },
  "names": [
    {
      "C": "CN",
      "ST": "Hubei",
      "L": "Wuhan",
      "O": "system:kube-proxy",
      "OU": "Kubernetes-manual"
    }
  ]
}
EOF1

cfssl gencert -ca=ssl/ca.pem -ca-key=ssl/ca-key.pem -config=ca-config.json -profile=peer kube-proxy-csr.json | cfssljson -bare ssl/kube-proxy
EOF

执行

1
2
3
4
5
bash -x gen_kube_proxy_cert.sh

# 在ssl目录下生成
# ├── kube-proxy-key.pem
# ├── kube-proxy.pem

生成kubeconfig文件

认证方式有2种

  • 证书认证
  • token认证

本次部署采用的是证书认证

生成脚本kube-proxy_kubeconfig.sh

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
cat <<'EOF'> kube-proxy_kubeconfig.sh
#!/bin/bash

K8S_CONF_DIR=/opt/k8s/config

APISERVER_IP=$1
K8S_CERT_DIR=$2
PORT=6443
CLUSTER_NAME=kubernetes
KUBE_APISERVER=https://${APISERVER_IP}:${PORT}
KUBECONFIG_FILE=kubeconfig/kube-proxy.kubeconfig
USERNAME=system:kube-proxy
CONTEXT_NAME=${USERNAME}@${CLUSTER_NAME}
CERT_PRFIX=kube-proxy

./bin/kubectl config set-cluster ${CLUSTER_NAME} \
    --certificate-authority=${K8S_CERT_DIR}/ca.pem \
    --embed-certs=true \
    --server=${KUBE_APISERVER} \
    --kubeconfig=${KUBECONFIG_FILE}

./bin/kubectl config set-credentials ${USERNAME} \
    --client-certificate=${K8S_CERT_DIR}/${CERT_PRFIX}.pem \
    --client-key=${K8S_CERT_DIR}/${CERT_PRFIX}-key.pem \
    --embed-certs=true \
    --kubeconfig=${KUBECONFIG_FILE}

./bin/kubectl config set-context ${CONTEXT_NAME} \
    --cluster=${CLUSTER_NAME} \
    --user=${USERNAME} \
    --kubeconfig=${KUBECONFIG_FILE}

./bin/kubectl config use-context ${CONTEXT_NAME} \
    --kubeconfig=${KUBECONFIG_FILE}
EOF

执行

1
2
3
4
bash -x kube-proxy_kubeconfig.sh $NODE_M_IP ssl

# 在kueconfig目录下生成
# ├── kube-proxy.kubeconfig

生成配置文件和service文件

生成脚本kube-proxy_config.sh

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# clusterCIDR: 172.16.0.0/16	这个是pod网段
cat <<'EOF'> kube-proxy_config.sh

K8S_CONF_DIR=/opt/k8s/config
CLUSER_CIDR=172.16.0.0/16

## 创建 kube-proxy 启动参数配置文件
cat > config/kube-proxy.yaml <<EOF1
apiVersion: kubeproxy.config.k8s.io/v1alpha1
bindAddress: 0.0.0.0
clientConnection:
  acceptContentTypes: ""
  burst: 10
  contentType: application/vnd.kubernetes.protobuf
  kubeconfig: ${K8S_CONF_DIR}/kube-proxy.kubeconfig
  qps: 5
clusterCIDR: ${CLUSER_CIDR} 
configSyncPeriod: 15m0s
conntrack:
  max: null
  maxPerCore: 32768
  min: 131072
  tcpCloseWaitTimeout: 1h0m0s
  tcpEstablishedTimeout: 24h0m0s
enableProfiling: false
healthzBindAddress: 0.0.0.0:10256
hostnameOverride: ""
iptables:
  masqueradeAll: false
  masqueradeBit: 14
  minSyncPeriod: 0s
  syncPeriod: 30s
ipvs:
  masqueradeAll: true
  minSyncPeriod: 5s
  scheduler: "rr"
  syncPeriod: 30s
kind: KubeProxyConfiguration
metricsBindAddress: 127.0.0.1:10249
mode: "ipvs"
nodePortAddresses: null
oomScoreAdj: -999
portRange: ""
udpIdleTimeout: 250ms
EOF1

#----------------------
# 创建 kube-proxy.service 服务管理文件
cat > service/kube-proxy.service <<EOF2
[Unit]
Description=Kubernetes Proxy
After=network.target

[Service]
ExecStart=/usr/local/bin/kube-proxy \\
    --config=${K8S_CONF_DIR}/kube-proxy.yaml \\
    --v=2
    
Restart=always
RestartSec=10s

[Install]
WantedBy=multi-user.target
EOF2
EOF

执行

1
2
3
4
5
6
7
bash -x kube-proxy_config.sh

# 在config目录下生成
# ├── kube-proxy.yaml

# 在service目录下生成
# ├── kube-proxy.service

分发二进制文件、配置文件、kubeconfig文件及service文件

1
2
3
4
5
6
7
for i in m01 w01 w02; do \
scp bin/kube-proxy $i:/usr/local/bin/; \
scp config/kube-proxy.yaml $i:/opt/k8s/config/; \
scp kubeconfig/kube-proxy.kubeconfig $i:/opt/k8s/config/; \
scp service/kube-proxy.service $i:/usr/lib/systemd/system/; \
scp ssl/front-proxy-ca.pem $i:/opt/k8s/ssl/; \
done

启动kube-proxy服务

1
2
3
4
5
6
for i in m01 w01 w02; do \
ssh $i "systemctl daemon-reload"; \
ssh $i "systemctl enable kube-proxy"; \
ssh $i "systemctl restart kube-proxy --no-block"; \
ssh $i "systemctl is-active kube-proxy"; \
done

查看服务状态

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
ss -tnlp | grep kube-proxy
# LISTEN 0      16384      127.0.0.1:10249      0.0.0.0:*    users:(("kube-proxy",pid=2272,fd=14))
# LISTEN 0      16384              *:10256            *:*    users:(("kube-proxy",pid=2272,fd=12))

ipvsadm -Ln
# IP Virtual Server version 1.2.1 (size=4096)
# Prot LocalAddress:Port Scheduler Flags
#   -> RemoteAddress:Port           Forward Weight ActiveConn InActConn
# TCP  10.96.0.1:443 rr
#   -> 192.168.2.10:6443            Masq    1      0          0    

部署calico

calico中,pod的网卡的另一端并不是接入虚拟网桥,而是直接接入了到内核中,所以看不到另外一端

组件

  • Felix:运行于各个节点的守护进程,主要完成接口管理、路由规划、ACL规划、路由和ACL的报文状态

  • BIRD:vRouter的实现,默认是bgp客户端,在运行Felix的节点必须要运行BIRD;同时又是路由反射器

    BGP协议的守护进程:既可以是路由反射器又是bgp客户端

下载calico.yaml并修改配置

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
mkdir -p /root/addons
cd /root/addons/

# calico的详细部署说明参见
# https://projectcalico.docs.tigera.io/getting-started/kubernetes/self-managed-onprem/onpremises

# 下载calicao部署文件,
curl https://raw.githubusercontent.com/projectcalico/calico/v3.25.0/manifests/tigera-operator.yaml -o calico.yaml

# 最简单的配置,就是修改配置中CALICO_IPV4POOL_CIDR参数,这个参数表示为pod分配的网络的网段,默认是
# 192.168.0.0/16,我们可以自定义网络地址

### - name: CALICO_IPV4POOL_CIDR
###   value: "172.16.0.0/16"


# 以下都是可选配置 
# ------------------------------------------------------------
# yaml配置文件
# cni_network_config:配置calico如何通过CNI与K8s对接

# 工作模型,默认配置使用的IPIP模型,支持3中可用值
# Always(全局流量)、Cross-SubNet(跨子网流量)和Never3种可用值
# Enable IPIP

### - name: CALICO_IPV4POOL_IPIP
###   value: "Always"
# Enable or Disable VXLAN on the default IP pool.
### - name: CALICO_IPV4POOL_VXLAN
###   value: "Never"

 
# 要是想启用BGP模型,需要将IPIP和VXLAN修改成Never并添加支持BGP的配置
# 混合模式-主机跨子网,有2种组合,①IPIP + BGP,② VXLAN + BGP
# ①IPIP + BGP

### - name: CALICO_IPV4POOL_IPIP
###   value: "Cross-SubNet'
### - name: CALICO_IPV4POOL_VXLAN
###   value: "Never"  
  
  
# ②VXLAN + BGP

### - name: CALICO_IPV4POOL_IPIP
###   value: "Never'
### - name: CALICO_IPV4POOL_VXLAN
###   value: "Cross-SubNet"
  
  
# 如果给定一个B类地址,calico默认使用26位掩码进行子网分配,这样占用c类子网的主机位,子网数量增
# 加,可以更多的支持node数量,但是每个node的POD数量减少,一个主机运行62个pod,使用kubeadmin
# 部署,node节点最大支持110个pod,最大pod数量可以修改
# 如果为了我们习惯和方便分清是哪个子网的pod,可以调整配置,让calico使用24位掩码进行子网切分地址池,
# 并将各子网配置给集群中的节点,需要配置如下参数

### - name: CALICO_IPV4POOL_BLOCK_SIZE
###   value: "24"


# controller manager用配置中参数--allocate-node-cidrs=true,来为节点分配一个pod cidr,
# 这个cidr是什么由另外一个参数--cluster-cidr=xxx决定,但是Calico默认并不会从用这个cidr给
# pod分配地址。我们可以使用以下配置,让calico使用controller manager为node分配的pod的cidr
# 为pod分配地址
# 设置为“true”并结合host-local这一IPAM插件以强制从PodCIDR中分配地址

### - name: USE_POD_CIDR
###   value: "true"


# 在地址分配方面,Calico在JSON格式的CNI插件配置文件中使用专有的calico-ipam插件,该插件并不会使用Node.Spec.PodCIDR中定义的子网作为节点本地用于为Pod分配地址的地址池,而是根据Calico插件为各节点的配置的地址池进行地址分配。若期望为节点真正使用地址池吻合PodCIDR的定义,则需要在部署清单中DaemonSet/calico-node资源的Pod模板中的calico-node容器之上将USE_POD_CIDR环境变量的值设置为true,并修改ConfigMap/calico-config资源中cni_network_config键中的plugins.ipam.type的值为host-local,且使用podCIDR为子网,具体配置如下所示。

###  cni_network_config: 
###     {
###       "plugins": [
###         {
###          "ipam": {
###               "type": "host-local",
###               "subnet": "usePodCidr"
###           },   

应用资源文件

1
kubectl apply -f calico.yaml

检查

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
kubectl get pods -A
# NAMESPACE     NAME                                       READY   STATUS    RESTARTS   AGE
# kube-system   calico-kube-controllers-555bc4b957-26fw8   1/1     Running   0          6m35s
# kube-system   calico-node-9xhq4                          1/1     Running   0          6m35s
# kube-system   calico-node-gtl2c                          1/1     Running   0          6m35s
# kube-system   calico-node-hvcnw                          1/1     Running   0          6m36s

# 如果这里pod一直起不来,试试自己手动把这几个镜像拉下来
for i in m01 w01 w02; do \
ssh $i "crictl pull docker.io/calico/cni:master"; \
ssh $i "crictl pull docker.io/calico/node:master"; \
ssh $i "crictl pull docker.io/calico/kube-controllers:master"; \
done

kubectl get nodes 
# NAME   STATUS   ROLES    AGE   VERSION
# m01    Ready    master   22m   v1.26.2
# w01    Ready    worker   14m   v1.26.2
# w02    Ready    worker   14m   v1.26.2

部署addons

部署coredns

用于集群内部service的解析,可以让pod把service name解析成service ip,然后通过service的ip地址进行连接到对应的应用,打开传送门,查看coredns的最新版本,目前最新是1.10.0版本。

部署coredns后,每个pod启动后,会在resolv.conf之注入dns信息

1
2
3
4
cat /etc/resolv.conf 
# search default.svc.cluster.local svc.cluster.local cluster.local
# nameserver 10.96.0.10
# options ndots:5

生成yaml资源文件

创建脚本文件gen_coredns_config.sh

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
cat <<'EOF'> gen_coredns_config.sh

DNS_DOMAIN=cluster.local
IMAGE_REGISTRY=registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:v1.9.3
DNS_MEMORY_LIMIT=170Mi
DNS_SERVER_IP=10.96.0.10

cat > coredns.yaml <<EOF1
apiVersion: v1
kind: ServiceAccount
metadata:
  name: coredns
  namespace: kube-system
  labels:
      kubernetes.io/cluster-service: "true"
      addonmanager.kubernetes.io/mode: Reconcile
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    kubernetes.io/bootstrapping: rbac-defaults
    addonmanager.kubernetes.io/mode: Reconcile
  name: system:coredns
rules:
- apiGroups:
  - ""
  resources:
  - endpoints
  - services
  - pods
  - namespaces
  verbs:
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - nodes
  verbs:
  - get
- apiGroups:
  - discovery.k8s.io
  resources:
  - endpointslices
  verbs:
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  annotations:
    rbac.authorization.kubernetes.io/autoupdate: "true"
  labels:
    kubernetes.io/bootstrapping: rbac-defaults
    addonmanager.kubernetes.io/mode: EnsureExists
  name: system:coredns
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:coredns
subjects:
- kind: ServiceAccount
  name: coredns
  namespace: kube-system
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: coredns
  namespace: kube-system
  labels:
      addonmanager.kubernetes.io/mode: EnsureExists
data:
  Corefile: |
    .:53 {
        errors
        health {
            lameduck 5s
        }
        ready
        kubernetes ${DNS_DOMAIN} in-addr.arpa ip6.arpa {
            pods insecure
            fallthrough in-addr.arpa ip6.arpa
            ttl 30
        }
        prometheus :9153
        forward . /etc/resolv.conf {
            max_concurrent 1000
        }
        cache 30
        loop
        reload
        loadbalance
    }
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: coredns
  namespace: kube-system
  labels:
    k8s-app: kube-dns
    kubernetes.io/cluster-service: "true"
    addonmanager.kubernetes.io/mode: Reconcile
    kubernetes.io/name: "CoreDNS"
spec:
  # replicas: not specified here:
  # 1. In order to make Addon Manager do not reconcile this replicas parameter.
  # 2. Default is 1.
  # 3. Will be tuned in real time if DNS horizontal auto-scaling is turned on.
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxUnavailable: 1
  selector:
    matchLabels:
      k8s-app: kube-dns
  template:
    metadata:
      labels:
        k8s-app: kube-dns
    spec:
      securityContext:
        seccompProfile:
          type: RuntimeDefault
      priorityClassName: system-cluster-critical
      serviceAccountName: coredns
      affinity:
        podAntiAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
          - weight: 100
            podAffinityTerm:
              labelSelector:
                matchExpressions:
                  - key: k8s-app
                    operator: In
                    values: ["kube-dns"]
              topologyKey: kubernetes.io/hostname
      tolerations:
        - key: "CriticalAddonsOnly"
          operator: "Exists"
      nodeSelector:
        kubernetes.io/os: linux
      containers:
      - name: coredns
        image: ${IMAGE_REGISTRY}
        imagePullPolicy: IfNotPresent
        resources:
          limits:
            memory: ${DNS_MEMORY_LIMIT}
          requests:
            cpu: 100m
            memory: 70Mi
        args: [ "-conf", "/etc/coredns/Corefile" ]
        volumeMounts:
        - name: config-volume
          mountPath: /etc/coredns
          readOnly: true
        ports:
        - containerPort: 53
          name: dns
          protocol: UDP
        - containerPort: 53
          name: dns-tcp
          protocol: TCP
        - containerPort: 9153
          name: metrics
          protocol: TCP
        livenessProbe:
          httpGet:
            path: /health
            port: 8080
            scheme: HTTP
          initialDelaySeconds: 60
          timeoutSeconds: 5
          successThreshold: 1
          failureThreshold: 5
        readinessProbe:
          httpGet:
            path: /ready
            port: 8181
            scheme: HTTP
        securityContext:
          allowPrivilegeEscalation: false
          capabilities:
            add:
            - NET_BIND_SERVICE
            drop:
            - all
          readOnlyRootFilesystem: true
      dnsPolicy: Default
      volumes:
        - name: config-volume
          configMap:
            name: coredns
            items:
            - key: Corefile
              path: Corefile
---
apiVersion: v1
kind: Service
metadata:
  name: kube-dns
  namespace: kube-system
  annotations:
    prometheus.io/port: "9153"
    prometheus.io/scrape: "true"
  labels:
    k8s-app: kube-dns
    kubernetes.io/cluster-service: "true"
    addonmanager.kubernetes.io/mode: Reconcile
    kubernetes.io/name: "CoreDNS"
spec:
  selector:
    k8s-app: kube-dns
  clusterIP: ${DNS_SERVER_IP}
  ports:
  - name: dns
    port: 53
    protocol: UDP
  - name: dns-tcp
    port: 53
    protocol: TCP
  - name: metrics
    port: 9153
    protocol: TCP
EOF1
EOF

执行脚本

1
2
3
4
bash -x gen_coredns_config.sh

# 当前目录下生成以下文件
# ├── coredns.yaml

应用yaml资源文件并查看

1
2
3
4
5
kubectl apply -f coredns.yaml

kubectl get pod -A
# NAMESPACE     NAME                                       READY   STATUS    RESTARTS       AGE
# kube-system   coredns-7f8b8f7b8-rhnnj                    1/1     Running   0          20s

部署metrics-server

在k8s中系统资源的采集均使用Metrics-server,可以通过Metrics采集节点和pod的内存,磁盘,CPU和网络使用率。可以传送门下载yaml文件,但是要修改几个参数:

IMAGE_REGISTRY:在阿里云的容器服务里面查看对应metrics-server镜像的版本

生成yaml资源文件

创建脚本文件gen_metrics-server_config.sh

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
cat <<'EOF'> gen_metrics-server_config.sh

CERT_PATH=/opt/k8s/ssl
CLIENT_CA_FILE=${CERT_PATH}/front-proxy-ca.pem
IMAGE_REGISTRY=registry.cn-hangzhou.aliyuncs.com/google_containers/metrics-server:v0.6.1

cat > metrics-server.yaml <<EOF1
apiVersion: v1
kind: ServiceAccount
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    k8s-app: metrics-server
    rbac.authorization.k8s.io/aggregate-to-admin: "true"
    rbac.authorization.k8s.io/aggregate-to-edit: "true"
    rbac.authorization.k8s.io/aggregate-to-view: "true"
  name: system:aggregated-metrics-reader
rules:
- apiGroups:
  - metrics.k8s.io
  resources:
  - pods
  - nodes
  verbs:
  - get
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    k8s-app: metrics-server
  name: system:metrics-server
rules:
- apiGroups:
  - ""
  resources:
  - nodes/metrics
  verbs:
  - get
- apiGroups:
  - ""
  resources:
  - pods
  - nodes
  verbs:
  - get
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server-auth-reader
  namespace: kube-system
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server:system:auth-delegator
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:auth-delegator
subjects:
- kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    k8s-app: metrics-server
  name: system:metrics-server
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:metrics-server
subjects:
- kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
---
apiVersion: v1
kind: Service
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server
  namespace: kube-system
spec:
  ports:
  - name: https
    port: 443
    protocol: TCP
    targetPort: https
  selector:
    k8s-app: metrics-server
---
apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server
  namespace: kube-system
spec:
  selector:
    matchLabels:
      k8s-app: metrics-server
  strategy:
    rollingUpdate:
      maxUnavailable: 0
  template:
    metadata:
      labels:
        k8s-app: metrics-server
    spec:
      containers:
      - args:
        - --cert-dir=/tmp
        - --secure-port=4443
        - --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
        - --kubelet-use-node-status-port
        - --metric-resolution=15s
        - --kubelet-insecure-tls   # kubectl top nodes
        - --requestheader-client-ca-file=${CLIENT_CA_FILE}   # 聚合CA证书 front-proxy-ca.crt
        - --requestheader-username-headers=X-Remote-User
        - --requestheader-group-headers=X-Remote-Group
        - --requestheader-extra-headers-prefix=X-Remote-Extra-
        image: ${IMAGE_REGISTRY}
        imagePullPolicy: IfNotPresent
        livenessProbe:
          failureThreshold: 3
          httpGet:
            path: /livez
            port: https
            scheme: HTTPS
          periodSeconds: 10
        name: metrics-server
        ports:
        - containerPort: 4443
          name: https
          protocol: TCP
        readinessProbe:
          failureThreshold: 3
          httpGet:
            path: /readyz
            port: https
            scheme: HTTPS
          initialDelaySeconds: 20
          periodSeconds: 10
        resources:
          requests:
            cpu: 100m
            memory: 200Mi
        securityContext:
          allowPrivilegeEscalation: false
          readOnlyRootFilesystem: true
          runAsNonRoot: true
          runAsUser: 1000
        volumeMounts:
        - mountPath: /tmp
          name: tmp-dir
        - name: ca-ssl
          mountPath: ${CERT_PATH}  
      nodeSelector:
        kubernetes.io/os: linux
      priorityClassName: system-cluster-critical
      serviceAccountName: metrics-server
      hostNetwork: true	   # kubectl top pods
      volumes:
      - emptyDir: {}
        name: tmp-dir
      - name: ca-ssl
        hostPath:
          path: ${CERT_PATH}
---
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
  labels:
    k8s-app: metrics-server
  name: v1beta1.metrics.k8s.io
spec:
  group: metrics.k8s.io
  groupPriorityMinimum: 100
  insecureSkipTLSVerify: true
  service:
    name: metrics-server
    namespace: kube-system
  version: v1beta1
  versionPriority: 100
EOF1
EOF

执行脚本

1
2
3
4
bash -x gen_metrics-server_config.sh

# 在当前目录下生成
# └── metrics-server.yaml

应用yaml资源文件并查看

1
2
3
4
5
6
7
8
kubectl apply -f metrics-server.yaml

kubectl get pods -n kube-system
# NAME                                       READY   STATUS    RESTARTS      AGE
# kube-system   metrics-server-668477f7f9-xz8l5            1/1     Running   0          36s

# 若是metrics-server起不了报错,可以查看日志,发现问题出在哪里
kubectl logs -f metrics-server-668477f7f9-xz8l5 -n=kube-system

查看资源消耗

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
kubectl top nodes
# NAME   CPU(cores)   CPU%   MEMORY(bytes)   MEMORY%
# m01    320m         16%    1835Mi          23%
# w01    155m         7%     739Mi           88%
# w02    130m         6%     696Mi           83%

kubectl top pods -A
# NAMESPACE     NAME                                       CPU(cores)   MEMORY(bytes)
# kube-system   calico-kube-controllers-555bc4b957-x8fh9   4m           20Mi
# kube-system   calico-node-jxp96                          85m          142Mi
# kube-system   calico-node-lfnr5                          81m          160Mi
# kube-system   calico-node-mj2tj                          94m          153Mi
# kube-system   coredns-7f8b8f7b8-dfn7s                    2m           20Mi
# kube-system   metrics-server-668477f7f9-p9z85            5m           16Mi

去掉不再使用系统变量

1
2
3
4
5
6
7
unset NODE_M_IP
unset NODE_W1_IP
unset NODE_W2_IP
unset ETCDCTL_API
unset ENDPOINTS
unset HOST
unset SSHPASS

验证集群