Featured image of post docker swarm Ingress网络VIP耗尽

docker swarm Ingress网络VIP耗尽

docker swarm

发现这么一个问题,程序以service的方式跑在docker swarm里面,因为默认的ingress网络是/24的,service在125个以后VIP耗尽了,没办法再创建了(就算你指定其他的overlay网络也没有用,这个网络固定需要分配一个VIP)。

level=error msg=“Could not parse VIP address while releasing”

level=error msg=“error deallocating vip” error=“invalid CIDR address: " vip.addr= vip.network=

目前引入CNI存在一些困难,又不想自己实现DNSRR策略(官方的建议是用dnsrr,但是那要引入很多东西,还要修改现有代码)。

所以想着把这个网改大,写了个工具,工具的具体代码如下:

相关类定义:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
type ServiceConfig struct {
    Name        string            `json:"Name"`
    Image       string            `json:"Image"`
    Labels      map[string]string `json:"Labels"`
    TargetPort  uint32            `json:"targetPort"`
    PublishPort uint32            `json:"publishPort"`
    Env         []string          `json:"ENV"`
}

type Config struct {
    Host []struct {
        IP       string `yaml:"ip"`
        Port     int    `yaml:"port"`
        Username string `yaml:"username"`
        Password string `yaml:"password"`
    } `yaml:"host"`
    Ingress struct {
        Subnet  string `yaml:"subnet"`
        Gateway string `yaml:"gateway"`
    } `yaml:"ingress"`
}

var sugarLogger *zap.SugaredLogger

程序入口:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
func main() {
    var ctx = context.Background()
    dockerClient, err := client.NewClientWithOpts(client.WithAPIVersionNegotiation(), client.WithVersion("1.41"))
    if err != nil {
        panic(err)
    }

    // 读取配置文件config.yml
    content, err := os.ReadFile("./config/config.yml")
    if err != nil {
        panic(err)
    }
    // 解析配置文件
    var config Config
    err = yaml.Unmarshal(content, &config)
    if err != nil {
        panic(err)
    }

    // 记录服务信息
    recordSvc(ctx, dockerClient)

    // 清空所有的服务
    delService(ctx, dockerClient)

    // 删除ingress网络
    delIngress(ctx, dockerClient, config)

    // 重建ingress网络
    rebuildIngress(ctx, dockerClient, config)

    // 重新创建service
    err = rebuildSvc(ctx, dockerClient)
    if err != nil {
        panic(err)
    }
}

func init() {
    config := zap.NewDevelopmentConfig()
    config.EncoderConfig.EncodeLevel = zapcore.CapitalColorLevelEncoder
    config.EncoderConfig.EncodeTime = zapcore.ISO8601TimeEncoder
    config.EncoderConfig.EncodeCaller = zapcore.ShortCallerEncoder
    config.EncoderConfig.MessageKey = "msg"
    config.EncoderConfig.TimeKey = "ts"
    config.EncoderConfig.LineEnding = zapcore.DefaultLineEnding
    config.EncoderConfig.EncodeDuration = zapcore.StringDurationEncoder
    config.EncoderConfig.EncodeCaller = zapcore.FullCallerEncoder
    config.EncoderConfig.ConsoleSeparator = " | "

    logger, err := config.Build()
    if err != nil {
        panic(err)
    }

    sugarLogger = logger.Sugar()
}

ingress.go

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
func delIngress(ctx context.Context, dockerClient *client.Client, config Config) {
    networks, err := dockerClient.NetworkList(ctx, types.NetworkListOptions{})
    if err != nil {
        panic(err)
    }
    for _, network := range networks {
        if network.Name == "ingress" {
            sugarLogger.Infoln("ingress 网络存在 ")
            err = dockerClient.NetworkRemove(ctx, "ingress")
            if err != nil {
                panic(err)
            }
            sugarLogger.Infoln("ingress 已经被移除 ")
        }
    }
    sugarLogger.Infoln("正在重启dockerd, 请等待...")
    reloadDocker(config)
}

func rebuildIngress(ctx context.Context, dockerClient *client.Client, config Config) {
    // 创建一个overlay网络,名为ingress,subnet为10.0.0.1/20
    networkName := "ingress"
    ipaconfig := network.IPAMConfig{
        Subnet:  config.Ingress.Subnet,
        Gateway: config.Ingress.Gateway,
    }
    networkcrt := types.NetworkCreate{
        Driver:     "overlay",
        Scope:      "swarm",
        EnableIPv6: false,
        IPAM: &network.IPAM{
            Driver: "default",
            Config: []network.IPAMConfig{
                ipaconfig,
            },
        },
        Internal:   false,
        Attachable: false,
        Ingress:    true,
        ConfigOnly: false,
        ConfigFrom: &network.ConfigReference{
            Network: "",
        },
        Options: map[string]string{
            "com.docker.network.driver.overlay.vxlanid_list": "4098",
            "com.docker.network.mtu":                         "1400",
        },
        Labels: map[string]string{},
    }
    resp, err := dockerClient.NetworkCreate(ctx, networkName, networkcrt)
    if err != nil {
        panic(err)
    }
    sugarLogger.Infof("Ingress网络%s创建成功 ", resp.ID)
    sugarLogger.Infoln("正在重启dockerd, 请等待...")
    reloadDocker(config)
}

func reloadDocker(config Config) {
    for _, host := range config.Host {
        // 设置 SSH 客户端配置
        sshConfig := &ssh.ClientConfig{
            User: host.Username,
            Auth: []ssh.AuthMethod{
                ssh.Password(host.Password),
            },
            HostKeyCallback: ssh.InsecureIgnoreHostKey(),
        }

        // 连接到远程 SSH 服务器
        conn, err := ssh.Dial("tcp", host.IP+":"+strconv.Itoa(host.Port), sshConfig)
        if err != nil {
            sugarLogger.Fatalf("SSH远程连接失败 : %s", err)
        }
        defer conn.Close()

        // 在 SSH 连接上创建一个新会话
        session, err := conn.NewSession()
        if err != nil {
            sugarLogger.Fatalf("创建 session 失败: %s", err)
        }
        defer session.Close()

        // 运行命令重启Docker服务
        cmd := "systemctl restart docker"
        if err := session.Run(cmd); err != nil {
            sugarLogger.Fatalf("执行CMD命令失败: %s", err)
        }
        sugarLogger.Infoln(host.IP, "docker已重启 ")
    }
}

service.go

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
func recordSvc(ctx context.Context, dockerClient *client.Client) {
    var svcStructs []ServiceConfig

    serviceList, err := dockerClient.ServiceList(ctx, types.ServiceListOptions{})
    if err != nil {
        panic(err)
    }

    // 遍历服务并将相应的信息附加到 svcStructs
    for _, service := range serviceList {
        var svcStruct ServiceConfig
        svcStruct.Name = service.Spec.Name
        svcStruct.Labels = service.Spec.Labels
        svcStruct.Image = service.Spec.TaskTemplate.ContainerSpec.Image
        svcStruct.TargetPort = service.Endpoint.Ports[0].TargetPort
        svcStruct.PublishPort = service.Endpoint.Ports[0].PublishedPort
        svcStruct.Env = service.Spec.TaskTemplate.ContainerSpec.Env
        svcStructs = append(svcStructs, svcStruct)
    }

    // 将 svcStructs 编码为JSON并将其写入services.json
    jsonBytes, err := json.MarshalIndent(svcStructs, "", "  ")
    if err != nil {
        panic(err)
    }
    f, err := os.Create("services.json")
    if err != nil {
        panic(err)
    }
    defer f.Close()
    _, err = f.Write(jsonBytes)
    if err != nil {
        panic(err)
    }
    sugarLogger.Infoln("服务创建信息已被保存到services.json ")
}

func delService(ctx context.Context, dockerClient *client.Client) {
    // 删除docker swarm中所有的service
    services, err := dockerClient.ServiceList(ctx, types.ServiceListOptions{})
    if err != nil {
        panic(err)
    }

    for _, service := range services {
        err = dockerClient.ServiceRemove(ctx, service.ID)
        if err != nil {
            panic(err)
        }
        sugarLogger.Infoln("服务", service.Spec.Name, "已被删除 ")
    }
    sugarLogger.Infoln("所有服务已被移除 ")
}

func rebuildSvc(ctx context.Context, dockerClient *client.Client) error {
    //根据services.json创建服务
    // 读取文件
    data, err := os.ReadFile("services.json")
    if err != nil {
        return err
    }

    // 解析 json
    var services []ServiceConfig
    err = json.Unmarshal(data, &services)
    if err != nil {
        return err
    }

    if services == nil {
        sugarLogger.Warnln("未在services.json中解析到服务配置, 程序退出 ")
        os.Exit(0)
    }

    // 创建服务
    for _, service := range services {
        svcContainerspec := &swarm.ContainerSpec{
            Image: service.Image,
            Env:   service.Env,
        }
        svcEndpoint := &swarm.EndpointSpec{
            Ports: []swarm.PortConfig{
                {
                    Protocol:      "tcp",
                    TargetPort:    service.TargetPort,
                    PublishedPort: service.PublishPort,
                    PublishMode:   "ingress",
                },
            },
        }

        serviceSpec := swarm.ServiceSpec{}
        serviceSpec.Name = service.Name
        serviceSpec.Labels = service.Labels
        serviceSpec.TaskTemplate.ContainerSpec = svcContainerspec
        serviceSpec.EndpointSpec = svcEndpoint

        resp, err := dockerClient.ServiceCreate(ctx, serviceSpec, types.ServiceCreateOptions{})
        if err != nil {
            panic(err)
        }

        sugarLogger.Infoln("服务被创建:", resp.ID)
        if resp.Warnings != nil {
            sugarLogger.Warnln(resp.Warnings)
        }
    }

    return nil
}

配置文件:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
# Swarm节点连接信息(包含主节点)
host:
  - ip: 192.168.1.1
    port: 22
    username: root
    password: mypass@123
  - ip: 192.168.1.2
    port: 22
    username: root
    password: mypass@123
  - ip: 192.168.1.3
    port: 22
    username: root
    password: mypass@123
# Ingress CIDR定义
ingress:
  subnet: 10.0.0.1/20
  gateway: 10.0.0.254

执行日志示例:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
2023-05-28T14:15:00.463+0800 | INFO | expandIngress/service.go:47 | 服务创建信息已被保存到services.json
2023-05-28T14:15:00.468+0800 | INFO | expandIngress/service.go:62 | 服务 test2 已被删除
2023-05-28T14:15:00.471+0800 | INFO | expandIngress/service.go:62 | 服务 test1 已被删除
2023-05-28T14:15:00.473+0800 | INFO | expandIngress/service.go:62 | 服务 test3 已被删除
2023-05-28T14:15:00.473+0800 | INFO | expandIngress/service.go:64 | 所有服务已被移除
2023-05-28T14:15:00.475+0800 | INFO | expandIngress/network.go:20 | ingress 网络存在
2023-05-28T14:15:00.477+0800 | INFO | expandIngress/network.go:25 | ingress 已经被移除
2023-05-28T14:15:00.477+0800 | INFO | expandIngress/network.go:28 | 正在重启dockerd, 请等待...
2023-05-28T14:15:30.385+0800 | INFO | expandIngress/network.go:101 | 192.168.1.1 docker已重启
2023-05-28T14:15:48.740+0800 | INFO | expandIngress/network.go:101 | 192.168.1.2 docker已重启
2023-05-28T14:16:19.057+0800 | INFO | expandIngress/network.go:101 | 192.168.1.3 docker已重启
2023-05-28T14:16:19.060+0800 | INFO | expandIngress/network.go:66 | Ingress网络97yi8vnml8z17yu6g3rqlztoa创建成功
2023-05-28T14:16:19.060+0800 | INFO | expandIngress/network.go:67 | 正在重启dockerd, 请等待...
2023-05-28T14:16:48.984+0800 | INFO | expandIngress/network.go:101 | 192.168.1.1 docker已重启
2023-05-28T14:17:07.311+0800 | INFO | expandIngress/network.go:101 | 192.168.1.2 docker已重启
2023-05-28T14:17:37.497+0800 | INFO | expandIngress/network.go:101 | 192.168.1.3 docker已重启
2023-05-28T14:17:37.501+0800 | INFO | expandIngress/service.go:115 | 服务被创建: y0hnsrbm2k43bclo9lsrrm2hc
2023-05-28T14:17:37.504+0800 | INFO | expandIngress/service.go:115 | 服务被创建: 75t458afpwwiwn98qibet61xb
2023-05-28T14:17:37.509+0800 | INFO | expandIngress/service.go:115 | 服务被创建: oh5rnpy55hmqtqtqbvs0ict3o

本文扩展:

我上面是针对已经有service的情况,如果在没有service的时候,只需要执行这几个命令就可以了:

1
2
# 主节点
$ docker network rm ingress
1
2
# 各个节点(含主节点)
$ systemctl restart docker
1
2
# 主节点
$  docker network create --driver overlay --ingress --subnet=10.0.0.0/20 --gateway=10.0.0.254 --opt com.docker.network.mtu=1400 ingress
1
2
# 各个节点(含主节点)
$ systemctl restart docker