我的项目使用nats-streaming集群。我想使用 docker swarm 在三台服务器上引发它,但只有 2 台服务器成功引发。 非工作节点输出如下日志:
docker-stack.yml:
version: "3.8"
networks:
network:
driver: overlay
attachable: true
services:
nats-streaming-1:
logging:
options:
max-size: "100m"
command:
- "-sc"
- "/etc/stan.conf"
- "--cluster"
- "nats://0.0.0.0:6222"
- "--cluster_id"
- $NATS_CLUSTER_NAME
- "--clustered"
- "--cluster_bootstrap"
- "--cluster_log_path"
- /data/log
- "--cluster_node_id"
- nats-streaming-1
- "--cluster_raft_logging"
- "--debug"
- "--dir"
- /data/msg
- "--http_port"
- "8222"
- "--port"
- "4222"
- "--store"
- file
- "--stan_debug"
- "--hb_interval"
- $NATS_HB_INTERVAL
- "--hb_fail_count"
- "$NATS_HB_FAIL_COUNT"
- "--hb_timeout"
- "$NATS_HB_TIMEOUT"
- "-mc"
- "$NATS_STAN_MAX_CHNS"
- "-mm"
- "$NATS_STAN_MAX_MSGS"
- "-mb"
- "$NATS_MAX_BYTES"
image: "nats-streaming:0.25.5"
networks:
network:
ports:
- "$NATS_PORT1:4222"
- "$NATS_HTTP_PORT1:8222"
volumes:
- "nats-streaming-1:/data"
- "./stan.conf:/etc/stan.conf"
deploy:
placement:
max_replicas_per_node: 1
constraints: [node.hostname == alex-swarm-3a]
nats-streaming-2:
logging:
options:
max-size: "100m"
command:
- "-sc"
- "/etc/stan.conf"
- "--cluster"
- "nats://0.0.0.0:6222"
- "--cluster_id"
- $NATS_CLUSTER_NAME
- "--clustered"
- "--cluster_log_path"
- /data/log
- "--cluster_node_id"
- nats-streaming-2
- "--cluster_raft_logging"
- "--debug"
- "--dir"
- /data/msg
- "--http_port"
- "8222"
- "--port"
- "4222"
- "--store"
- file
- "--stan_debug"
- "--routes"
- "nats://nats-streaming-1:6222"
- "--hb_interval"
- $NATS_HB_INTERVAL
- "--hb_fail_count"
- "$NATS_HB_FAIL_COUNT"
- "--hb_timeout"
- "$NATS_HB_TIMEOUT"
- "-mc"
- "$NATS_STAN_MAX_CHNS"
- "-mm"
- "$NATS_STAN_MAX_MSGS"
- "-mb"
- "$NATS_MAX_BYTES"
image: "nats-streaming:0.25.5"
ports:
- "$NATS_PORT2:4222"
- "$NATS_HTTP_PORT2:8222"
volumes:
- "nats-streaming-2:/data"
- "/home/master/stan.conf:/etc/stan.conf"
networks:
network:
deploy:
placement:
max_replicas_per_node: 1
constraints: [node.hostname == alex-swarm-3b]
nats-streaming-3:
logging:
options:
max-size: "100m"
command:
- "-sc"
- "/etc/stan.conf"
- "--cluster"
- "nats://0.0.0.0:6222"
- "--cluster_id"
- $NATS_CLUSTER_NAME
- "--clustered"
- "--cluster_log_path"
- /data/log
- "--cluster_node_id"
- nats-streaming-3
- "--cluster_raft_logging"
- "--debug"
- "--dir"
- /data/msg
- "--http_port"
- "8222"
- "--port"
- "4222"
- "--store"
- file
- "--stan_debug"
- "--routes"
- "nats://nats-streaming-1:6222"
- "--hb_interval"
- $NATS_HB_INTERVAL
- "--hb_fail_count"
- "$NATS_HB_FAIL_COUNT"
- "--hb_timeout"
- "$NATS_HB_TIMEOUT"
- "-mc"
- "$NATS_STAN_MAX_CHNS"
- "-mm"
- "$NATS_STAN_MAX_MSGS"
- "-mb"
- "$NATS_MAX_BYTES"
image: "nats-streaming:0.25.5"
networks:
- network
ports:
- "$NATS_PORT3:4222"
- "$NATS_HTTP_PORT3:8222"
volumes:
- "nats-streaming-3:/data"
- "/home/master/stan.conf:/etc/stan.conf"
deploy:
placement:
max_replicas_per_node: 1
constraints: [node.hostname == alex-swarm-3c]
volumes:
nats-streaming-1:
nats-streaming-2:
nats-streaming-3:
文档说明如下:“这是因为服务器恢复了流状态(由 -dir 指向并位于已安装的卷中),但没有恢复 RAFT 特定状态,该状态默认存储在以您的名称命名的目录中cluster id,相对于启动可执行文件的当前目录。在容器的上下文中,该数据将在容器停止后丢失。” 我将卷分配给数据文件夹并将 --cluster_log_path 分配给“/data/log”,但这没有帮助
问题在于,与 NATS 一起,许多其他服务在所有副本启动之前启动并开始向其写入数据。
要知道 NATS Streaming 已结束生命。 NATS 中现已提供持久层,称为 JetStream:https://docs.nats.io/nats-concepts/jetstream。这是一个讨论过渡的网络研讨会:https://youtu.be/yKI9YmLx_8A,这里是一个迁移示例:https://natsbyexample.com/examples/operations/stan2js/cli