尝试在 docker swarm 上设置 Stolon,现在为了简化,我让所有服务在同一主机上的管理器节点上运行。
在我的一生中,我似乎无法摆脱来自守护者的错误日志消息
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1 | Starting Stolon as a keeper...
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1 | Waiting for Consul to be ready at consul:8500...
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1 | Waiting for Consul to start...
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1 | Waiting for Consul to start...
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1 | Consul is ready.
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1 | 2024-04-22T13:18:57.328Z INFO cmd/keeper.go:2091 exclusive lock on data dir taken
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1 | 2024-04-22T13:18:57.332Z INFO cmd/keeper.go:569 keeper uid {"uid": "postgres_dsyf1a7juv4u1iwyjj6434ldx"}
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1 | 2024-04-22T13:18:57.337Z INFO cmd/keeper.go:1048 no cluster data available, waiting for it to appear
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1 | 2024-04-22T13:19:02.345Z INFO cmd/keeper.go:1080 our keeper data is not available, waiting for it to appear
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1 | 2024-04-22T13:19:07.347Z INFO cmd/keeper.go:1080 our keeper data is not available, waiting for it to appear
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1 | 2024-04-22T13:19:12.349Z INFO cmd/keeper.go:1080 our keeper data is not available, waiting for it to appear
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1 | 2024-04-22T13:19:17.352Z INFO cmd/keeper.go:1141 current db UID different than cluster data db UID {"db": "", "cdDB": "8198992d"}
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1 | 2024-04-22T13:19:17.352Z INFO cmd/keeper.go:1148 initializing the database cluster
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1 | 2024-04-22T13:19:17.384Z ERROR cmd/keeper.go:1174 failed to stop pg instance {"error": "cannot get instance state: exit status 1"}
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1 | 2024-04-22T13:19:22.387Z ERROR cmd/keeper.go:1110 db failed to initialize or resync
version: '3.8'
services:
consul:
image: dockerhub-user/app-consul:latest
volumes:
- console_data:/consul/data
ports:
- '8500:8500' # Expose the Consul UI and API port
- "8400:8400"
- "8301-8302:8301-8302"
- "8301-8302:8301-8302/udp"
- "8600:8600"
- "8600:8600/udp"
networks:
- shared_swarm_network
deploy:
placement:
constraints: [node.role == manager] # change to worker later if needed
restart_policy:
condition: on-failure
environment:
CONSUL_BIND_INTERFACE: 'eth0'
CONSUL_CLIENT_INTERFACE: 'eth0'
command: "agent -server -ui -bootstrap -client=0.0.0.0 -bind={{ GetInterfaceIP 'eth0' }} -data-dir=/consul/data"
# Managing Stolon clusters, providing operational control.
stolon-ctl:
image: dockerhub-user/app-stolon-ctl:latest
depends_on:
- consul
networks:
- shared_swarm_network
deploy:
placement:
constraints: [node.role == manager]
# Runs Stolon Keeper managing PostgreSQL data persistence and replication.
stolon-keeper:
image: dockerhub-user/app-stolon:latest
depends_on:
- stolon-ctl
- consul
environment:
- ROLE=keeper
- STKEEPER_UID=postgres_{{.Task.ID}}
- PG_REPL_USERNAME=repluser
- PG_REPL_PASSWORD=replpass
- PG_SU_USERNAME=postgres
- PG_SU_PASSWORD=postgres
- PG_APP_USER=app_user
- PG_APP_PASSWORD=mysecurepassword
- PG_APP_DB=app_db
volumes:
- stolon_data:/stolon/data
- pg_data:/var/lib/postgresql/data
- pg_log:/var/log/postgresql
networks:
- shared_swarm_network
deploy:
placement:
constraints: [node.role == manager]
# Deploys Stolon Sentinel for monitoring and orchestrating cluster failovers.
stolon-sentinel:
image: dockerhub-user/app-stolon:latest
environment:
- ROLE=sentinel
networks:
- shared_swarm_network
deploy:
placement:
constraints: [node.role == manager]
depends_on:
- stolon-keeper
- consul
volumes:
stolon_data:
console_data:
pg_data:
pg_log:
networks:
shared_swarm_network:
external: true
# Use the official PostgreSQL image as a base
FROM postgres:16.2
# Define the version of Stolon being used
ENV STOLON_VERSION v0.17.0
# Install necessary packages
RUN apt-get update && \
apt-get install -y curl unzip && \
rm -rf /var/lib/apt/lists/*
# Download and extract Stolon
RUN curl -L https://github.com/sorintlab/stolon/releases/download/${STOLON_VERSION}/stolon-${STOLON_VERSION}-linux-amd64.tar.gz -o stolon.tar.gz && \
mkdir -p /stolon-installation && \
tar -xzf stolon.tar.gz -C /stolon-installation && \
ls /stolon-installation && \
mv /stolon-installation/*/bin/* /usr/local/bin/
# Clean up installation files
RUN rm -rf /stolon-installation stolon.tar.gz && \
apt-get purge -y --auto-remove unzip
# Verify binaries are in the expected location
RUN ls /usr/local/bin/stolon-*
# Set up environment variables
ENV STOLONCTL_CLUSTER_NAME=stolon-cluster \
STOLONCTL_STORE_BACKEND=consul \
STOLONCTL_STORE_URL=http://consul:8500 \
CONSUL_PORT=8500 \
STKEEPER_DATA_DIR=/stolon/data \
PG_DATA_DIR=/var/lib/postgresql/data \
PG_BIN_PATH=/usr/lib/postgresql/16/bin \
PG_PORT=5432
# Expose PostgreSQL and Stolon proxy ports
EXPOSE 5432 5433
# Copy the entrypoint script into the container
COPY script/entrypoint.sh /entrypoint.sh
# Make the entrypoint script executable
RUN chmod +x /entrypoint.sh
# Set the entrypoint script as the entrypoint for the container
ENTRYPOINT ["/entrypoint.sh"]
#!/bin/bash
# Fetch the IP address of the container
IP_ADDRESS=$(hostname -I | awk '{print $1}')
if [ "$ROLE" = "sentinel" ]; then
# Verify registration with Consul
while ! curl -s "http://$STOLONCTL_STORE_BACKEND:$CONSUL_PORT/v1/kv/stolon/cluster/$STOLONCTL_CLUSTER_NAME/keepers/info?keys" | grep -q "$KEEPER_ID"; do
echo "Keeper not registered in Consul, waiting..."
sleep 1
done
echo "Keeper is registered in Consul."
fi
case "$ROLE" in
"keeper")
exec stolon-keeper \
--data-dir $STKEEPER_DATA_DIR \
--cluster-name $STOLONCTL_CLUSTER_NAME \
--store-backend $STOLONCTL_STORE_BACKEND \
--store-endpoints $STOLONCTL_STORE_URL \
--pg-listen-address $IP_ADDRESS \
--pg-repl-username $PG_REPL_USERNAME \
--pg-repl-password $PG_REPL_PASSWORD \
--pg-su-username $PG_SU_USERNAME \
--pg-su-password $PG_SU_PASSWORD \
--uid $STKEEPER_UID \
--pg-bin-path $PG_BIN_PATH \
--pg-port $PG_PORT
;;
"sentinel")
exec stolon-sentinel \
--cluster-name $STOLONCTL_CLUSTER_NAME \
--store-backend $STOLONCTL_STORE_BACKEND \
--store-endpoints $STOLONCTL_STORE_URL
;;
"proxy")
exec stolon-proxy \
--cluster-name $STOLONCTL_CLUSTER_NAME \
--store-backend $STOLONCTL_STORE_BACKEND \
--store-endpoints $STOLONCTL_STORE_URL \
--listen-address 0.0.0.0
;;
*)
echo "Unknown role: $ROLE"
exit 1
;;
esac
检查了网络连接,consul 已启动并运行良好,sentinel 和 proxy 也按预期工作,尽管有待数据库准备就绪。
请确认您是否已使用经过身份验证的用户启动集群?