Stolon keeper 一直无法使用 Consul 后端初始化 pg 数据库

问题描述 投票:0回答:1

尝试在 docker swarm 上设置 Stolon,现在为了简化,我让所有服务在同一主机上的管理器节点上运行。

在我的一生中,我似乎无法摆脱来自守护者的错误日志消息

守护者日志

app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1    | Starting Stolon as a keeper...
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1    | Waiting for Consul to be ready at consul:8500...
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1    | Waiting for Consul to start...
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1    | Waiting for Consul to start...
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1    | Consul is ready.
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1    | 2024-04-22T13:18:57.328Z   INFO    cmd/keeper.go:2091   exclusive lock on data dir taken
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1    | 2024-04-22T13:18:57.332Z   INFO    cmd/keeper.go:569    keeper uid       {"uid": "postgres_dsyf1a7juv4u1iwyjj6434ldx"}
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1    | 2024-04-22T13:18:57.337Z   INFO    cmd/keeper.go:1048   no cluster data available, waiting for it to appear
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1    | 2024-04-22T13:19:02.345Z   INFO    cmd/keeper.go:1080   our keeper data is not available, waiting for it to appear
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1    | 2024-04-22T13:19:07.347Z   INFO    cmd/keeper.go:1080   our keeper data is not available, waiting for it to appear
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1    | 2024-04-22T13:19:12.349Z   INFO    cmd/keeper.go:1080   our keeper data is not available, waiting for it to appear
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1    | 2024-04-22T13:19:17.352Z   INFO    cmd/keeper.go:1141   current db UID different than cluster data db UID        {"db": "", "cdDB": "8198992d"}
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1    | 2024-04-22T13:19:17.352Z   INFO    cmd/keeper.go:1148   initializing the database cluster
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1    | 2024-04-22T13:19:17.384Z   ERROR   cmd/keeper.go:1174   failed to stop pg instance       {"error": "cannot get instance state: exit status 1"}
app_stack_stolon-keeper.1.dsyf1a7juv4u@manager1    | 2024-04-22T13:19:22.387Z   ERROR   cmd/keeper.go:1110   db failed to initialize or resync

Docker 组合

version: '3.8'

services:
  consul:
    image: dockerhub-user/app-consul:latest
    volumes:
      - console_data:/consul/data
    ports:
      - '8500:8500'  # Expose the Consul UI and API port
      - "8400:8400"
      - "8301-8302:8301-8302"
      - "8301-8302:8301-8302/udp"
      - "8600:8600"
      - "8600:8600/udp"
    networks:
      - shared_swarm_network
    deploy:
      placement:
        constraints: [node.role == manager] # change to worker later if needed
      restart_policy:
        condition: on-failure
    environment:
      CONSUL_BIND_INTERFACE: 'eth0'
      CONSUL_CLIENT_INTERFACE: 'eth0'
    command: "agent -server -ui -bootstrap -client=0.0.0.0 -bind={{ GetInterfaceIP 'eth0' }} -data-dir=/consul/data"

  # Managing Stolon clusters, providing operational control.
  stolon-ctl:
    image: dockerhub-user/app-stolon-ctl:latest
    depends_on:
      - consul
    networks:
      - shared_swarm_network
    deploy:
      placement:
        constraints: [node.role == manager]

  # Runs Stolon Keeper managing PostgreSQL data persistence and replication.
  stolon-keeper:
    image: dockerhub-user/app-stolon:latest
    depends_on:
      - stolon-ctl
      - consul
    environment:
      - ROLE=keeper
      - STKEEPER_UID=postgres_{{.Task.ID}}
      - PG_REPL_USERNAME=repluser
      - PG_REPL_PASSWORD=replpass
      - PG_SU_USERNAME=postgres
      - PG_SU_PASSWORD=postgres
      - PG_APP_USER=app_user
      - PG_APP_PASSWORD=mysecurepassword
      - PG_APP_DB=app_db
    volumes:
      - stolon_data:/stolon/data
      - pg_data:/var/lib/postgresql/data
      - pg_log:/var/log/postgresql
    networks:
      - shared_swarm_network
    deploy:
      placement:
        constraints: [node.role == manager]

  # Deploys Stolon Sentinel for monitoring and orchestrating cluster failovers.
  stolon-sentinel:
    image: dockerhub-user/app-stolon:latest
    environment:
      - ROLE=sentinel
    networks:
      - shared_swarm_network
    deploy:
      placement:
        constraints: [node.role == manager]
    depends_on:
      - stolon-keeper
      - consul

volumes:
  stolon_data:
  console_data:
  pg_data:
  pg_log:

networks:
  shared_swarm_network:
    external: true

Dockerfile

# Use the official PostgreSQL image as a base
FROM postgres:16.2

# Define the version of Stolon being used
ENV STOLON_VERSION v0.17.0

# Install necessary packages
RUN apt-get update && \
    apt-get install -y curl unzip && \
    rm -rf /var/lib/apt/lists/* 

# Download and extract Stolon
RUN curl -L https://github.com/sorintlab/stolon/releases/download/${STOLON_VERSION}/stolon-${STOLON_VERSION}-linux-amd64.tar.gz -o stolon.tar.gz && \
    mkdir -p /stolon-installation && \
    tar -xzf stolon.tar.gz -C /stolon-installation && \
    ls /stolon-installation && \
    mv /stolon-installation/*/bin/* /usr/local/bin/

# Clean up installation files
RUN rm -rf /stolon-installation stolon.tar.gz && \
    apt-get purge -y --auto-remove unzip

# Verify binaries are in the expected location
RUN ls /usr/local/bin/stolon-*

# Set up environment variables
ENV STOLONCTL_CLUSTER_NAME=stolon-cluster \
    STOLONCTL_STORE_BACKEND=consul \
    STOLONCTL_STORE_URL=http://consul:8500 \
    CONSUL_PORT=8500 \
    STKEEPER_DATA_DIR=/stolon/data \
    PG_DATA_DIR=/var/lib/postgresql/data \
    PG_BIN_PATH=/usr/lib/postgresql/16/bin \
    PG_PORT=5432

# Expose PostgreSQL and Stolon proxy ports
EXPOSE 5432 5433

# Copy the entrypoint script into the container
COPY script/entrypoint.sh /entrypoint.sh

# Make the entrypoint script executable
RUN chmod +x /entrypoint.sh

# Set the entrypoint script as the entrypoint for the container
ENTRYPOINT ["/entrypoint.sh"]

入口点.sh

#!/bin/bash

# Fetch the IP address of the container
IP_ADDRESS=$(hostname -I | awk '{print $1}')

if [ "$ROLE" = "sentinel" ]; then
    # Verify registration with Consul
    while ! curl -s "http://$STOLONCTL_STORE_BACKEND:$CONSUL_PORT/v1/kv/stolon/cluster/$STOLONCTL_CLUSTER_NAME/keepers/info?keys" | grep -q "$KEEPER_ID"; do
        echo "Keeper not registered in Consul, waiting..."
        sleep 1
    done
    echo "Keeper is registered in Consul."
fi


case "$ROLE" in
  "keeper")
    exec stolon-keeper \
      --data-dir $STKEEPER_DATA_DIR \
      --cluster-name $STOLONCTL_CLUSTER_NAME \
      --store-backend $STOLONCTL_STORE_BACKEND \
      --store-endpoints $STOLONCTL_STORE_URL \
      --pg-listen-address $IP_ADDRESS \
      --pg-repl-username $PG_REPL_USERNAME \
      --pg-repl-password $PG_REPL_PASSWORD \
      --pg-su-username $PG_SU_USERNAME \
      --pg-su-password $PG_SU_PASSWORD \
      --uid $STKEEPER_UID \
      --pg-bin-path $PG_BIN_PATH \
      --pg-port $PG_PORT
    ;;
  "sentinel")
    exec stolon-sentinel \
      --cluster-name $STOLONCTL_CLUSTER_NAME \
      --store-backend $STOLONCTL_STORE_BACKEND \
      --store-endpoints $STOLONCTL_STORE_URL
    ;;
  "proxy")
    exec stolon-proxy \
      --cluster-name $STOLONCTL_CLUSTER_NAME \
      --store-backend $STOLONCTL_STORE_BACKEND \
      --store-endpoints $STOLONCTL_STORE_URL \
      --listen-address 0.0.0.0
    ;;
  *)
    echo "Unknown role: $ROLE"
    exit 1
    ;;
esac

检查了网络连接,consul 已启动并运行良好,sentinel 和 proxy 也按预期工作,尽管有待数据库准备就绪。

postgresql devops consul stolon
1个回答
0
投票

请确认您是否已使用经过身份验证的用户启动集群?

© www.soinside.com 2019 - 2024. All rights reserved.