| 角色 | 说明 | 数量建议 |
|---|
| Server | 存储数据,参与 Raft 共识 | 3 或 5 |
| Client | 转发请求,本地缓存 | 每台应用服务器 |
| Leader | 处理写请求 | 1(自动选举) |
| Follower | 复制数据,处理读请求 | N-1 |
| 节点 | IP | 角色 | 配置 |
|---|
| consul-1 | 192.168.1.10 | Server | 2C4G |
| consul-2 | 192.168.1.11 | Server | 2C4G |
| consul-3 | 192.168.1.12 | Server | 2C4G |
# 防火墙配置
firewall-cmd --permanent --add-port=8300/tcp # Server RPC
firewall-cmd --permanent --add-port=8301/tcp # LAN Serf
firewall-cmd --permanent --add-port=8301/udp
firewall-cmd --permanent --add-port=8302/tcp # WAN Serf
firewall-cmd --permanent --add-port=8302/udp
firewall-cmd --permanent --add-port=8500/tcp # HTTP API
firewall-cmd --permanent --add-port=8600/tcp # DNS
firewall-cmd --permanent --add-port=8600/udp
firewall-cmd --reload
// /etc/consul.d/server.json
{
"datacenter": "dc1",
"node_name": "consul-1",
"data_dir": "/opt/consul/data",
"log_level": "INFO",
"server": true,
"bootstrap_expect": 3,
"bind_addr": "192.168.1.10",
"client_addr": "0.0.0.0",
"retry_join": ["192.168.1.11", "192.168.1.12"],
"ui_config": {
"enabled": true
},
"connect": {
"enabled": true
},
"performance": {
"raft_multiplier": 1
}
}
{
"datacenter": "dc1",
"node_name": "consul-2",
"data_dir": "/opt/consul/data",
"log_level": "INFO",
"server": true,
"bootstrap_expect": 3,
"bind_addr": "192.168.1.11",
"client_addr": "0.0.0.0",
"retry_join": ["192.168.1.10", "192.168.1.12"],
"ui_config": {
"enabled": true
}
}
{
"datacenter": "dc1",
"node_name": "consul-3",
"data_dir": "/opt/consul/data",
"log_level": "INFO",
"server": true,
"bootstrap_expect": 3,
"bind_addr": "192.168.1.12",
"client_addr": "0.0.0.0",
"retry_join": ["192.168.1.10", "192.168.1.11"],
"ui_config": {
"enabled": true
}
}
# /etc/systemd/system/consul.service
[Unit]
Description=Consul Service Discovery Agent
Documentation=https://www.consul.io/
After=network-online.target
Wants=network-online.target
[Service]
Type=notify
User=consul
Group=consul
ExecStart=/usr/local/bin/consul agent -config-dir=/etc/consul.d/
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
KillSignal=SIGTERM
Restart=on-failure
RestartSec=5
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
# 在所有节点执行
systemctl daemon-reload
systemctl enable consul
systemctl start consul
# 检查状态
systemctl status consul
# 查看成员
consul members
# 输出示例
Node Address Status Type Build Protocol DC Partition Segment
consul-1 192.168.1.10:8301 alive server 1.15.0 2 dc1 default <all>
consul-2 192.168.1.11:8301 alive server 1.15.0 2 dc1 default <all>
consul-3 192.168.1.12:8301 alive server 1.15.0 2 dc1 default <all>
# 查看 Leader
consul operator raft list-peers
// /etc/consul.d/client.json
{
"datacenter": "dc1",
"node_name": "app-server-1",
"data_dir": "/opt/consul/data",
"log_level": "INFO",
"server": false,
"bind_addr": "192.168.1.100",
"client_addr": "127.0.0.1",
"retry_join": ["192.168.1.10", "192.168.1.11", "192.168.1.12"]
}
# docker-compose.yml
version: '3'
services:
consul-client:
image: consul:latest
command: agent -retry-join=192.168.1.10 -retry-join=192.168.1.11
environment:
- CONSUL_BIND_INTERFACE=eth0
network_mode: host
{
"performance": {
"raft_multiplier": 1,
"rpc_hold_timeout": "7s"
},
"raft_protocol": 3,
"raft_snapshot_threshold": 8192,
"raft_snapshot_interval": "30s"
}
{
"autopilot": {
"cleanup_dead_servers": true,
"last_contact_threshold": "200ms",
"max_trailing_logs": 250,
"server_stabilization_time": "10s"
}
}
# 创建 CA
consul tls ca create
# 创建 Server 证书
consul tls cert create -server -dc dc1
# 创建 Client 证书
consul tls cert create -client -dc dc1
{
"tls": {
"defaults": {
"ca_file": "/etc/consul.d/certs/consul-agent-ca.pem",
"cert_file": "/etc/consul.d/certs/dc1-server-consul-0.pem",
"key_file": "/etc/consul.d/certs/dc1-server-consul-0-key.pem",
"verify_incoming": true,
"verify_outgoing": true
},
"internal_rpc": {
"verify_server_hostname": true
}
}
}
# 创建快照
consul snapshot save backup.snap
# 定时备份脚本
#!/bin/bash
DATE=$(date +%Y%m%d_%H%M%S)
consul snapshot save /backup/consul_${DATE}.snap
find /backup -name "consul_*.snap" -mtime +7 -delete
# 恢复快照
consul snapshot restore backup.snap
# crontab
0 */6 * * * /usr/local/bin/consul snapshot save /backup/consul_$(date +\%Y\%m\%d_\%H\%M\%S).snap
# 1. 配置新节点
# 2. 启动新节点(会自动加入)
consul agent -config-dir=/etc/consul.d/
# 3. 验证
consul members
consul operator raft list-peers
# 1. 优雅离开
consul leave
# 2. 强制移除(节点故障时)
consul operator raft remove-peer -address="192.168.1.12:8300"
# 1. 移除故障节点
consul operator raft remove-peer -address="故障节点IP:8300"
# 2. 添加新节点
# 配置新节点并启动
#!/bin/bash
# check_consul.sh
# 检查 Leader
LEADER=$(consul operator raft list-peers | grep leader | wc -l)
if [ "$LEADER" -ne 1 ]; then
echo "CRITICAL: No leader elected"
exit 2
fi
# 检查节点数量
SERVERS=$(consul members | grep server | grep alive | wc -l)
if [ "$SERVERS" -lt 3 ]; then
echo "WARNING: Only $SERVERS servers alive"
exit 1
fi
echo "OK: Consul cluster healthy"
exit 0
# prometheus.yml
scrape_configs:
- job_name: 'consul'
metrics_path: '/v1/agent/metrics'
params:
format: ['prometheus']
static_configs:
- targets: ['192.168.1.10:8500', '192.168.1.11:8500', '192.168.1.12:8500']