基于DNS(Consul)高可用

DNS

推荐从Bind-DLZ入手,资料多
可控制度更好(查询DNS记录SQL可定制)
据说性能差
Bind-DLZ
https://www.cnblogs.com/saneri/p/8178065.html
  
PowerDNS
SQL schema设置规范
性能比Bind-DLZ好
 
coredns 和k8s结合比较多

nacos 阿里开源,含DNS和服务发现
 

监控程序:
主从结构,支持GTID
 
监控逻辑:
按分组取出来机器节点
master:
尝试连接成功 ok 保持
失败  进行从库选举
slave:
检查是不是在线online
在线的:
连接成功,复制是不是正常,不正常下线,检验延迟
下线更新cmdb,dns records
下线的:
连接成功,复制正常,不延迟 上线
上线更新cmdb,dns records
从库选举:
获取从库列表
 
故障切换:
确认所有节点都复制中断,判断复制完成
对比,所有节点是不是复制到一个位置
通过获取的GTID对比,是不是所有节点同步到一个位置,如果不是,选举出来最靠前的做master
如果同步位置都一样,根据cmdb中定义的level选择,最大的那个
新的主节点选举成功后,其他节点change过来
更新cmdb中的角色,oldmaster->slave,选举出来的master,更改新主节点的read_only
更新dns_records
 
在线切换:
oldmaster上执行super_read_only & read_only干掉业务连接
获取oldmaster中的show master status信息
获取从节点中的show slave status对比,确认都同步完成
按cmdb中的level或是指定的节点为新master
更新重做master/slave架构
更新cmdb
更新dns
记录log
 
一定要提高英文阅读能力

[root@mydb1 ~]# wget https://releases.hashicorp.com/consul/1.4.0/consul_1.4.0_linux_amd64.zip
[root@mydb1 ~]# mkdir -p /opt/consul /opt/consul/conf /data/consul /data/consul/shell/
[root@mydb2 ~]# mkdir -p /opt/consul /opt/consul/conf /data/consul /data/consul/shell/
[root@mydb3 ~]# mkdir -p /opt/consul /opt/consul/conf /data/consul /data/consul/shell/
[root@mydb1 ~]# unzip consul_1.4.0_linux_amd64.zip
将consul拷贝至/opt/consul目录
[root@mydb1 ~]# cat /opt/consul/conf/server.json
{
  "data_dir": "/data/consul",
  "enable_script_checks": true,
  "datacenter": "dc1",
  "log_level": "INFO",          
  "server": true,              
  "bootstrap_expect": 3,          
  "ui":true
}
[root@mydb1 consul]# ./consul agent -config-dir=/opt/consul/conf > /data/consul/consul.log &
[root@mydb2 consul]# ./consul agent -config-dir=/opt/consul/conf > /data/consul/consul.log &
[root@mydb3 consul]# ./consul agent -config-dir=/opt/consul/conf > /data/consul/consul.log &
[root@mydb2 consul]# ./consul join 192.168.1.101
[root@mydb3 consul]# ./consul join 192.168.1.101
[root@mydb1 consul]# ./consul members
Node   Address             Status  Type    Build  Protocol  DC   Segment
mydb1  192.168.1.101:8301  alive   server  1.4.0  2         dc1  <all>
mydb2  192.168.1.102:8301  alive   server  1.4.0  2         dc1  <all>
mydb3  192.168.1.103:8301  alive   server  1.4.0  2         dc1  <all>
[root@mydb1 consul]# ./consul catalog nodes
Node   ID        Address        DC
mydb1  52514e74  192.168.1.101  dc1
mydb2  aebbf0b2  192.168.1.102  dc1
mydb3  0e179069  192.168.1.103  dc1

# dig @127.0.0.1 -p 8600 mydb1.node.consul
# dig @127.0.0.1 -p 8600 mydb2.node.consul
# dig @127.0.0.1 -p 8600 mydb3.node.consul

[root@mydb1 consul]# ./consul operator raft list-peers
Node   ID                                    Address             State     Voter  RaftProtocol
mydb1  52514e74-d063-cfe3-1d58-55fda9fc2451  192.168.1.101:8300  leader    true   3
mydb2  aebbf0b2-09ad-f396-4c21-3f9ee40a16da  192.168.1.102:8300  follower  true   3
mydb3  0e179069-7360-3866-d9a6-7ea60c540c04  192.168.1.103:8300  follower  true   3

[root@mydb1 consul]# ./consul kv put id 11
Success! Data written to: id
[root@mydb1 consul]# ./consul kv get id
11
[root@mydb2 consul]# ./consul kv get id
11
[root@mydb3 consul]# ./consul kv get id
11

consul是用Raft来实现分布式一致性的
 
 
[root@mydb1 ~]# cat /opt/consul/conf/r-test-mgr-ser.json
{
  "service": {
    "name": "r-test-3306-mydb-ser",
    "tags": ["测试-3306"],
    "address": "192.168.1.101",
    "meta": {
      "meta": "for my service"
    },
    "port": 3306,
    "enable_tag_override": false,
    "checks": [
      {
        "args": ["/data/consul/shell/check_mysql_mgr_slave.sh"],
        "interval": "1s"
      }
    ]
  }
}
[root@mydb1 ~]# cat /opt/consul/conf/w-test-mgr-ser.json
{
  "service": {
    "name": "w-test-3306-mydb-ser",
    "tags": ["测试-3306"],
    "address": "192.168.1.101",
    "meta": {
      "meta": "for my service"
    },
    "port": 3306,
    "enable_tag_override": false,
    "checks": [
      {
        "args": ["/data/consul/shell/check_mysql_mgr_master.sh"],
        "interval": "10s"
      }
    ]
  }
}
注意在mydb2,mydb3上调整ip
检测脚本如下
[root@mydb1 ~]# cat /data/consul/shell/check_mysql_mgr_master.sh
#!/bin/bash
host="192.168.1.101"
port=3306
user="dba_user"
passwod="msds007"
comm="/usr/local/mysql/bin/mysql -u$user -h$host -P $port -p$passwod"
value=`$comm -Nse "select 1"`
primary_member=`$comm -Nse "select variable_value from performance_schema.global_status WHERE VARIABLE_NAME= 'group_replication_primary_member'"`
server_uuid=`$comm -Nse "select variable_value from performance_schema.global_variables where VARIABLE_NAME='server_uuid';"`
# 判断MySQL是否存活
if [ -z $value ]
then
   echo "mysql $port is down....."
   exit 2
fi
# 判断节点状态,是否存活
node_state=`$comm -Nse "select MEMBER_STATE from performance_schema.replication_group_members where MEMBER_ID='$server_uuid'"`
if [ $node_state != "ONLINE" ]
then
   echo "MySQL $port state is not online...."
   exit 2
fi
# 判断是不是主节点
if [[ $server_uuid == $primary_member ]]
then
   echo "MySQL $port Instance is master ........"
   exit 0
else
   echo "MySQL $port Instance is slave ........"
   exit 2
fi
[root@mydb1 ~]# cat /data/consul/shell/check_mysql_mgr_slave.sh
#!/bin/bash
host="192.168.1.101"
port=3306
user="dba_user"
passwod="msds007"
comm="/usr/local/mysql/bin/mysql -u$user -h$host -P $port -p$passwod"
value=`$comm -Nse "select 1"`
primary_member=`$comm -Nse "select variable_value from performance_schema.global_status WHERE VARIABLE_NAME= 'group_replication_primary_member'"`
server_uuid=`$comm -Nse "select variable_value from performance_schema.global_variables where VARIABLE_NAME='server_uuid';"`
# 判断mysql是否存活
if [ -z $value ]
then
   echo "mysql $port is down....."
   exit 2
fi
# 判断节点状态
node_state=`$comm -Nse "select MEMBER_STATE from performance_schema.replication_group_members where MEMBER_ID='$server_uuid'"`
if [ $node_state != "ONLINE" ]
then
   echo "MySQL $port state is not online...."
   exit 2
fi
# 判断是不是主节点
if [[ $server_uuid != $primary_member ]]
then
   echo "MySQL $port Instance is slave ........"
   exit 0
else
   node_num=`$comm -Nse "select count(*) from performance_schema.replication_group_members"`
# 判断如果没有任何从节点,主节点也注册从角色服务。
   if [ $node_num -eq 1 ]
   then
       echo "MySQL $port Instance is slave ........"
       exit 0
   else
       echo "MySQL $port Instance is master ........"
       exit 2
   fi
fi
注意在mydb2,mydb3上调整ip
 

[root@mydb1 consul]# ./consul agent -config-dir=/opt/consul/conf > /data/consul/consul.log &
[root@mydb2 consul]# ./consul agent -config-dir=/opt/consul/conf > /data/consul/consul.log &
[root@mydb3 consul]# ./consul agent -config-dir=/opt/consul/conf > /data/consul/consul.log &
[root@mydb2 consul]# ./consul join 192.168.1.101
[root@mydb3 consul]# ./consul join 192.168.1.101
[root@mydb1 consul]# ./consul members
 
# dig @127.0.0.1 -p 8600 w-test-3306-mydb-ser.service.consul
# dig @127.0.0.1 -p 8600 r-test-3306-mydb-ser.service.consul
 
Consul使用手册
http://www.liangxiansen.cn/2017/04/06/consul/
 
 
 
原文地址:https://www.cnblogs.com/allenhu320/p/11362937.html