配置 MHA
安装相关包
[root@mha-manager ~]#yum -y install mha4mysql-manager-0.58-0.el7.centos.noarch.rpm[root@mha-manager ~]#yum -y install mha4mysql-node-0.58-0.el7.centos.noarch.rpm所有被管理的服务器都要安装 mha4mysql-node-0.58-0.el7.centos.noarch.rpm管理端服务器要安装 mha4mysql-manager-0.58-0.el7.centos.noarch.rpm
实现所有节点的 ssh 验证
配置邮件服务器
1. 安装 potfix 服务[root@localhost ~]# yum install postfix-* mailx -y2. 配置 mail.rc 配置文件set from=543989246@qq.comset smtp=smtp.qq.comset smtp-auth-user=543989246@qq.comset smtp-auth-password=fzwzpjcekuizbbfi
邮件测试
[root@localhost ~]# cat sendmail.sh#!/bin/bashecho "MySQL is down" | mail -s "MHA Warning" 543989246@qq.com
配置 manager 配置文件
[root@localhost ~]# mkdir /etc/mastermha[root@localhost ~]# vim /etc/mastermha/app1.cnf[server default]user=mhauserpassword=123.commanager_workdir=/data/mastermha/app1/manager_log=/data/mastermha/app1/manager.logremote_workdir=/data/mastermha/app1/ssh_user=rootrepl_user=repluserrepl_password=123.comping_interval=1master_ip_failover_script=/usr/local/bin/master_ip_failoverreport_script=/usr/local/bin/sendmail.shcheck_repl_delay=0master_binlog_dir=/data/mysql/[server1]hostname=100.0.0.51candidate_master=1[server2]hostname=100.0.0.52candidate_master=1
创建 MHA 用户
此用户在 app1.cnf 文件中已经定义了
root@localhost (none) >create user mhauser@'10.0.0.%' identified by "123.com";Query OK, 0 rows affected (0.01 sec)root@localhost (none) >grant all on *.* to mhauser@'10.0.0.%';Query OK, 0 rows affected (0.00 sec)
创建 perl 脚本
参数 master_ip_failover_script= 中定义的内容root@mha-m:~$cat /usr/local/bin/master_ip_failover#!/usr/bin/env perluse strict;use warnings FATAL => 'all';use Getopt::Long;my ($command, $ssh_user, $orig_master_host, $orig_master_ip,$orig_master_port, $new_master_host, $new_master_ip, $new_master_port);my $vip = '10.0.0.200/24'; #改成我们需要的vipmy $key = '1';my $ssh_start_vip = "/sbin/ifconfig eth0:$key $vip"; ---指定网卡名称my $ssh_stop_vip = "/sbin/ifconfig eth0:$key down"; ---指定网卡名称GetOptions('command=s' => \$command,'ssh_user=s' => \$ssh_user,'orig_master_host=s' => \$orig_master_host,'orig_master_ip=s' => \$orig_master_ip,'orig_master_port=i' => \$orig_master_port,'new_master_host=s' => \$new_master_host,'new_master_ip=s' => \$new_master_ip,'new_master_port=i' => \$new_master_port,);exit &main();sub main {print "\n\nIN SCRIPT TEST====$ssh_stop_vip==$ssh_start_vip===\n\n";if ( $command eq "stop" || $command eq "stopssh" ) {my $exit_code = 1;eval {print "Disabling the VIP on old master: $orig_master_host \n";&stop_vip();$exit_code = 0;};if ($@) {warn "Got Error: $@\n";exit $exit_code;}exit $exit_code;}elsif ( $command eq "start" ) {my $exit_code = 10;eval {print "Enabling the VIP - $vip on the new master - $new_master_host \n";&start_vip();$exit_code = 0;};if ($@) {warn $@;exit $exit_code;}exit $exit_code;}elsif ( $command eq "status" ) {print "Checking the Status of the script.. OK \n";exit 0;}else {&usage();exit 1;}}sub start_vip() {`ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`;}sub stop_vip() {return 0 unless ($ssh_user);`ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`;}sub usage {"Usage: master_ip_failover --command=start|stop|stopssh|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n";}root@mha-m:/data$chmod o+x /usr/local/bin/master_ip_failoverroot@mha-m:/data$chmod o+x /usr/local/bin/sendmail.sh
启动 MHA
检查启动环境
[root@localhost ~]# masterha_check_ssh --conf=/etc/mastermha/app1.cnf
Wed Feb 24 11:05:51 2021 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Wed Feb 24 11:05:51 2021 - [info] Reading application default configuration from /etc/mastermha/app1.cnf..
Wed Feb 24 11:05:51 2021 - [info] Reading server configuration from /etc/mastermha/app1.cnf..
Wed Feb 24 11:05:51 2021 - [info] Starting SSH connection tests..
Wed Feb 24 11:05:52 2021 - [debug]
Wed Feb 24 11:05:51 2021 - [debug] Connecting via SSH from root@10.0.0.52(10.0.0.52:22) to root@10.0.0.51(10.0.0.51:22)..
Wed Feb 24 11:05:52 2021 - [debug] ok.
Wed Feb 24 11:05:52 2021 - [debug] Connecting via SSH from root@10.0.0.52(10.0.0.52:22) to root@10.0.0.53(10.0.0.53:22)..
Wed Feb 24 11:05:52 2021 - [debug] ok.
Wed Feb 24 11:05:52 2021 - [debug]
Wed Feb 24 11:05:51 2021 - [debug] Connecting via SSH from root@10.0.0.51(10.0.0.51:22) to root@10.0.0.52(10.0.0.52:22)..
Wed Feb 24 11:05:51 2021 - [debug] ok.
Wed Feb 24 11:05:51 2021 - [debug] Connecting via SSH from root@10.0.0.51(10.0.0.51:22) to root@10.0.0.53(10.0.0.53:22)..
Wed Feb 24 11:05:52 2021 - [debug] ok.
Wed Feb 24 11:05:53 2021 - [debug]
Wed Feb 24 11:05:52 2021 - [debug] Connecting via SSH from root@10.0.0.53(10.0.0.53:22) to root@10.0.0.51(10.0.0.51:22)..
Wed Feb 24 11:05:52 2021 - [debug] ok.
Wed Feb 24 11:05:52 2021 - [debug] Connecting via SSH from root@10.0.0.53(10.0.0.53:22) to root@10.0.0.52(10.0.0.52:22)..
Wed Feb 24 11:05:52 2021 - [debug] ok.
Wed Feb 24 11:05:53 2021 - [info] All SSH connection tests passed successfully.
[root@localhost ~]# masterha_check_repl --conf=/etc/mastermha/app1.cnf
Wed Feb 24 11:10:06 2021 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Wed Feb 24 11:10:06 2021 - [info] Reading application default configuration from /etc/mastermha/app1.cnf..
Wed Feb 24 11:10:06 2021 - [info] Reading server configuration from /etc/mastermha/app1.cnf..
Wed Feb 24 11:10:06 2021 - [info] MHA::MasterMonitor version 0.58.
Wed Feb 24 11:10:08 2021 - [info] GTID failover mode = 0
Wed Feb 24 11:10:08 2021 - [info] Dead Servers:
Wed Feb 24 11:10:08 2021 - [info] Alive Servers:
Wed Feb 24 11:10:08 2021 - [info] 10.0.0.51(10.0.0.51:3306)
Wed Feb 24 11:10:08 2021 - [info] 10.0.0.52(10.0.0.52:3306)
Wed Feb 24 11:10:08 2021 - [info] 10.0.0.53(10.0.0.53:3306)
Wed Feb 24 11:10:08 2021 - [info] Alive Slaves:
Wed Feb 24 11:10:08 2021 - [info] 10.0.0.52(10.0.0.52:3306) Version=5.7.33-log (oldest major version between slaves) log-bin:enabled
Wed Feb 24 11:10:08 2021 - [info] Replicating from 10.0.0.51(10.0.0.51:3306)
Wed Feb 24 11:10:08 2021 - [info] Primary candidate for the new Master (candidate_master is set)
Wed Feb 24 11:10:08 2021 - [info] 10.0.0.53(10.0.0.53:3306) Version=5.7.33-log (oldest major version between slaves) log-bin:enabled
Wed Feb 24 11:10:08 2021 - [info] Replicating from 10.0.0.51(10.0.0.51:3306)
Wed Feb 24 11:10:08 2021 - [info] Current Alive Master: 10.0.0.51(10.0.0.51:3306)
Wed Feb 24 11:10:08 2021 - [info] Checking slave configurations..
Wed Feb 24 11:10:08 2021 - [info] Checking replication filtering settings..
Wed Feb 24 11:10:08 2021 - [info] binlog_do_db= , binlog_ignore_db=
Wed Feb 24 11:10:08 2021 - [info] Replication filtering check ok.
Wed Feb 24 11:10:08 2021 - [info] GTID (with auto-pos) is not supported
Wed Feb 24 11:10:08 2021 - [info] Starting SSH connection tests..
Wed Feb 24 11:10:10 2021 - [info] All SSH connection tests passed successfully.
Wed Feb 24 11:10:10 2021 - [info] Checking MHA Node version..
Wed Feb 24 11:10:10 2021 - [info] Version check ok.
Wed Feb 24 11:10:10 2021 - [info] Checking SSH publickey authentication settings on the current master..
Wed Feb 24 11:10:11 2021 - [info] HealthCheck: SSH to 10.0.0.51 is reachable.
Wed Feb 24 11:10:11 2021 - [info] Master MHA Node version is 0.58.
Wed Feb 24 11:10:11 2021 - [info] Checking recovery script configurations on 10.0.0.51(10.0.0.51:3306)..
Wed Feb 24 11:10:11 2021 - [info] Executing command: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data/binlog/ --output_file=/data/mastermha/app1//save_binary_logs_test --manager_version=0.58 --start_file=mysql-bin.000009
Wed Feb 24 11:10:11 2021 - [info] Connecting to root@10.0.0.51(10.0.0.51:22)..
Creating /data/mastermha/app1 if not exists.. ok.
Checking output directory is accessible or not..
ok.
Binlog found at /data/binlog/, up to mysql-bin.000009
Wed Feb 24 11:10:11 2021 - [info] Binlog setting check done.
Wed Feb 24 11:10:11 2021 - [info] Checking SSH publickey authentication and checking recovery script configurations on all alive slave servers..
Wed Feb 24 11:10:11 2021 - [info] Executing command : apply_diff_relay_logs --command=test --slave_user='mhauser' --slave_host=10.0.0.52 --slave_ip=10.0.0.52 --slave_port=3306 --workdir=/data/mastermha/app1/ --target_version=5.7.33-log --manager_version=0.58 --relay_log_info=/data/mysql/relay-log.info --relay_dir=/data/mysql/ --slave_pass=xxx
Wed Feb 24 11:10:11 2021 - [info] Connecting to root@10.0.0.52(10.0.0.52:22)..
Checking slave recovery environment settings..
Opening /data/mysql/relay-log.info ... ok.
Relay log found at /data/mysql, up to mysql-relay-bin.000002
Temporary relay log file is /data/mysql/mysql-relay-bin.000002
Checking if super_read_only is defined and turned on.. not present or turned off, ignoring.
Testing mysql connection and privileges..
mysql: [Warning] Using a password on the command line interface can be insecure.
done.
Testing mysqlbinlog output.. done.
Cleaning up test file(s).. done.
Wed Feb 24 11:10:11 2021 - [info] Executing command : apply_diff_relay_logs --command=test --slave_user='mhauser' --slave_host=10.0.0.53 --slave_ip=10.0.0.53 --slave_port=3306 --workdir=/data/mastermha/app1/ --target_version=5.7.33-log --manager_version=0.58 --relay_log_info=/data/mysql/relay-log.info --relay_dir=/data/mysql/ --slave_pass=xxx
Wed Feb 24 11:10:11 2021 - [info] Connecting to root@10.0.0.53(10.0.0.53:22)..
Checking slave recovery environment settings..
Opening /data/mysql/relay-log.info ... ok.
Relay log found at /data/mysql, up to mysql-relay-bin.000002
Temporary relay log file is /data/mysql/mysql-relay-bin.000002
Checking if super_read_only is defined and turned on.. not present or turned off, ignoring.
Testing mysql connection and privileges..
mysql: [Warning] Using a password on the command line interface can be insecure.
done.
Testing mysqlbinlog output.. done.
Cleaning up test file(s).. done.
Wed Feb 24 11:10:12 2021 - [info] Slaves settings check done.
Wed Feb 24 11:10:12 2021 - [info]
10.0.0.51(10.0.0.51:3306) (current master)
+--10.0.0.52(10.0.0.52:3306)
+--10.0.0.53(10.0.0.53:3306)
Wed Feb 24 11:10:12 2021 - [info] Checking replication health on 10.0.0.52..
Wed Feb 24 11:10:12 2021 - [info] ok.
Wed Feb 24 11:10:12 2021 - [info] Checking replication health on 10.0.0.53..
Wed Feb 24 11:10:12 2021 - [info] ok.
Wed Feb 24 11:10:12 2021 - [info] Checking master_ip_failover_script status:
Wed Feb 24 11:10:12 2021 - [info] /usr/local/bin/master_ip_failover --command=status --ssh_user=root --orig_master_host=10.0.0.51 --orig_master_ip=10.0.0.51 --orig_master_port=3306
IN SCRIPT TEST====/sbin/ifconfig eth0:1 down==/sbin/ifconfig eth0:1 10.0.0.200/24===
Checking the Status of the script.. OK
Wed Feb 24 11:10:12 2021 - [info] OK.
Wed Feb 24 11:10:12 2021 - [warning] shutdown_script is not defined.
Wed Feb 24 11:10:12 2021 - [info] Got exit code 0 (Not master dead).
MySQL Replication Health is OK.
检测成功后开启 MHA
nohup masterha_manager --conf=/etc/mastermha/app1.cnf &> /dev/null &
查看MHA状态
[root@localhost ~]# masterha_check_status --conf=/etc/mastermha/app1.cnf
app1 (pid:13297) is running(0:PING_OK), master:10.0.0.51
查看MHA日志
[root@localhost ~]# tail -f /data/mastermha/app1/manager.log
IN SCRIPT TEST====/sbin/ifconfig eth0:1 down==/sbin/ifconfig eth0:1 10.0.0.200/24===
Checking the Status of the script.. OK
Wed Feb 24 11:15:18 2021 - [info] OK.
Wed Feb 24 11:15:18 2021 - [warning] shutdown_script is not defined.
Wed Feb 24 11:15:18 2021 - [info] Set master ping interval 1 seconds.
Wed Feb 24 11:15:18 2021 - [warning] secondary_check_script is not defined. It is highly recommended setting it to check master reachability from two or more routes.
Wed Feb 24 11:15:18 2021 - [info] Starting ping health check on 10.0.0.51(10.0.0.51:3306)..
Wed Feb 24 11:15:18 2021 - [info] Ping(SELECT) succeeded, waiting until MySQL doesn't respond..
查看健康检查
[root@localhost ~]# tail -f /data/mastermha/app1/manager.log
IN SCRIPT TEST====/sbin/ifconfig eth0:1 down==/sbin/ifconfig eth0:1 10.0.0.200/24===
Checking the Status of the script.. OK
Wed Feb 24 11:15:18 2021 - [info] OK.
Wed Feb 24 11:15:18 2021 - [warning] shutdown_script is not defined.
Wed Feb 24 11:15:18 2021 - [info] Set master ping interval 1 seconds.
Wed Feb 24 11:15:18 2021 - [warning] secondary_check_script is not defined. It is highly recommended setting it to check master reachability from two or more routes.
Wed Feb 24 11:15:18 2021 - [info] Starting ping health check on 10.0.0.51(10.0.0.51:3306)..
Wed Feb 24 11:15:18 2021 - [info] Ping(SELECT) succeeded, waiting until MySQL doesn't respond..
-----------------------------------------------------------------------------------------------
#健康检查
[root@mysql ~]# tail -f /data/mysql/mysql.log
2021-02-24T03:17:42.821567Z 110 Query SELECT 1 As Value
2021-02-24T03:17:43.824434Z 110 Query SELECT 1 As Value
2021-02-24T03:17:44.823319Z 110 Query SELECT 1 As Value
2021-02-24T03:17:45.823814Z 110 Query SELECT 1 As Value
2021-02-24T03:17:46.825207Z 110 Query SELECT 1 As Value
2021-02-24T03:17:47.826499Z 110 Query SELECT 1 As Value
2021-02-24T03:17:48.826082Z 110 Query SELECT 1 As Value
2021-02-24T03:17:49.826376Z 110 Query SELECT 1 As Value
2021-02-24T03:17:50.827108Z 110 Query SELECT 1 As Value
2021-02-24T03:17:51.831014Z 110 Query SELECT 1 As Value
2021-02-24T03:17:52.831806Z 110 Query SELECT 1 As Value
2021-02-24T03:17:53.829655Z 110 Query SELECT 1 As Value
模拟故障
当 mysql 主宕机后,
Wed Feb 24 11:24:13 2021 - [warning] Got error on MySQL select ping: 2006 (MySQL server has gone away)
Wed Feb 24 11:24:13 2021 - [info] Executing SSH check script: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data/binlog/ --output_file=/data/mastermha/app1//save_binary_logs_test --manager_version=0.58 --binlog_prefix=mysql-bin
Wed Feb 24 11:24:14 2021 - [warning] Got error on MySQL connect: 2003 (Can't connect to MySQL server on '10.0.0.51' (111))
Wed Feb 24 11:24:14 2021 - [warning] Connection failed 2 time(s)..
Wed Feb 24 11:24:14 2021 - [info] HealthCheck: SSH to 10.0.0.51 is reachable.
Wed Feb 24 11:24:15 2021 - [warning] Got error on MySQL connect: 2003 (Can't connect to MySQL server on '10.0.0.51' (111))
Wed Feb 24 11:24:15 2021 - [warning] Connection failed 3 time(s)..
Wed Feb 24 11:24:16 2021 - [warning] Got error on MySQL connect: 2003 (Can't connect to MySQL server on '10.0.0.51' (111))
Wed Feb 24 11:24:16 2021 - [warning] Connection failed 4 time(s)..
Wed Feb 24 11:24:16 2021 - [warning] Master is not reachable from health checker!
Wed Feb 24 11:24:16 2021 - [warning] Master 10.0.0.51(10.0.0.51:3306) is not reachable!
Wed Feb 24 11:24:16 2021 - [warning] SSH is reachable.
Wed Feb 24 11:24:16 2021 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha_default.cnf and /etc/mastermha/app1.cnf again, and trying to connect to all servers to check server status..
Wed Feb 24 11:24:16 2021 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Wed Feb 24 11:24:16 2021 - [info] Reading application default configuration from /etc/mastermha/app1.cnf..
Wed Feb 24 11:24:16 2021 - [info] Reading server configuration from /etc/mastermha/app1.cnf..
Wed Feb 24 11:24:17 2021 - [info] GTID failover mode = 0
Wed Feb 24 11:24:17 2021 - [info] Dead Servers:
Wed Feb 24 11:24:17 2021 - [info] 10.0.0.51(10.0.0.51:3306)
Wed Feb 24 11:24:17 2021 - [info] Alive Servers:
Wed Feb 24 11:24:17 2021 - [info] 10.0.0.52(10.0.0.52:3306)
Wed Feb 24 11:24:17 2021 - [info] 10.0.0.53(10.0.0.53:3306)
Wed Feb 24 11:24:17 2021 - [info] Alive Slaves:
Wed Feb 24 11:24:17 2021 - [info] 10.0.0.52(10.0.0.52:3306) Version=5.7.33-log (oldest major version between slaves) log-bin:enabled
Wed Feb 24 11:24:17 2021 - [info] Replicating from 10.0.0.51(10.0.0.51:3306)
Wed Feb 24 11:24:17 2021 - [info] Primary candidate for the new Master (candidate_master is set)
Wed Feb 24 11:24:17 2021 - [info] 10.0.0.53(10.0.0.53:3306) Version=5.7.33-log (oldest major version between slaves) log-bin:enabled
Wed Feb 24 11:24:17 2021 - [info] Replicating from 10.0.0.51(10.0.0.51:3306)
Wed Feb 24 11:24:17 2021 - [info] Checking slave configurations..
Wed Feb 24 11:24:17 2021 - [info] Checking replication filtering settings..
Wed Feb 24 11:24:17 2021 - [info] Replication filtering check ok.
Wed Feb 24 11:24:17 2021 - [info] Master is down!
Wed Feb 24 11:24:17 2021 - [info] Terminating monitoring script.
Wed Feb 24 11:24:17 2021 - [info] Got exit code 20 (Master dead).
Wed Feb 24 11:24:17 2021 - [info] MHA::MasterFailover version 0.58.
Wed Feb 24 11:24:17 2021 - [info] Starting master failover.
Wed Feb 24 11:24:17 2021 - [info]
Wed Feb 24 11:24:17 2021 - [info] * Phase 1: Configuration Check Phase..
Wed Feb 24 11:24:17 2021 - [info]
Wed Feb 24 11:24:18 2021 - [info] GTID failover mode = 0
Wed Feb 24 11:24:18 2021 - [info] Dead Servers:
Wed Feb 24 11:24:18 2021 - [info] 10.0.0.51(10.0.0.51:3306)
Wed Feb 24 11:24:18 2021 - [info] Checking master reachability via MySQL(double check)...
Wed Feb 24 11:24:18 2021 - [info] ok.
Wed Feb 24 11:24:18 2021 - [info] Alive Servers:
Wed Feb 24 11:24:18 2021 - [info] 10.0.0.52(10.0.0.52:3306)
Wed Feb 24 11:24:18 2021 - [info] 10.0.0.53(10.0.0.53:3306)
Wed Feb 24 11:24:18 2021 - [info] Alive Slaves:
Wed Feb 24 11:24:18 2021 - [info] 10.0.0.52(10.0.0.52:3306) Version=5.7.33-log (oldest major version between slaves) log-bin:enabled
Wed Feb 24 11:24:18 2021 - [info] Replicating from 10.0.0.51(10.0.0.51:3306)
Wed Feb 24 11:24:18 2021 - [info] Primary candidate for the new Master (candidate_master is set)
Wed Feb 24 11:24:18 2021 - [info] 10.0.0.53(10.0.0.53:3306) Version=5.7.33-log (oldest major version between slaves) log-bin:enabled
Wed Feb 24 11:24:18 2021 - [info] Replicating from 10.0.0.51(10.0.0.51:3306)
Wed Feb 24 11:24:18 2021 - [info] Starting Non-GTID based failover.
Wed Feb 24 11:24:18 2021 - [info]
Wed Feb 24 11:24:18 2021 - [info] ** Phase 1: Configuration Check Phase completed.
Wed Feb 24 11:24:18 2021 - [info]
Wed Feb 24 11:24:18 2021 - [info] * Phase 2: Dead Master Shutdown Phase..
Wed Feb 24 11:24:18 2021 - [info]
Wed Feb 24 11:24:18 2021 - [info] Forcing shutdown so that applications never connect to the current master..
Wed Feb 24 11:24:18 2021 - [info] Executing master IP deactivation script:
Wed Feb 24 11:24:18 2021 - [info] /usr/local/bin/master_ip_failover --orig_master_host=10.0.0.51 --orig_master_ip=10.0.0.51 --orig_master_port=3306 --command=stopssh --ssh_user=root
IN SCRIPT TEST====/sbin/ifconfig eth0:1 down==/sbin/ifconfig eth0:1 10.0.0.200/24===
Disabling the VIP on old master: 10.0.0.51
Wed Feb 24 11:24:18 2021 - [info] done.
Wed Feb 24 11:24:18 2021 - [warning] shutdown_script is not set. Skipping explicit shutting down of the dead master.
Wed Feb 24 11:24:18 2021 - [info] * Phase 2: Dead Master Shutdown Phase completed.
Wed Feb 24 11:24:18 2021 - [info]
Wed Feb 24 11:24:18 2021 - [info] * Phase 3: Master Recovery Phase..
Wed Feb 24 11:24:18 2021 - [info]
Wed Feb 24 11:24:18 2021 - [info] * Phase 3.1: Getting Latest Slaves Phase..
Wed Feb 24 11:24:18 2021 - [info]
Wed Feb 24 11:24:18 2021 - [info] The latest binary log file/position on all slaves is mysql-bin.000009:154
Wed Feb 24 11:24:18 2021 - [info] Latest slaves (Slaves that received relay log files to the latest):
Wed Feb 24 11:24:18 2021 - [info] 10.0.0.52(10.0.0.52:3306) Version=5.7.33-log (oldest major version between slaves) log-bin:enabled
Wed Feb 24 11:24:18 2021 - [info] Replicating from 10.0.0.51(10.0.0.51:3306)
Wed Feb 24 11:24:18 2021 - [info] Primary candidate for the new Master (candidate_master is set)
Wed Feb 24 11:24:18 2021 - [info] 10.0.0.53(10.0.0.53:3306) Version=5.7.33-log (oldest major version between slaves) log-bin:enabled
Wed Feb 24 11:24:18 2021 - [info] Replicating from 10.0.0.51(10.0.0.51:3306)
Wed Feb 24 11:24:18 2021 - [info] The oldest binary log file/position on all slaves is mysql-bin.000009:154
Wed Feb 24 11:24:18 2021 - [info] Oldest slaves:
Wed Feb 24 11:24:18 2021 - [info] 10.0.0.52(10.0.0.52:3306) Version=5.7.33-log (oldest major version between slaves) log-bin:enabled
Wed Feb 24 11:24:18 2021 - [info] Replicating from 10.0.0.51(10.0.0.51:3306)
Wed Feb 24 11:24:18 2021 - [info] Primary candidate for the new Master (candidate_master is set)
Wed Feb 24 11:24:18 2021 - [info] 10.0.0.53(10.0.0.53:3306) Version=5.7.33-log (oldest major version between slaves) log-bin:enabled
Wed Feb 24 11:24:18 2021 - [info] Replicating from 10.0.0.51(10.0.0.51:3306)
Wed Feb 24 11:24:18 2021 - [info]
Wed Feb 24 11:24:18 2021 - [info] * Phase 3.2: Saving Dead Master's Binlog Phase..
Wed Feb 24 11:24:18 2021 - [info]
Wed Feb 24 11:24:19 2021 - [info] Fetching dead master's binary logs..
Wed Feb 24 11:24:19 2021 - [info] Executing command on the dead master 10.0.0.51(10.0.0.51:3306): save_binary_logs --command=save --start_file=mysql-bin.000009 --start_pos=154 --binlog_dir=/data/binlog/ --output_file=/data/mastermha/app1//saved_master_binlog_from_10.0.0.51_3306_20210224112417.binlog --handle_raw_binlog=1 --disable_log_bin=0 --manager_version=0.58
Creating /data/mastermha/app1 if not exists.. ok.
Concat binary/relay logs from mysql-bin.000009 pos 154 to mysql-bin.000009 EOF into /data/mastermha/app1//saved_master_binlog_from_10.0.0.51_3306_20210224112417.binlog ..
Binlog Checksum enabled
Dumping binlog format description event, from position 0 to 154.. ok.
No need to dump effective binlog data from /data/binlog//mysql-bin.000009 (pos starts 154, filesize 154). Skipping.
Binlog Checksum enabled
/data/mastermha/app1//saved_master_binlog_from_10.0.0.51_3306_20210224112417.binlog has no effective data events.
Event not exists.
Wed Feb 24 11:24:19 2021 - [info] Additional events were not found from the orig master. No need to save.
Wed Feb 24 11:24:19 2021 - [info]
Wed Feb 24 11:24:19 2021 - [info] * Phase 3.3: Determining New Master Phase..
Wed Feb 24 11:24:19 2021 - [info]
Wed Feb 24 11:24:19 2021 - [info] Finding the latest slave that has all relay logs for recovering other slaves..
Wed Feb 24 11:24:19 2021 - [info] All slaves received relay logs to the same position. No need to resync each other.
Wed Feb 24 11:24:19 2021 - [info] Searching new master from slaves..
Wed Feb 24 11:24:19 2021 - [info] Candidate masters from the configuration file:
Wed Feb 24 11:24:19 2021 - [info] 10.0.0.52(10.0.0.52:3306) Version=5.7.33-log (oldest major version between slaves) log-bin:enabled
Wed Feb 24 11:24:19 2021 - [info] Replicating from 10.0.0.51(10.0.0.51:3306)
Wed Feb 24 11:24:19 2021 - [info] Primary candidate for the new Master (candidate_master is set)
Wed Feb 24 11:24:19 2021 - [info] Non-candidate masters:
Wed Feb 24 11:24:19 2021 - [info] Searching from candidate_master slaves which have received the latest relay log events..
Wed Feb 24 11:24:19 2021 - [info] New master is 10.0.0.52(10.0.0.52:3306)
Wed Feb 24 11:24:19 2021 - [info] Starting master failover..
Wed Feb 24 11:24:19 2021 - [info]
From:
10.0.0.51(10.0.0.51:3306) (current master)
+--10.0.0.52(10.0.0.52:3306)
+--10.0.0.53(10.0.0.53:3306)
To:
10.0.0.52(10.0.0.52:3306) (new master)
+--10.0.0.53(10.0.0.53:3306)
Wed Feb 24 11:24:19 2021 - [info]
Wed Feb 24 11:24:19 2021 - [info] * Phase 3.4: New Master Diff Log Generation Phase..
Wed Feb 24 11:24:19 2021 - [info]
Wed Feb 24 11:24:19 2021 - [info] This server has all relay logs. No need to generate diff files from the latest slave.
Wed Feb 24 11:24:19 2021 - [info]
Wed Feb 24 11:24:19 2021 - [info] * Phase 3.5: Master Log Apply Phase..
Wed Feb 24 11:24:19 2021 - [info]
Wed Feb 24 11:24:19 2021 - [info] *NOTICE: If any error happens from this phase, manual recovery is needed.
Wed Feb 24 11:24:19 2021 - [info] Starting recovery on 10.0.0.52(10.0.0.52:3306)..
Wed Feb 24 11:24:19 2021 - [info] This server has all relay logs. Waiting all logs to be applied..
Wed Feb 24 11:24:19 2021 - [info] done.
Wed Feb 24 11:24:19 2021 - [info] All relay logs were successfully applied.
Wed Feb 24 11:24:19 2021 - [info] Getting new master's binlog name and position..
Wed Feb 24 11:24:19 2021 - [info] mysql-bin.000012:154
Wed Feb 24 11:24:19 2021 - [info] All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='10.0.0.52', MASTER_PORT=3306, MASTER_LOG_FILE='mysql-bin.000012', MASTER_LOG_POS=154, MASTER_USER='repluser', MASTER_PASSWORD='xxx';
Wed Feb 24 11:24:19 2021 - [info] Executing master IP activate script:
Wed Feb 24 11:24:19 2021 - [info] /usr/local/bin/master_ip_failover --command=start --ssh_user=root --orig_master_host=10.0.0.51 --orig_master_ip=10.0.0.51 --orig_master_port=3306 --new_master_host=10.0.0.52 --new_master_ip=10.0.0.52 --new_master_port=3306 --new_master_user='mhauser' --new_master_password=xxx
Unknown option: new_master_user
Unknown option: new_master_password
IN SCRIPT TEST====/sbin/ifconfig eth0:1 down==/sbin/ifconfig eth0:1 10.0.0.200/24===
Enabling the VIP - 10.0.0.200/24 on the new master - 10.0.0.52
Wed Feb 24 11:24:19 2021 - [info] OK.
Wed Feb 24 11:24:19 2021 - [info] Setting read_only=0 on 10.0.0.52(10.0.0.52:3306)..
Wed Feb 24 11:24:19 2021 - [info] ok.
Wed Feb 24 11:24:19 2021 - [info] ** Finished master recovery successfully.
Wed Feb 24 11:24:19 2021 - [info] * Phase 3: Master Recovery Phase completed.
Wed Feb 24 11:24:19 2021 - [info]
Wed Feb 24 11:24:19 2021 - [info] * Phase 4: Slaves Recovery Phase..
Wed Feb 24 11:24:19 2021 - [info]
Wed Feb 24 11:24:19 2021 - [info] * Phase 4.1: Starting Parallel Slave Diff Log Generation Phase..
Wed Feb 24 11:24:19 2021 - [info]
Wed Feb 24 11:24:19 2021 - [info] -- Slave diff file generation on host 10.0.0.53(10.0.0.53:3306) started, pid: 13904. Check tmp log /data/mastermha/app1//10.0.0.53_3306_20210224112417.log if it takes time..
Wed Feb 24 11:24:20 2021 - [info]
Wed Feb 24 11:24:20 2021 - [info] Log messages from 10.0.0.53 ...
Wed Feb 24 11:24:20 2021 - [info]
Wed Feb 24 11:24:19 2021 - [info] This server has all relay logs. No need to generate diff files from the latest slave.
Wed Feb 24 11:24:20 2021 - [info] End of log messages from 10.0.0.53.
Wed Feb 24 11:24:20 2021 - [info] -- 10.0.0.53(10.0.0.53:3306) has the latest relay log events.
Wed Feb 24 11:24:20 2021 - [info] Generating relay diff files from the latest slave succeeded.
Wed Feb 24 11:24:20 2021 - [info]
Wed Feb 24 11:24:20 2021 - [info] * Phase 4.2: Starting Parallel Slave Log Apply Phase..
Wed Feb 24 11:24:20 2021 - [info]
Wed Feb 24 11:24:20 2021 - [info] -- Slave recovery on host 10.0.0.53(10.0.0.53:3306) started, pid: 13906. Check tmp log /data/mastermha/app1//10.0.0.53_3306_20210224112417.log if it takes time..
Wed Feb 24 11:24:21 2021 - [info]
Wed Feb 24 11:24:21 2021 - [info] Log messages from 10.0.0.53 ...
Wed Feb 24 11:24:21 2021 - [info]
Wed Feb 24 11:24:20 2021 - [info] Starting recovery on 10.0.0.53(10.0.0.53:3306)..
Wed Feb 24 11:24:20 2021 - [info] This server has all relay logs. Waiting all logs to be applied..
Wed Feb 24 11:24:20 2021 - [info] done.
Wed Feb 24 11:24:20 2021 - [info] All relay logs were successfully applied.
Wed Feb 24 11:24:20 2021 - [info] Resetting slave 10.0.0.53(10.0.0.53:3306) and starting replication from the new master 10.0.0.52(10.0.0.52:3306)..
Wed Feb 24 11:24:20 2021 - [info] Executed CHANGE MASTER.
Wed Feb 24 11:24:20 2021 - [info] Slave started.
Wed Feb 24 11:24:21 2021 - [info] End of log messages from 10.0.0.53.
Wed Feb 24 11:24:21 2021 - [info] -- Slave recovery on host 10.0.0.53(10.0.0.53:3306) succeeded.
Wed Feb 24 11:24:21 2021 - [info] All new slave servers recovered successfully.
Wed Feb 24 11:24:21 2021 - [info]
Wed Feb 24 11:24:21 2021 - [info] * Phase 5: New master cleanup phase..
Wed Feb 24 11:24:21 2021 - [info]
Wed Feb 24 11:24:21 2021 - [info] Resetting slave info on the new master..
Wed Feb 24 11:24:21 2021 - [info] 10.0.0.52: Resetting slave info succeeded.
Wed Feb 24 11:24:21 2021 - [info] Master failover to 10.0.0.52(10.0.0.52:3306) completed successfully.
Wed Feb 24 11:24:21 2021 - [info]
----- Failover Report -----
app1: MySQL Master failover 10.0.0.51(10.0.0.51:3306) to 10.0.0.52(10.0.0.52:3306) succeeded
Master 10.0.0.51(10.0.0.51:3306) is down!
Check MHA Manager logs at localhost.localdomain:/data/mastermha/app1/manager.log for details.
Started automated(non-interactive) failover.
Invalidated master IP address on 10.0.0.51(10.0.0.51:3306)
The latest slave 10.0.0.52(10.0.0.52:3306) has all relay logs for recovery.
Selected 10.0.0.52(10.0.0.52:3306) as a new master.
10.0.0.52(10.0.0.52:3306): OK: Applying all logs succeeded.
10.0.0.52(10.0.0.52:3306): OK: Activated master IP address.
10.0.0.53(10.0.0.53:3306): This host has the latest relay log events.
Generating relay diff files from the latest slave succeeded.
10.0.0.53(10.0.0.53:3306): OK: Applying all logs succeeded. Slave started, replicating from 10.0.0.52(10.0.0.52:3306)
10.0.0.52(10.0.0.52:3306): Resetting slave info succeeded.
Master failover to 10.0.0.52(10.0.0.52:3306) completed successfully.
Wed Feb 24 11:24:21 2021 - [info] Sending mail..
验证VIP漂移到新的Master上
[root@mysql ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
link/ether 00:0c:29:c1:d6:19 brd ff:ff:ff:ff:ff:ff
inet 10.0.0.52/24 brd 10.0.0.255 scope global noprefixroute eth0
valid_lft forever preferred_lft forever
inet 10.0.0.200/24 brd 10.0.0.255 scope global secondary eth0:1
valid_lft forever preferred_lft forever
inet6 fe80::20c:29ff:fec1:d619/64 scope link noprefixroute
valid_lft forever preferred_lft forever
收到报警邮件
如果再运行MHA,需删除下面的文件
[root@localhost ~]# ls /data/mastermha/app1/app1.failover.complete
/data/mastermha/app1/app1.failover.complete
[root@localhost ~]# rm -rf/data/mastermha/app1/app1.failover.complete
