redis cluster安装部署(测试环境)

redis 应用于web前端，做缓存和数据存取的速度是挺可观的，最近看了一些资料，手痒了，就弄了一个测试环境，两台方案，试用一下。

##Redis 集群部署##

一，方案调研:

参考博客:

http://jolestar.com/redis-ha/

http://www.luocs.com/archives/tag/redis

https://github.com/wandoulabs/codis/blob/master/doc/tutorial_zh.md

https://github.com/twitter/twemproxy

二，部署架构

三,方案简介：

需求:

    缓存:定时任务临时写入和读取的数据量大概在10G左右
    存储:日常存储量大概在40G左右
    需要7＊24小时不间断提供数据服务，所以要求redis集群要稳定，在有故障时可自动切换和数据即时恢复，并在业务增长或下降时，集群可伸缩或扩展（手动），并不影响在线业务，故调研测试这两种方案，以codis 为代理的codis server redis的集群，以twemproxy为代理的redis集群
codis 集群技术原理参考  https://github.com/wandoulabs/codis/blob/master/doc/tutorial_zh.md
twemproxy 集群技术原理参考:
https://github.com/twitter/twemproxy
http://blog.jpush.cn/redis-twemproxy-benchmark/

四,部署架构规划

服务器操作系统： centos 6.x  注意:centos 5版本的内核支持不了codis 安装编译
服务器部署规划:
192.168.5.14        keepalived ,codis相关服务
192.168.5.15        keepalived,codis相关服务
192.168.5.16        twemporxy,redis,haproxy
192.168.5.44        twemporxy,redis,haproxy

五,部署安装

1,codis 方案安装

1.1安装go 环境:
首先按照golang，下载地址：https://golang.org/dl/，最新的1.4.2版本。 
如果被墙使用golang中国下载http://golangtc.com/download。 
cd /usr/loca/
wget https://storage.googleapis.com/golang/go1.4.2.linux-amd64.tar.gz
tar -zxvf go1.4.2.linux-amd64.tar.gz


vim /etc/profile
最后追加
#set go & codis environment
GOROOT=/usr/local/go
PATH=$PATH:$GOROOT/bin
GOPATH=/usr/local/codis
export GOROOT  PATH  GOPATH

source /etc/profile

1.2 安装配置codis 相关配置

cd /usr/local
git clone https://github.com/wandoulabs/codis.git
#将codis的pkg包拷贝到$GOPATH目录。
mkdir -p /usr/local/codis/src/github.com/wandoulabs/codis

cp /usr/local/codis/pkg/ /usr/local/codis/cmd/  /usr/local/codis/src/github.com/wandoulabs/codis -R

cd /usr/local/codis/ ; sh bootstrap.sh   #安装编译需要些时间，过程略过...... 大概3分钟左右，我的是这样，你的环境，哈哈，你懂的，

 1.3配置脚本:

github上原码中有示例，可以参考试用  https://github.com/wandoulabs/codis
在$path/sample 目录下:

cat startall.sh
./start_dashboard.sh

sleep 3

./start_redis.sh

./add_group.sh

./initslot.sh

./start_proxy.sh

./set_proxy_online.sh

cat  start_dashboard.sh


#!/bin/sh

    nohup ../bin/codis-config -c config.ini -L ./log/dashboard.log dashboard --addr=:18087 --http-log=./log/requests.log &>/dev/null &



启动前注意一下redis的配置文件./redis_conf/*.conf
cat  start_redis.sh



#!/bin/sh



    nohup ../bin/codis-server ./redis_conf/6381.conf &> ./log/redis_6381.log &

    nohup ../bin/codis-server ./redis_conf/6382.conf &> ./log/redis_6382.log &

    echo "sleep 3s"

    sleep 3

    tail -n 30 ./log/redis_6381.log

    tail -n 30 ./log/redis_6382.log


        cat   add_group.sh


    #!/bin/sh

    echo "add group 1 with a master(localhost:6381), Notice: do not use localhost when in produciton"

    ../bin/codis-config -c config.ini -L ./log/cconfig.log server add 5 192.168.5.44:6381 master



    echo "add group 2 with a master(localhost:6382), Notice: do not use localhost when in produciton"

    ../bin/codis-config -c config.ini -L ./log/cconfig.log server add 6 192.168.5.44:6382 master

        cat  ./initslot.sh

        #!/bin/sh

    echo "slots initializing..."

    #../bin/codis-config -c config.ini slot init -f

    echo "done"



    echo "set slot ranges to server groups..."

    ../bin/codis-config -c  config.ini slot range-set 0 511 5 online

    ../bin/codis-config -c  config.ini slot range-set 512 1023 6 online

    echo "done"


        cat  ./start_proxy.sh
        #!/bin/sh

    echo "shut down proxy_g5_g6.."

    ../bin/codis-config -c config.ini proxy offline proxy_g5_g6

    echo "done"



    echo "start new proxy..."

    nohup ../bin/codis-proxy --log-level info -c config.ini -L ./log/proxy.log  --cpu=8 --addr=0.0.0.0:19000 --http-addr=0.0.0.0:11000 &

    echo "done"



    echo "sleep 3s"

    sleep 3

    tail -n 30 ./log/proxy.log



        cat  ./set_proxy_online.sh

        #!/bin/sh

        echo "set proxy_1 online"

        ../bin/codis-config -c config.ini proxy online proxy_g5_g6

        echo "done"

1.4有自已编写的一个类上面所有脚本的python脚本：在测试中，

#!/usr/bin/env python
# coding:utf8
#author:shantuwqk@163.com


import os,sys,commands,time
from subprocess import Popen,PIPE
from mako.template import Template


codis_config = "config.ini"
codis_root = "/data/setup/codis/data"

def codis_dashboard(opt):
    if opt == "start":
    #os.chdir(codis_root)
        exec_cmd = "cd %s; nohup ../bin/codis-config -c %s -L ./log/dashboard.log dashboard --addr=:18087 --http-log=./log/http.log &>/dev/null &" %(codis_root,codis_config)
        s = os.system(exec_cmd)
        if s == 0:
            print "33[32;1m codis dashboard start .... OK33[0m"
        else:
            print "33[31;1m codis dashboard start .... Error33[0m"
    elif opt == "stop":
        dashboard_id = "ps aux |grep "codis-config"| grep "dashboard"|awk '{print $2}'"
        s,v = commands.getstatusoutput("kill -9 `%s`" %dashboard_id)
        if s == 0:
            print "33[32;1m KILL codis dashboard id:[%s] OK33[0m"%dashboard_id
        else:
            print "33[31;1m KILL codis dashboard id:[%s] Error33[0m"%dashboard_id
    else:
        pass


def codis_redis(opt,port):

    if opt == "start":
        exec_cmd = "cd %s; nohup ../bin/codis-server ./conf/%s.conf &> ./log/%s.log &" %(codis_root,port,port)
    print exec_cmd
    print os.system(exec_cmd)
        s = os.system(exec_cmd)
        if s == 0:
            print "33[32;1m start redis %s server is OK 33[0m"%(port)

        else:
            print "33[31;1m start redis %s server is Error33[0m"%(port)

    elif opt == "stop":
        redis_id = "ps aux | grep codis-server| grep %s|awk '{print $2}'"%port
        s,v = commands.getstatusoutput("kill -9 `%s`" %redis_id)
        if s == 0:
            print "33[32;1m KILL redis port:[%s] pid:[%s] is OK33[0m"%(port,redis_id)
            print v,
        else:
            print "33[31;1m KILL redis port:[%s] pid:[%s] is Error33[0m"%(port,redis_id)
            print v,
    else:
            pass
def codis_group(gid,addr,gtag):
    exec_cmd = "cd %s; ../bin/codis-config -c %s -L ./log/%s_addgroup.log server add %s %s %s" %(codis_root,codis_config,addr.split(':')[1],gid,addr,gtag)
    print "start add codis cluster group info.....","
",exec_cmd
    s,v = commands.getstatusoutput(exec_cmd)
    print s,v,
    if s == 0:
        print "33[32;1m add group:[%s] with a gtag:[%s] addr:(%s) OK33[0m" %(gid,gtag,addr)
    else:
        print "33[31;1m add group:[%s] with a gtag:[%s] addr:(%s) Error33[0m" %(gid,gtag,addr)

def remove_fenc():
    remove_fenc = "cd %s;../bin/codis-config -c %s action remove-fence"%(codis_root,codis_config)
    s,v = commands.getstatusoutput(remove_fenc)
    print "remove fenc proxy info",v,

def slot_init():
    init_cmd = "cd %s; ../bin/codis-config -c %s slot init -f" %(codis_root,codis_config)
    print "INIT SLOT ........","
",commands.getstatusoutput(init_cmd)[1]



def codis_initslot(gid,slot_range):

    exec_cmd = "cd %s; ../bin/codis-config -c %s slot range-set %s %s %s online"%(codis_root,codis_config,slot_range[0],slot_range[1],gid)
    print exec_cmd
    s,v = commands.getstatusoutput(exec_cmd)
    print v,
    if s == 0:
        print "33[32;1m slot init:[%s],gid:[%s] OK33[0m"%(slot_range,gid)
    else:
        print "33[31;1m slot init:[%s],gid:[%s] Error33[0m"%(slot_range,gid)
def offline_proxy():
    proxy_tag = commands.getstatusoutput("cat %s|grep proxy_id"%codis_config)[1].split('=')[1]
    exec_down_cmd = "cd %s; ../bin/codis-config -c %s proxy offline %s" %(codis_root,codis_config,proxy_tag)
    print "Shutdown %s offline....." %proxy_tag
    print exec_down_cmd
    commands.getstatusoutput(exec_down_cmd)


def codis_proxy(opt):
    proxy_tag = commands.getstatusoutput("cat %s|grep proxy_id"%codis_config)[1].split('=')[1]
    if opt == "start":
        exec_new_cmd = "cd %s; nohup ../bin/codis-proxy --log-level info -c %s -L ./log/%s.log --cpu=8 --addr=0.0.0.0:19000 --http-addr=0.0.0.0:11000 &" %(codis_root,codis_config,proxy_tag)
        print exec_new_cmd
        s = os.system(exec_new_cmd)
        if s == 0:
            print "33[32;1m codis proxy tag:[%s]start OK33[0m"%(proxy_tag)

        else:
            print "33[31;1m codis proxy tag:[%s]start Error33[0m"%(proxy_tag)
    time.sleep(5)
        exec_online_cmd = "cd %s;../bin/codis-config -c %s proxy online %s" %(codis_root,codis_config,proxy_tag)

        print "Set %s online .....!!" %(proxy_tag)
    print exec_online_cmd
        print commands.getstatusoutput(exec_online_cmd)[1]
    elif opt == "stop":

        print "Shutdown %s offline....." %proxy_tag
        exec_down_cmd = "cd %s; ../bin/codis-config -c %s proxy offline %s" %(codis_root,codis_config,proxy_tag)
        print commands.getstatusoutput(exec_down_cmd)[1]

        exec_proxy_id = "ps aux |grep codis-proxy| grep -v grep|awk '{print $2}'"
        print commands.getstatusoutput("kill -9 `%s`"%exec_proxy_id)
def get_client_ip():
    eth_inter= commands.getstatusoutput("ifconfig -a| awk '/^em/ {;a=$1;FS=":"; nextline=NR+1; next}{ if (NR==nextline) { split($2,b," ")}{ if ($2 ~ /[0-9]./) {print a,b[1]}; FS=" "}}'|uniq -c|awk '{print $2,$3}'")
    if eth_inter[0] == 0:
        return eth_inter[1].split()[1]
    else:
        print "get client ip error"



def slot_range(n,group):
    slot_dict = {}
    num=0
    l = range(n)
    per = len(l) / group
    #改变i的索引值
    for i in l[::per]:
        avg=l[i:i+per]
        tupv01 = avg[0],avg[-1]
        #print type(tupv01),tupv01

        #指定当前添加的组数
        num+=1
        slot_dict[num] = []
        slot_dict[num].append(tupv01)
        #如果只剩下一组，则将剩余元素全部追加至列表
        if num==group-1:
            tupv02 = l[i+per:][0],l[i+per:][-1]
            #print tupv02
            v02key = num + 1
            slot_dict[v02key] = []
            slot_dict[v02key].append(tupv02)
            break
    for k,port in redis_instance_port.items():
        if k in slot_dict.keys():
            slot_dict[k].append(port)
    return slot_dict




def help_prompt():
    print """
This program prints files to the standard output.
Options include:
--version : Prints the version number
--help    : Helpful tips
--task    : To operate on missions:
            [init_codis_cluster]
            [start_dashborad|stop_dashboard|start_redis|stop_redis|start_proxy|stop_proxy|addgroup|initslot]
sample    : python es_manage.py --task init_codis_cluster
          : python es_manage.py --task  [start_dashborad|stop_dashboard|start_redis|stop_redis|start_proxy|stop_proxy|addgroup|initslot]
"""




if __name__ == "__main__":

    if len(sys.argv) < 2:
        print "no argument"
        sys.exit()
    if sys.argv[1].startswith('--'):
        option = sys.argv[1][2:]
        if option == 'version':
            print 'Version 0.1'
        elif option == 'help':
            help_prompt()
        if option == "task" and sys.argv[2] == "init_codis_cluster" and sys.argv[2] is not None:
            codis_dashboard('start')
            #redis_port = ['6381','6382'] #定义所要启动 redis实例的端口号
            redis_instance_port = {1:'6381',2:"6382"}
            for k,p in redis_instance_port.items():
        #生成redis启动配置文件
                template_file = Template(filename="./conf/redis.master.conf.template",module_directory='tmp/test').render(port="%s"%p,memsize=4)
                conf = open('./conf/%s.conf'%p,'w')
                conf.write(template_file)
                conf.close()
        #启动redis master 实例
                codis_redis('start',p)
        print slot_range(1024,len(redis_instance_port.keys()))
        time.sleep(5)
        remove_fenc()
        slot_init()
        for k,v in slot_range(1024,len(redis_instance_port.keys())).items():
            codis_group(k,'%s:%s'%(get_client_ip(),v[1]),'master')
        for k,v in slot_range(1024,len(redis_instance_port.keys())).items():
            codis_initslot(k,v[0])
        offline_proxy()
            codis_proxy('start')
        elif option == "task" and sys.argv[2] == "start_dashborad":
            codis_dashboard('start')
        elif option == "task" and sys.argv[2] == "stop_dashboard":
            codis_dashboard("stop")
        elif option == "task" and sys.argv[2] == "start_redis" and sys.argv[3] is not None:
            #redis_tmp_port = "6381"
            codis_redis('start',sys.argv[3])
        elif option == "task" and sys.argv[2] == "stop_redis" and sys.argv[3] is not None:
            codis_redis('stop',sys.argv[3])
        elif option == "task" and sys.argv[2] == "start_proxy":
            codis_proxy("start")
        elif option == "task"  and sys.argv[2] == "stop_proxy":
            codis_proxy("stop")
    elif option == "task" and sys.argv[2] == "stopall":
        codis_proxy("stop")
        commands.getstatusoutput("killall codis-server")
        codis_dashboard("stop")


    else:
        help_prompt()

#一般情况下codis集群在安装部署初始化的时候，需要先期规划，已经做好addgrop和initslot工作，后续看需求再补充

2,twemproxy方案安装:

安装过程参考:

https://github.com/twitter/twemproxy

2.1注意，configure 编译之前需要系统安装autoreconf  
    当前用的 autoconf-2.64.tar.gz 这个包，
    tar zxvf autoconf-2.64.tar.gz ; cd autoconf-2.64 ; make && make install 即可
2.2 下载安装 twemproxy
    git clone git@github.com:twitter/twemproxy.git
    cd twemproxy
    autoreconf -fvi
    mkdir /usr/local/twemporxy
    ./configure –prefix=/usr/local/twemproxy --enable-debug=full
    make
    make install 
    src/nutcracker -h

2.3 配置 twemproxy 代码 cat /usr/local/twemproxy/etc/544cluster.yml

    alpha:

    listen: 0.0.0.0:12000

    hash: fnv1a_64

    distribution: ketama

    auto_eject_hosts: true

    redis: true

    server_retry_timeout: 2000

    server_failure_limit: 1

    servers:

    - 192.168.5.44:6479:1

    - 192.168.5.16:6479:1

配置详解参考如下:

     http://cpjsjxy.iteye.com/blog/2090333

3.haproxy 安装部署：

3.1下载 haproxy-1.5.10.tar.gz tar zxvf haproxy-1.5.10.tar.gz ; cd haproxy-1.5.10; make TARGET=linux26 ARCH=x86_64&& make install 3.2 配置文件：

global

    daemon

    nbproc 6

    pidfile /var/run/haproxy.pid

    ulimit-n 65535



defaults

    mode tcp                        #mode { tcp|http|health }，tcp 表示4层，http表示7层，health仅作为健康检查使用

    retries 2                       #尝试2次失败则从集群摘除

    option redispatch               #如果失效则强制转换其他服务器

    option abortonclose             #连接数过大自动关闭

    maxconn 1024                    #最大连接数

    timeout connect 1d              #连接超时时间，重要，hive查询数据能返回结果的保证

    timeout client 1d               #同上

    timeout server 1d               #同上

    timeout check 2000              #健康检查时间

    log 127.0.0.1 local0 err #[err warning info debug]



listen  admin_stats                     #定义管理界面

    bind 0.0.0.0:8888               #管理界面访问IP和端口

    mode http                       #管理界面所使用的协议

    maxconn 10          #最大连接数

    stats refresh 30s               #30秒自动刷新

    stats uri /                     #访问url

    stats realm Hive Haproxy       #验证窗口提示

    stats auth admin:123456         #401验证用户名密码



listen codis-proxy-ha-20000        #codis-proxy

    bind 0.0.0.0:20000             #ha作为proxy所绑定的IP和端口

    mode tcp                        #以4层方式代理，重要

    balance leastconn               #调度算法 'leastconn' 最少连接数分配，或者 'roundrobin'，轮询分配

    maxconn 1024                    #最大连接数

    server codis-proxy2-master 192.168.5.15:19000  check inter 5000 rise 1 fall 2

    server codis-proxy1-master 192.168.5.14:19000  check inter 5000 rise 1 fall 2

    #server codis-server-5156381-master 192.168.5.15:6381  check inter 5000 rise 1 fall 2

    #server codis-server-5156382-master 192.168.5.15:6382  check inter 5000 rise 1 fall 2





listen twemproxy-ha-21000             

    bind 0.0.0.0:21000              

    mode tcp                        

    balance leastconn              

    maxconn 1024 

    server redis-544-master 192.168.5.44:6479  check inter 5000 rise 1 fall 2

    server redis-516-master 192.168.5.16:6479  check inter 5000 rise 1 fall 2



3.3 haproxy 启动关闭：

  /usr/local/sbin/haproxy -f /data/setup/haproxy-1.5.10/conf/haproxy.cfg

4.keepalived安装部署: yum install keepalived

  主从配置是一样的，特别注意的配置项：
   state BACKUP
 为了避免网络不稳定的情况下，主从抢占，配为BACKUP
   interface em2
 VIP 邦定的网卡接口指定

cat /etc/keepalived/keepalived.conf

! Configuration File for keepalived

global_defs {
notification_email {
wangqiankun@lashou-inc.com
}
notification_email_from nagios@lashou.com
smtp_server 127.0.0.1
smtp_connect_timeout 30
router_id ha_01
}

vrrp_instance HA_01 {
state BACKUP
nopreempt
interface em2
virtual_router_id 15
priority 99
advert_int 1
authentication {
    auth_type PASS
    auth_pass 2222
}
virtual_ipaddress {
    192.168.5.90
    192.168.5.91
}
}

virtual_server 192.168.5.90 20000 {
delay_loop 6
lb_algo lc
lb_kind DR
nat_mask 255.255.255.0
#  persistence_timeout 10
protocol TCP

real_server 192.168.5.44 20000 {
    weight 1
    TCP_CHECK {
    connect_timeout 5
    nb_get_retry 3
    delay_before_retry 3
    connect_port 20000
    }
}

real_server 192.168.5.16 20000 {
    weight 1
    TCP_CHECK {
    connect_timeout 5
    nb_get_retry 3
    delay_before_retry 3
    connect_port 20000
    }
}

}
virtual_server 192.168.5.91 21000 {
delay_loop 6
lb_algo lc
lb_kind DR
nat_mask 255.255.255.0
#   persistence_timeout 10
protocol TCP


real_server 192.168.5.44 21000 {
    weight 1
    TCP_CHECK {
    connect_timeout 5
    nb_get_retry 3
    delay_before_retry 3
    connect_port 21000
    }
}

real_server 192.168.5.16 21000 {
    weight 1
    TCP_CHECK {
    connect_timeout 5
    nb_get_retry 3
    delay_before_retry 3
    connect_port 21000
    }
}

}

proxy 做为keepalived + lvs 的后端真实机需要安装lvs 并加如下脚本

    cat lvs_re.sh 
    #!/bin/bash
    WEB_VIP1=192.168.5.90
    WEB_VIP2=192.168.5.91

    . /etc/rc.d/init.d/functions

    case "$1" in
    start)
    ifconfig lo:0 $WEB_VIP1 netmask 255.255.255.255 broadcast $WEB_VIP1
    ifconfig lo:1 $WEB_VIP2 netmask 255.255.255.255 broadcast $WEB_VIP2
    /sbin/route add -host $WEB_VIP1 dev lo:0
    /sbin/route add -host $WEB_VIP2 dev lo:1
    echo "1" >/proc/sys/net/ipv4/conf/lo/arp_ignore
    echo "2" >/proc/sys/net/ipv4/conf/lo/arp_announce
    echo "1" >/proc/sys/net/ipv4/conf/all/arp_ignore
    echo "2" >/proc/sys/net/ipv4/conf/all/arp_announce
    sysctl -p >/dev/null 2>&1
    echo "RealServer Start OK"

    ;;
    stop)
    ifconfig lo:0 down
    ifconfig lo:1 down
    route del $WEB_VIP1 >/dev/null 2>&1
    route del $WEB_VIP2 >/dev/null 2>&1
    echo "0" >/proc/sys/net/ipv4/conf/lo/arp_ignore
    echo "0" >/proc/sys/net/ipv4/conf/lo/arp_announce
    echo "0" >/proc/sys/net/ipv4/conf/all/arp_ignore
    echo "0" >/proc/sys/net/ipv4/conf/all/arp_announce
    echo "RealServer Stoped"
    ;;

    status)
        # Status of LVS-DR real server.
        islothere=`/sbin/ifconfig lo:0 | grep "$WEB_VIP1"`
        islothere=`/sbin/ifconfig lo:1 | grep "$WEB_VIP2"`
        isrothere=`netstat -rn | grep "lo:0" | grep "$WEB_VIP"`
        if [ ! "$islothere" -o ! "isrothere" ];then
        # Either the route or the lo:0 device
        # not found.
        echo "LVS-DR real server Stopped."
        else
        echo "LVS-DR Running."
        fi
    ;;
    *)
        # Invalid entry.
        echo "$0: Usage: $0 {start|status|stop}"
        exit 1
    ;;
    esac
    exit 0

4.2.启动keepalived 和 lvs 服务加载配置

/etc/init.d/keepalived start ./lvs_re.sh start

六：日常维护记录：后续维护过程中待记录