cloudera5.11.2安装配置手册

1 概述

本文档用于搭建impala开发测试环境,配置yarn-ha和hdfs-ha

2 服务器规划配置

具体请参考excel文档的部署规划sheet

东方通impala环境.png

3 部署软件版本和下载地址

mysql5.7.24:
cdh相关软件下载地址:
http://archive.cloudera.com/cm5/cm/5/cloudera-manager-centos7-cm5.11.2_x86_64.tar.gz
http://archive.cloudera.com/cdh5/parcels/5.11.2.4/CDH-5.11.2-1.cdh5.11.2.p0.4-el7.parcel
http://archive.cloudera.com/cdh5/parcels/5.11.2.4/CDH-5.11.2-1.cdh5.11.2.p0.4-el7.parcel.sha1  注:修改文件名称把sha1中的1去掉
http://archive.cloudera.com/cdh5/parcels/5.11.2.4/manifest.json

jdk-8u144-linux-x64
nginx1.14
ansible
sshpass用法:
sshpass -p '1w98.77B32' ssh -o StrictHostKeyChecking=no root@168.1.2.180 "hostnamectl set-hostname gxjh01"

4 配置

4.1 安装配置管理工具ansible

此工具用于批量配置管理服务器

4.1.1 安装

yum install ansible -y

4.1.2 修改配置文件

修改/etc/ansible/ansible.cfg,解掉注释

host_key_checking = False

4.1.3 配置ansible组

编辑/etc/ansible/hosts

[cdh]
168.1.2.180    ansible_ssh_user=root ansible_ssh_pass=1w98.77B32
168.1.2.181    ansible_ssh_user=root ansible_ssh_pass=1w98.77B32
168.1.2.182    ansible_ssh_user=root ansible_ssh_pass=1w98.77B32
168.1.2.183    ansible_ssh_user=root ansible_ssh_pass=1w98.77B32

4.2 初始化服务器环境

4.2.1 执行脚本

关闭selinux,配置防火墙,配置ntp,添加sudo用户weihu,修改文件描述符,配置主机/etc/hosts文件。 在ansible中执行

ansible cdh -m script -a '/root/centos7_init.sh' -f 5

cento7_init.sh

#!/bin/bash
# Filename:    centos7_init.sh
# Revision:    1.0
# Date:        2016/12/15
# Author: 
# Email:  
# Website:     no
# Description: centos7系统初始化

#1.定义配置yum源函数--(注:若程序不能联网,则需要配置本地yum源,将地址指向此处)
function yum(){
touch /etc/yum.repos.d/mysql5.7.repo
cat > /etc/yum.repos.d/mysql5.7.repo << EOF
[mysql5.7]
name=mysql5.7
baseurl=https://mirrors.tuna.tsinghua.edu.cn/mysql/yum/mysql57-community-el7/
enabled=1
gpgcheck=0
EOF
}

#2.定义配置NTP函数
function ntp(){
/usr/bin/yum -y install ntp
#修改配置文件
echo '' >/etc/ntp.conf
cat >>/etc/ntp.conf << EOF
driftfile /var/lib/ntp/drift
restrict default nomodify notrap nopeer noquery
restrict 127.0.0.1 
restrict ::1
server ntp1.aliyun.com
server ntp2.aliyun.com
server ntp3.aliyun.com
server ntp4.aliyun.com
server ntp5.aliyun.com
includefile /etc/ntp/crypto/pw
keys /etc/ntp/keys
disable monitor
EOF
#同步hwclock
cat >>/etc/sysconfig/ntpd<<EOF
#Command line options for ntpd
SYNC_HWCLOCK=yes
OPTIONS="-g"
EOF
#使用ntpdate命令校验时间
/usr/sbin/ntpdate ntp2.aliyun.com && /usr/sbin/hwclock -w
/usr/bin/systemctl stop chronyd &> /dev/null
/usr/bin/systemctl disable chronyd &> /dev/null
/usr/bin/systemctl start ntpd &> /dev/null
/usr/bin/systemctl enable ntpd &> /dev/null
}

#3.定义关闭防火墙函数  如果机房漏扫建议将防火墙开启,此函数不执行
function close_firewalld(){
    /usr/bin/systemctl stop firewalld.service &> /dev/null
    /usr/bin/systemctl disable firewalld.service &> /dev/null
}
#4.定义关闭selinux函数
function close_selinux(){
    setenforce 0
    sed -i 's#SELINUX=enforcing#SELINUX=disabled#g' /etc/selinux/config
}

#5.调整文件描述符
function optimization(){
echo "* soft nofile 65536" >> /etc/security/limits.conf
echo "* hard nofile 65536" >> /etc/security/limits.conf
echo "* soft nproc 65536" >> /etc/security/limits.conf
echo "* hard nproc 65536" >> /etc/security/limits.conf
}
#6.配置hosts
function host(){
cat >> /etc/hosts << EOF
168.1.2.180    gxjh01
168.1.2.181    gxjh02
168.1.2.182    gxjh03
168.1.2.183    gxjh04
EOF
}
#7.添加sudo-weihu用户
function add_user(){
useradd weihu
echo "weihu@123!"|passwd --stdin weihu
history -c
sed -i '91a weihu  ALL=(ALL)      NOPASSWD:ALL' /etc/sudoers
}
#8.配置jdk环境变量 注:此处只是
function jdk(){
cat >> /etc/profile << EOF
export JAVA_HOME=/usr/local/java/jdk1.8.0_144
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$JAVA_HOME/bin:$PATH
EOF
}
#9.优化cdh相关参数
function cdh(){
echo never > /sys/kernel/mm/transparent_hugepage/defrag
echo never > /sys/kernel/mm/transparent_hugepage/enabled
echo "vm.swappiness = 10" >> /etc/sysctl.conf
/usr/sbin/sysctl -p
}
#10.配置rc.local 开机自启动
function rc(){
chmod +x /etc/rc.d/rc.local
cat >> /etc/rc.d/rc.local << EOF
echo never > /sys/kernel/mm/transparent_hugepage/defrag
echo never > /sys/kernel/mm/transparent_hugepage/enabled
#/opt/cm-5.11.2/etc/init.d/cloudera-scm-server restart
/opt/cm-5.11.2/etc/init.d/cloudera-scm-agent restart
EOF
}
#11.服务器用户登录限制 禁止用户登录,禁止使用dns(有的时候会出现ssh和ftp登录时间很长)
function ssh(){
sed -i 's/#PermitRootLogin/PermitRootLogin/g' /etc/ssh/sshd_config
sed -i "s/#UseDNS no/UseDNS no/g" /etc/ssh/sshd_config
systemctl restart sshd
}
#初始化方法
function init(){
    yum;
    ntp;
    close_firewalld;
    close_selinux;
    optimization;
    host;
    add_user;
    jdk;
    cdh;
    rc;
}
init

4.2.2 配置主机互信

-- 各服务器执行ssh-keygen
ansible cdh -m shell -a "ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa" -f 10
-- 生成authorized_keys文件
ansible cdh -m shell -a "cat /root/.ssh/id_rsa.pub" -f 10 > /tmp/authorized_keys
-- 去除多余的行
sed -i '/SUCCESS/d' /tmp/authorized_keys
-- 分发authorized_keys至每台服务器
ansible cdh -m copy -a "src=/tmp/authorized_keys dest=/root/.ssh/ owner=root group=root mode=0644" -f 10

4.2.3 配置jdk

由于4.2.1 中脚本已包含配置环境变量,因此直接执行以下操作即可

ansible cdh -m copy  -a "src=/home/data/software/jdk-8u144-linux-x64.tar.gz dest=/root/jdk-8u144-linux-x64.tar.gz owner=root group=root mode=0644" -f 10
ansible cdh -m shell -a "mkdir -p /usr/local/java/ " -f 5
ansible cdh -m shell -a "tar -zxf /root/jdk-8u144-linux-x64.tar.gz -C /usr/local/java/ " -f 5

ansible cdh -m shell -a "source /etc/profile&& java -version " -f 5

4.3 配置mysql

mysql安装在168.1.2.180 服务器中

4.3.1 配置mysql5.7.repo

文件路径:/etc/yum.repos.d/

[mysql5.7]
name=mysql5.7
baseurl=https://mirrors.tuna.tsinghua.edu.cn/mysql/yum/mysql57-community-el7/
enabled=1
gpgcheck=0

4.3.1 安装

注:这些服务器安装了多余的包需先卸载掉:yum -y remove mariadb-libs

yum -y install mysql-community-server
mkdir -p /home/data/mysql/data
mkdir -p /home/data/mysql/logs
chown -R mysql:mysql /home/data/mysql

4.3.2 mysql配置文件

/etc/my.cnf

# mysql配置文件
[mysqld]
socket = /var/lib/mysql/mysql.sock
#禁止域名解析
skip-name-resolve
#数据目录
datadir=/home/data/mysql/data
#mysql的server-id
server-id=249
#超时时间
wait_timeout=100
interactive_timeout=100
### binlog设置 ###
# binlog日志设置
# 不强制限制存储函数创建,这个变量也适用于触发器创建
log_bin_trust_function_creators = 1
# binlog存储地址
log_bin = /home/data/mysql/logs/mysql-bin
# binlog格式
binlog_format = row
# binlog过期时间
expire-logs-days = 7
# binlog缓存日志
binlog_cache_size = 2M
# 自增初始值
auto_increment_offset = 1
# 自增间隔
auto_increment_increment = 2 
# 错误日志配置
log_error = /home/data/mysql/logs/mysql-error.log
# 慢日志配置
slow_query_log = 1
slow_query_log_file = /home/data/mysql/logs/mysql-slow.log
long_query_time = 5
### innodb ###
innodb_write_io_threads = 32
innodb_read_io_threads = 32
innodb_buffer_pool_size = 1G
innodb_file_per_table = 1
innodb_log_file_size = 50M
innodb_log_buffer_size = 64M
### 优化配置 ###
max_connections = 1024
max_connect_errors = 1000
lower_case_table_names = 1
key_buffer_size = 64M
table_open_cache = 6144
table_definition_cache = 4096
sort_buffer_size = 512K
read_buffer_size = 512K
join_buffer_size = 512K
tmp_table_size = 64M
max_heap_table_size = 64M
# 接收的数据包大小
max_allowed_packet = 1024M
#mysql5.6和mysql5.7默认的sql_mode不一样
#sql_mode = STRICT_TRANS_TABLES,NO_ENGINE_SUBSTITUTION
# 开启查询缓存
explicit_defaults_for_timestamp=true

4.3.3 初始化mysql库

初始化mysql库

mysqld --initialize-insecure --user=mysql 

4.3.4 启动mysql

systemctl start mysqld
systemctl enable mysqld

4.3.5 执行mysql_secure_installation

[root@localhost ~]# mysql_secure_installation

Securing the MySQL server deployment.

Connecting to MySQL using a blank password.

VALIDATE PASSWORD PLUGIN can be used to test passwords
and improve security. It checks the strength of password
and allows the users to set only those passwords which are
secure enough. Would you like to setup VALIDATE PASSWORD plugin?

Press y|Y for Yes, any other key for No: n
Please set the password for root here.

New password: 

Re-enter new password: 
By default, a MySQL installation has an anonymous user,
allowing anyone to log into MySQL without having to have
a user account created for them. This is intended only for
testing, and to make the installation go a bit smoother.
You should remove them before moving into a production
environment.

Remove anonymous users? (Press y|Y for Yes, any other key for No) : Y
Success.


Normally, root should only be allowed to connect from
'localhost'. This ensures that someone cannot guess at
the root password from the network.

Disallow root login remotely? (Press y|Y for Yes, any other key for No) : y
Success.

By default, MySQL comes with a database named 'test' that
anyone can access. This is also intended only for testing,
and should be removed before moving into a production
environment.


Remove test database and access to it? (Press y|Y for Yes, any other key for No) : y
 - Dropping test database...
Success.

 - Removing privileges on test database...
Success.

Reloading the privilege tables will ensure that all changes
made so far will take effect immediately.

Reload privilege tables now? (Press y|Y for Yes, any other key for No) : y
Success.

All done! 
[root@localhost ~]# 

4.3.3 创建hive库

CREATE DATABASE `hive` DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;

4.4 配置CDH

CDH包含server和agent两个,server用于管理,agent用于承载各种角色服务,本次规划使用在gxjh01上配置server+agent,另外三台运行agent

4.4.1 配置cm-server

在gxjh01服务器上执行,由于服务器中存储空间大不放在/home/目录中,考虑到cdh默认在/opt目录下因此,使用软链接将/opt/cloudera 和/opt/cm-11.2目录链接到/home/data/cloudera 和/home/data/cm-11.2下

#初始化安装文件
tar zxvf cloudera-manager-centos7-cm5.11.2_x86_64.tar.gz -C /home/data/
mkdir -p /opt/cloudera/parcel-repo/
#添加软连接
ln -s /home/data/cloudera /opt/cloudera
ln -s /home/data/cm-5.11.2 /opt/cm-5.11.2
chmod 777 /home/data/software/mysql-connector-java-5.1.39-bin.jar

cp /home/data/software/mysql-connector-java-5.1.39-bin.jar /opt/cm-5.11.2/share/cmf/lib/mysql-connector-java.jar
mv CDH-5.11.2-1.cdh5.11.2.p0.4-el7.parcel.sha1 CDH-5.11.2-1.cdh5.11.2.p0.4-el7.parcel.sha
mv CDH-5.11.2-1.cdh5.11.2.p0.4-el7.parcel* /opt/cloudera/parcel-repo/
mv manifest.json /opt/cloudera/parcel-repo/

#添加用户
useradd --system --home=/opt/cm-5.11.2/run/cloudera-scm-server/ --no-create-home --shell=/bin/false --comment "Cloudera SCM User" cloudera-scm
#初始化cm库
update user set Grant_priv='Y' where user='weihu' and Host='%';
/opt/cm-5.11.2/share/cmf/schema/scm_prepare_database.sh mysql cm -hlocalhost -uweihu -pweihu123 --scm-host % scm scm scm

mkdir -p /var/lib/cloudera-scm-server
chown cloudera-scm:cloudera-scm /var/lib/cloudera-scm-server
#修改agent链接的配置文件/opt/cm-5.11.2/etc/cloudera-scm-agent/config.ini
server_host=gxjh01
#启动:
/opt/cm-5.11.2/etc/init.d/cloudera-scm-server restart

4.4.2 配置cm-agent配置

将ansible配置组中gxjh01服务器注释掉

配置用户

拷贝安装文件

ansible cdh -m copy -a "src=/opt/cm-5.11.2.tar.gz dest=/root/ owner=root group=root mode=0644" -f 4
#创建文件夹
ansible cdh -m shell -a "mkdir -p /home/data" -f 4
ansible cdh -m shell -a "mkdir -p /home/data/cloudera" -f 4
#解压文件
ansible cdh -m shell -a "tar -xf /root/cm-5.11.2.tar.gz -C /home/data" -f 4
#做软连接
ansible cdh -m shell -a "ln -s /home/data/cloudera /opt/cloudera" -f 4
ansible cdh -m shell -a "ln -s /home/data/cm-5.11.2 /opt/cm-5.11.2" -f 4
#配置用户
ansible cdh -m shell -a 'useradd --system --home=/opt/cm-5.11.2/run/cloudera-scm-server/ --no-create-home --shell=/bin/false --comment "Cloudera SCM User" cloudera-scm' -f 10

4.5 启动agent和开机自启动

#启动:
/opt/cm-5.11.2/etc/init.d/cloudera-scm-agent start
#开机自启动:
chmod +x /etc/rc.d/rc.local
#添加如下:
/opt/cm-5.11.2/etc/init.d/cloudera-scm-agent restart

4.6 登陆cm-server控制台,配置集群

4.6.1 登陆控制台

http://168.1.2.180:7180 用户名:admin 密码:admin

01.png

4.6.2 登陆控制台

接受协议

02.png

4.6.3 选择所要安装的版本

选择免费版本

03.png

4.6.4 弹出下图

04.png

4.6.5 选择待安装的主机

根据实际需求选择相应主机

05.png

4.6.6 弹出下图点击继续

06.png

4.6.7 弹出下图点击继续

07.png

4.6.8 弹出下图点击继续

08.png

4.6.9 弹出下图点击继续

09.png

4.6.10 选择自定义角色安装

hdfs+yarn+zookeeper+hive+impala

10.png

4.6.11 为每个角色选择主机

原则上namenode和datanode不得在同一台,resourcemanager角色单独在一台机器上,由于此次只有四台服务器,因此每台机器都放了多个角色。
为Hive Metastore Server角色服务器添加mysql-connector-java.jar

cp /opt/cm-5.11.2/share/cmf/lib/mysql-connector-java.jar /opt/cloudera/parcels/CDH-5.11.2-1.cdh5.11.2.p0.4/lib/hive/lib/
11.png

4.6.12 配置hive连接的mysql源

cp /opt/cm-5.11.2/share/cmf/lib/mysql-connector-java.jar /opt/cloudera/parcels/CDH-5.11.2-1.cdh5.11.2.p0.4/lib/hive/lib/

12.png

4.6.13 弹出下图,根据实际需求更改相应目录,点击继续,安装完成后集群配置完毕

13.png

4.6.14 配置yarn ha

启用高可用,选择备用主机

1401.png
14.png

4.6.16 启用hdfs ha

15.png

4.6.17 配置namenode server名称

16.png

4.6.18 选择备用hdfs主机

17.png

4.6.18 设置目录

18.png

4.6.19 点击继续

19.png

4.6.20 至此,hdfs高可用完毕

20.png

4.7 配置nginx负载均衡impalad

4.7.1 安装

#安装依赖
yum install pcre-devel zlib-devel openssl-devel -y
#编译
./configure --prefix=/home/data/nginx --with-http_ssl_module --with-http_flv_module --with-http_stub_status_module --with-http_gzip_static_module  --with-stream
#安装
make && make install
#编辑配置文件 cat /home/data/nginx/conf/nginx.conf
worker_processes  8;
worker_rlimit_nofile 65535;

error_log  logs/error.log;
pid        logs/nginx.pid;

events {
    use  epoll;
    worker_connections  65535;
}
stream {
    server {
        listen 31050;
        proxy_pass impala;
    }

    upstream impala {
	server 168.1.2.180:21050;
	server 168.1.2.181:21050;
	server 168.1.2.182:21050;
	server 168.1.2.183:21050;   
	}
}

4.7.2 启动&&停止

启动:/home/data/nginx/sbin/nginx
停止:pkill -9 nginx

4.7.3 连接地址

168.1.2.180:31050

4.8 java程序连接集群的配置文件

配置文件包含两个:hdfs-site.xml 和core-site.xml

/etc/hadoop/conf.cloudera.hdfs/core-site.xml
/etc/hadoop/conf.cloudera.hdfs/hdfs-site.xml

5 集群维护

本章用来介绍如何操作cdh集群的启停

5.1 启动

5.1.1 启动mysql

mysql安装地址为gxjh01上

systemctl start mysql

5.1.2 启动cm-server

cm-server部署在gxjh01上

opt/cm-5.11.2/etc/init.d/cloudera-scm-server start

5.1.3 启动cm-agent

cm-agent部署在gxjh[01:04]上

opt/cm-5.11.2/etc/init.d/cloudera-scm-agent start

5.1.4 启动nginx

nginx安装地址为gxjh01上

/home/data/nginx/sbin/nginx

5.1.5 登陆cm-server web界面管理集群

通过界面启动集群

21.png

5.2 关闭

5.2.1 登陆cm-server web界面管理集群

通过界面关闭集群

21.png

5.2.2 关闭nginx

nginx安装地址为gxjh01上

pkill -9 nginx

5.2.3 关闭cm-agent

cm-agent部署在gxjh[01:04]上

opt/cm-5.11.2/etc/init.d/cloudera-scm-agent stop

5.2.4 关闭cm-server

cm-server部署在gxjh01上

opt/cm-5.11.2/etc/init.d/cloudera-scm-server stop

5.2.5 关闭mysql

mysql安装地址为gxjh01上

systemctl stop mysql
原文地址:https://www.cnblogs.com/sdhzdtwhm/p/9983397.html