DS 安装

参考官网：https://dolphinscheduler.apache.org/en-us/docs/latest/user_doc/cluster-deployment.html

部署用户获取sudo权限；
host配置，ssh免密，部署目录修改所属用户；
数据库初始化：修改conf/datasource.properties；执行script/create-dolphinscheduler.sh；（add：执行完之后，手动添加 sql/xdf 下的sql）；
修改环境变量：dolphinscheduler_env.sh；
修改配置：conf/config/install_config.conf；
hdfs保存资源：将core-site.xml hdfs-site.xml 放到conf目录下；
自动部署：sh install.sh；
登陆验证：http://xx.xx.xx.xx:12345/dolphinscheduler admin/dolphinscheduler123
先下载源码
修改 Hadoop hive版本

<!--<hadoop.version>2.7.3</hadoop.version>-->
<hadoop.version>2.7.7</hadoop.version>
<!--<hive.jdbc.version>2.1.0</hive.jdbc.version>-->
<hive.jdbc.version>2.3.6</hive.jdbc.version>

编译
mvn -U clean package -Prelease -Dmaven.test.skip=true
生成二进制文件
在 $DOLPHIN_SCHEDULER_HOME/dolphinscheduler-dist/target 目录中
数据库初始化

drop database if exists dolphinscheduler;
CREATE DATABASE dolphinscheduler DEFAULT CHARACTER SET utf8 DEFAULT COLLATE
utf8_general_ci;
create user 'ds' IDENTIFIED BY 'QWer_1234';
GRANT ALL PRIVILEGES ON dolphinscheduler.* TO 'ds'@'%' IDENTIFIED BY
'QWer_1234';
GRANT ALL PRIVILEGES ON dolphinscheduler.* TO 'ds'@'localhost' IDENTIFIED BY
'QWer_1234';
flush privileges;

准备MySQL元数据库
vim conf/datasource.properties

MySQL

spring.datasource.driver-class-name=com.mysql.jdbc.Driver
spring.datasource.url=jdbc:mysql://bigdata02:3306/dolphinscheduler?
useUnicode=true&characterEncoding=UTF-8&allowMultiQueries=true
spring.datasource.username=ds
spring.datasource.password=QWer_1234

放入 MySQL 驱动 jar 到 lib 目录
cp ~/soft/mysql-connector-java-5.1.47.jar /opt/apps/dolphinscheduler-1.3.4/lib/
初始化数据库
sh script/create-dolphinscheduler.sh
修改 dolphinscheduler_env.sh
vim conf/env/dolphinscheduler_env.sh

export HADOOP_HOME=/neworiental/tools/hadoop-3.2.1
export HADOOP_CONF_DIR=/neworiental/tools/hadoop-3.2.1/etc/hadoop
export SPARK_HOME1=/neworiental/tools/spark-3.0.2-bin-hadoop3.2
export SPARK_HOME2=/neworiental/tools/spark-3.0.2-bin-hadoop3.2
export PYTHON_HOME=/usr/bin
export JAVA_HOME=/neworiental/tools/jdk1.8.0_161
export HIVE_HOME=/neworiental/tools/apache-hive-3.1.2-bin
export FLINK_HOME=/neworiental/tools/flink-1.11.1
export DATAX_HOME=/opt/soft/datax

修改配置 install_config.conf
vim conf/config/install_config.conf
拷贝 Hadoop 集群配置文件
cp ~/apps/hadoop-2.7.7/etc/hadoop/core-site.xml /opt/apps/dolphinscheduler-
1.3.4/conf/
cp ~/apps/hadoop-2.7.7/etc/hadoop/hdfs-site.xml /opt/apps/dolphinscheduler-
1.3.4/conf/
修改配置 install_config.conf
vim conf/config/install_config.conf

# postgresql or mysql
dbtype="mysql"
# db config
# db address and port
dbhost="bigdata02:3306"
# db username
username="ds"

# database name
dbname="dolphinscheduler"
# db passwprd
# NOTICE: if there are special characters, please use the  to escape, for
example, `[` escape to `[`
password="xxxxxx"
# zk cluster
zkQuorum="bigdata04:2181,bigdata03:2181,bigdata02:2181"
# Note: the target installation path for dolphinscheduler, please not config as
the same as the current path (pwd)
installPath="/opt/apps/dolphinscheduler"
# deployment user
# Note: the deployment user needs to have sudo privileges and permissions to
operate hdfs. If hdfs is enabled, the root directory needs to be created by
itself
deployUser="bigdata"

# alert config
# mail server host
mailServerHost="smtp.exmail.qq.com"
# mail server port
# note: Different protocols and encryption methods correspond to different
ports, when SSL/TLS is enabled, make sure the port is correct.
mailServerPort="25"
# sender
mailSender="xxx@qq.com"
# user
mailUser="aaa@qq.com"
# sender password
# note: The mail.passwd is email service authorization code, not the email login
password.
mailPassword="xxxxxxxxxx"
# TLS mail protocol support
starttlsEnable="true"
# SSL mail protocol support
# only one of TLS and SSL can be in the true state.
sslEnable="false"
#note: sslTrust is the same as mailServerHost
sslTrust="smtp.exmail.qq.com"

# resource storage type：HDFS,S3,NONE
resourceStorageType="HDFS"

defaultFS="hdfs://hadoop277ha:8020/"
# if resourceStorageType is S3, the following three configuration is required,
otherwise please ignore
s3Endpoint="http://192.168.xx.xx:9010"
s3AccessKey="xxxxxxxxxx"
s3SecretKey="xxxxxxxxxx"
# if resourcemanager HA enable, please type the HA ips ; if resourcemanager is
single, make this value empty
yarnHaIps="bigdata04,bigdata05"
# if resourcemanager HA enable or not use resourcemanager, please skip this
value setting; If resourcemanager is single, you only need to replace yarnIp1 to
actual resourcemanager hostname.
singleYarnIp="yarnIp1"
# resource store on HDFS/S3 path, resource file will store to this hadoop hdfs
path, self configuration, please make sure the directory exists on hdfs and have
read write permissions。/dolphinscheduler is recommended
resourceUploadPath="/dolphinscheduler"
# who have permissions to create directory under HDFS/S3 root path
# Note: if kerberos is enabled, please config hdfsRootUser=
hdfsRootUser="bigdata"

# kerberos config
# whether kerberos starts, if kerberos starts, following four items need to
config, otherwise please ignore
kerberosStartUp="false"
# kdc krb5 config file path
krb5ConfPath="$installPath/conf/krb5.conf"
# keytab username
keytabUserName="hdfs-mycluster@ESZ.COM"
# username keytab path
keytabPath="$installPath/conf/hdfs.headless.keytab"
# api server port
apiServerPort="12345"
# install hosts
# Note: install the scheduled hostname list. If it is pseudo-distributed, just
write a pseudo-distributed hostname
ips="localhost"
# ssh port, default 22
# Note: if ssh port is not default, modify here
sshPort="22"
# run master machine
# Note: list of hosts hostname for deploying master
masters="localhost"

# run worker machine
# note: need to write the worker group name of each worker, the default value is
"default"
workers="localhost:default"
# run alert machine
# note: list of machine hostnames for deploying alert server
alertServer="localhost"
# run api machine
# note: list of machine hostnames for deploying api server
apiServers="localhost"

集群模式如下配置

# install hosts
# Note: install the scheduled hostname list. If it is pseudo-distributed, just
write a pseudo-distributed hostname
ips="bigdata02,bigdata03,bigdata04,bigdata05"
# ssh port, default 22
# Note: if ssh port is not default, modify here
sshPort="22"
# run master machine
# Note: list of hosts hostname for deploying master
masters="bigdata04,bigdata05"
# run worker machine
# note: need to write the worker group name of each worker, the default value is
"default"
workers="bigdata02:default,bigdata03:default,bigdata04:default,bigdata05:default
"
# run alert machine
# note: list of machine hostnames for deploying alert server
alertServer="bigdata03"
# run api machine
# note: list of machine hostnames for deploying api server
apiServers="bigdata04"

11 . 启动基础服务
启动 ZooKeeper
启动 Hadoop
一键停止集群所有服务
sh /opt/apps/dolphinscheduler-1.3.4/bin/stop-all.sh
一键开启集群所有服务
sh /opt/apps/dolphinscheduler-1.3.4/bin/start-all.sh

玩

地址： http://172.24.28.65:12345/dolphinscheduler/ui/#/security/tenant
默认的用户名和密码是 admin/dolphinscheduler123

创建队列
创建租户
租户对应的是 Linux 的用户，用于 worker 提交作业所使用的用户。如果 Linux 没有这个用户，worker 会在执行脚本的时候创建这个用户。
创建普通用户
就是普通开发账号
创建告警组
告警组用于流程启动时设置参数，在流程结束以后会将流程的状态和其他信息以邮件形式发送给告警组。
创建Worker分组
指定 worker 运行在那些指定的机器上
创建数据源
DS 中工作流处理的数据来自于各种数据源，因此需要提前定义。

先创建项目
在项目中定义工作流

资源管理

资源管理创建的是 Python 、jar等可以运行的程序，脚本或者配置文件
UDF管理上传的是 UDF 函数