oozie案例——自定义MapReduce workflow

相关运行命令

运行一个应用：
bin/oozie job -oozie http://hadoop-1:11000/oozie -config examples/apps/map-reduce/job.properties -run

杀掉一个job
bin/oozie job -oozie http://hadoop-1:11000/oozie  -kill 0000001-160702224410648-oozie-beif-W

查看job的日志信息
bin/oozie job -oozie http://hadoop-1:11000/oozie -log 0000001-160702224410648-oozie-beif-W

查看job的信息
bin/oozie job -oozie http://hadoop-1:11000/oozie -info 0000001-160702224410648-oozie-beif-W

1.定义job.properties

nameNode=hdfs://hadoop-1:9000
jobTracker=hadoop-1:8032
queueName=default
examplesRoot=mr-wordcount

oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/workflow.xml
outputDir=output-data

2. 定义workflow.xml

<workflow-app xmlns="uri:oozie:workflow:0.2" name="map-reduce-wf">
    <start to="mr-node"/>
    <action name="mr-node">
        <map-reduce>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <prepare>
                <delete path="${nameNode}/user/${wf:user()}/${examplesRoot}/${outputDir}"/>
            </prepare>
            <configuration>
                <property>
                    <name>mapred.job.queue.name</name>
                    <value>${queueName}</value>
                </property>
                
                <!-- new api flag -->
                <property>
                    <name>mapred.mapper.new-api</name>
                    <value>true</value>
                </property>
                <property>
                    <name>mapred.reducer.new-api</name>
                    <value>true</value>
                </property>
                
                <!-- map task -->
                <property>
                    <name>mapreduce.job.map.class</name>
                    <value>org.gh.hadoop.mapreduce.WordCount$WCMapper</value>
                </property>
                <property>
                    <name>mapreduce.map.output.key.class</name>
                    <value>org.apache.hadoop.io.Text</value>
                </property>
                <property>
                    <name>mapreduce.map.output.value.class</name>
                    <value>org.apache.hadoop.io.IntWritable</value>
                </property>
                
                <!-- reduce task -->
                <property>
                    <name>mapreduce.job.reduce.class</name>
                    <value>org.gh.hadoop.mapreduce.WordCount$WCReducer</value>
                </property>
                <property>
                    <name>mapreduce.job.output.key.class</name>
                    <value>org.apache.hadoop.io.Text</value>
                </property>
                <property>
                    <name>mapreduce.job.output.value.class</name>
                    <value>org.apache.hadoop.io.IntWritable</value>
                </property>
                
                
                <property>
                    <name>mapred.map.tasks</name>
                    <value>1</value>
                </property>
                
                <!-- input data dir -->
                <property>
                    <name>mapred.input.dir</name>
                    <value>/user/${wf:user()}/${examplesRoot}/input-data</value>
                </property>
                
                <!-- output data dir -->
                <property>
                    <name>mapred.output.dir</name>
                    <value>/user/${wf:user()}/${examplesRoot}/${outputDir}</value>
                </property>
            </configuration>
        </map-reduce>
        <ok to="end"/>
        <error to="fail"/>
    </action>
    <kill name="fail">
        <message>Map/Reduce failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <end name="end"/>
</workflow-app>