1)在 elk-03 的/bd/flume-1.7/conf 目录下创建 kafka-flume-hdfs.conf 文件 [hadoop@elk-03 conf]$ vim kafka-flume-hdfs.conf 2 ) 在文件配置如下内容 ## 组件定义 a1.sources=r1 r2 a1.channels=c1 c2 a1.sinks=k1 k2 ## source1 ## kafka start 主题源数据 a1.sources.r1.type = org.apache.flume.source.kafka.KafkaSource a1.sources.r1.batchSize = 5000 a1.sources.r1.batchDurationMillis = 2000 a1.sources.r1.kafka.bootstrap.servers = elk-01:9092,elk-02:9092,elk-03:9092 a1.sources.r1.kafka.zookeeperConnect = elk-01:2181,elk-02:2181,elk-03:2181 a1.sources.r1.kafka.topics=topic_start ## source2 ## kafka event 主题源数据 a1.sources.r2.type = org.apache.flume.source.kafka.KafkaSource a1.sources.r2.batchSize = 5000 a1.sources.r2.batchDurationMillis = 2000 a1.sources.r2.kafka.bootstrap.servers = elk-01:9092,elk-02:9092,elk-03:9092 a1.sources.r2.kafka.zookeeperConnect = elk-01:2181,elk-02:2181,elk-03:2181 a1.sources.r2.kafka.topics=topic_event ## channel1 a1.channels.c1.type=memory a1.channels.c1.capacity=100000 a1.channels.c1.transactionCapacity=10000 ## channel2 a1.channels.c2.type=memory a1.channels.c2.capacity=100000 a1.channels.c2.transactionCapacity=10000 ## sink1 ## 主题 start 数据输出到 hdfs 的路径 a1.sinks.k1.type = hdfs a1.sinks.k1.hdfs.path = /origin_data/gmall/log/topic_start/%Y-%m-%d a1.sinks.k1.hdfs.filePrefix = logstarta1.sinks.k1.hdfs.round = true a1.sinks.k1.hdfs.roundValue = 30 a1.sinks.k1.hdfs.roundUnit = second ##sink2 ## 主题 event 数据输出到 hdfs 的路径 a1.sinks.k2.type = hdfs a1.sinks.k2.hdfs.path = /origin_data/gmall/log/topic_event/%Y-%m-%d a1.sinks.k2.hdfs.filePrefix = logeventa1.sinks.k2.hdfs.round = true a1.sinks.k2.hdfs.roundValue = 30 a1.sinks.k2.hdfs.roundUnit = second ##(生成文件大小设定) 不要产生大量小文件 a1.sinks.k1.hdfs.rollInterval = 30 a1.sinks.k1.hdfs.rollSize = 0 a1.sinks.k1.hdfs.rollCount = 0 a1.sinks.k2.hdfs.rollInterval = 30 a1.sinks.k2.hdfs.rollSize = 0 a1.sinks.k2.hdfs.rollCount = 0 ## 控制输出文件是原生文件。 a1.sinks.k1.hdfs.fileType = CompressedStream a1.sinks.k2.hdfs.fileType = CompressedStream ## 支持 lzo 压缩 a1.sinks.k1.hdfs.codeC = lzop a1.sinks.k2.hdfs.codeC = lzop ## 拼装 a1.sources.r1.channels = c1 a1.sinks.k1.channel= c1 a1.sources.r2.channels = c2 a1.sinks.k2.channel= c2