flume 配置实例.md

从文件到kafka

# flume-directory-kafka.conf: A single-node Flume configuration
# Name the components on this agent
a3.sources = r3
a3.sinks = k3
a3.channels = c3

# Describe/configure the source
a3.sources.r3.type = exec
a3.sources.r3.channels = c3
a3.sources.r3.command = tail -F /usr/local/nginx/logs/access.log

# Describe the sink
a3.sinks.k3.type = org.apache.flume.sink.kafka.KafkaSink
a3.sinks.k3.topic = countly_small_sys
a3.sinks.k3.brokerList = node6.hadoop.fzzqxf.com:9092,node5.hadoop.fzzqxf.com:9092,node4.hadoop.fzzqxf.com:9092,node3.hadoop.fzzqxf.com:9092,node2.hadoop.fzzqxf.com:9092
a3.sinks.k3.requiredAcks = 1
a3.sinks.k3.batchSize = 20
a3.sinks.k3.channel = c3

# Use a channel which buffers events in memory
a3.channels.c3.type = memory
a3.channels.c3.capacity = 100000
a3.channels.c3.transactionCapacity = 1000

# Bind the source and sink to the channel
a3.sources.r3.channels = c3
a3.sinks.k3.channel = c3

从kafka 到hdfs

# list the sources, sinks and channels for the agent
agent_collector.sources = kafka-src-1
agent_collector.sinks = hdfs-sink-1
agent_collector.channels = mem-channel-1
# set channel for source
agent_collector.sources.kafka-src-1.channels = mem-channel-1
# set channel for sink
agent_collector.sinks.hdfs-sink-1.channel = mem-channel-1

# properties for sources
agent_collector.sources.kafka-src-1.type = org.apache.flume.source.kafka.KafkaSource
agent_collector.sources.kafka-src-1.channels = mem-channel-1
agent_collector.sources.kafka-src-1.zookeeperConnect = master.hadoop.fzzqxf.com:2181,node1.hadoop.fzzqxf.com:2181,node4.hadoop.fzzqxf.com:2181,node5.hadoop.fzzqxf.com:2181,node6.hadoop.fzzqxf.com:2181
agent_collector.sources.kafka-src-1.topic = siege-countly_log-2
agent_collector.sources.kafka-src-1.groupId = flumeCountlyHdfs
agent_collector.sources.kafka-src-1.batchSize=200
agent_collector.sources.kafka-src-1.kafka.consumer.timeout.ms = 100

# properties for channels
agent_collector.channels.mem-channel-1.type = memory
agent_collector.channels.mem-channel-1.capacity = 10000
agent_collector.channels.mem-channel-1.transactionCapacity = 10000
agent_collector.channels.mem-channel-1.byteCapacityBufferPercentage = 20
agent_collector.channels.mem-channel-1.byteCapacity = 10485760

# properties for sinks
agent_collector.channels = mem-channel-1
agent_collector.sinks = hdfs-sink-1
agent_collector.sinks.hdfs-sink-1.type = hdfs
agent_collector.sinks.hdfs-sink-1.channel = mem-channel-1
agent_collector.sinks.hdfs-sink-1.hdfs.path = /data/logs/flume/realtime/countly/%Y%m%d 
agent_collector.sinks.hdfs-sink-1.hdfs.filePrefix = collector_1
agent_collector.sinks.hdfs-sink-1.hdfs.fileType=DataStream
agent_collector.sinks.hdfs-sink-1.hdfs.minBlockReplicas=1
agent_collector.sinks.hdfs-sink-1.hdfs.rollInterval=21600
# unit is bytes. value is 1G*100=13421772800.
agent_collector.sinks.hdfs-sink-1.hdfs.rollSize=0
agent_collector.sinks.hdfs-sink-1.hdfs.rollCount=0
agent_collector.sinks.hdfs-sink-1.hdfs.batchSize=1000
agent_collector.sinks.hdfs-sink-1.hdfs.callTimeout=60000
原文地址:https://www.cnblogs.com/wang3680/p/12330288.html