[Spark笔记]Spark Streaming连接Kafka的入门代码

用scala写的一段Spark Streaming连接Kafka的入门代码

import kafka.serializer.StringDecoder
import org.apache.log4j.Logger
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.{DStream, InputDstream}
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.log4j.{Level, Logger}
Logger.getLogger("org").setLevel(Level.OFF)
Logger.getLogger("akka").setLevel(Level.OFF)

//sc: SparkContext
var ssc = new StreamingContext(sc, Seconds(2))

var kafkaTopic = "test"
var Zk = "localhost:2181"
var KafkaParams = Map(
"boorstrap.server" => "0.0.0.0:9092",
"zookeeper.connect" => "localhost“2181”,
“group.id" => "spark-streaming-test"
)
val topics = Set[String] = kafkaTopic.split(",").map(_.trim).toSet
val stream  = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topics)

stream.foreachRDD(rdd=>{
	if(!rdd.isEmpty){
		val count = rdd.count
		rdd.collect().foreach(println)
	}else{
		println("No data captured")
	}
}
)
ssc.start()
ssc.awaitTermination()
ssc.stop(stopSparkContext = True, stopGracefully= True)

原文地址:https://www.cnblogs.com/lestatzhang/p/10611336.html