SparkStreaming整合Kafka


intellij代码实现Spark Stream和Kafka结合消息接收及处理StreamKaf

StreamKafkaProducer1

package org.hnsw

import java.util

import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}

object StreamKafkaProducer1 {

  def streamingkafka() = {
    val sparkConf = new SparkConf().setAppName("DirectKafkaWordCount").setMaster("local[4]")
//      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
    val ssc = new StreamingContext(sparkConf, Seconds(5))
    val streamRdd = ssc.socketTextStream("192.168.3.66",8888)
    // 分割字符
    val words = streamRdd.flatMap((x)=>{
      x.split(" ")
    }).map((x)=>{
      (x,1)
    })
    // 第一步 统计每个单词数目
    val wordscount = words.reduceByKeyAndWindow((v1:Int,v2:Int)=>{
      v1+v2
    },Seconds(30),Seconds(10))
    wordscount.print()
    //第二步 发送消息到kafka
    wordscount.foreachRDD((rdd)=>{
      rdd.foreachPartition((partRdd)=>{
        //设置kafka服务参数
        val props = new util.HashMap[String,Object]()
        props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.3.66:9092")
        //序列化
        props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,classOf[StringSerializer])
        props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,classOf[StringSerializer])
        //实例化一个kafka生产者
        val producer = new KafkaProducer[String,String](props)  //创建kafka生产者
        //封装成kafka消息
        //定义topic 及消息内容
        val topic="kafka-lt" //指定发送的topic名字
        partRdd.foreach((line)=>{
          val str = line._1 +" "+line._2 //定义发送单词结构为 "单词 次数"
          val message = new ProducerRecord[String,String](topic,null,str) //封装kafka消息
          //给kafka发消息
          producer.send(message) //发送消息
        })
      })
    })
    ssc.start()
    ssc.awaitTermination()
  }

  def main(args: Array[String]): Unit = {
    streamingkafka()
  }
}

相关