操作思路
- 先获取每个批次DStream中的RDD
- 将RDD转为DataFrame
代码
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[*]")
val ssc = new StreamingContext(conf,Seconds(3))
val dataDStream: ReceiverInputDStream[String] = ssc.socketTextStream("hadoop01", 8888)
val resDStream: DStream[(String, Int)] = dataDStream.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_)
val spark: SparkSession = SparkSession.builder().config(conf).getOrCreate()
import spark.implicits._
resDStream.foreachRDD(
rdd => {
val df: DataFrame = rdd.toDF("word","count")
df.createOrReplaceTempView("t1")
spark.sql("select * from t1").show()
}
)
ssc.start()
ssc.awaitTermination()
}