第1关:学历等级与平均收入情况分析
import org.apache.spark.sql.{SaveMode, SparkSession}
import org.apache.spark.sql.functions._
object test1 {
def main(args: Array[String]): Unit = {
/********** Begin **********/
//创建spark对象
val spark = SparkSession.builder().master("local[*]").appName("test1").getOrCreate()
//读取数据库 mydb 中的 info 表
val df = spark.read.format("jdbc")
.option("driver","com.mysql.jdbc.Driver")
.option("url","jdbc:mysql://127.0.0.1:3306/mydb?useUnicode=true&characterEncoding=utf-8")
.option("dbtable","info")
.option("user","root")
.option("password","123123").load()
//统计分析客户学历与人均收入、人均花费,保留两位小数
val dd = df.groupBy("Education").agg(round(sum(col("Income"))/count(col("Income")),2) as "AvgIncome",round(sum(col("Spending"))/count(col("Spending")),2) as "AvgSpending").orderBy("AvgIncome")
dd.createOrReplaceTempView("dd")
//将统计结果保存至数据库
spark.sql("select * from dd").repartition(1)
.write
.format("jdbc")
.option("driver","com.mysql.jdbc.Driver")
.option("url", "jdbc:mysql://127.0.0.1:3306/customer?useUnicode=true&characterEncoding=utf-8")
.option("dbtable", "avgsalary")
.option("user", "root")
.option("password", "123123")
.mode(SaveMode.Overwrite)
.save()
/********** End **********/
println("完成!")
spark.stop()
}
}