sparksql hive作为数据源

根据官方文档的说法,要把hive-site.xml,core-site.xml,hdfs-site.xml拷贝到spark的conf目录下,保证mysql已经启动

java

 1 public class Demo {
 2     private static SparkSession session = SparkSession.builder().appName("demo").enableHiveSupport()
 3             .config("spark.sql.warehouse.dir", "/user/hive/warehouse").getOrCreate();
 4 
 5     public static void main(String[] args) {
 6         session.sql("drop table if exists students_info");
 7         session.sql("create table if not exists students_info(name string,age int) "
 8                 + "row format delimited fields terminated by '	' 
");
 9 
10         // 将数据导入学生信息表
11         session.sql(
12                 "load data local inpath '/opt/module/spark-test/data/student_infos.txt' into table default.students_info");
13 
14         session.sql("drop table if exists students_score");
15         session.sql("create table if not exists students_score(name string,score int)  
"
16                 + "row format delimited fields terminated by '	' 
");
17 
18         // 将数据导入学生成绩表
19         session.sql(
20                 "load data local inpath '/opt/module/spark-test/data/student_scores.txt' into table default.students_score");
21 
22         // 查询
23         Dataset<Row> dataset = session.sql(
24                 "select s1.name,s1.age,s2.score from students_info s1 join students_score s2 on s1.name=s2.name where s2.score>80");
25 
26         // 将dataset中的数据保存到hive中
27         session.sql("drop table if exists students_result");
28         dataset.write().saveAsTable("students_result");
29 
30         // 将hive中的表转成dataset,查看数据是否成功保存
31         Dataset<Row> table = session.table("students_result");
32         table.show();
33 
34         session.stop();
35 
36     }
37 }

scala

 1 object Demo {
 2   def main(args: Array[String]): Unit = {
 3     val session = SparkSession.builder().appName("demo").enableHiveSupport().config("spark.sql.warehouse.dir", "/user/hive/warehouse").getOrCreate()
 4 
 5     session.sql("drop table if exists students_info")
 6     session.sql("create table if not exists students_info(name string,age int) 
 row format delimited fields terminated by '	'")
 7 
 8     session.sql("load data local inpath '/opt/module/spark-test/data/student_infos.txt' into table default.students_info")
 9 
10     session.sql("drop table if exists students_score")
11     session.sql("create table if not exists students_score(name string,score int) 
 row format delimited fields terminated by '	'")
12 
13     session.sql("load data local inpath '/opt/module/spark-test/data/student_scores.txt' into table default.students_score")
14 
15     //保存到hive中
16     session.sql("drop table if exists students_result")
17     session.sql("select s1.name,s1.age,s2.score from students_info s1 join students_score s2 on s1.name=s2.name where s2.score >90").write.saveAsTable("students_result")
18 
19     //检查数据是否保存
20     val df = session.table("students_result")
21     df.show()
22 
23     session.stop()
24   }
25 }
原文地址:https://www.cnblogs.com/tele-share/p/10397525.html