spark sql

scala> import org.apache.spark.sql.hive.HiveContext

import org.apache.spark.sql.hive.HiveContext

scala> val hivecon=new HiveContext(sc)

warning: there was one deprecation warning; re-run with -deprecation for details

hivecon: org.apache.spark.sql.hive.HiveContext = org.apache.spark.sql.hive.HiveContext@56db9f2d

scala> hivecon.sql("use gamedw")

res5: org.apache.spark.sql.DataFrame = []

scala> hivecon.sql("select collect_set(custname),sex from cust group by sex")

res6: org.apache.spark.sql.DataFrame = [collect_set(custname): array<string>, sex: int]

scala> hivecon.sql("select collect_set(custname),sex from cust group by sex").show
+---------------------+---+
|collect_set(custname)|sex|
+---------------------+---+
| [mahuateng, liuya...|  1|
| [liuqin, hello, w...|  0|
+---------------------+---+

原文地址:https://www.cnblogs.com/playforever/p/9618748.html