sparksql dataset

java

 1 /** 
 2  *2.0之后使用sparksession即可,不需要再去创建sqlcontext
 3  *@author Tele
 4  *
 5  */
 6 public class Demo {
 7     private static SparkConf conf = new SparkConf().setAppName("dataframedemo").setMaster("local");
 8     private static JavaSparkContext jsc = new JavaSparkContext(conf);
 9     
10     private static SparkSession session = new SparkSession(jsc.sc());
11     
12     //创建sparksession(可以不创建jsc)
13 //    private static SparkSession session = SparkSession.builder().appName("asd").master("local").getOrCreate();
14     
15     
16 //    private static SQLContext sqlContext = new SQLContext(session);
17     
18     
19     public static void main(String[] args) {
20     
21         //在java中返回是dataset,在scala中返回的是dataframe
22     //    sqlContext.read().json("xx")
23         
24         Dataset<Row> dataset = session.read().json("./src/main/java/cn/tele/spark_sql/dataframe/students.json");
25         //输出全部数据
26         dataset.show();
27         
28         
29         //输出某一列
30         dataset.select("id").show();
31         dataset.select(dataset.col("id")).show();
32         
33         
34         //判断id>2
35         dataset.select(dataset.col("id").gt(2)).show();
36         
37         //判断id>=2
38         dataset.select(dataset.col("id").geq(2)).show();
39         
40         //将age += 100
41         dataset.select(dataset.col("age").plus(100)).show();
42         
43         //输出元数据
44         dataset.printSchema();
45         
46         
47         jsc.close();
48         
49         
50     }
51 }

scala

 1 object Demo {
 2   def main(args: Array[String]): Unit = {
 3     val conf = new SparkConf().setAppName("demo").setMaster("local")
 4     val sc = new SparkContext(conf)
 5     
 6     val sqlcontext = new SQLContext(sc)
 7     
 8     
 9     val dataframe = sqlcontext.read.json("./src/main/scala/cn/tele/spark_sql/dataframe/students.json")
10     
11     
12     dataframe.select(dataframe.col("id")).show()
13     
14     
15     dataframe.select(dataframe.col("id").plus(1)).show()
16     
17     dataframe.select(dataframe.col("id") + 1).show()
18     
19     dataframe.select("id","name").show()
20     
21     dataframe.select(dataframe.col("id").geq("2")).show()
22     
23     dataframe.printSchema()
24     
25   }
26 }
原文地址:https://www.cnblogs.com/tele-share/p/10366749.html