SparkSQL——DataFrame API基本操作

package com.spark

import org.apache.spark.sql.SparkSession

/**
  * DataFrame API基本操作
  */
object DataFrameAPP1 {
  def main(args: Array[String]): Unit = {

    val path="E:\data\infos.txt"
    val spark=SparkSession.builder().appName("DataFrameApp").master("local[2]").getOrCreate()

    val peopleDF=spark.read.format("json").load(path)

    peopleDF.printSchema()

    //输出前20条数据
    peopleDF.show()

    //select name from table
    peopleDF.select("name").show()

    //select name ,age+10 as age2 from table
    peopleDF.select(peopleDF.col("name"),(peopleDF.col("age")+10).as("age2")).show()

    //select * from table where age>19
    peopleDF.filter(peopleDF.col("age")>19).show()

    //select age,count(1) from table group by age
    peopleDF.groupBy("age").count().show()

    spark.stop()
  }
}
原文地址:https://www.cnblogs.com/aishanyishi/p/10318175.html