spark sql01

package sql;



import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.SQLContext;

/**
 *
 */
public class DataFrameReadJsonOps2 {

    /**
     * @param args
     */
    public static void main(String[] args) {
        //创建SparkConf用于读取系统配置信息并设置当前应用程序的名字
        SparkConf conf = new SparkConf().setAppName("DataFrameOps").setMaster("local");
        //创建JavaSparkContext对象实例作为整个Driver的核心基石
        JavaSparkContext sc = new JavaSparkContext(conf);
        //设置日志级别为WARN
        sc.setLogLevel("WARN");
        //创建SQLContext上下文对象用于SQL的分析
        SQLContext sqlContext = new SQLContext(sc);
        //创建Data Frame,可以简单的认为DataFrame是一张表
        DataFrame df = sqlContext.read().json("c:/resources/people.json");
        //select * from table
        df.show();
        //desc table
        df.printSchema();
        //select name from table
        df.select(df.col("name")).show();
        //select name, age+10 from table
        df.select(df.col("name"), df.col("age").plus(10)).show();
        //select * from table where age > 21
        df.filter(df.col("age").gt(21)).show();
        //select age, count(1) from table group by age
        df.groupBy("age").count().show(); //df.groupBy(df.col("age")).count().show();
    }

}
//
//SLF4J: Class path contains multiple SLF4J bindings.
//SLF4J: Found binding in [jar:file:/E:/bigdata/spark-1.4.0-bin-hadoop2.6/lib/spark-assembly-1.4.0-hadoop2.6.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
//SLF4J: Found binding in [jar:file:/E:/bigdata/spark-1.4.0-bin-hadoop2.6/lib/spark-examples-1.4.0-hadoop2.6.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
//SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
//SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
//Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
//17/12/29 14:15:10 INFO SparkContext: Running Spark version 1.4.0
//17/12/29 14:15:24 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
//17/12/29 14:15:28 INFO SecurityManager: Changing view acls to: alamps
//17/12/29 14:15:28 INFO SecurityManager: Changing modify acls to: alamps
//17/12/29 14:15:28 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(alamps); users with modify permissions: Set(alamps)
//17/12/29 14:15:37 INFO Slf4jLogger: Slf4jLogger started
//17/12/29 14:15:39 INFO Remoting: Starting remoting
//17/12/29 14:15:44 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkDriver@172.18.3.7:55458]
//17/12/29 14:15:44 INFO Utils: Successfully started service 'sparkDriver' on port 55458.
//17/12/29 14:15:45 INFO SparkEnv: Registering MapOutputTracker
//17/12/29 14:15:46 INFO SparkEnv: Registering BlockManagerMaster
//17/12/29 14:15:46 INFO DiskBlockManager: Created local directory at C:UsersalampsAppDataLocalTempspark-cd3ecbc3-41b5-4d8b-8e78-8c2c368ce80blockmgr-660894dd-39d3-4c8a-bf25-ae1d3850953d
//17/12/29 14:15:46 INFO MemoryStore: MemoryStore started with capacity 467.6 MB
//17/12/29 14:15:47 INFO HttpFileServer: HTTP File server directory is C:UsersalampsAppDataLocalTempspark-cd3ecbc3-41b5-4d8b-8e78-8c2c368ce80bhttpd-106ce90e-d496-4e96-a383-b471aeb5a224
//17/12/29 14:15:47 INFO HttpServer: Starting HTTP Server
//17/12/29 14:15:48 INFO Utils: Successfully started service 'HTTP file server' on port 55464.
//17/12/29 14:15:48 INFO SparkEnv: Registering OutputCommitCoordinator
//17/12/29 14:15:49 INFO Utils: Successfully started service 'SparkUI' on port 4040.
//17/12/29 14:15:49 INFO SparkUI: Started SparkUI at http://172.18.3.7:4040
//17/12/29 14:15:49 INFO Executor: Starting executor ID driver on host localhost
//17/12/29 14:15:50 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 55483.
//17/12/29 14:15:50 INFO NettyBlockTransferService: Server created on 55483
//17/12/29 14:15:50 INFO BlockManagerMaster: Trying to register BlockManager
//17/12/29 14:15:50 INFO BlockManagerMasterEndpoint: Registering block manager localhost:55483 with 467.6 MB RAM, BlockManagerId(driver, localhost, 55483)
//17/12/29 14:15:50 INFO BlockManagerMaster: Registered BlockManager
//+----+-------+
//| age|   name|
//+----+-------+
//|null|Michael|
//|  30|   Andy|
//|  19| Justin|
//+----+-------+
//
//root
// |-- age: long (nullable = true)
// |-- name: string (nullable = true)
//
//+-------+
//|   name|
//+-------+
//|Michael|
//|   Andy|
//| Justin|
//+-------+
//
//+-------+----------+
//|   name|(age + 10)|
//+-------+----------+
//|Michael|      null|
//|   Andy|        40|
//| Justin|        29|
//+-------+----------+
//
//+---+----+
//|age|name|
//+---+----+
//| 30|Andy|
//+---+----+
//
//+----+-----+
//| age|count|
//+----+-----+
//|null|    1|
//|  19|    1|
//|  30|    1|
//+----+-----+
原文地址:https://www.cnblogs.com/alamps/p/8144298.html