Spark SQL Table Join(Python)

示例
 
Spark SQL注册“临时表”执行“Join”(Inner Join、Left Outer Join、Right Outer Join、Full Outer Join)
 
代码
 
from pyspark import SparkConf, SparkContext
from pyspark.sql import SQLContext, Row
 
conf = SparkConf().setAppName("spark_sql_table_join")
 
sc = SparkContext(conf=conf)
 
sqlCtx = SQLContext(sc)
 
line1 = sc.parallelize(["name1 a", "name3 c", "name4 d"])
 
line2 = sc.parallelize(["name1 1", "name2 2", "name3 3"])
 
word1 = line1.map(lambda line: line.split(" "))
 
word2 = line2.map(lambda line: line.split(" "))
 
table1 = word1.map(lambda words: Row(name=words[0], title=words[1]))
 
table2 = word2.map(lambda words: Row(name=words[0], fraction=words[1]))
 
tableSchema1 = sqlCtx.inferSchema(table1)
 
tableSchema2 = sqlCtx.inferSchema(table2)
 
tableSchema1.registerTempTable("table1")
 
tableSchema2.registerTempTable("table2")
 
 
def printRows(rows):
    if rows:
        for row in rows:
            print row
 
# inner join
rows = sqlCtx.sql(
    "select table1.name, table1.title, table2.fraction from table1 join table2 on table1.name = table2.name").collect()
 
printRows(rows)
 
print "============================================="
 
# left outer join
rows = sqlCtx.sql(
    "select table1.name, table1.title, table2.fraction from table1 left outer join table2 on table1.name = table2.name").collect()
 
printRows(rows)
 
# right outer join
rows = sqlCtx.sql(
    "select table1.name, table1.title, table2.fraction from table1 right outer join table2 on table1.name = table2.name").collect()
 
print "============================================="
 
printRows(rows)
 
# full outer join
rows = sqlCtx.sql(
    "select table1.name, table1.title, table2.fraction from table1 full outer join table2 on table1.name = table2.name").collect()
 
print "============================================="
 
printRows(rows)
 
"""
Row(name=u'name1', title=u'a', fraction=u'1')                                   
Row(name=u'name3', title=u'c', fraction=u'3')
=============================================
Row(name=u'name1', title=u'a', fraction=u'1')                                   
Row(name=u'name3', title=u'c', fraction=u'3')
Row(name=u'name4', title=u'd', fraction=None)
=============================================
Row(name=u'name1', title=u'a', fraction=u'1')
Row(name=None, title=None, fraction=u'2')
Row(name=u'name3', title=u'c', fraction=u'3')
=============================================
Row(name=u'name1', title=u'a', fraction=u'1')
Row(name=None, title=None, fraction=u'2')
Row(name=u'name3', title=u'c', fraction=u'3')
Row(name=u'name4', title=u'd', fraction=None)
"""
 
sc.stop()
 
原文地址:https://www.cnblogs.com/yurunmiao/p/4892247.html