广播变量和累加器

累加器

from pyspark import SparkContext
sc = SparkContext("local", "Accumulator app")
num = sc.accumulator(10)
def f(x):
   global num
   num+=x
rdd = sc.parallelize([20,30,40,50])
rdd.foreach(f)
final = num.value
print(final)

广播变量

from pyspark import SparkContext
sc = SparkContext("local", "Accumulator app")
list_1 = [1,2,3,4]
rdd1 = sc.parallelize([1,1,2,3,5,6,7])
rdd2 = sc.parallelize([1,1,2,8])
bc = sc.broadcast(list_1)
res = rdd1.map(lambda x:(x,1) if x in bc.value else (x,0))
print(res.collect())
原文地址:https://www.cnblogs.com/muyue123/p/13362682.html