java spark算子repartition,coalesce

按照我的理解

coalesce(numPartitions, shuffle = true) 就等于repartition(numPartitions)

区别

当coalesce(numPartitions, shuffle = false)时候,是单纯的将多个分区合并成一个(注意:不shuffle的时候不能增加分区,只能减少分区) 

当coalesce(numPartitions, shuffle = true)时候,是会有shuffle操作的,按照相应的算法计算所在分区。

/**
 * # _*_ coding:utf-8 _*_
 * # Author:xiaoshubiao
 * # Time : 2020/5/13 14:13
 * java map,mapfunctions
 **/
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;


public class spark_function {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("spark_java_function");
        JavaSparkContext sc = new JavaSparkContext(conf);
        List<String> list = Arrays.asList("a","b","c","d");
        JavaRDD<String> parallelize = sc.parallelize(list,3);
        //repartition
        parallelize
                .mapPartitionsWithIndex(
                new Function2<Integer, Iterator<String>, Iterator<String>>() {
                    @Override
                    public Iterator<String> call(Integer integer, Iterator<String> stringIterator) throws Exception {
                        List<String> l = new ArrayList();
                        while (stringIterator.hasNext()){
                            String next = stringIterator.next();
                            System.out.println("分区id:"+integer+"--值:"+next);
                            l.add(next+integer);
                        }
                        return l.iterator();
                    }
                }
        ,false)
                .repartition(2)
                .mapPartitionsWithIndex(
                        new Function2<Integer, Iterator<String>, Iterator<String>>() {
                            @Override
                            public Iterator<String> call(Integer integer, Iterator<String> stringIterator) throws Exception {
                                List<String> list = new ArrayList();
                                while (stringIterator.hasNext()){
                                    String next = stringIterator.next();
                                    System.out.println("重分区后,分区id:"+integer+"---值"+next);
                                    list.add(next);
                                }
                                return list.iterator();
                            }
                        }
                ,false)
                .collect();
        //coalesce
        parallelize
                .mapPartitionsWithIndex(
                        new Function2<Integer, Iterator<String>, Iterator<String>>() {
                            @Override
                            public Iterator<String> call(Integer integer, Iterator<String> stringIterator) throws Exception {
                                List<String> l = new ArrayList();
                                while (stringIterator.hasNext()){
                                    String next = stringIterator.next();
                                    System.out.println("分区id:"+integer+"--值:"+next);
                                    l.add(next+integer);
                                }
                                return l.iterator();
                            }
                        }
                        ,false)
                .coalesce(2,false)
                .mapPartitionsWithIndex(
                        new Function2<Integer, Iterator<String>, Iterator<String>>() {
                            @Override
                            public Iterator<String> call(Integer integer, Iterator<String> stringIterator) throws Exception {
                                List<String> list = new ArrayList();
                                while (stringIterator.hasNext()){
                                    String next = stringIterator.next();
                                    System.out.println("重分区后,分区id:"+integer+"---值"+next);
                                    list.add(next);
                                }
                                return list.iterator();
                            }
                        }
                        ,false)
                .collect();
    }
}
原文地址:https://www.cnblogs.com/7749ha/p/12882544.html