Flink状态之OperatorState

知识点:

Flink 为算子状态(operator state)提供三种基本数据结构:  
    列表状态(List state): 将状态表示为一组数据的列表。  
    联合列表状态(Union list state): 也将状态表示为数据的列表。它与常规列表状态的区别在于,在发生故障时,或者从保 存点(savepoint)启动应用程序时如何恢复。  
    广播状态(Broadcast state) 如果一个算子有多项任务,而它的每项任务状态又都相同,那么这种特殊情况最适合应 用广播状态

1、主类

package com.example.demo.flink;

import com.example.demo.flink.impl.CustomSink;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;


/**
 * @program: demo
 * @description: valuestate
 * @author: yang
 * @create: 2020-12-28 15:46
 */
public class TestOperatorStateMain {
    public static void main(String[] args) throws  Exception{
        //获取执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
        //StreamExecutionEnvironment.getExecutionEnvironment();
        //设置并行度
        env.setParallelism(16);
        //获取数据源
        DataStreamSource<Tuple2<String, Integer>> dataStreamSource =
                env.fromElements(
                        Tuple2.of("Spark", 3),
                        Tuple2.of("Flink", 5),
                        Tuple2.of("Hadoop", 7),
                        Tuple2.of("Spark", 4));

        // 输出:
        //(1,5.0)
        //(2,4.0)
        dataStreamSource.addSink(new CustomSink(3)).setParallelism(1);
        env.execute("TestStatefulApi");

    }

}

2、处理实现类

package com.example.demo.flink.impl;

import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.runtime.state.FunctionInitializationContext;
import org.apache.flink.runtime.state.FunctionSnapshotContext;
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;

import java.util.ArrayList;
import java.util.List;

/**
 * @program: demo
 * @description:
 * @author: yang
 * @create: 2020-12-29 11:36
 */
public class CustomSink implements SinkFunction<Tuple2<String,Integer>>, CheckpointedFunction {
    private int threshold;

    private List<Tuple2<String,Integer>> bufferElements;

    private ListState<Tuple2<String,Integer>> checkpointState;

    public CustomSink(int i) {
        this.threshold = i;
        this.bufferElements = new ArrayList<>();
    }

    @Override
    public void snapshotState(FunctionSnapshotContext context) throws Exception {
        //设置快照
        checkpointState.clear();
        for(Tuple2<String,Integer> ele : bufferElements){
            checkpointState.add(ele);
        }
    }


    @Override
    public void initializeState(FunctionInitializationContext context) throws Exception {
        ListStateDescriptor descriptor = new ListStateDescriptor<Tuple2<String,Integer>>("Operator", TypeInformation.of(new TypeHint<Tuple2<String, Integer>>() {
        }));
        //将checkpoint中的数据加载进内存
        checkpointState = context.getOperatorStateStore().getListState(descriptor);
        if(context.isRestored()){
            for (Tuple2<String,Integer> ele: checkpointState.get()) {
                bufferElements.add(ele);
            }
        }
    }

    @Override
    public void invoke(Tuple2<String, Integer> value, Context context) throws Exception {
        bufferElements.add(value);
        if(bufferElements.size() == threshold){
            System.out.println("自定义格式:" + bufferElements);
            bufferElements.clear();
        }
    }
}
原文地址:https://www.cnblogs.com/ywjfx/p/14228605.html