知识点:
https://github.com/ververica/flink-cdc-connectors //官网地址
1、依赖
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.16</version>
</dependency>
<dependency>
<groupId>com.alibaba.ververica</groupId>
<!-- add the dependency matching your database -->
<artifactId>flink-connector-mysql-cdc</artifactId>
<version>1.0.0</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.25</version>
</dependency>
2、处理类
import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.functions.source.SourceFunction; import com.alibaba.ververica.cdc.debezium.StringDebeziumDeserializationSchema; import com.alibaba.ververica.cdc.connectors.mysql.MySQLSource; /** * @program: Flink1.11 * @description: * @author: yang * @create: 2021-01-11 17:41 */ public class MySqlBinlogSourceExample { public static void main(String[] args) throws Exception { SourceFunction<String> sourceFunction = MySQLSource.<String>builder() .hostname("localhost") .port(3306) .databaseList("test") // monitor all tables under inventory database .username("root") .password("root") .deserializer(new StringDebeziumDeserializationSchema()) // converts SourceRecord to String .build(); StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration()); env.addSource(sourceFunction).print().setParallelism(1); // use parallelism 1 for sink to keep message ordering env.execute("test"); } }
3、binlog结果
修改:befor and after SourceRecord{ sourcePartition={server=mysql-binlog-source}, sourceOffset={ts_sec=1610362335, file=mysql-bin.000004, pos=233445691, row=1, server_id=1, event=2} } ConnectRecord {topic='mysql-binlog-source.test.weblog', kafkaPartition=null, key=Struct{id=5}, keySchema=Schema{mysql_binlog_source.test.weblog.Key:STRUCT}, value=Struct{before=Struct{id=5,url=5,method=5,ip=5,args=5,create_time=1610390670000},after=Struct{id=5,url=5555,method=5555,ip=5555,args=5555,create_time=1610390670000},source=Struct{version=1.2.0.Final,connector=mysql,name=mysql-binlog-source,ts_ms=1610362335000,db=test,table=weblog,server_id=1,file=mysql-bin.000004,pos=233445826,row=0,thread=944986},op=u,ts_ms=1610362335615}, valueSchema=Schema{mysql_binlog_source.test.weblog.Envelope:STRUCT}, timestamp=null, headers=ConnectHeaders(headers=) } 增加:只有after SourceRecord{sourcePartition={server=mysql-binlog-source}, sourceOffset={file=mysql-bin.000004, pos=233455303}} ConnectRecord {topic='mysql-binlog-source.test.weblog', kafkaPartition=null, key=Struct{id=7}, keySchema=Schema{mysql_binlog_source.test.weblog.Key:STRUCT}, value=Struct{after=Struct{id=7,url=7,method=7,ip=7,args=7,create_time=1610391478000},source=Struct{version=1.2.0.Final,connector=mysql,name=mysql-binlog-source,ts_ms=0,snapshot=last,db=test,table=weblog,server_id=0,file=mysql-bin.000004,pos=233455303,row=0},op=c,ts_ms=1610362692061}, valueSchema=Schema{mysql_binlog_source.test.weblog.Envelope:STRUCT}, timestamp=null, headers=ConnectHeaders(headers=)} 删除:只有before SourceRecord{sourcePartition={server=mysql-binlog-source}, sourceOffset={ts_sec=1610362743, file=mysql-bin.000004, pos=233456891, row=1, server_id=1, event=2}} ConnectRecord{topic='mysql-binlog-source.test.weblog', kafkaPartition=null, key=Struct{id=1}, keySchema=Schema{mysql_binlog_source.test.weblog.Key:STRUCT}, value=Struct{before=Struct{id=1,url=1,method=1,ip=1,args=1,create_time=1603115590000},source=Struct{version=1.2.0.Final,connector=mysql,name=mysql-binlog-source,ts_ms=1610362743000,db=test,table=weblog,server_id=1,file=mysql-bin.000004,pos=233457026,row=0,thread=944986},op=d,ts_ms=1610362744527}, valueSchema=Schema{mysql_binlog_source.test.weblog.Envelope:STRUCT}, timestamp=null, headers=ConnectHeaders(headers=)}
4、如果需要将数据进行etl,解析数据,然后自定义实现sink
#####################################测试代码不能用,由于开源不完善,所以无法完成....##################################################
5、测试代码
import com.alibaba.fastjson.JSON; import com.alibaba.ververica.cdc.debezium.DebeziumDeserializationSchema; import com.alibaba.ververica.cdc.debezium.StringDebeziumDeserializationSchema; import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.typeinfo.BasicTypeInfo; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.functions.source.SourceFunction; import com.alibaba.ververica.cdc.connectors.mysql.MySQLSource; import org.apache.flink.util.Collector; import org.apache.kafka.connect.data.Struct; import org.apache.kafka.connect.source.SourceRecord; import java.lang.reflect.Field; /** * @program: Flink1.11 * @description: * @author: yang * @create: 2021-01-11 17:41 */ public class MySqlBinlogSourceExample { public static void main(String[] args) throws Exception { SourceFunction<String> sourceFunction = MySQLSource.<String>builder() .hostname("123.207.27.238") .port(3306) .databaseList("test","spark_job") .tableList("test.weblog","spark_test") .username("root") .password("hushuo") .deserializer(new StringDebeziumDeserializationSchema()) // .deserializer(new MyDebeziumDeserializationSchema()) .build(); StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration()); env.addSource(sourceFunction).print().setParallelism(1); // env.addSource(sourceFunction).map(new MyMapHandler()).print().setParallelism(1); env.execute("test"); } } class MyMapHandler implements MapFunction<String, Object>{ public Object map(String s) throws Exception { SourceRecord sourceRecord = JSON.toJavaObject(JSON.parseObject(s),SourceRecord.class); Struct struct = (Struct)sourceRecord.value(); System.out.println("struct:"+struct); return s; } } class MyDebeziumDeserializationSchema implements DebeziumDeserializationSchema{ public static final String CREATE = "c"; public static final String DELETE = "d"; public static final String UPDATE = "u"; public void deserialize(SourceRecord sourceRecord, Collector collector) throws Exception { Struct value = (Struct)sourceRecord.value(); String op = value.getString("op"); Struct data = null; if(CREATE.equals(op)){ //增加 data = this.createData(value); }else if(DELETE.equals(op)){ //删除 data = this.deleteData(value); }else if(UPDATE.equals(op)){ //修改 data = this.updateData(value); }else { throw new RuntimeException("data is error......"); } collector.collect(JSON.toJSONString(data)); } public TypeInformation getProducedType() { return BasicTypeInfo.STRING_TYPE_INFO; } private Struct updateData( Struct value){ System.out.println("修改"); Struct beforeData = (Struct)value.get("before"); System.out.println("修改之前数据before:"+beforeData.toString()); Struct afterData = (Struct)value.get("after"); System.out.println("修改之后数据afterData:"+afterData.toString()); return afterData; } private Struct deleteData( Struct value){ System.out.println("删除"); Struct beforeData = (Struct)value.get("before"); System.out.println("before:"+beforeData.toString()); return beforeData; } private Struct createData( Struct value){ System.out.println("增加"); Struct afterData = (Struct)value.get("after"); System.out.println("afterData:"+afterData.toString()); return afterData; } }