FlinkCDC从Mysql数据写⼊Kafka 环境安装:
  1.jdk
  2.Zookeeper
  3.Kafka
  4.maven
  5.
⼀、binlog监控Mysql的库
⼆、编写FlinkCDC程序
1.添加pom⽂件
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="/POM/4.0.0" xmlns:xsi="/2001/XMLSchema-instance"
xsi:schemaLocation="/POM/4.0.0 /xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.lxz</groupId>
<artifactId>gmall-logger</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>gmall-20210909</name>
<description>Demo project for Spring Boot</description>
<properties>
<java.version>1.8</java.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
&porting.outputEncoding>UTF-8</porting.outputEncoding>
<spring-boot.version>2.4.1</spring-boot.version>
<mavenpiler.source>${java.version}</mavenpiler.source>
<mavenpiler.target>${java.version}</mavenpiler.target>
<flink.version>1.12.0</flink.version>
<scala.version>2.12</scala.version>
<hadoop.version>3.1.3</hadoop.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-cep_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-json</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.68</version>
</dependency>
<dependency>
<groupId>com.alibaba.ververica</groupId>
<artifactId>flink-connector-mysql-cdc</artifactId>
<version>1.2.0</version>
</dependency>
<!--如果保存检查点到hdfs上,需要引⼊此依赖-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<!--Flink默认使⽤的是slf4j记录⽇志,相当于⼀个⽇志的接⼝,我们这⾥使⽤log4j作为具体的⽇志实现-->        <dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-to-slf4j</artifactId>
<version>2.14.0</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.kafka</groupId>
<artifactId>spring-kafka</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
</dependencies>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-dependencies</artifactId>
<version>${spring-boot.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<version>2.3.0.RELEASE</version>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
</execution>
</executions>
<configuration>
<classifier>boot</classifier>
<mainClass>com.lxz.gamll20210909.Gamll20210909Application</mainClass>
</configuration>
</plugin>
</plugins>
</build>
</project>
2.MykafkaUtil⼯具类
import org.apache.flink.apimon.serialization.SimpleStringSchema;
import org.apache.tors.kafka.FlinkKafkaProducer;
import java.util.Properties;
public class MyKafkaUtil {
private static String KAFKA_SERVER = "hadoop201:9092,hadoop202:9092,hadoop203:9092";
private static Properties properties =  new Properties();
static {
properties.setProperty("bootstrap.servers",KAFKA_SERVER);
}
public static FlinkKafkaProducer<String> getKafkaSink(String topic){
return new FlinkKafkaProducer<String>(topic,new SimpleStringSchema(),properties);
}
}
3.FlinkCDC主程序
import com.alibaba.fastjson.JSONObject;
import com.alibaba.sql.MySQLSource;
import com.alibaba.sql.table.StartupOptions;
import com.alibaba.ververica.cdc.debezium.DebeziumDeserializationSchema;
import com.alibaba.ververica.cdc.debezium.DebeziumSourceFunction;
import com.lxz.gamll20210909.util.MyKafkaUtil;
import io.debezium.data.Envelope;
import org.apache.peinfo.TypeInformation;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.vironment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
import org.t.data.Field;
import org.t.data.Schema;
import org.t.data.Struct;
import org.t.source.SourceRecord;
public class Flink_CDCWithCustomerSchema {
public static void main(String[] args) throws Exception {
//1.创建执⾏环境
StreamExecutionEnvironment env = ExecutionEnvironment();
env.setParallelism(1);
log4j2不打印日志//2.创建Flink-MySQL-CDC的Source
DebeziumSourceFunction<String> mysqlSource = MySQLSource.<String>builder()
.hostname("hadoop201")
.port(3306)
.username("root")
.
password("000000")
.databaseList("gmall-20210712")
.startupOptions(StartupOptions.latest())
//                .startupOptions(KafkaOptions.StartupOptions.class)
.deserializer(new DebeziumDeserializationSchema<String>() {
//⾃定义数据解析器
@Override
public void deserialize(SourceRecord sourceRecord, Collector<String> collector) throws Exception {
//获取主题信息,包含着数据库和表名  mysql_all-flink.z_user_info
String topic = pic();
String[] arr = topic.split("\\.");
String db = arr[1];
String tableName = arr[2];
//获取操作类型 READ DELETE UPDATE CREATE
Envelope.Operation operation = Envelope.operationFor(sourceRecord);
//获取值信息并转换为Struct类型
Struct value = (Struct) sourceRecord.value();
//获取变化后的数据
Struct after = Struct("after");
//创建JSON对象⽤于存储数据信息
JSONObject data = new JSONObject();
if (after != null) {
Schema schema = after.schema();
for (Field field : schema.fields()) {
data.put(field.name(), (field.name()));
}
}
//创建JSON对象⽤于封装最终返回值数据信息
JSONObject result = new JSONObject();
result.put("operation", String().toLowerCase());
result.put("data", data);
result.put("database", db);
result.put("table", tableName);
/
/发送数据⾄下游
}
@Override
public TypeInformation<String> getProducedType() {
return TypeInformation.of(String.class);
}
})
.build();
//3.使⽤CDC Source从MySQL读取数据
DataStreamSource<String> mysqlDS = env.addSource(mysqlSource);
/
/4.打印数据
mysqlDS.KafkaSink("ods_base_db"));
//5.执⾏任务
}
}
三、结果
1.启动FlinkCDC主程序
2.在服务器上开⼀个kafka的消费者
bin/kafka-console-consumer.sh --bootstrap-server hadoop201:9092 --topic ods_base_db 3.在Mysql中插⼊数据看Kafka会不会消费
  Mysql端
  Kafka端
成功消费。

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。