mysqlcdc通过flink (java)算子实现数据的同步

package com.yofoto.myfunction;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.ververica.cdc.connectors.mysql.source.MySqlSource;
import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;

public class StreamJoinExample3 {

    // Static and final DateTimeFormatter to avoid serialization issues
    private static final DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");

    public static void main(String[] args) throws Exception {
        // Create execution environment
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        final ObjectMapper objectMapper = new ObjectMapper();  // Jackson ObjectMapper to parse JSON

        // MySQL CDC configuration for the 'test' table
        MySqlSource<String> mySqlSource1 = MySqlSource.<String>builder()
                .hostname("localhost")
                .port(3306)
                .databaseList("test")
                .tableList("test.test")
                .username("root")
                .password("123456")
                .deserializer(new JsonDebeziumDeserializationSchema())
                .build();

        // Read data from MySQL CDC stream with WatermarkStrategy
        DataStream<String> mySQLStream1 = env.fromSource(mySqlSource1, WatermarkStrategy.noWatermarks(), "MySQL CDC Source");

        // Transform and process the stream to handle different types of operations
        DataStream<Tuple4<String, Integer, String, String>> transformedStream1 = mySQLStream1
                .map(new MapFunction<String, Tuple4<String, Integer, String, String>>() {
                    @Override
                    public Tuple4<String, Integer, String, String> map(String value) throws Exception {
                        JsonNode jsonNode = objectMapper.readTree(value);
                        String operation = jsonNode.path("op").asText();
                        JsonNode beforeNode = jsonNode.path("before");
                        JsonNode afterNode = jsonNode.path("after");
                        System.out.println(operation);
                        // Check if it's a delete event, in which case there might not be an "after" node
                        if ("d".equals(operation)) {
                            return new Tuple4<>(operation, beforeNode.path("id").asInt(), null, null);
                        }

                        int id = afterNode.path("id").asInt();
                        String name = afterNode.path("name").asText();
                        String dt = afterNode.path("dt").asText();
                        String dtMySql = convertIsoDateTimeToMySql(dt);

                        return new Tuple4<>(operation, id, name, dtMySql);
                    }
                });


        // Print the transformed streams to console for debugging/inspection
        transformedStream1.print("Transformed Stream 1");

        // 将数据流写入 JDBC Sink
        transformedStream1.addSink(new MySQLJDBCSink());

        // Execute the job
        env.execute("Stream Join Example 2");

    }

    public static String convertIsoDateTimeToMySql(String isoDateTime) {
        System.out.println(isoDateTime);
        // Define format patterns
        DateTimeFormatter isoFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'"); // For full format with 'Z'
        DateTimeFormatter partialFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm"); // For partial format (missing seconds)
        DateTimeFormatter mysqlFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); // MySQL format

        try {
            // Check if the date string ends with 'Z' (indicating UTC time)
            if (isoDateTime.endsWith("Z")) {
                // Parse as full format (with 'Z')
                LocalDateTime localDateTime = LocalDateTime.parse(isoDateTime, isoFormatter);
                return localDateTime.format(mysqlFormatter);
            } else {
                // Try parsing the partial format (missing seconds)
                LocalDateTime localDateTime = LocalDateTime.parse(isoDateTime, partialFormatter);
                return localDateTime.format(mysqlFormatter);
            }
        } catch (DateTimeParseException e) {
            e.printStackTrace();
            return null;
        }
    }
    public static class MySQLJDBCSink extends RichSinkFunction<Tuple4<String, Integer, String, String>> {
        private static final String MYSQL_URL = "jdbc:mysql://localhost:3306/test";
        private static final String MYSQL_USER = "root";
        private static final String MYSQL_PASSWORD = "123456";
        private static final String INSERT_SQL = "INSERT INTO test.test_target (id, name, dt) VALUES (?, ?, ?)";
        private static final String UPDATE_SQL = "UPDATE test.test_target SET name = ?, dt = ? WHERE id = ?";
        private static final String DELETE_SQL = "DELETE FROM test.test_target WHERE id = ?";

        private transient Connection connection;
        private transient PreparedStatement insertStatement;
        private transient PreparedStatement updateStatement;
        private transient PreparedStatement deleteStatement;

        @Override
        public void open(org.apache.flink.configuration.Configuration parameters) throws Exception {
            super.open(parameters);
            connection = DriverManager.getConnection(MYSQL_URL, MYSQL_USER, MYSQL_PASSWORD);
            insertStatement = connection.prepareStatement(INSERT_SQL);
            updateStatement = connection.prepareStatement(UPDATE_SQL);
            deleteStatement = connection.prepareStatement(DELETE_SQL);
        }

        @Override
        public void invoke(Tuple4<String, Integer, String, String> value, Context context) throws Exception {
            String operation = value.f0;
            int id = value.f1;
            String name = value.f2;
            String dt = value.f3;

            switch (operation) {
                case "r": // Insert
                case "c": // Insert
                case "u": // Update
                    try {
                        if ("u".equals(operation)) {
                            updateStatement.setString(1, name);
                            updateStatement.setString(2, dt);
                            updateStatement.setInt(3, id);
                            updateStatement.executeUpdate();
                        } else {
                            insertStatement.setInt(1, id);
                            insertStatement.setString(2, name);
                            insertStatement.setString(3, dt);
                            insertStatement.executeUpdate();
                        }
                    } catch (SQLException e) {
                        // Log the error or handle it appropriately
                        throw e;
                    }
                    break;
                case "d": // Delete
                    try {
                        deleteStatement.setInt(1, id);
                        deleteStatement.executeUpdate();
                    } catch (SQLException e) {
                        // Log the error or handle it appropriately
                        throw e;
                    }
                    break;
                default:
                    // Handle unknown operation or log it
                    System.out.println("Unknown operation: " + operation);
                    break;
            }
        }

        @Override
        public void close() throws Exception {
            super.close();
            if (insertStatement != null) {
                insertStatement.close();
            }
            if (updateStatement != null) {
                updateStatement.close();
            }
            if (deleteStatement != null) {
                deleteStatement.close();
            }
            if (connection != null) {
                connection.close();
            }
        }
    }
}

maven依赖:

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <maven.compiler.source>1.8</maven.compiler.source>
    <maven.compiler.target>1.8</maven.compiler.target>
    <project.build.sourceEncoding>UTF8</project.build.sourceEncoding>
    <flink.version>1.14.0</flink.version>
    <scala.binary.version>2.11</scala.binary.version>
  </properties>

  <dependencies>
    <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-jdbc -->
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-connector-jdbc_2.11</artifactId>
      <version>1.14.0</version>
    </dependency>
<!--    <dependency>-->
<!--      <groupId>com.fasterxml.jackson.core</groupId>-->
<!--      <artifactId>jackson-databind</artifactId>-->
<!--      <version>2.13.0</version>-->
<!--    </dependency>-->
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-streaming-java_2.11</artifactId>
      <version>${flink.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-runtime-web_2.11</artifactId>
      <version>${flink.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-streaming-scala_2.11</artifactId>
      <version>${flink.version}</version>
    </dependency>
    <dependency>
      <groupId>com.fasterxml.jackson.core</groupId>
      <artifactId>jackson-databind</artifactId>
      <version>2.12.3</version> <!-- 使用最新的版本号 -->
    </dependency>
    <!-- Flink Kafka Connector 依赖 -->
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-connector-kafka_2.11</artifactId>
      <version>${flink.version}</version>
    </dependency>

    <!-- Flink CDC 连接器依赖 -->
    <dependency>
      <groupId>com.ververica</groupId>
      <artifactId>flink-connector-mysql-cdc</artifactId>
      <version>2.3.0</version> <!-- 请根据你的实际 Flink CDC 版本进行调整 -->
    </dependency>

    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-clients_2.11</artifactId>
      <version>${flink.version}</version>
    </dependency>

    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-table-api-java-bridge_2.11</artifactId>
      <version>${flink.version}</version> <!-- 请根据你的实际 Flink 版本进行调整 -->
    </dependency>
    <!-- Flink Table 运行时依赖 -->
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-table-runtime_2.11</artifactId>
      <version>${flink.version}</version> <!-- 请根据你的实际 Flink 版本进行调整 -->
    </dependency>

    <dependency>
      <groupId>org.apache.doris</groupId>
      <artifactId>flink-doris-connector-1.14_2.11</artifactId>
      <version>1.1.1</version>
    </dependency>

    <dependency>
      <groupId>mysql</groupId>
      <artifactId>mysql-connector-java</artifactId>
      <version>8.0.33</version>
    </dependency>


    <dependency>
      <groupId>com.alibaba</groupId>
      <artifactId>fastjson</artifactId>
      <version>1.2.83</version>
    </dependency>

    <dependency>
      <groupId>cn.hutool</groupId>
      <artifactId>hutool-all</artifactId>
      <version>5.8.28</version>
    </dependency>

    <!-- 添加 SLF4J API 依赖 -->
    <dependency>
      <groupId>org.slf4j</groupId>
      <artifactId>slf4j-api</artifactId>
      <version>1.7.36</version>
    </dependency>

    <!-- 添加 SLF4J 日志实现,例如 logback -->
    <dependency>
      <groupId>ch.qos.logback</groupId>
      <artifactId>logback-classic</artifactId>
      <version>1.2.10</version>
    </dependency>

    <dependency>
      <groupId>ch.qos.logback</groupId>
      <artifactId>logback-core</artifactId>
      <version>1.2.10</version>
    </dependency>

    <dependency>
      <groupId>commons-configuration</groupId>
      <artifactId>commons-configuration</artifactId>
      <version>1.6</version>
    </dependency>

    <dependency>
      <groupId>org.projectlombok</groupId>
      <artifactId>lombok</artifactId>
      <version>1.18.26</version>
      <scope>compile</scope>
    </dependency>

  </dependencies>

  <build>
    <plugins>

      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-assembly-plugin</artifactId>
        <version>2.5.5</version>
        <configuration>
          <archive>
            <manifest>
              <mainClass>com.yofoto.myfunction.KafkaMaxEventTimePerGroupJob</mainClass>
            </manifest>
          </archive>
          <descriptorRefs>
            <descriptorRef>jar-with-dependencies</descriptorRef>
          </descriptorRefs>
        </configuration>
        <executions>
          <execution>
            <id>make-assembly</id>
            <phase>package</phase>
            <goals>
              <goal>single</goal>
            </goals>
          </execution>
        </executions>
      </plugin>

    </plugins>
  </build>
<think>好的,我现在需要帮助用户了解Flink CDC的使用与配置。首先,我需要回忆一下之前提供的引用内容,引用[1]和引用[2]中的信息。根据引用[1],Flink CDC支持多种下游连接器,如TiDB、MySQL、Kafka等,并且有丰富的生态支持。引用[2]提到Flink CDC连接器是Apache Flink社区的一部分,使用YAML配置,支持全库同步、分片表等,主要用Java开发。 用户的问题是关于Flink CDC的使用和配置,所以需要从安装、配置步骤、核心功能、应用场景等方面展开。首先,我应该按照逻辑结构分步骤说明,确保用户能逐步理解和操作。同时,要遵循用户给出的系统级指令,比如正确使用LaTeX格式,添加相关问题,以及引用标识。 首先,安装部分,根据引用[2],Flink CDC的连接器需要添加到Flink的lib目录中,或者通过Maven依赖引入。这里需要详细说明两种安装方式,并给出Maven的依赖代码示例。 接下来是配置步骤,引用[2]中提到使用YAML配置文件,所以需要解释如何编写YAML文件,包括source和sink的配置,并给出一个具体的例子,比如从MySQL同步到Kafka。同时,需要注意参数如数据库地址、表名、格式等。 然后,核心功能部分,根据引用[1]和[2],需要列出全库同步、分片表同步、模式演变、数据转换等,并简要说明每个功能的应用场景和配置方法。 应用场景部分,引用[1]提到了实时数仓、数据迁移、ETL等,需要具体说明每个场景下如何应用Flink CDC,可能结合示例中的配置。 最后,注意事项部分,需要考虑监控、容错、性能优化等方面,确保用户在实际使用中避免常见问题。 在生成相关问题的时候,需要覆盖用户可能进一步询问的方向,比如性能调优、监控方法、异常处理、与其他工具对比等,同时要确保问题与之前的回答内容相关,并且能够引导用户深入理解。 整个过程中要确保使用中文,正确应用LaTeX格式,比如变量或技术术语如果有数学表达式的话需要用$...$,但这里可能较少,但如果有的话需要注意。同时,引用标识要自然地添加在相关段落末尾,如[^1][^2]。 现在检查是否遗漏了关键点:安装、配置示例、核心功能、应用场景、注意事项、相关问题。可能需要分步骤详细说明每个部分,避免过于简略。同时,确保YAML配置示例正确,代码块使用正确的语法标记。另外,用户可能对具体参数的含义感兴趣,是否需要解释部分参数?例如,在YAML配置中的server-id、scan.incremental.snapshot.chunk.size等,可能需要简要说明,但保持回答不过于冗长。 最后,确保回答结构清晰,分点明确,帮助用户一步步配置和使用Flink CDC。同时,参考引用内容时要准确,例如下游连接器的种类,Flink CDC的主要功能等。</think>### Flink CDC 使用与配置详解 #### 1. **环境准备与安装** - **依赖添加**:Flink CDC 连接器需通过 Maven 或直接下载 JAR 包集成到 Flink 环境中。 - **Maven 依赖**(以 MySQL 为例): ```xml <dependency> <groupId>com.alibaba.ververica</groupId> <artifactId>flink-connector-mysql-cdc</artifactId> <version>2.3.0</version> </dependency> ``` - **手动安装**:将 JAR 包放入 Flink 的 `lib` 目录并重启集群[^2]。 #### 2. **基础配置步骤** 通过 YAML 文件定义数据同步任务,以下是从 MySQL 同步到 Kafka 的示例: ```yaml source: type: mysql host: localhost port: 3306 username: root password: 123456 database: test_db table: orders server-id: 5400-5404 # 需唯一,避免数据冲突[^2] sink: type: kafka brokers: "kafka-host:9092" topic: orders_cdc format: json # 支持 Avro/JSON 等格式 ``` #### 3. **核心功能与配置参数** - **全量+增量同步**: 默认开启全量快照(`scan.incremental.snapshot.enabled=true`),随后自动切换为增量日志捕获。 - **分片表合并**: 配置 `shard-key` 实现分片表合并(如分库分表场景)。 - **模式演变**: 启用 `schema.evolution.enabled=true` 以自动适应表结构变更[^2]。 - **数据转换**: 结合 Flink SQL 或自定义算子进行字段过滤、格式转换等。 #### 4. **应用场景示例** - **实时数仓**:将 MySQL 订单表变更实时同步到 Kafka,供下游计算广告点击率[^1]。 - **数据迁移**:全量迁移 PostgreSQL 数据到 TiDB,并持续同步增量数据。 - **ETL 处理**:在 Flink 作业中清洗 CDC 数据后写入 HBase。 #### 5. **注意事项** - **监控指标**:关注 `source.numRecordsIn`(输入速率)和 `sink.numRecordsOut`(输出速率)。 - **容错机制**:启用 Checkpoint(间隔 30s-60s)保障 Exactly-Once 语义。 - **性能优化**: - 调整 `scan.incremental.snapshot.chunk.size`(分块大小)提升全量阶段效率。 - 并行度设置建议与源表分区数一致。 ---
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值