flinksql--windowtop-N <flink 1.12或者1.14>

本文介绍如何使用Apache Flink实现窗口内的Top-N排名功能,包括通过SQL进行数据处理的具体步骤,以及不同时间类型下的Watermark设置方法。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

  1. 窗口 top-N flink 1.12
package com.cn.sql;

import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;

/*
* 开窗 分组-top-N sql Demo
* 示例:
2020-04-15 08:05:00,4.00,A,supplier1
2020-04-15 08:06:00,4.00,C,supplier2
2020-04-15 08:07:00,2.00,G,supplier1
2020-04-15 08:08:00,2.00,B,supplier3
2020-04-15 08:09:00,5.00,D,supplier4
2020-04-15 08:11:00,2.00,B,supplier3
2020-04-15 08:13:00,1.00,E,supplier1
2020-04-15 08:15:00,3.00,H,supplier2
2020-04-15 08:17:00,6.00,F,supplier5
2020-04-15 08:25:00,6.00,F,supplier5
2020-04-15 08:30:00,6.00,F,supplier5
* */
public class SqlDemo {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        StreamTableEnvironment sTenv = StreamTableEnvironment.create(env);
        //todo 创建kafka表
        String sql =
                "CREATE TABLE TEST (" +
                        "`bidtime` TIMESTAMP(3),\n" +
                        "`price` FLOAT,\n" +
                        "`item` VARCHAR(3),\n" +
                        "`supplier_id` STRING,\n" +
                        " WATERMARK FOR bidtime as bidtime - INTERVAL '5' SECOND)\n" +
                        "WITH(" +
                        "'connector'='kafka',\n" +
                        "'properties.bootstrap.servers'='localhost:9092',\n" +
                        "'topic'='kafkaS',\n" +
                        "'properties.group.id'='a'," +
                        "'format'='csv')";
        sTenv.executeSql(sql);
        //todo 创建输出表
        sTenv.executeSql("CREATE TABLE TESTS (" +
                "`window_start`  TIMESTAMP(3) NOT NULL,\n" +
                "`window_end`  TIMESTAMP(3) NOT NULL,\n" +
                "`supplier_id` STRING ,\n" +
                "`price` FLOAT,\n" +
                "`rownum` BIGINT NOT NULL,\n" +
                "`cnt` BIGINT NOT NULL\n)" +
                "WITH(" +
                "'connector'='print')");
        //todo 执行逻辑
        sTenv.executeSql("INSERT INTO TESTS SELECT *\n" +
                "  FROM (\n" +
                "    SELECT *, ROW_NUMBER() OVER (PARTITION BY window_start, window_end ORDER BY price DESC) as rownum\n" +
                "    FROM (\n" +
                "    SELECT  \n" +
                "    TUMBLE_START(bidtime, INTERVAL '10' MINUTES) as  window_start,  \n" +
                "    TUMBLE_END(bidtime, INTERVAL '10' MINUTES) as  window_end, \n " +
                "    supplier_id, \n " +
                "    SUM(price) as price,\n" +
                "    COUNT(*) as cnt  FROM TEST \n" +
                "    GROUP BY TUMBLE(bidtime, INTERVAL '10' MINUTES),supplier_id )\n" +
                "    ) \n" +
                "   WHERE rownum <= 3 ");
    }
}
//也可以 使用CTE 公共表表达式增加可读性
   //todo 执行逻辑
        sTenv.executeSql("INSERT INTO TESTS \n" +
                "WITH \n" +
                "orders_with_total AS (\n" +
                "    SELECT  \n" +
                "    TUMBLE_START(bidtime, INTERVAL '10' MINUTES) as  window_start,  \n" +
                "    TUMBLE_END(bidtime, INTERVAL '10' MINUTES) as  window_end, \n " +
                "    supplier_id, \n " +
                "    SUM(price) as price,\n" +
                "    COUNT(*) as cnt  FROM TEST \n" +
                "    GROUP BY TUMBLE(bidtime, INTERVAL '10' MINUTES),supplier_id )," +
                "orders_with_totasl AS ( \n" +
                "    SELECT *, ROW_NUMBER() OVER (PARTITION BY window_start, window_end ORDER BY price DESC) as rownum\n" +
                "    FROM orders_with_total \n" +
                ")  \n" +
                " SELECT *\n" +
                "  FROM orders_with_totasl\n" +
                "  WHERE rownum <= 3 ");
  1. 窗口 top-N flink 1.14
    (14 版本的 明显简化不少)
SELECT *
  FROM (
    SELECT *, ROW_NUMBER() OVER (PARTITION BY window_start, window_end ORDER BY price DESC) as rownum
    FROM (
      SELECT window_start, window_end, supplier_id, SUM(price) as price, COUNT(*) as cnt
      FROM TABLE(
        TUMBLE(TABLE Bid, DESCRIPTOR(bidtime), INTERVAL '10' MINUTES))
      GROUP BY window_start, window_end, supplier_id
    )
  ) WHERE rownum <= 3

3. 时间类型的使用 确定watermark

(默认TIMPSTAMP 类型 是 yyyy-MM-dd HH:mm:ss.SSSSSSSSS)

1 当类型不是默认类型 例如 yyyy-MM-dd HH:mm
CREATE TABLE TEST (" +
                        "`bidtime` VARCHAR(20),\n" +
                        "`price` FLOAT,\n" +
                        "`item` VARCHAR(3),\n" +
                        "`supplier_id` STRING,\n" +
                        "`ts` AS TO_TIMESTAMP(bidtime,'yyyy-MM-dd HH:mm'),\n" +
                        " WATERMARK FOR ts as ts - INTERVAL '5' SECOND)\n" +
                        "WITH(" +
                        "'connector'='kafka',\n" +
                        "'properties.bootstrap.servers'='localhost:9092',\n" +
                        "'topic'='kafkaS',\n" +
                        "'properties.group.id'='a'," +
                        "'format'='csv');

2 时间戳 (长整型的数字)
 "CREATE TABLE TEST (" +
                        "`bidtime` BIGINT,\n" +
                        "`price` FLOAT,\n" +
                        "`item` VARCHAR(3),\n" +
                        "`supplier_id` STRING,\n" +
                        "`ts` AS TO_TIMESTAMP(FROM_UNIXTIME(bidtime,'yyyy-MM-dd HH:mm')),\n" +
                        " WATERMARK FOR ts as ts - INTERVAL '5' SECOND)\n" +
                        "WITH(" +
                        "'connector'='kafka',\n" +
                        "'properties.bootstrap.servers'='localhost:9092',\n" +
                        "'topic'='kafkaS',\n" +
                        "'properties.group.id'='a'," +
                        "'format'='csv')"

注意:这里是 时间 TIMPSTAP 类型默认为yyyy-MM-dd HH:mm:ss 如果 输入的格式不一致 或造成格式化异常 需要 上面的转化 就"ts AS TO_TIMESTAMP(bidtime,‘yyyy-MM-dd HH:mm’)
如果时间是以时间戳的形式出现那么类型BIGINT 需要使用 flink 1.14 版本需要 TO_TIMESTAMP_LTZ()转化一下 如果 是flink 1.12 版本 "ts AS TO_TIMESTAMP(FROM_UNIXTIME(bidtime,‘yyyy-MM-dd HH:mm’)) 就是先要 转化为本地时间 然你在转化为时间戳 所以 在埋点数据或者其他数据 最好是用 时间格式 而不是时间错

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值