Storm示例程序

Apache Storm 数据处理实战:NumberCount与WordCount案例解析

2. Storm的NumberCount案例

  • 首先声明NumberSpout用于模拟产生数据

    package com.gcc.numberCount;
    
    import org.apache.storm.spout.SpoutOutputCollector;
    import org.apache.storm.task.TopologyContext;
    import org.apache.storm.topology.OutputFieldsDeclarer;
    import org.apache.storm.topology.base.BaseRichSpout;
    import org.apache.storm.tuple.Fields;
    import org.apache.storm.tuple.Values;
    
    import java.util.Map;
    
    public class NumberSpout extends BaseRichSpout {
        //声明一个SpoutOutputcollector 对象 用于发送数据
        private SpoutOutputCollector collector;
        //声明一个计数器
        private int number;
    
        @Override
        public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
            this.collector=collector;
        }
    
        /**
         * 重复调用这一个方法从源数据获取一条记录
         * 我们根据业务需求近期进行封装,然后通过SpoutOutputCollector 发送给下一个Bolt
         *
         */
        @Override
        public void nextTuple() {
            //随机生成一个数字
    //        int number= (int) (Math.random()*101);
            //将数据发送给下一个Bolt
            this.collector.emit(new Values(number++));
                //限制传输速度
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    
        /**
         * 定义输出值对应的属性
         * @param declarer
         */
        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            declarer.declare(new Fields("num"));
        }
    }
    
    
  • NumberBolt:处理NumberSpout上游传递过来的数据

    package com.gcc.numberCount;
    
    import org.apache.storm.topology.BasicOutputCollector;
    import org.apache.storm.topology.OutputFieldsDeclarer;
    import org.apache.storm.topology.base.BaseBasicBolt;
    import org.apache.storm.tuple.Fields;
    import org.apache.storm.tuple.Tuple;
    import org.apache.storm.tuple.Values;
    
    public class NumberBolt extends BaseBasicBolt {
        //声明一个统计器
        private static int count;
    
        /**
         * 处理数据的业务逻辑
         * @param input
         * @param collector
         */
        @Override
        public void execute(Tuple input, BasicOutputCollector collector) {
    //        System.out.println("从上游获取的数据:"+input);
    //        显示上游tuple数据信息
            System.out.println("NumberBolt.execute=="+input.getInteger(0)+"--");
            //开始统计
            count+=input.getInteger(0);
    
            //继续向后传递信息
            collector.emit(new Values(count));
    //        System.out.println("截至到本次,其获取的数据和为:"+count);
        }
    
        /**
         * 如果需要向下传递数据,需要提前定义数据格式
         * @param declarer
         */
        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            declarer.declare(new Fields("count"));
        }
    }
    
    
  • NumberCountBolt:处理由NumberBolt传递过来的数据

    package com.gcc.numberCount;
    
    import org.apache.storm.topology.BasicOutputCollector;
    import org.apache.storm.topology.OutputFieldsDeclarer;
    import org.apache.storm.topology.base.BaseBasicBolt;
    import org.apache.storm.tuple.Tuple;
    
    public class NumberCountBolt extends BaseBasicBolt {
        @Override
        public void execute(Tuple input, BasicOutputCollector collector) {
            System.out.println("NumberCountBolt.excute--"+input.getInteger(0)+"--"+input.getIntegerByField("count"));
        }
    
        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
    
        }
    }
    
  • 另一分支Bolt,也是处理NumberSpout的数据

    package com.gcc.numberCount;
    
    import org.apache.storm.topology.BasicOutputCollector;
    import org.apache.storm.topology.OutputFieldsDeclarer;
    import org.apache.storm.topology.base.BaseBasicBolt;
    import org.apache.storm.tuple.Tuple;
    
    public class NumberAgainBolt extends BaseBasicBolt {
        @Override
        public void execute(Tuple input, BasicOutputCollector collector) {
            System.out.println("NumberAgainBolt.execute==="+input.getIntegerByField("num"));
        }
    
        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
    
        }
    }
    

由此可得,其拓扑结构为:

image-20220718144244935
  • Topology:构建拓扑结构
package com.gcc.numberCount;

import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.topology.TopologyBuilder;

public class NumberTopology {
    public static void main(String[] args) {
        //1.创建任务的拓扑图
        //创建topology的构建起
        TopologyBuilder topologyBuilder = new TopologyBuilder();
        //设置拓扑关系(Spout)
        topologyBuilder.setSpout("numberSpout",new NumberSpout());
        //设置拓扑关系(Bolt)
        topologyBuilder.setBolt("numberBolt",new NumberBolt()).shuffleGrouping("numberSpout");
        //设置拓扑关系(Bolt)
        topologyBuilder.setBolt("numberCountBolt",new NumberCountBolt()).shuffleGrouping("numberBolt");
        //设置拓扑关系(Bolt)
        topologyBuilder.setBolt("numberAgainBolt",new NumberAgainBolt()).shuffleGrouping("numberSpout");
        //设置拓扑关系(Bolt)
//        topologyBuilder.setBolt("number2",new NumberBolt()).shuffleGrouping("number1");

        //2.配置启动
        Config config = new Config();
       //创建Topology
        StormTopology topology = topologyBuilder.createTopology();

        if(args.length>0){
            try {
                StormSubmitter.submitTopology(args[0],config,topology);
            } catch (AlreadyAliveException e) {
                e.printStackTrace();
            } catch (InvalidTopologyException e) {
                e.printStackTrace();
            } catch (AuthorizationException e) {
                e.printStackTrace();
            }
        }else{
            //本地模式启动集群
            LocalCluster localCluster = new LocalCluster();
            localCluster.submitTopology("numberTopology",config,topology);
        }



    }
}

可将IDEA的程序打包到服务器运行:

打包时候若初见程序类或者包不存在的错误,但是实际依赖已经加入。

解决方案:可以选择rebuild项目之后,重新install和package

将打包后的jar包上传服务器后

使用命令:

$ storm jar com.gcc.com.gcc.numberCount.NumberTopology [任务序号] 

可以在服务器上运行

3. Storm的WordCout案例

  • WordCountSpout:模拟单词数据,利用数组存放需要发送的数据

    package com.gcc.wordcount;
    
    import org.apache.storm.spout.SpoutOutputCollector;
    import org.apache.storm.task.TopologyContext;
    import org.apache.storm.topology.OutputFieldsDeclarer;
    import org.apache.storm.topology.base.BaseRichSpout;
    import org.apache.storm.tuple.Fields;
    import org.apache.storm.tuple.Values;
    
    import java.util.Map;
    import java.util.Random;
    
    public class WordCountSpout extends BaseRichSpout {
        //声明一个SpoutOutputcollector 对象 用于发送数据
        private SpoutOutputCollector collector;
        //创建一个数组存放需要发送的数据
        private String[] array={"hello world","hello gcc","hello hi welcome to world"};
    
        private Random random=new Random();
    
        @Override
        public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
            this.collector=collector;
        }
    
        /**
         * 重复调用这一个方法从源数据获取一条记录
         * 我们根据业务需求近期进行封装,然后通过SpoutOutputCollector 发送给下一个Bolt
         *
         */
        @Override
        public void nextTuple() {
            //获取本次需要发送的字符串
            String line=array[random.nextInt(array.length)];
            System.out.println("WordCountSpout.nextTuple---"+line);
            this.collector.emit(new Values(line));
                //限制传输速度
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    
        /**
         * 定义输出值对应的属性
         * @param declarer
         */
        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            declarer.declare(new Fields("line"));
        }
    }
    
    
  • LineSplitBolt:处理Spout上游数据,切分单词

    package com.gcc.wordcount;
    
    import org.apache.storm.topology.BasicOutputCollector;
    import org.apache.storm.topology.OutputFieldsDeclarer;
    import org.apache.storm.topology.base.BaseBasicBolt;
    import org.apache.storm.tuple.Fields;
    import org.apache.storm.tuple.Tuple;
    import org.apache.storm.tuple.Values;
    
    public class LineSplitBolt extends BaseBasicBolt {
    
        /**
         * 处理数据的业务逻辑
         * 切分每行的数据
         * @param input
         * @param collector
         */
        @Override
        public void execute(Tuple input, BasicOutputCollector collector) {
            //首先获取到一行数据
            String line =input.getStringByField("line");
            System.out.println("LineSplitBolt.execute----"+line);
            //对line进行切分
            if(line!=null&&line.length()>0){
                //开始切分
                String []words=line.split(" ");
                //将切分之后的单词发送到下一个Bolt
                for (String word : words) {
                    //继续向后传递信息
                    collector.emit(new Values(word));
                }
            }
        }
    
        /**
         * 如果需要向下传递数据,需要提前定义数据格式
         * @param declarer
         */
        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            declarer.declare(new Fields("word"));
        }
    }
    
    
  • WordCountBolt:对LineSplitBolt的数据进行单词统计,采用的FieldsGrouping的数据分发策略

    package com.gcc.wordcount;
    
    import org.apache.storm.topology.BasicOutputCollector;
    import org.apache.storm.topology.OutputFieldsDeclarer;
    import org.apache.storm.topology.base.BaseBasicBolt;
    import org.apache.storm.tuple.Fields;
    import org.apache.storm.tuple.Tuple;
    import org.apache.storm.tuple.Values;
    
    import java.util.HashMap;
    import java.util.Map;
    
    public class WordCountBolt extends BaseBasicBolt {
        //声明一个容器 存在以前的统计结果
        private Map<String,Integer> map=new HashMap<>();
    
        /**
         * 处理数据的业务逻辑
         * @param input
         * @param collector
         */
        @Override
        public void execute(Tuple input, BasicOutputCollector collector) {
            //获取单词
            String word=input.getStringByField("word");
            //判断这个单词是否统计过
            if(map.containsKey(word)){
                //最新统计数量
                int count=map.get(word);
                count++;
                //重新设置到map
                map.put(word,count);
            }else{
                map.put(word,1);
            }
            System.out.println("WordCountBolt本次执行完:"+this+"单词【"+word+"】数量【"+map.get(word)+"]");
        }
    
        /**
         * 如果需要向下传递数据,需要提前定义数据格式
         * @param declarer
         */
        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
    
        }
    }
    
    
  • Topology:设置拓扑结构

    package com.gcc.wordcount;
    
    import com.gcc.numberCount.NumberAgainBolt;
    import com.gcc.numberCount.NumberBolt;
    import com.gcc.numberCount.NumberCountBolt;
    import com.gcc.numberCount.NumberSpout;
    import org.apache.storm.Config;
    import org.apache.storm.LocalCluster;
    import org.apache.storm.generated.StormTopology;
    import org.apache.storm.topology.TopologyBuilder;
    import org.apache.storm.tuple.Fields;
    
    public class WordCountTopology {
        public static void main(String[] args) {
            //1.创建任务的拓扑图
            //创建topology的构建起
            TopologyBuilder topologyBuilder = new TopologyBuilder();
            //设置拓扑关系(Spout)
            topologyBuilder.setSpout("WordCountSpout",new WordCountSpout());
    
            //设置拓扑关系Bolt 切分行
            topologyBuilder.setBolt("LineSplitBolt",new LineSplitBolt(),2).shuffleGrouping("WordCountSpout");
            //设置拓扑关系Bolt 对单词的数量进行统计
            topologyBuilder.setBolt("WordCountBolt",new WordCountBolt(),4).fieldsGrouping("LineSplitBolt",new Fields("word"));
    
            //2.配置启动
            Config config = new Config();
           //创建Topology
            StormTopology topology = topologyBuilder.createTopology();
            //本地模式启动集群
            LocalCluster localCluster = new LocalCluster();
            localCluster.submitTopology("numberTopology",config,topology);
    
    
        }
    }
    
    
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值