2. Storm的NumberCount案例
-
首先声明NumberSpout用于模拟产生数据
package com.gcc.numberCount; import org.apache.storm.spout.SpoutOutputCollector; import org.apache.storm.task.TopologyContext; import org.apache.storm.topology.OutputFieldsDeclarer; import org.apache.storm.topology.base.BaseRichSpout; import org.apache.storm.tuple.Fields; import org.apache.storm.tuple.Values; import java.util.Map; public class NumberSpout extends BaseRichSpout { //声明一个SpoutOutputcollector 对象 用于发送数据 private SpoutOutputCollector collector; //声明一个计数器 private int number; @Override public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { this.collector=collector; } /** * 重复调用这一个方法从源数据获取一条记录 * 我们根据业务需求近期进行封装,然后通过SpoutOutputCollector 发送给下一个Bolt * */ @Override public void nextTuple() { //随机生成一个数字 // int number= (int) (Math.random()*101); //将数据发送给下一个Bolt this.collector.emit(new Values(number++)); //限制传输速度 try { Thread.sleep(1000); } catch (InterruptedException e) { e.printStackTrace(); } } /** * 定义输出值对应的属性 * @param declarer */ @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("num")); } }
-
NumberBolt:处理NumberSpout上游传递过来的数据
package com.gcc.numberCount; import org.apache.storm.topology.BasicOutputCollector; import org.apache.storm.topology.OutputFieldsDeclarer; import org.apache.storm.topology.base.BaseBasicBolt; import org.apache.storm.tuple.Fields; import org.apache.storm.tuple.Tuple; import org.apache.storm.tuple.Values; public class NumberBolt extends BaseBasicBolt { //声明一个统计器 private static int count; /** * 处理数据的业务逻辑 * @param input * @param collector */ @Override public void execute(Tuple input, BasicOutputCollector collector) { // System.out.println("从上游获取的数据:"+input); // 显示上游tuple数据信息 System.out.println("NumberBolt.execute=="+input.getInteger(0)+"--"); //开始统计 count+=input.getInteger(0); //继续向后传递信息 collector.emit(new Values(count)); // System.out.println("截至到本次,其获取的数据和为:"+count); } /** * 如果需要向下传递数据,需要提前定义数据格式 * @param declarer */ @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("count")); } }
-
NumberCountBolt:处理由NumberBolt传递过来的数据
package com.gcc.numberCount; import org.apache.storm.topology.BasicOutputCollector; import org.apache.storm.topology.OutputFieldsDeclarer; import org.apache.storm.topology.base.BaseBasicBolt; import org.apache.storm.tuple.Tuple; public class NumberCountBolt extends BaseBasicBolt { @Override public void execute(Tuple input, BasicOutputCollector collector) { System.out.println("NumberCountBolt.excute--"+input.getInteger(0)+"--"+input.getIntegerByField("count")); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { } }
-
另一分支Bolt,也是处理NumberSpout的数据
package com.gcc.numberCount; import org.apache.storm.topology.BasicOutputCollector; import org.apache.storm.topology.OutputFieldsDeclarer; import org.apache.storm.topology.base.BaseBasicBolt; import org.apache.storm.tuple.Tuple; public class NumberAgainBolt extends BaseBasicBolt { @Override public void execute(Tuple input, BasicOutputCollector collector) { System.out.println("NumberAgainBolt.execute==="+input.getIntegerByField("num")); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { } }
由此可得,其拓扑结构为:

- Topology:构建拓扑结构
package com.gcc.numberCount;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.topology.TopologyBuilder;
public class NumberTopology {
public static void main(String[] args) {
//1.创建任务的拓扑图
//创建topology的构建起
TopologyBuilder topologyBuilder = new TopologyBuilder();
//设置拓扑关系(Spout)
topologyBuilder.setSpout("numberSpout",new NumberSpout());
//设置拓扑关系(Bolt)
topologyBuilder.setBolt("numberBolt",new NumberBolt()).shuffleGrouping("numberSpout");
//设置拓扑关系(Bolt)
topologyBuilder.setBolt("numberCountBolt",new NumberCountBolt()).shuffleGrouping("numberBolt");
//设置拓扑关系(Bolt)
topologyBuilder.setBolt("numberAgainBolt",new NumberAgainBolt()).shuffleGrouping("numberSpout");
//设置拓扑关系(Bolt)
// topologyBuilder.setBolt("number2",new NumberBolt()).shuffleGrouping("number1");
//2.配置启动
Config config = new Config();
//创建Topology
StormTopology topology = topologyBuilder.createTopology();
if(args.length>0){
try {
StormSubmitter.submitTopology(args[0],config,topology);
} catch (AlreadyAliveException e) {
e.printStackTrace();
} catch (InvalidTopologyException e) {
e.printStackTrace();
} catch (AuthorizationException e) {
e.printStackTrace();
}
}else{
//本地模式启动集群
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("numberTopology",config,topology);
}
}
}
可将IDEA的程序打包到服务器运行:
打包时候若初见程序类或者包不存在的错误,但是实际依赖已经加入。
解决方案:可以选择rebuild项目之后,重新install和package
将打包后的jar包上传服务器后
使用命令:
$ storm jar com.gcc.com.gcc.numberCount.NumberTopology [任务序号]
可以在服务器上运行
3. Storm的WordCout案例
-
WordCountSpout:模拟单词数据,利用数组存放需要发送的数据
package com.gcc.wordcount; import org.apache.storm.spout.SpoutOutputCollector; import org.apache.storm.task.TopologyContext; import org.apache.storm.topology.OutputFieldsDeclarer; import org.apache.storm.topology.base.BaseRichSpout; import org.apache.storm.tuple.Fields; import org.apache.storm.tuple.Values; import java.util.Map; import java.util.Random; public class WordCountSpout extends BaseRichSpout { //声明一个SpoutOutputcollector 对象 用于发送数据 private SpoutOutputCollector collector; //创建一个数组存放需要发送的数据 private String[] array={"hello world","hello gcc","hello hi welcome to world"}; private Random random=new Random(); @Override public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { this.collector=collector; } /** * 重复调用这一个方法从源数据获取一条记录 * 我们根据业务需求近期进行封装,然后通过SpoutOutputCollector 发送给下一个Bolt * */ @Override public void nextTuple() { //获取本次需要发送的字符串 String line=array[random.nextInt(array.length)]; System.out.println("WordCountSpout.nextTuple---"+line); this.collector.emit(new Values(line)); //限制传输速度 try { Thread.sleep(1000); } catch (InterruptedException e) { e.printStackTrace(); } } /** * 定义输出值对应的属性 * @param declarer */ @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("line")); } }
-
LineSplitBolt:处理Spout上游数据,切分单词
package com.gcc.wordcount; import org.apache.storm.topology.BasicOutputCollector; import org.apache.storm.topology.OutputFieldsDeclarer; import org.apache.storm.topology.base.BaseBasicBolt; import org.apache.storm.tuple.Fields; import org.apache.storm.tuple.Tuple; import org.apache.storm.tuple.Values; public class LineSplitBolt extends BaseBasicBolt { /** * 处理数据的业务逻辑 * 切分每行的数据 * @param input * @param collector */ @Override public void execute(Tuple input, BasicOutputCollector collector) { //首先获取到一行数据 String line =input.getStringByField("line"); System.out.println("LineSplitBolt.execute----"+line); //对line进行切分 if(line!=null&&line.length()>0){ //开始切分 String []words=line.split(" "); //将切分之后的单词发送到下一个Bolt for (String word : words) { //继续向后传递信息 collector.emit(new Values(word)); } } } /** * 如果需要向下传递数据,需要提前定义数据格式 * @param declarer */ @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word")); } }
-
WordCountBolt:对LineSplitBolt的数据进行单词统计,采用的FieldsGrouping的数据分发策略
package com.gcc.wordcount; import org.apache.storm.topology.BasicOutputCollector; import org.apache.storm.topology.OutputFieldsDeclarer; import org.apache.storm.topology.base.BaseBasicBolt; import org.apache.storm.tuple.Fields; import org.apache.storm.tuple.Tuple; import org.apache.storm.tuple.Values; import java.util.HashMap; import java.util.Map; public class WordCountBolt extends BaseBasicBolt { //声明一个容器 存在以前的统计结果 private Map<String,Integer> map=new HashMap<>(); /** * 处理数据的业务逻辑 * @param input * @param collector */ @Override public void execute(Tuple input, BasicOutputCollector collector) { //获取单词 String word=input.getStringByField("word"); //判断这个单词是否统计过 if(map.containsKey(word)){ //最新统计数量 int count=map.get(word); count++; //重新设置到map map.put(word,count); }else{ map.put(word,1); } System.out.println("WordCountBolt本次执行完:"+this+"单词【"+word+"】数量【"+map.get(word)+"]"); } /** * 如果需要向下传递数据,需要提前定义数据格式 * @param declarer */ @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { } }
-
Topology:设置拓扑结构
package com.gcc.wordcount; import com.gcc.numberCount.NumberAgainBolt; import com.gcc.numberCount.NumberBolt; import com.gcc.numberCount.NumberCountBolt; import com.gcc.numberCount.NumberSpout; import org.apache.storm.Config; import org.apache.storm.LocalCluster; import org.apache.storm.generated.StormTopology; import org.apache.storm.topology.TopologyBuilder; import org.apache.storm.tuple.Fields; public class WordCountTopology { public static void main(String[] args) { //1.创建任务的拓扑图 //创建topology的构建起 TopologyBuilder topologyBuilder = new TopologyBuilder(); //设置拓扑关系(Spout) topologyBuilder.setSpout("WordCountSpout",new WordCountSpout()); //设置拓扑关系Bolt 切分行 topologyBuilder.setBolt("LineSplitBolt",new LineSplitBolt(),2).shuffleGrouping("WordCountSpout"); //设置拓扑关系Bolt 对单词的数量进行统计 topologyBuilder.setBolt("WordCountBolt",new WordCountBolt(),4).fieldsGrouping("LineSplitBolt",new Fields("word")); //2.配置启动 Config config = new Config(); //创建Topology StormTopology topology = topologyBuilder.createTopology(); //本地模式启动集群 LocalCluster localCluster = new LocalCluster(); localCluster.submitTopology("numberTopology",config,topology); } }