笔记待更新
数据为:订单号,用户id,产品名,价格,数量
package mapreduce.ordertopn;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* 统计出每个订单中金额前n大的产品
* @author THY
*/
public class OrderTopN {
/**
*
* @author THY
*
public static class OrderTopNMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable>{
//每一行数据都要调用map一次,如果数据量很大的话需要很多次创建OrderBean,Text所以在全局创建一个对象,
OrderBean ob = new OrderBean();
Text k=new Text();
@Override
protected void map(LongWritable key, Text values,Context context)
throws IOException, InterruptedException {
String[] split = values.toString().split(",");
ob.set(split[0], split[1], split[2], Float.parseFloat(split[3]), Integer.parseInt(split[4]));
k.set(split[0]);
//交给maptask的kv对象会被maptask序列化后存储,不会覆盖
context.write(ob, NullWritable.get());