MapReduce案例11——影评分析6/7(特定类型电影topN)

本文介绍了一个基于Hadoop的大数据处理案例,通过MapReduce框架分析电影评分数据,包括找出1997年上映的Comedy类型电影中评分最高的10部电影,以及各类电影中评分最高的5部。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

题目:

现有如此三份数据:
1、users.dat    数据格式为:  2::M::56::16::70072
对应字段为:UserID BigInt, Gender String, Age Int, Occupation String, Zipcode String
对应字段中文解释:用户id,性别,年龄,职业,邮政编码

2、movies.dat		数据格式为: 2::Jumanji (1995)::Adventure|Children's|Fantasy
对应字段为:MovieID BigInt, Title String, Genres String
对应字段中文解释:电影ID,电影名字,电影类型

3、ratings.dat		数据格式为:  1::1193::5::978300760
对应字段为:UserID BigInt, MovieID BigInt, Rating Double, Timestamped String
对应字段中文解释:用户ID,电影ID,评分,评分时间戳

用户ID,电影ID,评分,评分时间戳,性别,年龄,职业,邮政编码,电影名字,电影类型
userid, movieId, rate, ts, gender, age, occupation, zipcode, movieName, movieType
(6)求1997年上映的电影中,评分最高的10部Comedy类电影
(7)该影评库中各种类型电影中评价最高的5部电影(类型,电影名,平均影评分)

两个题目思路类似:先求出对应类型的平均评分,然后分组取topN即可

案例6主体代码:

/**
 * @author: lpj   
 * @date: 2018年3月16日 下午7:16:47
 * @Description:
 */
package lpj.filmCritic;

import java.io.IOException;
import java.text.DecimalFormat;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import lpj.filmBean.GoodMoiveGroup3;
import lpj.filmBean.GoodMovieBean3;
/**
 *
 */
public class Comedytop10MR {
	
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
//		conf.addResource("hdfs-site.xml");//使用配置文件
//		System.setProperty("HADOOP_USER_NAME", "hadoop");//使用集群
		FileSystem fs = FileSystem.get(conf);//默认使用本地
		Job job = Job.getInstance(conf);
		job.setJarByClass(Comedytop10MR.class);
		job.setMapperClass(Comedytop10MR_Mapper.class);
		job.setReducerClass(Comedytop10MR_Reducer.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		Path inputPath = new Path("d:/a/totalFilmInfos.txt");
		Path outputPath = new Path("d:/a/homework11_6_1");
		if (fs.exists(outputPath)) {
			fs.delete(outputPath, true);
		}
		FileInputFormat.setInputPaths(job, inputPath);
		FileOutputFormat.setOutputPath(job, outputPath);
		//------------------------------------
		FileSystem fs2 = FileSystem.get(conf);//默认使用本地
		Job job2 = Job.getInstance(conf);
		job2.setJarByClass(Comedytop10MR.class);
		job2.setMapperClass(Comedytop10MR2_Mapper.class);
		job2.setReducerClass(Comedytop10MR2_Reducer.class);
		job2.setOutputKeyClass(GoodMovieBean3.class);
		job2.setOutputValueClass(NullWritable.class);
		job2.setGroupingComparatorClass(GoodMoiveGroup3.class);
		Path inputPath2 = new Path("d:/a/homework11_6_1");
		Path outputPath2 = new Path("d:/a/homework11_6_2");
		if (fs2.exists(outputPath2)) {
			fs2.delete(outputPath2, true);
		}
		FileInputFormat.setInputPaths(job2, inputPath2);
		FileOutputFormat.setOutputPath(job2, outputPath2);
		//--------------------------------
		ControlledJob aJob = new ControlledJob(job.getConfiguration());
		ControlledJob bJob = new ControlledJob(job2.getConfiguration());
		aJob.setJob(job);
		bJob.setJob(job2);
		JobControl jc = new JobControl("jc");
		jc.addJob(aJob);
		jc.addJob(bJob);
		bJob.addDependingJob(aJob);
		Thread thread = new Thread(jc);
		thread.start();
		while(!jc.allFinished()){
			thread.sleep(1000);
		}
		jc.stop();
		
	}
	
	public static class Comedytop10MR_Mapper extends Mapper<LongWritable, Text, Text, IntWritable>{
		Text kout = new Text();
		Text valueout = new Text();
		@Override
		protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
			//(6)求1997年上映的电影中,评分最高的10部Comedy类电影
			//用户ID,电影ID,评分,评分时间戳,性别,年龄,职业,邮政编码,电影名字,电影类型
			String [] reads = value.toString().trim().split("::");
			String moivename = reads[8];
			String year = moivename.substring(moivename.length() - 5, moivename.length() - 1);
			int rate = Integer.parseInt(reads[2]);
			String type = reads[9];
			if (year.equals("1997") && (type.equals("Comedy") || type.equals("comedy"))) {
				context.write(new Text(moivename), new IntWritable(rate));
			}
		}
	}
	public static class Comedytop10MR_Reducer extends Reducer<Text, IntWritable, Text, Text>{
		Text kout = new Text();
		Text valueout = new Text();
		@Override
		protected void reduce(Text key, Iterable<IntWritable> values, Context context)throws IOException, InterruptedException {
			int num = 0;
			int total = 0;
			double avg = 0;
			for(IntWritable in : values){
				num ++;
				total += in.get();
			}
			avg = 1.0 * total / num;
			DecimalFormat df = new DecimalFormat("#.#");
			String format = df.format(avg);
			context.write(key, new Text(format));
		}
	}
	//-------------------------求top10-------------------
	public static class Comedytop10MR2_Mapper extends Mapper<LongWritable, Text, GoodMovieBean3, NullWritable>{
		Text kout = new Text();
		Text valueout = new Text();
		GoodMovieBean3 gm = new GoodMovieBean3();
		@Override
		protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
			String [] reads = value.toString().trim().split("\t");
			String moivename = reads[0];
			double num = Double.parseDouble(reads[1]);
			gm.setYear(moivename);
			gm.setNum(num);
			context.write(gm, NullWritable.get());
			
		}
	}
	public static class Comedytop10MR2_Reducer extends Reducer<GoodMovieBean3, NullWritable, GoodMovieBean3, NullWritable>{
		Text kout = new Text();
		Text valueout = new Text();
		@Override
		protected void reduce(GoodMovieBean3 key, Iterable<NullWritable> values, Context context)throws IOException, InterruptedException {
			int num = 0;
			for(NullWritable in : values){
				num ++;
				if (num <= 10) {
					context.write(key, NullWritable.get());
				}else {
					return;
				}
			}

		}
	}
}

最终结果:

Castle, The (1997)	3.9
Full Monty, The (1997)	3.9
Austin Powers: International Man of Mystery (1997)	3.7
Santitos (1997)	3.7
Liar Liar (1997)	3.5
Broadway Damage (1997)	3.5
Six Ways to Sunday (1997)	3.5
Clockwatchers (1997)	3.4
In & Out (1997)	3.3
Fierce Creatures (1997)	3.3

案例7主体代码:

/**
 * @author: lpj   
 * @date: 2018年3月16日 下午7:16:47
 * @Description:
 */
package lpj.filmCritic;

import java.io.IOException;
import java.text.DecimalFormat;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import lpj.filmBean.GoodMoiveGroup3;
import lpj.filmBean.GoodMoiveGroup4;
import lpj.filmBean.GoodMovieBean3;
import lpj.filmBean.GoodMovieBean4;
/**
 *
 */
public class Kindtop5MR {
	
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
//		conf.addResource("hdfs-site.xml");//使用配置文件
//		System.setProperty("HADOOP_USER_NAME", "hadoop");//使用集群
		FileSystem fs = FileSystem.get(conf);//默认使用本地
		Job job = Job.getInstance(conf);
		job.setJarByClass(Kindtop5MR.class);
		job.setMapperClass(Comedytop10MR_Mapper.class);
		job.setReducerClass(Comedytop10MR_Reducer.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		Path inputPath = new Path("d:/a/totalFilmInfos.txt");
		Path outputPath = new Path("d:/a/homework11_7_1");
		if (fs.exists(outputPath)) {
			fs.delete(outputPath, true);
		}
		FileInputFormat.setInputPaths(job, inputPath);
		FileOutputFormat.setOutputPath(job, outputPath);
		//------------------------------------
		FileSystem fs2 = FileSystem.get(conf);//默认使用本地
		Job job2 = Job.getInstance(conf);
		job2.setJarByClass(Kindtop5MR.class);
		job2.setMapperClass(Comedytop10MR2_Mapper.class);
		job2.setReducerClass(Comedytop10MR2_Reducer.class);
		job2.setOutputKeyClass(GoodMovieBean4.class);
		job2.setOutputValueClass(NullWritable.class);
		job2.setGroupingComparatorClass(GoodMoiveGroup4.class);
		Path inputPath2 = new Path("d:/a/homework11_7_1");
		Path outputPath2 = new Path("d:/a/homework11_7_2");
		if (fs2.exists(outputPath2)) {
			fs2.delete(outputPath2, true);
		}
		FileInputFormat.setInputPaths(job2, inputPath2);
		FileOutputFormat.setOutputPath(job2, outputPath2);
		//--------------------------------
		ControlledJob aJob = new ControlledJob(job.getConfiguration());
		ControlledJob bJob = new ControlledJob(job2.getConfiguration());
		aJob.setJob(job);
		bJob.setJob(job2);
		JobControl jc = new JobControl("jc");
		jc.addJob(aJob);
		jc.addJob(bJob);
		bJob.addDependingJob(aJob);
		Thread thread = new Thread(jc);
		thread.start();
		while(!jc.allFinished()){
			thread.sleep(1000);
		}
		jc.stop();
		
	}
	
	public static class Comedytop10MR_Mapper extends Mapper<LongWritable, Text, Text, IntWritable>{
		Text kout = new Text();
		Text valueout = new Text();
		@Override
		protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
			//7)该影评库中各种类型电影中评价最高的5部电影(类型,电影名,平均影评分)
			//用户ID,电影ID,评分,评分时间戳,性别,年龄,职业,邮政编码,电影名字,电影类型
			String [] reads = value.toString().trim().split("::");
			String moivename = reads[8];
			int rate = Integer.parseInt(reads[2]);
			String type = reads[9];
			context.write(new Text(type + "\t" + moivename), new IntWritable(rate));
		}
	}
	public static class Comedytop10MR_Reducer extends Reducer<Text, IntWritable, Text, Text>{
		Text kout = new Text();
		Text valueout = new Text();
		@Override
		protected void reduce(Text key, Iterable<IntWritable> values, Context context)throws IOException, InterruptedException {
			int num = 0;
			int total = 0;
			double avg = 0;
			for(IntWritable in : values){
				num ++;
				total += in.get();
			}
			avg = 1.0 * total / num;
			DecimalFormat df = new DecimalFormat("#.#");
			String format = df.format(avg);
			context.write(key, new Text(format));
		}
	}
	//-------------------------求top5-------------------
	//War	Underground (1995)	3.7
	public static class Comedytop10MR2_Mapper extends Mapper<LongWritable, Text, GoodMovieBean4, NullWritable>{
		Text kout = new Text();
		Text valueout = new Text();
		GoodMovieBean4 gm = new GoodMovieBean4();
		@Override
		protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
			String [] reads = value.toString().trim().split("\t");
			String type = reads[0];
			String moivename = reads[1];
			double num = Double.parseDouble(reads[2]);
			gm.setType(type);
			gm.setName(moivename);
			gm.setNum(num);
			context.write(gm, NullWritable.get());
			
		}
	}
	public static class Comedytop10MR2_Reducer extends Reducer<GoodMovieBean4, NullWritable, GoodMovieBean4, NullWritable>{
		Text kout = new Text();
		Text valueout = new Text();
		@Override
		protected void reduce(GoodMovieBean4 key, Iterable<NullWritable> values, Context context)throws IOException, InterruptedException {
			int num = 0;
			for(NullWritable in : values){
				num ++;
				if (num <= 5) {
					context.write(key, NullWritable.get());
				}else {
					return;
				}
			}

		}
	}
}

最终结果(截取部分)

Skipped Parts (2000)	Drama|Romance	4.5
Graduate, The (1967)	Drama|Romance	4.2
Children of Paradise (Les enfants du paradis) (1945)	Drama|Romance	4.2
Beautiful Thing (1996)	Drama|Romance	4.1
Brief Encounter (1946)	Drama|Romance	4.1
Talented Mr. Ripley, The (1999)	Drama|Mystery|Thriller	3.5
Client, The (1994)	Drama|Mystery|Thriller	3.4
2001: A Space Odyssey (1968)	Drama|Mystery|Sci-Fi|Thriller	4.1
Chungking Express (1994)	Drama|Mystery|Romance	3.9
Sommersby (1993)	Drama|Mystery|Romance	3.3
Flesh and Bone (1993)	Drama|Mystery|Romance	3.2
Lulu on the Bridge (1998)	Drama|Mystery|Romance	2.7

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值