题目:
现有如此三份数据:
1、users.dat 数据格式为: 2::M::56::16::70072
对应字段为:UserID BigInt, Gender String, Age Int, Occupation String, Zipcode String
对应字段中文解释:用户id,性别,年龄,职业,邮政编码
2、movies.dat 数据格式为: 2::Jumanji (1995)::Adventure|Children's|Fantasy
对应字段为:MovieID BigInt, Title String, Genres String
对应字段中文解释:电影ID,电影名字,电影类型
3、ratings.dat 数据格式为: 1::1193::5::978300760
对应字段为:UserID BigInt, MovieID BigInt, Rating Double, Timestamped String
对应字段中文解释:用户ID,电影ID,评分,评分时间戳
用户ID,电影ID,评分,评分时间戳,性别,年龄,职业,邮政编码,电影名字,电影类型
userid, movieId, rate, ts, gender, age, occupation, zipcode, movieName, movieType
(6)求1997年上映的电影中,评分最高的10部Comedy类电影
(7)该影评库中各种类型电影中评价最高的5部电影(类型,电影名,平均影评分)
两个题目思路类似:先求出对应类型的平均评分,然后分组取topN即可
案例6主体代码:
/**
* @author: lpj
* @date: 2018年3月16日 下午7:16:47
* @Description:
*/
package lpj.filmCritic;
import java.io.IOException;
import java.text.DecimalFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import lpj.filmBean.GoodMoiveGroup3;
import lpj.filmBean.GoodMovieBean3;
/**
*
*/
public class Comedytop10MR {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
// conf.addResource("hdfs-site.xml");//使用配置文件
// System.setProperty("HADOOP_USER_NAME", "hadoop");//使用集群
FileSystem fs = FileSystem.get(conf);//默认使用本地
Job job = Job.getInstance(conf);
job.setJarByClass(Comedytop10MR.class);
job.setMapperClass(Comedytop10MR_Mapper.class);
job.setReducerClass(Comedytop10MR_Reducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
Path inputPath = new Path("d:/a/totalFilmInfos.txt");
Path outputPath = new Path("d:/a/homework11_6_1");
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
}
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
//------------------------------------
FileSystem fs2 = FileSystem.get(conf);//默认使用本地
Job job2 = Job.getInstance(conf);
job2.setJarByClass(Comedytop10MR.class);
job2.setMapperClass(Comedytop10MR2_Mapper.class);
job2.setReducerClass(Comedytop10MR2_Reducer.class);
job2.setOutputKeyClass(GoodMovieBean3.class);
job2.setOutputValueClass(NullWritable.class);
job2.setGroupingComparatorClass(GoodMoiveGroup3.class);
Path inputPath2 = new Path("d:/a/homework11_6_1");
Path outputPath2 = new Path("d:/a/homework11_6_2");
if (fs2.exists(outputPath2)) {
fs2.delete(outputPath2, true);
}
FileInputFormat.setInputPaths(job2, inputPath2);
FileOutputFormat.setOutputPath(job2, outputPath2);
//--------------------------------
ControlledJob aJob = new ControlledJob(job.getConfiguration());
ControlledJob bJob = new ControlledJob(job2.getConfiguration());
aJob.setJob(job);
bJob.setJob(job2);
JobControl jc = new JobControl("jc");
jc.addJob(aJob);
jc.addJob(bJob);
bJob.addDependingJob(aJob);
Thread thread = new Thread(jc);
thread.start();
while(!jc.allFinished()){
thread.sleep(1000);
}
jc.stop();
}
public static class Comedytop10MR_Mapper extends Mapper<LongWritable, Text, Text, IntWritable>{
Text kout = new Text();
Text valueout = new Text();
@Override
protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
//(6)求1997年上映的电影中,评分最高的10部Comedy类电影
//用户ID,电影ID,评分,评分时间戳,性别,年龄,职业,邮政编码,电影名字,电影类型
String [] reads = value.toString().trim().split("::");
String moivename = reads[8];
String year = moivename.substring(moivename.length() - 5, moivename.length() - 1);
int rate = Integer.parseInt(reads[2]);
String type = reads[9];
if (year.equals("1997") && (type.equals("Comedy") || type.equals("comedy"))) {
context.write(new Text(moivename), new IntWritable(rate));
}
}
}
public static class Comedytop10MR_Reducer extends Reducer<Text, IntWritable, Text, Text>{
Text kout = new Text();
Text valueout = new Text();
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context)throws IOException, InterruptedException {
int num = 0;
int total = 0;
double avg = 0;
for(IntWritable in : values){
num ++;
total += in.get();
}
avg = 1.0 * total / num;
DecimalFormat df = new DecimalFormat("#.#");
String format = df.format(avg);
context.write(key, new Text(format));
}
}
//-------------------------求top10-------------------
public static class Comedytop10MR2_Mapper extends Mapper<LongWritable, Text, GoodMovieBean3, NullWritable>{
Text kout = new Text();
Text valueout = new Text();
GoodMovieBean3 gm = new GoodMovieBean3();
@Override
protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
String [] reads = value.toString().trim().split("\t");
String moivename = reads[0];
double num = Double.parseDouble(reads[1]);
gm.setYear(moivename);
gm.setNum(num);
context.write(gm, NullWritable.get());
}
}
public static class Comedytop10MR2_Reducer extends Reducer<GoodMovieBean3, NullWritable, GoodMovieBean3, NullWritable>{
Text kout = new Text();
Text valueout = new Text();
@Override
protected void reduce(GoodMovieBean3 key, Iterable<NullWritable> values, Context context)throws IOException, InterruptedException {
int num = 0;
for(NullWritable in : values){
num ++;
if (num <= 10) {
context.write(key, NullWritable.get());
}else {
return;
}
}
}
}
}
最终结果:
Castle, The (1997) 3.9
Full Monty, The (1997) 3.9
Austin Powers: International Man of Mystery (1997) 3.7
Santitos (1997) 3.7
Liar Liar (1997) 3.5
Broadway Damage (1997) 3.5
Six Ways to Sunday (1997) 3.5
Clockwatchers (1997) 3.4
In & Out (1997) 3.3
Fierce Creatures (1997) 3.3
案例7主体代码:
/**
* @author: lpj
* @date: 2018年3月16日 下午7:16:47
* @Description:
*/
package lpj.filmCritic;
import java.io.IOException;
import java.text.DecimalFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import lpj.filmBean.GoodMoiveGroup3;
import lpj.filmBean.GoodMoiveGroup4;
import lpj.filmBean.GoodMovieBean3;
import lpj.filmBean.GoodMovieBean4;
/**
*
*/
public class Kindtop5MR {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
// conf.addResource("hdfs-site.xml");//使用配置文件
// System.setProperty("HADOOP_USER_NAME", "hadoop");//使用集群
FileSystem fs = FileSystem.get(conf);//默认使用本地
Job job = Job.getInstance(conf);
job.setJarByClass(Kindtop5MR.class);
job.setMapperClass(Comedytop10MR_Mapper.class);
job.setReducerClass(Comedytop10MR_Reducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
Path inputPath = new Path("d:/a/totalFilmInfos.txt");
Path outputPath = new Path("d:/a/homework11_7_1");
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
}
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
//------------------------------------
FileSystem fs2 = FileSystem.get(conf);//默认使用本地
Job job2 = Job.getInstance(conf);
job2.setJarByClass(Kindtop5MR.class);
job2.setMapperClass(Comedytop10MR2_Mapper.class);
job2.setReducerClass(Comedytop10MR2_Reducer.class);
job2.setOutputKeyClass(GoodMovieBean4.class);
job2.setOutputValueClass(NullWritable.class);
job2.setGroupingComparatorClass(GoodMoiveGroup4.class);
Path inputPath2 = new Path("d:/a/homework11_7_1");
Path outputPath2 = new Path("d:/a/homework11_7_2");
if (fs2.exists(outputPath2)) {
fs2.delete(outputPath2, true);
}
FileInputFormat.setInputPaths(job2, inputPath2);
FileOutputFormat.setOutputPath(job2, outputPath2);
//--------------------------------
ControlledJob aJob = new ControlledJob(job.getConfiguration());
ControlledJob bJob = new ControlledJob(job2.getConfiguration());
aJob.setJob(job);
bJob.setJob(job2);
JobControl jc = new JobControl("jc");
jc.addJob(aJob);
jc.addJob(bJob);
bJob.addDependingJob(aJob);
Thread thread = new Thread(jc);
thread.start();
while(!jc.allFinished()){
thread.sleep(1000);
}
jc.stop();
}
public static class Comedytop10MR_Mapper extends Mapper<LongWritable, Text, Text, IntWritable>{
Text kout = new Text();
Text valueout = new Text();
@Override
protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
//7)该影评库中各种类型电影中评价最高的5部电影(类型,电影名,平均影评分)
//用户ID,电影ID,评分,评分时间戳,性别,年龄,职业,邮政编码,电影名字,电影类型
String [] reads = value.toString().trim().split("::");
String moivename = reads[8];
int rate = Integer.parseInt(reads[2]);
String type = reads[9];
context.write(new Text(type + "\t" + moivename), new IntWritable(rate));
}
}
public static class Comedytop10MR_Reducer extends Reducer<Text, IntWritable, Text, Text>{
Text kout = new Text();
Text valueout = new Text();
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context)throws IOException, InterruptedException {
int num = 0;
int total = 0;
double avg = 0;
for(IntWritable in : values){
num ++;
total += in.get();
}
avg = 1.0 * total / num;
DecimalFormat df = new DecimalFormat("#.#");
String format = df.format(avg);
context.write(key, new Text(format));
}
}
//-------------------------求top5-------------------
//War Underground (1995) 3.7
public static class Comedytop10MR2_Mapper extends Mapper<LongWritable, Text, GoodMovieBean4, NullWritable>{
Text kout = new Text();
Text valueout = new Text();
GoodMovieBean4 gm = new GoodMovieBean4();
@Override
protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
String [] reads = value.toString().trim().split("\t");
String type = reads[0];
String moivename = reads[1];
double num = Double.parseDouble(reads[2]);
gm.setType(type);
gm.setName(moivename);
gm.setNum(num);
context.write(gm, NullWritable.get());
}
}
public static class Comedytop10MR2_Reducer extends Reducer<GoodMovieBean4, NullWritable, GoodMovieBean4, NullWritable>{
Text kout = new Text();
Text valueout = new Text();
@Override
protected void reduce(GoodMovieBean4 key, Iterable<NullWritable> values, Context context)throws IOException, InterruptedException {
int num = 0;
for(NullWritable in : values){
num ++;
if (num <= 5) {
context.write(key, NullWritable.get());
}else {
return;
}
}
}
}
}
最终结果(截取部分)
Skipped Parts (2000) Drama|Romance 4.5
Graduate, The (1967) Drama|Romance 4.2
Children of Paradise (Les enfants du paradis) (1945) Drama|Romance 4.2
Beautiful Thing (1996) Drama|Romance 4.1
Brief Encounter (1946) Drama|Romance 4.1
Talented Mr. Ripley, The (1999) Drama|Mystery|Thriller 3.5
Client, The (1994) Drama|Mystery|Thriller 3.4
2001: A Space Odyssey (1968) Drama|Mystery|Sci-Fi|Thriller 4.1
Chungking Express (1994) Drama|Mystery|Romance 3.9
Sommersby (1993) Drama|Mystery|Romance 3.3
Flesh and Bone (1993) Drama|Mystery|Romance 3.2
Lulu on the Bridge (1998) Drama|Mystery|Romance 2.7