MapReduce处理数据平均值与数值大小排行比较

一:计算数据平均值

在map中将名称作为key数据为value写出去

/* * 计算平均成绩 * 名字作为key分数值为value写出去 */public class AverageMap extends Mapper<LongWritable, Text, Text, IntWritable> {protected void map(LongWritable key,Text value,org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, IntWritable>.Context context)throws java.io.IOException, InterruptedException {String line = value.toString();if (line.trim().length() > 0) {String[] str = line.split("\t");if (str.length == 2) {context.write(new Text(str[0]),new IntWritable(Integer.valueOf(str[1])));}}};}

public class AverageRedu extendsReducer<Text, IntWritable, Text, DoubleWritable> {@Overrideprotected void reduce(Text key, Iterable<IntWritable> values,Reducer<Text, IntWritable, Text, DoubleWritable>.Context context)throws IOException, InterruptedException {int sum = 0;for (IntWritable value : values) {sum += value.get();}context.write(key, new DoubleWritable(sum / 2.0));}}

public class AverageMain {public static void main(String[] args) throws Exception {Configuration conf = new Configuration();Job job = new Job(conf);job.setJarByClass(AverageMain.class);job.setMapperClass(AverageMap.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);job.setReducerClass(AverageRedu.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(DoubleWritable.class);FileInputFormat.addInputPath(job, new Path(args[0]));FileOutputFormat.setOutputPath(job, new Path(args[1]));job.waitForCompletion(true);}}

二:求最大最小值

求最大最小值(简单方式:将所有数据作为valueslist)

/* * 求最大最小值(简单方式:将所有数据作为valueslist) */public class MaxminMap extends Mapper<LongWritable, Text, Text, LongWritable> {@Overrideprotected void map(LongWritable key, Text value,Mapper<LongWritable, Text, Text, LongWritable>.Context context)throws IOException, InterruptedException {String line = value.toString();if (line.trim().length() > 0) {context.write(new Text("key:"),new LongWritable(Long.parseLong(line)));}}}

public class MaxminRedu extends Reducer<Text, LongWritable, Text, LongWritable> {@Overrideprotected void reduce(Text key, Iterable<LongWritable> values,Reducer<Text, LongWritable, Text, LongWritable>.Context context)throws IOException, InterruptedException {long max = Long.MIN_VALUE;long min = Long.MAX_VALUE;for (LongWritable val : values) {if (val.get() > max) {max = val.get();}if (val.get() < min) {min = val.get();}}context.write(new Text("Max"), new LongWritable(max));context.write(new Text("Min"), new LongWritable(min));}}

public class MaxminMain {public static void main(String[] args) throws Exception {Configuration conf = new Configuration();Job job = new Job(conf);job.setJarByClass(MaxminMain.class);job.setMapperClass(MaxminMap.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(LongWritable.class);job.setReducerClass(MaxminRedu.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(LongWritable.class);FileInputFormat.addInputPath(job, new Path(args[0]));FileOutputFormat.setOutputPath(job, new Path(args[1]));job.waitForCompletion(true);}}

三:求数据的topN排行

setup()此方法被MapReduce框架仅且执行一次,在执行Map任务前,进行相关变量或者资源的集中初始化工作。若是将资源初始化工作放在方法map()中,导致Mapper任务在解析每一行输入时都会进行资源初始化工作,导致重复,程序运行效率不高!

cleanup()此方法被MapReduce框架仅且执行一次,在执行完毕Map任务后,,进行相关变量或资源的释放工作。若是将释放资源工作放入方法map()中,也会导致Mapper任务在解析、处理每一行文本后释放资源,而且在下一行文本解析前还要重复初始化,导致反复重复,程序运行效率不高!

PS:大数据情况下首先在map中将数据进行一次数组排行

/* * 求数据的topN排行 *//* * 原始数据 * orderid,userid,payment,productid * 1,9818,100,121 * 2,8918,2999,22 * 3,2322,1234,11 * 4,343,2232,22 * 5,232,434,1 * 6,34,232,11 * 7,2322,9000,54 * 8,45,3454,34 *//* * 预测结果(求TOP5-payment) * 1,9000 * 2, * 3, * 4, * 5, *//* * 在map是将N排序 *//* setup(),此方法被MapReduce框架仅且执行一次,在执行Map任务前,进行相关变量或者资源的集中初始化工作。若是将资源初始化工作放在方法map()中,导致Mapper任务在解析每一行输入时都会进行资源初始化工作,导致重复,程序运行效率不高! cleanup(),此方法被MapReduce框架仅且执行一次,在执行完毕Map任务后,进行相关变量或资源的释放工作。若是将释放资源工作放入方法map()中,也会导致Mapper任务在解析、处理每一行文本后释放资源,而且在下一行文本解析前还要重复初始化,导致反复重复,程序运行效率不高! */public class topNMap extendsMapper<LongWritable, Text, IntWritable, IntWritable> {int len; // topN中的Nint[] top; // top数组@Overrideprotected void map(LongWritable key, Text value,Mapper<LongWritable, Text, IntWritable, IntWritable>.Context context)throws IOException, InterruptedException {String line = value.toString().trim();if (line.trim().length() > 0) {String str[] = line.split(",");if (str.length == 4) {int payment = Integer.parseInt(str[2]);// 将数据放入top[]数组中add(payment);}}}private void add(int payment) {top[0] = payment;Arrays.sort(top); // 数组的升序排序}// 初始化@Overrideprotected void setup(Mapper<LongWritable, Text, IntWritable, IntWritable>.Context context)throws IOException, InterruptedException {len = context.getConfiguration().getInt("N", 5); // 默认为5top = new int[len + 1]; // 以top[0]为比较标准,其后的5个数与top[0]比较。所以要(len+1)个大小}@Overrideprotected void cleanup(Mapper<LongWritable, Text, IntWritable, IntWritable>.Context context)throws IOException, InterruptedException {for (int i = 1; i < len + 1; i++) { // 只需要取出top后面的(除第一个位置以外的)几个数context.write(new IntWritable(top[i]), new IntWritable(top[i]));}}}不求与人相比,但求超越自己,要哭就哭出激动的泪水,要笑就笑出成长的性格。

MapReduce处理数据平均值与数值大小排行比较

相关文章:

你感兴趣的文章:

标签云: