HelloWorld（WordCount）

<span style="font-family: Arial, Helvetica, sans-serif;">package org.apache.hadoop.examples;</span>import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class WordCount { public static class TokenizerMapper //继承Mapper类型，重写map方法extends Mapper<Object, Text, Text, IntWritable>{private final static IntWritable one = new IntWritable(1);private Text word = new Text();public void map(Object key, Text value, Context context) throws IOException, InterruptedException {StringTokenizer itr = new StringTokenizer(value.toString());while (itr.hasMoreTokens()) {//把每行拆分成一个个单词word.set(itr.nextToken());context.write(word, one);//key为该行首字母相对于文本文件首地址的偏移量}} }public static class IntSumReducer //继承Reducer类，重写reduce方法extends Reducer<Text,IntWritable,Text,IntWritable> {private IntWritable result = new IntWritable();public void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {int sum = 0;for (IntWritable val : values) {//values为对应单词的计数列表sum += val.get();//统计}result.set(sum);context.write(key, result);} } public static void main(String[] args) throws Exception {Configuration conf = new Configuration();String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();if (otherArgs.length != 2) {System.err.println("Usage: wordcount <in> <out>");System.exit(2);}Job job = new Job(conf, "word count");//Job对象负责管理和运行一个计算任务job.setJarByClass(WordCount.class);job.setMapperClass(TokenizerMapper.class);//TokenizerMapper完成Map过程的处理和使用job.setCombinerClass(IntSumReducer.class);//Combine过程job.setReducerClass(IntSumReducer.class);//IntSumReducer完成Reduce过程的处理和使用job.setOutputKeyClass(Text.class);//设置输出的key类型，，相当于java的string类型job.setOutputValueClass(IntWritable.class);//value类型为IntWritable，相当于intFileInputFormat.addInputPath(job, new Path(otherArgs[0]));//任务的输入路径FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));//任务输出路径System.exit(job.waitForCompletion(true) ? 0 : 1); }}

以上原码所在位置：

图解具体过程：（图片来自）

①由MapReduce完成拆分

key为偏移量（Linux和Windows有区别）。

②用户定义map方法的执行过程

输入：上面过程得到的<key,value>

输出：新的<key,value>

<span style="font-size:14px;">while (itr.hasMoreTokens()) //把每行拆分成一个个单词word.set(itr.nextToken());context.write(word, one);//key为该行首字母相对于文本文件首地址的偏移量</span>

③Mapper将刚才得到的<key,value>，按key进行排序并将相同的key进行combine操作

④用户定义reduce方法的执行过程

int sum = 0;for (IntWritable val : values) {//values为对应单词的计数列表listsum += val.get();//统计

真正的爱，应该超越生命的长度心灵的宽度灵魂的深度

相关文章：

你感兴趣的文章：

标签云：