Hadoop的第一个程序 wordcount

Hadoop的第一个程序 wordcount

package com.songguoliang.hadoop.chapter05;import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;/** * 单词计数demo * @date 2015-05-01 12:51:29 * @author sgl */public class WordCount {/** * 继承Mapper接口 * @date 2015-05-01 12:53:03 * @author sgl */public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{//one表示单词出现一次private final static IntWritable one=new IntWritable(1);//存储分割的单词private Text word=new Text();@Overrideprotected void map(Object key, Text value, Mapper<Object, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {StringTokenizer itr=new StringTokenizer(value.toString());while (itr.hasMoreElements()) {word.set(itr.nextToken());context.write(word, one);}}}/** * Reduce方法 * @date 2015-05-03 19:05:21 * @author sgl */public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable>{private IntWritable result=new IntWritable();@Overrideprotected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {int sum=0;for(IntWritable val:values){sum+=val.get();}result.set(sum);context.write(key, result);}}public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {Configuration conf=new Configuration();String [] otherArgs=new GenericOptionsParser(conf, args).getRemainingArgs();if(otherArgs.length!=2){System.err.println("Usage:wordcount <in> <out>");System.exit(2);}Job job=new org.apache.hadoop.mapreduce.Job(conf, "word count");job.setJarByClass(WordCount.class);job.setMapperClass(TokenizerMapper.class);job.setCombinerClass(IntSumReducer.class);job.setReducerClass(IntSumReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);FileInputFormat.addInputPath(job, new Path(args[0]));FileOutputFormat.setOutputPath(job, new Path(args[1]));System.exit(job.waitForCompletion(true)?0:1);}}

版权声明:本文为博主原创文章,,转载请注明本文链接。

人生至少要有两次冲动,一为奋不顾身的爱情,一为说走就走的旅行。

Hadoop的第一个程序 wordcount

相关文章:

你感兴趣的文章:

标签云: