mapreduce程序统计单词个数示例

一、新建文本文件wordcount.txt,并上传至hdfs服务器上

[hadoop@cloud01 HDFSdemo]$ hadoop fs -cat /wc/wordcount.txthello worldhello Chinahello wenjiehello USAhello Chinahello Chinahello Japan

[hadoop@cloud01 HDFSdemo]$ hadoop fs -cat /wc/wordcount1.txthello USA

期望结果：

<hello,8>,<world,1><China,3><wenjie,1>,<USA，2>,<Japan,1>

二、通过MR程序统计

1、在Eclipse下编写map程序、reduce程序、Main主程序

package mapreduce;import java.io.IOException;import org.apache.commons.lang.StringUtils;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;/*** Mapper** @author shenfl**/public class WCMapper extends Mapper<LongWritable, Text, Text, LongWritable> { /** * @param key : text offset * @param value: each line text * @context : hadoop context */ protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] values = StringUtils.split(value.toString(), " "); for(String v:values){ context.write(new Text(v),new LongWritable(1)); } }}

package mapreduce;import java.io.IOException;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;public class WCReducer extends Reducer<Text, LongWritable, Text, LongWritable> { @Override protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { long count = 0; for(LongWritable v:values){ count += v.get(); } context.write(key, new LongWritable(count)); }}

2、Main主程序查看运行结果

package mapreduce;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;/*** <p>* Test hadoop 2.4.1 version program* </p>** @author shenfl**/public class WordCount { private static final String HDFS_PATH = "hdfs://cloud01:9000"; public static void main(String[] args) { Configuration conf = new Configuration(); try { // conf.set("", ""); Job job = Job.getInstance(conf); /** * Set the job’s jar file by finding an example class location. * * @param cls * the example class. */ job.setJarByClass(WordCount.class); job.setJar("wc.jar"); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(WCMapper.class); job.setReducerClass(WCReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputKeyClass(LongWritable.class); Path inputPath = new Path(HDFS_PATH + "/wc"); Path outputDir = new Path(HDFS_PATH + "/tmp"); /** * Set the array of as the list of inputs for the * map-reduce job. * @param job The job to modify * @param inputPaths * the of the input directories/files for * the map-reduce job. */ FileInputFormat.setInputPaths(job, inputPath); /** * Set the of the output directory for the map-reduce * job. * @param job The job to modify * @param outputDir * the of the output directory for the * map-reduce job. */ FileOutputFormat.setOutputPath(job, outputDir); FileSystem fs = FileSystem.get(new URI(HDFS_PATH), conf); if (fs.exists(outputDir)) { fs.delete(outputDir, true); } System.exit(job.waitForCompletion(true) ? 0 : 1); } catch (Exception e) { e.printStackTrace(); } }}

3、通过MR执行后，查看hdfs上的结果

吃东西，随便是什么——都可以。当日出越过山涧，我未老，你依然。

相关文章：

你感兴趣的文章：

标签云：