沉沙
2018-09-27
来源 :
阅读 829
评论 0
摘要:本篇教程介绍了大数据应用 Hadoop Demo:倒排索引详解,希望阅读本篇文章以后大家有所收获,帮助大家对大数据云计算大数据应用的理解更加深入。
本篇教程介绍了大数据应用 Hadoop Demo:倒排索引详解,希望阅读本篇文章以后大家有所收获,帮助大家对大数据云计算大数据应用的理解更加深入。
<
package com.asin.hdp.inverted;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class InvertedIndexCombine {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(InvertedIndexCombine.class);
job.setMapperClass(invertedMapper.class);
job.setCombinerClass(invertedCombine.class);
job.setReducerClass(invertedReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path("e:/a.txt"));
FileInputFormat.addInputPath(job, new Path("e:/b.txt"));
FileInputFormat.addInputPath(job, new Path("e:/c.txt"));
FileOutputFormat.setOutputPath(job, new Path("e:/outputCombine"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
public static class invertedMapper extends Mapper
@Override
protected void map(LongWritable key, Text value, Mapper
throws IOException, InterruptedException {
FileSplit split = (FileSplit) context.getInputSplit();
Path path = split.getPath();
String name = path.getName().replace("e:/", "");
StringTokenizer token = new StringTokenizer(value.toString(), " ");
while (token.hasMoreTokens()) {
context.write(new Text(name + "\t" + token.nextToken()), new Text("1"));
}
}
}
public static class invertedCombine extends Reducer
@Override
protected void reduce(Text key, Iterable
throws IOException, InterruptedException {
String line = key.toString();
String[] split = line.split("\t");
int sum = 0;
for (Text text : values) {
sum += Integer.parseInt(text.toString());
}
context.write(new Text(split[1]), new Text(split[0] + ":" + sum));
}
}
public static class invertedReduce extends Reducer
@Override
protected void reduce(Text key, Iterable
throws IOException, InterruptedException {
String val = "";
for (Text text : values) {
val += text + "\t";
}
context.write(new Text(key), new Text(val));
}
}
}
本文由职坐标整理并发布,希望对同学们有所帮助。了解更多详情请关注职坐标大数据云计算大数据应用频道!
喜欢 | 0
不喜欢 | 0
您输入的评论内容中包含违禁敏感词
我知道了

请输入正确的手机号码
请输入正确的验证码
您今天的短信下发次数太多了,明天再试试吧!
我们会在第一时间安排职业规划师联系您!
您也可以联系我们的职业规划师咨询:
版权所有 职坐标-一站式AI+学习就业服务平台 沪ICP备13042190号-4
上海海同信息科技有限公司 Copyright ©2015 www.zhizuobiao.com,All Rights Reserved.
沪公网安备 31011502005948号