- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
|KPL预报丨重庆QG首发全员换阵,成都AG、DYG谁能终结对方连胜?
-
-
一角车吧|四驱+302马力2.0T,档次感超越迈腾凯美瑞,从33万降至13万
-
-
小橘子游戏日记|学会赶紧和好友组队去,排位十连胜不是梦,当前版本几大毒瘤阵容
-
多彩优生活iPhone SE2十几天销量超华为小米,今年苹果能超华为成世界第二?
-
#我国#我国崛起最快的一座城,超越南京,将碾压武汉,比肩上海
-
-
山寨|“以假乱真”的山寨冰棒,图一考验眼力,看到最后一个笑喷了
-
霍华德|时隔十年再夺冠?湖人不仅有詹眉,还有曾经第一控卫第一中锋!
-
中甲@这边官宣新援,那边公布冠名,四川中甲德比已经开始暗中较劲
-
沫雪月莹随笔|一款华为麒麟980,三款骁龙855,3000左右的旗舰新机
-
-
-
-
「看遍市井繁华」幽默笑话:老总老婆说你除了没跟我一张床其余老公该干的你都干了
-
-
览富财经2019年度IPO中介机构市场占有率“大比武”,原创
-
树屋|我的世界:从零开始搭建强悍树屋,手残党的福利,学会秒变大神
-
申元浩作品?请回答1988和机智的医生生活是一个导演吗?