AJUG April 2011 Raw hadoop example

811 views

Published on

Example code using the Hadoop APIs directly from my April 2011 Atlanta Java Users Group presentation.

Published in: Technology
0 Comments
0 Likes
Statistics
Notes
  • Be the first to comment

  • Be the first to like this

No Downloads
Views
Total views
811
On SlideShare
0
From Embeds
0
Number of Embeds
5
Actions
Shares
0
Downloads
8
Comments
0
Likes
0
Embeds 0
No embeds

No notes for slide

AJUG April 2011 Raw hadoop example

  1. 1. package org.ajug;import org.apache.hadoop.fs.Path;import org.apache.hadoop.conf.*;import org.apache.hadoop.io.*;import org.apache.hadoop.mapreduce.*;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;public class MnM { public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "ajug"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Data.class); job.setMapperClass(MnMMapper.class); job.setReducerClass(MnMReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); }}================================================package org.ajug;import org.apache.hadoop.io.DoubleWritable;import org.apache.hadoop.io.WritableComparable;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;public class Data implements WritableComparable<Data> { private DoubleWritable width = new DoubleWritable(); private DoubleWritable weight = new DoubleWritable(); void set(double a_width, double a_weight) { width.set(a_width); weight.set(a_weight); } public double getWidth() {
  2. 2. return width.get();} public double getWeight() { return weight.get(); } public void write(DataOutput out) throws IOException { width. write(out); weight. write(out); } public void readFields(DataInput in) throws IOException { width. readFields(in); weight. readFields(in); } public int hashCode() { return width.hashCode() * 163 + weight.hashCode(); } public int compareTo(Data tp) { int cmp = width.compareTo(tp.width); if (cmp != 0) { return cmp; } return weight.compareTo(tp.weight); } public String toString() { return "" + width + "t" + weight; }}========================================package org.ajug;import org.apache.hadoop.io.*;import org.apache.hadoop.mapreduce.*;import java.io.IOException;public class MnMMapper extends Mapper<LongWritable, Text, Text, Data> { private Text color = new Text(); private Data data = new Data(); public void map(LongWritable key, Text value, Context context) throwsIOException, InterruptedException { String line = value.toString(); String[] fields = line.split(","); if (fields.length > 2) { color.set(fields[0]); double weight = Double.parseDouble(fields[1]); double width = Double.parseDouble(fields[2]); data.set(width, weight); context.write(color, data); }
  3. 3. }}======================================package org.ajug;import org.apache.hadoop.io.*;import org.apache.hadoop.mapreduce.*;import java.io.IOException;import java.util.Iterator;public class MnMReducer extends Reducer <Text, Data, Text, Data> { private Data data = new Data(); public void reduce(Text key, Iterable<Data> values, Context context)throws IOException, InterruptedException { double weights = 0; double widths=0; int count = 0; Iterator iter = values.iterator(); while (iter.hasNext()) { Data value = (Data)iter.next(); count++; weights += value.getWeight(); widths += value.getWidth(); } data.set(widths/count, weights/count); context.write(key, data); }}

×