Successfully reported this slideshow.
We use your LinkedIn profile and activity data to personalize ads and to show you more relevant ads. You can change your ad preferences anytime.

AJUG April 2011 Raw hadoop example


Published on

Example code using the Hadoop APIs directly from my April 2011 Atlanta Java Users Group presentation.

Published in: Technology
  • Be the first to comment

  • Be the first to like this

AJUG April 2011 Raw hadoop example

  1. 1. package org.ajug;import org.apache.hadoop.fs.Path;import org.apache.hadoop.conf.*;import*;import org.apache.hadoop.mapreduce.*;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;public class MnM { public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "ajug"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Data.class); job.setMapperClass(MnMMapper.class); job.setReducerClass(MnMReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); }}================================================package org.ajug;import;import;import;import;import;public class Data implements WritableComparable<Data> { private DoubleWritable width = new DoubleWritable(); private DoubleWritable weight = new DoubleWritable(); void set(double a_width, double a_weight) { width.set(a_width); weight.set(a_weight); } public double getWidth() {
  2. 2. return width.get();} public double getWeight() { return weight.get(); } public void write(DataOutput out) throws IOException { width. write(out); weight. write(out); } public void readFields(DataInput in) throws IOException { width. readFields(in); weight. readFields(in); } public int hashCode() { return width.hashCode() * 163 + weight.hashCode(); } public int compareTo(Data tp) { int cmp = width.compareTo(tp.width); if (cmp != 0) { return cmp; } return weight.compareTo(tp.weight); } public String toString() { return "" + width + "t" + weight; }}========================================package org.ajug;import*;import org.apache.hadoop.mapreduce.*;import;public class MnMMapper extends Mapper<LongWritable, Text, Text, Data> { private Text color = new Text(); private Data data = new Data(); public void map(LongWritable key, Text value, Context context) throwsIOException, InterruptedException { String line = value.toString(); String[] fields = line.split(","); if (fields.length > 2) { color.set(fields[0]); double weight = Double.parseDouble(fields[1]); double width = Double.parseDouble(fields[2]); data.set(width, weight); context.write(color, data); }
  3. 3. }}======================================package org.ajug;import*;import org.apache.hadoop.mapreduce.*;import;import java.util.Iterator;public class MnMReducer extends Reducer <Text, Data, Text, Data> { private Data data = new Data(); public void reduce(Text key, Iterable<Data> values, Context context)throws IOException, InterruptedException { double weights = 0; double widths=0; int count = 0; Iterator iter = values.iterator(); while (iter.hasNext()) { Data value = (Data); count++; weights += value.getWeight(); widths += value.getWidth(); } data.set(widths/count, weights/count); context.write(key, data); }}