23. import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MaxTempDriver {
public static void main(String[] args) throws Exception {
// Create a new job
Job job = new Job();
// Set job name to locate it in the distributed environment
job.setJarByClass(MaxTempDriver.class);
job.setJobName("Max Temperature");
// Set input and output Path, note that we use the default input format
// which is TextInputFormat (each record is a line of input)
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// Set Mapper and Reducer class
job.setMapperClass(Map.class);
job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);
// Set Output key and value
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
Driver Program
24. After saving all 3 programs -> right click on your project in package explorer pane and click on export.
33. Check if the file is moved properly by displaying the file contents.
34. Now give the txt file store in HDFS as input to the MapReduce program.
( /output is the directory in which the output will be stored.)
hadoop jar /home/cloudera/MaxTemp.jar MaxTemp /maxtemp_ip/MaxTemp_Data.txt /maxtemp_op
35.
36. Check the contents of the output directory. It will contain a file named “part-r-00000” which contains the
output of the program.
Display the contents of that file to check the output obtained.