SlideShare a Scribd company logo
1 of 3
Download to read offline
package org.ajug;

import   org.apache.hadoop.fs.Path;
import   org.apache.hadoop.conf.*;
import   org.apache.hadoop.io.*;
import   org.apache.hadoop.mapreduce.*;
import   org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import   org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import   org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import   org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;


public class MnM {

    public static void main(String[] args) throws Exception {
       Configuration conf = new Configuration();

            Job job = new Job(conf, "ajug");

         job.setOutputKeyClass(Text.class);
         job.setOutputValueClass(Data.class);

         job.setMapperClass(MnMMapper.class);
         job.setReducerClass(MnMReducer.class);

         job.setInputFormatClass(TextInputFormat.class);
         job.setOutputFormatClass(TextOutputFormat.class);

         FileInputFormat.addInputPath(job, new Path(args[0]));
         FileOutputFormat.setOutputPath(job, new Path(args[1]));

         job.waitForCompletion(true);
    }

}

================================================

package org.ajug;

import org.apache.hadoop.io.DoubleWritable;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class Data implements WritableComparable<Data> {
    private DoubleWritable width = new DoubleWritable();
    private DoubleWritable weight = new DoubleWritable();

    void set(double a_width, double a_weight) {
        width.set(a_width);
        weight.set(a_weight);
    }

    public double getWidth() {
return width.get();
}
     public double getWeight() {
         return weight.get();
     }

    public void write(DataOutput out) throws IOException {
      width. write(out);
      weight. write(out);
    }

    public void readFields(DataInput in) throws IOException {
      width. readFields(in);
      weight. readFields(in);
    }


    public int hashCode() {
      return width.hashCode() * 163 + weight.hashCode();
    }

    public int compareTo(Data tp) {
      int cmp = width.compareTo(tp.width);
      if (cmp != 0) {
        return cmp;
      }
      return weight.compareTo(tp.weight);
    }

     public String toString() {
         return "" + width + "t" + weight;
     }

}
========================================
package org.ajug;

import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;

import java.io.IOException;

public class MnMMapper extends
     Mapper<LongWritable, Text, Text, Data> {

     private Text color = new Text();
     private Data data = new Data();

    public void map(LongWritable key, Text value, Context context) throws
IOException, InterruptedException {
        String line = value.toString();
        String[] fields = line.split(",");
        if (fields.length > 2) {
           color.set(fields[0]);
            double weight = Double.parseDouble(fields[1]);
            double width = Double.parseDouble(fields[2]);
            data.set(width, weight);
            context.write(color, data);
        }
}
}
======================================
package org.ajug;

import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;

import java.io.IOException;
import java.util.Iterator;

public class MnMReducer extends   Reducer <Text, Data, Text, Data> {

    private Data data = new Data();


    public void reduce(Text key, Iterable<Data> values, Context context)
throws IOException, InterruptedException {
        double weights = 0;
        double widths=0;
        int count = 0;

        Iterator iter = values.iterator();
        while (iter.hasNext()) {
            Data value = (Data)iter.next();
            count++;
            weights += value.getWeight();
            widths += value.getWidth();
        }
        data.set(widths/count, weights/count);
        context.write(key, data);
    }
}

More Related Content

What's hot

Weather of the Century: Design and Performance
Weather of the Century: Design and PerformanceWeather of the Century: Design and Performance
Weather of the Century: Design and PerformanceMongoDB
 
The Weather of the Century
The Weather of the CenturyThe Weather of the Century
The Weather of the CenturyMongoDB
 
Image magick++
Image magick++Image magick++
Image magick++Yubin Lim
 
Logic Equations Resolver J Script
Logic Equations Resolver   J ScriptLogic Equations Resolver   J Script
Logic Equations Resolver J ScriptRoman Agaev
 
名古屋SGGAE/J勉強会 Grails、Gaelykでハンズオン
名古屋SGGAE/J勉強会 Grails、Gaelykでハンズオン名古屋SGGAE/J勉強会 Grails、Gaelykでハンズオン
名古屋SGGAE/J勉強会 Grails、GaelykでハンズオンTsuyoshi Yamamoto
 
Google App Engine Developer - Day3
Google App Engine Developer - Day3Google App Engine Developer - Day3
Google App Engine Developer - Day3Simon Su
 
ggplot2 extensions-ggtree.
ggplot2 extensions-ggtree.ggplot2 extensions-ggtree.
ggplot2 extensions-ggtree.Dr. Volkan OBAN
 
Programming with Python and PostgreSQL
Programming with Python and PostgreSQLProgramming with Python and PostgreSQL
Programming with Python and PostgreSQLPeter Eisentraut
 
Building Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at StripeBuilding Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at StripeMongoDB
 
Python in the database
Python in the databasePython in the database
Python in the databasepybcn
 
Some R Examples[R table and Graphics] -Advanced Data Visualization in R (Some...
Some R Examples[R table and Graphics] -Advanced Data Visualization in R (Some...Some R Examples[R table and Graphics] -Advanced Data Visualization in R (Some...
Some R Examples[R table and Graphics] -Advanced Data Visualization in R (Some...Dr. Volkan OBAN
 
Herding types with Scala macros
Herding types with Scala macrosHerding types with Scala macros
Herding types with Scala macrosMarina Sigaeva
 
Engineering a robust(ish) data pipeline with Luigi and AWS Elastic Map Reduce
Engineering a robust(ish) data pipeline with Luigi and AWS Elastic Map ReduceEngineering a robust(ish) data pipeline with Luigi and AWS Elastic Map Reduce
Engineering a robust(ish) data pipeline with Luigi and AWS Elastic Map ReduceAaron Knight
 
ClickHouse Features for Advanced Users, by Aleksei Milovidov
ClickHouse Features for Advanced Users, by Aleksei MilovidovClickHouse Features for Advanced Users, by Aleksei Milovidov
ClickHouse Features for Advanced Users, by Aleksei MilovidovAltinity Ltd
 
GPars For Beginners
GPars For BeginnersGPars For Beginners
GPars For BeginnersMatt Passell
 
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-Tsuyoshi Yamamoto
 
2019-01-29 - Demystifying Kotlin Coroutines
2019-01-29 - Demystifying Kotlin Coroutines2019-01-29 - Demystifying Kotlin Coroutines
2019-01-29 - Demystifying Kotlin CoroutinesEamonn Boyle
 

What's hot (20)

Weather of the Century: Design and Performance
Weather of the Century: Design and PerformanceWeather of the Century: Design and Performance
Weather of the Century: Design and Performance
 
The Weather of the Century
The Weather of the CenturyThe Weather of the Century
The Weather of the Century
 
Image magick++
Image magick++Image magick++
Image magick++
 
Logic Equations Resolver J Script
Logic Equations Resolver   J ScriptLogic Equations Resolver   J Script
Logic Equations Resolver J Script
 
名古屋SGGAE/J勉強会 Grails、Gaelykでハンズオン
名古屋SGGAE/J勉強会 Grails、Gaelykでハンズオン名古屋SGGAE/J勉強会 Grails、Gaelykでハンズオン
名古屋SGGAE/J勉強会 Grails、Gaelykでハンズオン
 
Google App Engine Developer - Day3
Google App Engine Developer - Day3Google App Engine Developer - Day3
Google App Engine Developer - Day3
 
ggplot2 extensions-ggtree.
ggplot2 extensions-ggtree.ggplot2 extensions-ggtree.
ggplot2 extensions-ggtree.
 
Programming with Python and PostgreSQL
Programming with Python and PostgreSQLProgramming with Python and PostgreSQL
Programming with Python and PostgreSQL
 
Building Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at StripeBuilding Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at Stripe
 
Python in the database
Python in the databasePython in the database
Python in the database
 
Some R Examples[R table and Graphics] -Advanced Data Visualization in R (Some...
Some R Examples[R table and Graphics] -Advanced Data Visualization in R (Some...Some R Examples[R table and Graphics] -Advanced Data Visualization in R (Some...
Some R Examples[R table and Graphics] -Advanced Data Visualization in R (Some...
 
Herding types with Scala macros
Herding types with Scala macrosHerding types with Scala macros
Herding types with Scala macros
 
Typelevel summit
Typelevel summitTypelevel summit
Typelevel summit
 
Engineering a robust(ish) data pipeline with Luigi and AWS Elastic Map Reduce
Engineering a robust(ish) data pipeline with Luigi and AWS Elastic Map ReduceEngineering a robust(ish) data pipeline with Luigi and AWS Elastic Map Reduce
Engineering a robust(ish) data pipeline with Luigi and AWS Elastic Map Reduce
 
ClickHouse Features for Advanced Users, by Aleksei Milovidov
ClickHouse Features for Advanced Users, by Aleksei MilovidovClickHouse Features for Advanced Users, by Aleksei Milovidov
ClickHouse Features for Advanced Users, by Aleksei Milovidov
 
Clojure functions midje
Clojure functions midjeClojure functions midje
Clojure functions midje
 
GPars For Beginners
GPars For BeginnersGPars For Beginners
GPars For Beginners
 
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
 
Tricks
TricksTricks
Tricks
 
2019-01-29 - Demystifying Kotlin Coroutines
2019-01-29 - Demystifying Kotlin Coroutines2019-01-29 - Demystifying Kotlin Coroutines
2019-01-29 - Demystifying Kotlin Coroutines
 

Viewers also liked

Hadoop and Cascading At AJUG July 2009
Hadoop and Cascading At AJUG July 2009Hadoop and Cascading At AJUG July 2009
Hadoop and Cascading At AJUG July 2009Christopher Curtin
 
Jenny, Katerina And Arynda
Jenny, Katerina And AryndaJenny, Katerina And Arynda
Jenny, Katerina And Aryndakaterinawsy
 
2011 march cloud computing atlanta
2011 march cloud computing atlanta2011 march cloud computing atlanta
2011 march cloud computing atlantaChristopher Curtin
 
Atlanta hadoop users group july 2013
Atlanta hadoop users group july 2013Atlanta hadoop users group july 2013
Atlanta hadoop users group july 2013Christopher Curtin
 
AJUG April 2011 Cascading example
AJUG April 2011 Cascading exampleAJUG April 2011 Cascading example
AJUG April 2011 Cascading exampleChristopher Curtin
 
Redis and Bloom Filters - Atlanta Java Users Group 9/2014
Redis and Bloom Filters - Atlanta Java Users Group 9/2014Redis and Bloom Filters - Atlanta Java Users Group 9/2014
Redis and Bloom Filters - Atlanta Java Users Group 9/2014Christopher Curtin
 

Viewers also liked (9)

Hadoop and Cascading At AJUG July 2009
Hadoop and Cascading At AJUG July 2009Hadoop and Cascading At AJUG July 2009
Hadoop and Cascading At AJUG July 2009
 
Jenny, Katerina And Arynda
Jenny, Katerina And AryndaJenny, Katerina And Arynda
Jenny, Katerina And Arynda
 
2011 march cloud computing atlanta
2011 march cloud computing atlanta2011 march cloud computing atlanta
2011 march cloud computing atlanta
 
Ajug april 2011
Ajug april 2011Ajug april 2011
Ajug april 2011
 
Atlanta hadoop users group july 2013
Atlanta hadoop users group july 2013Atlanta hadoop users group july 2013
Atlanta hadoop users group july 2013
 
IASA Atlanta September 2009
IASA Atlanta September 2009IASA Atlanta September 2009
IASA Atlanta September 2009
 
Nosql East October 2009
Nosql East October 2009Nosql East October 2009
Nosql East October 2009
 
AJUG April 2011 Cascading example
AJUG April 2011 Cascading exampleAJUG April 2011 Cascading example
AJUG April 2011 Cascading example
 
Redis and Bloom Filters - Atlanta Java Users Group 9/2014
Redis and Bloom Filters - Atlanta Java Users Group 9/2014Redis and Bloom Filters - Atlanta Java Users Group 9/2014
Redis and Bloom Filters - Atlanta Java Users Group 9/2014
 

Similar to AJUG April 2011 Raw hadoop example

Hadoop Installation_13_09_2022(1).docx
Hadoop Installation_13_09_2022(1).docxHadoop Installation_13_09_2022(1).docx
Hadoop Installation_13_09_2022(1).docx1MS20CS406
 
Create & Execute First Hadoop MapReduce Project in.pptx
Create & Execute First Hadoop MapReduce Project in.pptxCreate & Execute First Hadoop MapReduce Project in.pptx
Create & Execute First Hadoop MapReduce Project in.pptxvishal choudhary
 
JRubyKaigi2010 Hadoop Papyrus
JRubyKaigi2010 Hadoop PapyrusJRubyKaigi2010 Hadoop Papyrus
JRubyKaigi2010 Hadoop PapyrusKoichi Fujikawa
 
Scalable and Flexible Machine Learning With Scala @ LinkedIn
Scalable and Flexible Machine Learning With Scala @ LinkedInScalable and Flexible Machine Learning With Scala @ LinkedIn
Scalable and Flexible Machine Learning With Scala @ LinkedInVitaly Gordon
 
The current program only run one iteration of the KMeans algorithm. .docx
The current program only run one iteration of the KMeans algorithm. .docxThe current program only run one iteration of the KMeans algorithm. .docx
The current program only run one iteration of the KMeans algorithm. .docxtodd241
 
05 pig user defined functions (udfs)
05 pig user defined functions (udfs)05 pig user defined functions (udfs)
05 pig user defined functions (udfs)Subhas Kumar Ghosh
 
Introduction to Scalding and Monoids
Introduction to Scalding and MonoidsIntroduction to Scalding and Monoids
Introduction to Scalding and MonoidsHugo Gävert
 
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash course
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash courseCodepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash course
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash courseSages
 
Please fix the java code (using eclipse)package hw4p1;import jav.pdf
Please fix the java code (using eclipse)package hw4p1;import jav.pdfPlease fix the java code (using eclipse)package hw4p1;import jav.pdf
Please fix the java code (using eclipse)package hw4p1;import jav.pdfinfo961251
 
EuroPython 2015 - Big Data with Python and Hadoop
EuroPython 2015 - Big Data with Python and HadoopEuroPython 2015 - Big Data with Python and Hadoop
EuroPython 2015 - Big Data with Python and HadoopMax Tepkeev
 
Having a problem figuring out where my errors are- The code is not run.pdf
Having a problem figuring out where my errors are- The code is not run.pdfHaving a problem figuring out where my errors are- The code is not run.pdf
Having a problem figuring out where my errors are- The code is not run.pdfNicholasflqStewartl
 
Cascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUGCascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUGMatthew McCullough
 
Testing multi outputformat based mapreduce
Testing multi outputformat based mapreduceTesting multi outputformat based mapreduce
Testing multi outputformat based mapreduceAshok Agarwal
 
Writing Hadoop Jobs in Scala using Scalding
Writing Hadoop Jobs in Scala using ScaldingWriting Hadoop Jobs in Scala using Scalding
Writing Hadoop Jobs in Scala using ScaldingToni Cebrián
 
Open XKE - Big Data, Big Mess par Bertrand Dechoux
Open XKE - Big Data, Big Mess par Bertrand DechouxOpen XKE - Big Data, Big Mess par Bertrand Dechoux
Open XKE - Big Data, Big Mess par Bertrand DechouxPublicis Sapient Engineering
 
Spark Day 2017- Spark 의 과거, 현재, 미래
Spark Day 2017- Spark 의 과거, 현재, 미래Spark Day 2017- Spark 의 과거, 현재, 미래
Spark Day 2017- Spark 의 과거, 현재, 미래Moon Soo Lee
 
TypeScript Introduction
TypeScript IntroductionTypeScript Introduction
TypeScript IntroductionDmitry Sheiko
 

Similar to AJUG April 2011 Raw hadoop example (20)

Hadoop Installation_13_09_2022(1).docx
Hadoop Installation_13_09_2022(1).docxHadoop Installation_13_09_2022(1).docx
Hadoop Installation_13_09_2022(1).docx
 
Create & Execute First Hadoop MapReduce Project in.pptx
Create & Execute First Hadoop MapReduce Project in.pptxCreate & Execute First Hadoop MapReduce Project in.pptx
Create & Execute First Hadoop MapReduce Project in.pptx
 
JRubyKaigi2010 Hadoop Papyrus
JRubyKaigi2010 Hadoop PapyrusJRubyKaigi2010 Hadoop Papyrus
JRubyKaigi2010 Hadoop Papyrus
 
Scalable and Flexible Machine Learning With Scala @ LinkedIn
Scalable and Flexible Machine Learning With Scala @ LinkedInScalable and Flexible Machine Learning With Scala @ LinkedIn
Scalable and Flexible Machine Learning With Scala @ LinkedIn
 
The current program only run one iteration of the KMeans algorithm. .docx
The current program only run one iteration of the KMeans algorithm. .docxThe current program only run one iteration of the KMeans algorithm. .docx
The current program only run one iteration of the KMeans algorithm. .docx
 
05 pig user defined functions (udfs)
05 pig user defined functions (udfs)05 pig user defined functions (udfs)
05 pig user defined functions (udfs)
 
Introduction to Scalding and Monoids
Introduction to Scalding and MonoidsIntroduction to Scalding and Monoids
Introduction to Scalding and Monoids
 
Hadoop
HadoopHadoop
Hadoop
 
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash course
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash courseCodepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash course
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash course
 
Please fix the java code (using eclipse)package hw4p1;import jav.pdf
Please fix the java code (using eclipse)package hw4p1;import jav.pdfPlease fix the java code (using eclipse)package hw4p1;import jav.pdf
Please fix the java code (using eclipse)package hw4p1;import jav.pdf
 
EuroPython 2015 - Big Data with Python and Hadoop
EuroPython 2015 - Big Data with Python and HadoopEuroPython 2015 - Big Data with Python and Hadoop
EuroPython 2015 - Big Data with Python and Hadoop
 
Having a problem figuring out where my errors are- The code is not run.pdf
Having a problem figuring out where my errors are- The code is not run.pdfHaving a problem figuring out where my errors are- The code is not run.pdf
Having a problem figuring out where my errors are- The code is not run.pdf
 
Cascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUGCascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUG
 
Testing multi outputformat based mapreduce
Testing multi outputformat based mapreduceTesting multi outputformat based mapreduce
Testing multi outputformat based mapreduce
 
Amazon elastic map reduce
Amazon elastic map reduceAmazon elastic map reduce
Amazon elastic map reduce
 
Writing Hadoop Jobs in Scala using Scalding
Writing Hadoop Jobs in Scala using ScaldingWriting Hadoop Jobs in Scala using Scalding
Writing Hadoop Jobs in Scala using Scalding
 
Open XKE - Big Data, Big Mess par Bertrand Dechoux
Open XKE - Big Data, Big Mess par Bertrand DechouxOpen XKE - Big Data, Big Mess par Bertrand Dechoux
Open XKE - Big Data, Big Mess par Bertrand Dechoux
 
Spark Day 2017- Spark 의 과거, 현재, 미래
Spark Day 2017- Spark 의 과거, 현재, 미래Spark Day 2017- Spark 의 과거, 현재, 미래
Spark Day 2017- Spark 의 과거, 현재, 미래
 
TypeScript Introduction
TypeScript IntroductionTypeScript Introduction
TypeScript Introduction
 
MaxTemp PPT.pptx
MaxTemp PPT.pptxMaxTemp PPT.pptx
MaxTemp PPT.pptx
 

Recently uploaded

Artificial intelligence in the post-deep learning era
Artificial intelligence in the post-deep learning eraArtificial intelligence in the post-deep learning era
Artificial intelligence in the post-deep learning eraDeakin University
 
APIForce Zurich 5 April Automation LPDG
APIForce Zurich 5 April  Automation LPDGAPIForce Zurich 5 April  Automation LPDG
APIForce Zurich 5 April Automation LPDGMarianaLemus7
 
Human Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsHuman Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsMark Billinghurst
 
Unblocking The Main Thread Solving ANRs and Frozen Frames
Unblocking The Main Thread Solving ANRs and Frozen FramesUnblocking The Main Thread Solving ANRs and Frozen Frames
Unblocking The Main Thread Solving ANRs and Frozen FramesSinan KOZAK
 
Making_way_through_DLL_hollowing_inspite_of_CFG_by_Debjeet Banerjee.pptx
Making_way_through_DLL_hollowing_inspite_of_CFG_by_Debjeet Banerjee.pptxMaking_way_through_DLL_hollowing_inspite_of_CFG_by_Debjeet Banerjee.pptx
Making_way_through_DLL_hollowing_inspite_of_CFG_by_Debjeet Banerjee.pptxnull - The Open Security Community
 
Maximizing Board Effectiveness 2024 Webinar.pptx
Maximizing Board Effectiveness 2024 Webinar.pptxMaximizing Board Effectiveness 2024 Webinar.pptx
Maximizing Board Effectiveness 2024 Webinar.pptxOnBoard
 
SIEMENS: RAPUNZEL – A Tale About Knowledge Graph
SIEMENS: RAPUNZEL – A Tale About Knowledge GraphSIEMENS: RAPUNZEL – A Tale About Knowledge Graph
SIEMENS: RAPUNZEL – A Tale About Knowledge GraphNeo4j
 
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks..."LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...Fwdays
 
SQL Database Design For Developers at php[tek] 2024
SQL Database Design For Developers at php[tek] 2024SQL Database Design For Developers at php[tek] 2024
SQL Database Design For Developers at php[tek] 2024Scott Keck-Warren
 
Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365
Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365
Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 3652toLead Limited
 
My Hashitalk Indonesia April 2024 Presentation
My Hashitalk Indonesia April 2024 PresentationMy Hashitalk Indonesia April 2024 Presentation
My Hashitalk Indonesia April 2024 PresentationRidwan Fadjar
 
Are Multi-Cloud and Serverless Good or Bad?
Are Multi-Cloud and Serverless Good or Bad?Are Multi-Cloud and Serverless Good or Bad?
Are Multi-Cloud and Serverless Good or Bad?Mattias Andersson
 
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...Patryk Bandurski
 
#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024BookNet Canada
 
Automating Business Process via MuleSoft Composer | Bangalore MuleSoft Meetup...
Automating Business Process via MuleSoft Composer | Bangalore MuleSoft Meetup...Automating Business Process via MuleSoft Composer | Bangalore MuleSoft Meetup...
Automating Business Process via MuleSoft Composer | Bangalore MuleSoft Meetup...shyamraj55
 
Transcript: New from BookNet Canada for 2024: BNC BiblioShare - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: BNC BiblioShare - Tech Forum 2024Transcript: New from BookNet Canada for 2024: BNC BiblioShare - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: BNC BiblioShare - Tech Forum 2024BookNet Canada
 
Beyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
Beyond Boundaries: Leveraging No-Code Solutions for Industry InnovationBeyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
Beyond Boundaries: Leveraging No-Code Solutions for Industry InnovationSafe Software
 
Unlocking the Potential of the Cloud for IBM Power Systems
Unlocking the Potential of the Cloud for IBM Power SystemsUnlocking the Potential of the Cloud for IBM Power Systems
Unlocking the Potential of the Cloud for IBM Power SystemsPrecisely
 
AI as an Interface for Commercial Buildings
AI as an Interface for Commercial BuildingsAI as an Interface for Commercial Buildings
AI as an Interface for Commercial BuildingsMemoori
 

Recently uploaded (20)

Artificial intelligence in the post-deep learning era
Artificial intelligence in the post-deep learning eraArtificial intelligence in the post-deep learning era
Artificial intelligence in the post-deep learning era
 
APIForce Zurich 5 April Automation LPDG
APIForce Zurich 5 April  Automation LPDGAPIForce Zurich 5 April  Automation LPDG
APIForce Zurich 5 April Automation LPDG
 
Human Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsHuman Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR Systems
 
E-Vehicle_Hacking_by_Parul Sharma_null_owasp.pptx
E-Vehicle_Hacking_by_Parul Sharma_null_owasp.pptxE-Vehicle_Hacking_by_Parul Sharma_null_owasp.pptx
E-Vehicle_Hacking_by_Parul Sharma_null_owasp.pptx
 
Unblocking The Main Thread Solving ANRs and Frozen Frames
Unblocking The Main Thread Solving ANRs and Frozen FramesUnblocking The Main Thread Solving ANRs and Frozen Frames
Unblocking The Main Thread Solving ANRs and Frozen Frames
 
Making_way_through_DLL_hollowing_inspite_of_CFG_by_Debjeet Banerjee.pptx
Making_way_through_DLL_hollowing_inspite_of_CFG_by_Debjeet Banerjee.pptxMaking_way_through_DLL_hollowing_inspite_of_CFG_by_Debjeet Banerjee.pptx
Making_way_through_DLL_hollowing_inspite_of_CFG_by_Debjeet Banerjee.pptx
 
Maximizing Board Effectiveness 2024 Webinar.pptx
Maximizing Board Effectiveness 2024 Webinar.pptxMaximizing Board Effectiveness 2024 Webinar.pptx
Maximizing Board Effectiveness 2024 Webinar.pptx
 
SIEMENS: RAPUNZEL – A Tale About Knowledge Graph
SIEMENS: RAPUNZEL – A Tale About Knowledge GraphSIEMENS: RAPUNZEL – A Tale About Knowledge Graph
SIEMENS: RAPUNZEL – A Tale About Knowledge Graph
 
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks..."LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
 
SQL Database Design For Developers at php[tek] 2024
SQL Database Design For Developers at php[tek] 2024SQL Database Design For Developers at php[tek] 2024
SQL Database Design For Developers at php[tek] 2024
 
Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365
Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365
Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365
 
My Hashitalk Indonesia April 2024 Presentation
My Hashitalk Indonesia April 2024 PresentationMy Hashitalk Indonesia April 2024 Presentation
My Hashitalk Indonesia April 2024 Presentation
 
Are Multi-Cloud and Serverless Good or Bad?
Are Multi-Cloud and Serverless Good or Bad?Are Multi-Cloud and Serverless Good or Bad?
Are Multi-Cloud and Serverless Good or Bad?
 
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
 
#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
 
Automating Business Process via MuleSoft Composer | Bangalore MuleSoft Meetup...
Automating Business Process via MuleSoft Composer | Bangalore MuleSoft Meetup...Automating Business Process via MuleSoft Composer | Bangalore MuleSoft Meetup...
Automating Business Process via MuleSoft Composer | Bangalore MuleSoft Meetup...
 
Transcript: New from BookNet Canada for 2024: BNC BiblioShare - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: BNC BiblioShare - Tech Forum 2024Transcript: New from BookNet Canada for 2024: BNC BiblioShare - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: BNC BiblioShare - Tech Forum 2024
 
Beyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
Beyond Boundaries: Leveraging No-Code Solutions for Industry InnovationBeyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
Beyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
 
Unlocking the Potential of the Cloud for IBM Power Systems
Unlocking the Potential of the Cloud for IBM Power SystemsUnlocking the Potential of the Cloud for IBM Power Systems
Unlocking the Potential of the Cloud for IBM Power Systems
 
AI as an Interface for Commercial Buildings
AI as an Interface for Commercial BuildingsAI as an Interface for Commercial Buildings
AI as an Interface for Commercial Buildings
 

AJUG April 2011 Raw hadoop example

  • 1. package org.ajug; import org.apache.hadoop.fs.Path; import org.apache.hadoop.conf.*; import org.apache.hadoop.io.*; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; public class MnM { public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "ajug"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Data.class); job.setMapperClass(MnMMapper.class); job.setReducerClass(MnMReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); } } ================================================ package org.ajug; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.WritableComparable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; public class Data implements WritableComparable<Data> { private DoubleWritable width = new DoubleWritable(); private DoubleWritable weight = new DoubleWritable(); void set(double a_width, double a_weight) { width.set(a_width); weight.set(a_weight); } public double getWidth() {
  • 2. return width.get(); } public double getWeight() { return weight.get(); } public void write(DataOutput out) throws IOException { width. write(out); weight. write(out); } public void readFields(DataInput in) throws IOException { width. readFields(in); weight. readFields(in); } public int hashCode() { return width.hashCode() * 163 + weight.hashCode(); } public int compareTo(Data tp) { int cmp = width.compareTo(tp.width); if (cmp != 0) { return cmp; } return weight.compareTo(tp.weight); } public String toString() { return "" + width + "t" + weight; } } ======================================== package org.ajug; import org.apache.hadoop.io.*; import org.apache.hadoop.mapreduce.*; import java.io.IOException; public class MnMMapper extends Mapper<LongWritable, Text, Text, Data> { private Text color = new Text(); private Data data = new Data(); public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] fields = line.split(","); if (fields.length > 2) { color.set(fields[0]); double weight = Double.parseDouble(fields[1]); double width = Double.parseDouble(fields[2]); data.set(width, weight); context.write(color, data); }
  • 3. } } ====================================== package org.ajug; import org.apache.hadoop.io.*; import org.apache.hadoop.mapreduce.*; import java.io.IOException; import java.util.Iterator; public class MnMReducer extends Reducer <Text, Data, Text, Data> { private Data data = new Data(); public void reduce(Text key, Iterable<Data> values, Context context) throws IOException, InterruptedException { double weights = 0; double widths=0; int count = 0; Iterator iter = values.iterator(); while (iter.hasNext()) { Data value = (Data)iter.next(); count++; weights += value.getWeight(); widths += value.getWidth(); } data.set(widths/count, weights/count); context.write(key, data); } }