SlideShare a Scribd company logo
1 of 51
Hadoop
19:00   20:00



          ※
Hadoop       /      MapReduce(43p)
     Hadoop                    38p
     Hadoop   I/O(46p)
     MapRedue                        (42p)
     MapReduce        (22p)
     MapReduce                    (38p)
     MapReduce        (36p)
     Hadoop               (30p)
10   Hadoop       (30p)
11   Pig(46p)
12   HBase(30p)
13   ZooKeeper(38p)
http://twitter.com/just_do_neet
         http://ameblo.jp/just-do-neet/




Hadoop




MapReduce
Hadoop
−−Grace Hopper
(p1)



           /

100   /             Facebook

2.5                 Ancestry.com)

           20                       The
Internet Archive)

          15
How Much Information?
http://hmi.ucsd.edu/pdf/HMI_2009_ConsumerReport_Dec9_2009.pdf
(p3)



HDD
(p3)




Hadoop(HDFS




                 (HDFS)




              (MapReduce)
(p4)


RDBMS   MapReduce
Hadoop                                              (p9)




         Doug Cutting(Douglas Read Cutting)



                 Lucene     Nutch

                 Google              MapReduce

                 Hadoop            Yahoo! Inc.
                     Cloudera
http://www.sdtimes.com/blog/post/2009/08/10/Hadoop-creator-goes-to-Cloudera.aspx
Hadoop   (p9)




Doug Cutting
Hadoop   (p13)
[           ]Eclipse




    http://d.hatena.ne.jp/torazuka/20100102/p1
[              ]GAE




http://d.hatena.ne.jp/torazuka/20100101/gaetan
http://d.hatena.ne.jp/torazuka/20091011/pic
Hadoop           (p13)
Core

            I/O

Avro

            RPC



MapReduce




HDFS
Hadoop                      (p13)

Pig/HIVE

             DSL

HBase

               (≒BigTable)

ZooKeeper

            (≒Chubby)

Chukwa
MapReduce
Hadoop                     (p19)



MapReduce   map   reduce
MapReduce
MapReduce
MapReduce
MapReduce
MapReduce
MapReduce
Java MapReduce                       (p20)



Java

  MapReduce

       MapReduceBase    extend

       Mapper/Reducer            implement

  MapReduce

       JobClient
< 0.20.0
public class WordCount {
    public static class Map extends MapReduceBase implements Mapper<LongWritable,
Text, Text, IntWritable> {
        //Map
    }

    public static class Reduce extends MapReduceBase implements Reducer<Text,
IntWritable, Text, IntWritable> {
       //Reduce
    }

    public static void main(String[] args) throws Exception {
      JobConf conf = new JobConf(WordCount.class);
      conf.setJobName("wordcount");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);

        conf.setMapperClass(Map.class);
        conf.setReducerClass(Reduce.class);

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(args[0]));
        FileOutputFormat.setOutputPath(conf, new Path(args[1]));

        JobClient.runJob(conf);
    }
}
< 0.20.0                         (20)
public class WordCount {
    public static class Map extends MapReduceBase implements Mapper<LongWritable,
Text, Text, IntWritable> {
        //Map
    }

    public static class Reduce extends MapReduceBase implements Reducer<Text,
IntWritable, Text, IntWritable> {
       //Reduce
    }

    public static void main(String[] args) throws Exception {
      JobConf conf = new JobConf(WordCount.class);
      conf.setJobName("wordcount");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);

        conf.setMapperClass(Map.class);
        conf.setReducerClass(Reduce.class);

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(args[0]));
        FileOutputFormat.setOutputPath(conf, new Path(args[1]));

        JobClient.runJob(conf);
    }
}
JobConf
Job

        Map/Reduce

                 /

                         /
        /HBase

  Map/Reduce
Map/Reduce
   Key-Value

Mapper<LongWritable, Text, Text,
IntWritable>

     LongWritable   Text   IntWritable
                      Hadoop

          Key=Long/Value=Text
            Key=Text/Value=Int
Writable
Hadoop

  Text,LongWritable,IntWritable,
  BytesWritable.....

  Java

         org.apache.hadoop.io.serializer.WritableSerial
         ization


            (ReflectionUtil) ※deserialize

            DataInputStream
                 (Writable#readField / Writable#write)
>= 0.20.0
public class WordCount {
    public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
        //Map
    }

    public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable>
{
        //Reduce
    }

    public static void main(String[] args) throws Exception {
      Job job = new Job();
      job.setJarByClass(WordCount.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        job.setInputFormat(TextInputFormat.class);
        job.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.addInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);
    }
}
>= 0.20.0                            (20)
public class WordCount {
    public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
        //Map
    }

    public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable>
{
        //Reduce
    }

    public static void main(String[] args) throws Exception {
      Job job = new Job();
      job.setJarByClass(WordCount.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        job.setInputFormat(TextInputFormat.class);
        job.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.addInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);
    }
}
API
JobConf            Configuration

JobClient        Job

Mapper/Reducer



OutputCollector,Reporter           Context

Key-Value              Map

  Map
(p29)




→



→              Map   Reduce




→Map/Reduce
         Map
(p29)




JobTracker
→

TaskTracker
→             JobTracker
Hadoop
Reduce   (p30)
Reduce             (p31)




※Reduce   shuffle
(p31)



Map→Reduce
   (max/min/average   )

  Combiner

             Reduce
  (Shuffle                Key-Value



  Map→Reduce
Hadoop Streaming                     (p34)



              Map/Reduce

  C/Python/Ruby/perl...           Map/Reduce



Hadoop     hadoop-streaming.jar
Hadoop Streaming
 Map:cat/Reduce:wc

 HDFS
$ hadoop dfs -copyFromLocal /usr/local/hadoop/*.txt /dfs/test/input/
$ hadoop jar /usr/local/hadoop/contrib/streaming/hadoop-0.20.1-streaming.jar -input /dfs/test/input/ -output /dfs/test/
output -mapper cat -reducer "wc -l"

09/09/26 17:00:29 INFO jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
09/09/26 17:00:30 WARN mapred.JobClient: No job jar file set. User classes may not be found. See
JobConf(Class) or JobConf#setJar(String).
09/09/26 17:00:30 INFO mapred.FileInputFormat: Total input paths to process : 4
09/09/26 17:00:31 INFO mapred.FileInputFormat: Total input paths to process : 4
09/09/26 17:00:33 INFO streaming.StreamJob: map 0% reduce 0%
09/09/26 17:00:42 INFO streaming.StreamJob: map 100% reduce 0%
09/09/26 17:00:44 INFO streaming.StreamJob: map 100% reduce 100%
09/09/26 17:00:44 INFO streaming.StreamJob: Output: /dfs/test/output

$ hadoop dfs -cat /dfs/test/output/*
   8842
Hadoop Streaming
 Python
 http://www.michael-noll.com/wiki/Writing_An_Hadoop_MapReduce_Program_In_Python


$ hadoop jar /usr/local/hadoop/contrib/streaming/hadoop-0.20.1-streaming.jar -input /dfs/test/input/ -output /dfs/test/
output -mapper "python map.py" -reducer "python reduce.py"

09/09/26 17:29:25 INFO jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
09/09/26 17:29:26 WARN mapred.JobClient: No job jar file set. User classes may not be found. See
JobConf(Class) or JobConf#setJar(String).
09/09/26 17:29:26 INFO mapred.FileInputFormat: Total input paths to process : 4
09/09/26 17:29:26 INFO mapred.FileInputFormat: Total input paths to process : 4
09/09/26 17:29:30 INFO streaming.StreamJob: map 0% reduce 0%
09/09/26 17:29:33 INFO streaming.StreamJob: map 100% reduce 0%
09/09/26 17:29:35 INFO streaming.StreamJob: map 100% reduce 100%
09/09/26 17:29:35 INFO streaming.StreamJob: Output: /dfs/test/output

$ hadoop dfs -cat /dfs/test/output/*
via 1942
the 1476
to    1394
in    819
a     816
cutting)   740
Hadoop Pipes     (p38)



Hadoop MapReduce   C++

TaskTracker Map/Reduce
        JNI
Hadoop本 輪読会 1章〜2章
Hadoop本 輪読会 1章〜2章

More Related Content

What's hot

The Pregel Programming Model with Spark GraphX
The Pregel Programming Model with Spark GraphXThe Pregel Programming Model with Spark GraphX
The Pregel Programming Model with Spark GraphXAndrea Iacono
 
Introduction to MapReduce and Hadoop
Introduction to MapReduce and HadoopIntroduction to MapReduce and Hadoop
Introduction to MapReduce and HadoopMohamed Elsaka
 
Processing massive amount of data with Map Reduce using Apache Hadoop - Indi...
Processing massive amount of data with Map Reduce using Apache Hadoop  - Indi...Processing massive amount of data with Map Reduce using Apache Hadoop  - Indi...
Processing massive amount of data with Map Reduce using Apache Hadoop - Indi...IndicThreads
 
Interactive big data analytics
Interactive big data analyticsInteractive big data analytics
Interactive big data analyticsViet-Trung TRAN
 
Introduction to Hadoop and MapReduce
Introduction to Hadoop and MapReduceIntroduction to Hadoop and MapReduce
Introduction to Hadoop and MapReduceDr Ganesh Iyer
 
Writing Hadoop Jobs in Scala using Scalding
Writing Hadoop Jobs in Scala using ScaldingWriting Hadoop Jobs in Scala using Scalding
Writing Hadoop Jobs in Scala using ScaldingToni Cebrián
 
Aerospike Nested CDTs - Meetup Dec 2019
Aerospike Nested CDTs - Meetup Dec 2019Aerospike Nested CDTs - Meetup Dec 2019
Aerospike Nested CDTs - Meetup Dec 2019Aerospike
 
Ruby on Big Data (Cassandra + Hadoop)
Ruby on Big Data (Cassandra + Hadoop)Ruby on Big Data (Cassandra + Hadoop)
Ruby on Big Data (Cassandra + Hadoop)Brian O'Neill
 
RR & Docker @ MuensteR Meetup (Sep 2017)
RR & Docker @ MuensteR Meetup (Sep 2017)RR & Docker @ MuensteR Meetup (Sep 2017)
RR & Docker @ MuensteR Meetup (Sep 2017)Daniel Nüst
 
Git for beginners
Git for beginnersGit for beginners
Git for beginnersVinh Nguyen
 
Writing MapReduce Programs using Java | Big Data Hadoop Spark Tutorial | Clou...
Writing MapReduce Programs using Java | Big Data Hadoop Spark Tutorial | Clou...Writing MapReduce Programs using Java | Big Data Hadoop Spark Tutorial | Clou...
Writing MapReduce Programs using Java | Big Data Hadoop Spark Tutorial | Clou...CloudxLab
 
R + 15 minutes = Hadoop cluster
R + 15 minutes = Hadoop clusterR + 15 minutes = Hadoop cluster
R + 15 minutes = Hadoop clusterJeffrey Breen
 
Scalding - the not-so-basics @ ScalaDays 2014
Scalding - the not-so-basics @ ScalaDays 2014Scalding - the not-so-basics @ ScalaDays 2014
Scalding - the not-so-basics @ ScalaDays 2014Konrad Malawski
 
Spark 4th Meetup Londond - Building a Product with Spark
Spark 4th Meetup Londond - Building a Product with SparkSpark 4th Meetup Londond - Building a Product with Spark
Spark 4th Meetup Londond - Building a Product with Sparksamthemonad
 
Hadoop & MapReduce
Hadoop & MapReduceHadoop & MapReduce
Hadoop & MapReduceNewvewm
 

What's hot (20)

The Pregel Programming Model with Spark GraphX
The Pregel Programming Model with Spark GraphXThe Pregel Programming Model with Spark GraphX
The Pregel Programming Model with Spark GraphX
 
RHadoop, R meets Hadoop
RHadoop, R meets HadoopRHadoop, R meets Hadoop
RHadoop, R meets Hadoop
 
Introduction to MapReduce and Hadoop
Introduction to MapReduce and HadoopIntroduction to MapReduce and Hadoop
Introduction to MapReduce and Hadoop
 
Processing massive amount of data with Map Reduce using Apache Hadoop - Indi...
Processing massive amount of data with Map Reduce using Apache Hadoop  - Indi...Processing massive amount of data with Map Reduce using Apache Hadoop  - Indi...
Processing massive amount of data with Map Reduce using Apache Hadoop - Indi...
 
Gur1009
Gur1009Gur1009
Gur1009
 
Interactive big data analytics
Interactive big data analyticsInteractive big data analytics
Interactive big data analytics
 
Introduction to Hadoop and MapReduce
Introduction to Hadoop and MapReduceIntroduction to Hadoop and MapReduce
Introduction to Hadoop and MapReduce
 
Writing Hadoop Jobs in Scala using Scalding
Writing Hadoop Jobs in Scala using ScaldingWriting Hadoop Jobs in Scala using Scalding
Writing Hadoop Jobs in Scala using Scalding
 
Aerospike Nested CDTs - Meetup Dec 2019
Aerospike Nested CDTs - Meetup Dec 2019Aerospike Nested CDTs - Meetup Dec 2019
Aerospike Nested CDTs - Meetup Dec 2019
 
Ruby on Big Data (Cassandra + Hadoop)
Ruby on Big Data (Cassandra + Hadoop)Ruby on Big Data (Cassandra + Hadoop)
Ruby on Big Data (Cassandra + Hadoop)
 
MapReduce
MapReduceMapReduce
MapReduce
 
RR & Docker @ MuensteR Meetup (Sep 2017)
RR & Docker @ MuensteR Meetup (Sep 2017)RR & Docker @ MuensteR Meetup (Sep 2017)
RR & Docker @ MuensteR Meetup (Sep 2017)
 
Pig workshop
Pig workshopPig workshop
Pig workshop
 
Git for beginners
Git for beginnersGit for beginners
Git for beginners
 
Writing MapReduce Programs using Java | Big Data Hadoop Spark Tutorial | Clou...
Writing MapReduce Programs using Java | Big Data Hadoop Spark Tutorial | Clou...Writing MapReduce Programs using Java | Big Data Hadoop Spark Tutorial | Clou...
Writing MapReduce Programs using Java | Big Data Hadoop Spark Tutorial | Clou...
 
R + 15 minutes = Hadoop cluster
R + 15 minutes = Hadoop clusterR + 15 minutes = Hadoop cluster
R + 15 minutes = Hadoop cluster
 
Scalding - the not-so-basics @ ScalaDays 2014
Scalding - the not-so-basics @ ScalaDays 2014Scalding - the not-so-basics @ ScalaDays 2014
Scalding - the not-so-basics @ ScalaDays 2014
 
Spark 4th Meetup Londond - Building a Product with Spark
Spark 4th Meetup Londond - Building a Product with SparkSpark 4th Meetup Londond - Building a Product with Spark
Spark 4th Meetup Londond - Building a Product with Spark
 
Hadoop & MapReduce
Hadoop & MapReduceHadoop & MapReduce
Hadoop & MapReduce
 
Polimorfismo cosa?
Polimorfismo cosa?Polimorfismo cosa?
Polimorfismo cosa?
 

Similar to Hadoop本 輪読会 1章〜2章

Mapreduce by examples
Mapreduce by examplesMapreduce by examples
Mapreduce by examplesAndrea Iacono
 
Hadoop trainingin bangalore
Hadoop trainingin bangaloreHadoop trainingin bangalore
Hadoop trainingin bangaloreappaji intelhunt
 
Advance Map reduce - Apache hadoop Bigdata training by Design Pathshala
Advance Map reduce - Apache hadoop Bigdata training by Design PathshalaAdvance Map reduce - Apache hadoop Bigdata training by Design Pathshala
Advance Map reduce - Apache hadoop Bigdata training by Design PathshalaDesing Pathshala
 
Full stack analytics with Hadoop 2
Full stack analytics with Hadoop 2Full stack analytics with Hadoop 2
Full stack analytics with Hadoop 2Gabriele Modena
 
MAP REDUCE IN DATA SCIENCE.pptx
MAP REDUCE IN DATA SCIENCE.pptxMAP REDUCE IN DATA SCIENCE.pptx
MAP REDUCE IN DATA SCIENCE.pptxHARIKRISHNANU13
 
JRubyKaigi2010 Hadoop Papyrus
JRubyKaigi2010 Hadoop PapyrusJRubyKaigi2010 Hadoop Papyrus
JRubyKaigi2010 Hadoop PapyrusKoichi Fujikawa
 
Hadoop interview question
Hadoop interview questionHadoop interview question
Hadoop interview questionpappupassindia
 
Introducción a hadoop
Introducción a hadoopIntroducción a hadoop
Introducción a hadoopdatasalt
 
Big-data-analysis-training-in-mumbai
Big-data-analysis-training-in-mumbaiBig-data-analysis-training-in-mumbai
Big-data-analysis-training-in-mumbaiUnmesh Baile
 
Hadoop - Lessons Learned
Hadoop - Lessons LearnedHadoop - Lessons Learned
Hadoop - Lessons Learnedtcurdt
 
Introduction to Scalding and Monoids
Introduction to Scalding and MonoidsIntroduction to Scalding and Monoids
Introduction to Scalding and MonoidsHugo Gävert
 
Map-Reduce and Apache Hadoop
Map-Reduce and Apache HadoopMap-Reduce and Apache Hadoop
Map-Reduce and Apache HadoopSvetlin Nakov
 
Behm Shah Pagerank
Behm Shah PagerankBehm Shah Pagerank
Behm Shah Pagerankgothicane
 

Similar to Hadoop本 輪読会 1章〜2章 (20)

Mapreduce by examples
Mapreduce by examplesMapreduce by examples
Mapreduce by examples
 
Hadoop trainingin bangalore
Hadoop trainingin bangaloreHadoop trainingin bangalore
Hadoop trainingin bangalore
 
Map Reduce
Map ReduceMap Reduce
Map Reduce
 
Advance Map reduce - Apache hadoop Bigdata training by Design Pathshala
Advance Map reduce - Apache hadoop Bigdata training by Design PathshalaAdvance Map reduce - Apache hadoop Bigdata training by Design Pathshala
Advance Map reduce - Apache hadoop Bigdata training by Design Pathshala
 
Hadoop
HadoopHadoop
Hadoop
 
Full stack analytics with Hadoop 2
Full stack analytics with Hadoop 2Full stack analytics with Hadoop 2
Full stack analytics with Hadoop 2
 
MAP REDUCE IN DATA SCIENCE.pptx
MAP REDUCE IN DATA SCIENCE.pptxMAP REDUCE IN DATA SCIENCE.pptx
MAP REDUCE IN DATA SCIENCE.pptx
 
JRubyKaigi2010 Hadoop Papyrus
JRubyKaigi2010 Hadoop PapyrusJRubyKaigi2010 Hadoop Papyrus
JRubyKaigi2010 Hadoop Papyrus
 
Hadoop interview question
Hadoop interview questionHadoop interview question
Hadoop interview question
 
Introducción a hadoop
Introducción a hadoopIntroducción a hadoop
Introducción a hadoop
 
Hadoop ecosystem
Hadoop ecosystemHadoop ecosystem
Hadoop ecosystem
 
Big-data-analysis-training-in-mumbai
Big-data-analysis-training-in-mumbaiBig-data-analysis-training-in-mumbai
Big-data-analysis-training-in-mumbai
 
Hadoop - Lessons Learned
Hadoop - Lessons LearnedHadoop - Lessons Learned
Hadoop - Lessons Learned
 
Hadoop
HadoopHadoop
Hadoop
 
Hadoop ecosystem
Hadoop ecosystemHadoop ecosystem
Hadoop ecosystem
 
Zenith it-hadoop-training
Zenith it-hadoop-trainingZenith it-hadoop-training
Zenith it-hadoop-training
 
hadoop.ppt
hadoop.ppthadoop.ppt
hadoop.ppt
 
Introduction to Scalding and Monoids
Introduction to Scalding and MonoidsIntroduction to Scalding and Monoids
Introduction to Scalding and Monoids
 
Map-Reduce and Apache Hadoop
Map-Reduce and Apache HadoopMap-Reduce and Apache Hadoop
Map-Reduce and Apache Hadoop
 
Behm Shah Pagerank
Behm Shah PagerankBehm Shah Pagerank
Behm Shah Pagerank
 

More from moai kids

中国最新ニュースアプリ事情
中国最新ニュースアプリ事情中国最新ニュースアプリ事情
中国最新ニュースアプリ事情moai kids
 
FluentdとRedshiftの素敵な関係
FluentdとRedshiftの素敵な関係FluentdとRedshiftの素敵な関係
FluentdとRedshiftの素敵な関係moai kids
 
Twitterのsnowflakeについて
TwitterのsnowflakeについてTwitterのsnowflakeについて
Twitterのsnowflakeについてmoai kids
 
Programming Hive Reading #4
Programming Hive Reading #4Programming Hive Reading #4
Programming Hive Reading #4moai kids
 
Programming Hive Reading #3
Programming Hive Reading #3Programming Hive Reading #3
Programming Hive Reading #3moai kids
 
"Programming Hive" Reading #1
"Programming Hive" Reading #1"Programming Hive" Reading #1
"Programming Hive" Reading #1moai kids
 
Casual Compression on MongoDB
Casual Compression on MongoDBCasual Compression on MongoDB
Casual Compression on MongoDBmoai kids
 
Introduction to MongoDB
Introduction to MongoDBIntroduction to MongoDB
Introduction to MongoDBmoai kids
 
Hadoop Conference Japan 2011 Fallに行ってきました
Hadoop Conference Japan 2011 Fallに行ってきましたHadoop Conference Japan 2011 Fallに行ってきました
Hadoop Conference Japan 2011 Fallに行ってきましたmoai kids
 
HBase本輪読会資料(11章)
HBase本輪読会資料(11章)HBase本輪読会資料(11章)
HBase本輪読会資料(11章)moai kids
 
snappyについて
snappyについてsnappyについて
snappyについてmoai kids
 
第四回月次セミナー(公開版)
第四回月次セミナー(公開版)第四回月次セミナー(公開版)
第四回月次セミナー(公開版)moai kids
 
第三回月次セミナー(公開版)
第三回月次セミナー(公開版)第三回月次セミナー(公開版)
第三回月次セミナー(公開版)moai kids
 
Pythonで自然言語処理
Pythonで自然言語処理Pythonで自然言語処理
Pythonで自然言語処理moai kids
 
HandlerSocket plugin Client for Javaとそれを用いたベンチマーク
HandlerSocket plugin Client for Javaとそれを用いたベンチマークHandlerSocket plugin Client for Javaとそれを用いたベンチマーク
HandlerSocket plugin Client for Javaとそれを用いたベンチマークmoai kids
 
Yammer試用レポート(公開版)
Yammer試用レポート(公開版)Yammer試用レポート(公開版)
Yammer試用レポート(公開版)moai kids
 
掲示板時間軸コーパスを用いたワードトレンド解析(公開版)
掲示板時間軸コーパスを用いたワードトレンド解析(公開版)掲示板時間軸コーパスを用いたワードトレンド解析(公開版)
掲示板時間軸コーパスを用いたワードトレンド解析(公開版)moai kids
 
中国と私(仮題)
中国と私(仮題)中国と私(仮題)
中国と私(仮題)moai kids
 
不自然言語処理コンテストLT資料
不自然言語処理コンテストLT資料不自然言語処理コンテストLT資料
不自然言語処理コンテストLT資料moai kids
 
n-gramコーパスを用いた類義語自動獲得手法について
n-gramコーパスを用いた類義語自動獲得手法についてn-gramコーパスを用いた類義語自動獲得手法について
n-gramコーパスを用いた類義語自動獲得手法についてmoai kids
 

More from moai kids (20)

中国最新ニュースアプリ事情
中国最新ニュースアプリ事情中国最新ニュースアプリ事情
中国最新ニュースアプリ事情
 
FluentdとRedshiftの素敵な関係
FluentdとRedshiftの素敵な関係FluentdとRedshiftの素敵な関係
FluentdとRedshiftの素敵な関係
 
Twitterのsnowflakeについて
TwitterのsnowflakeについてTwitterのsnowflakeについて
Twitterのsnowflakeについて
 
Programming Hive Reading #4
Programming Hive Reading #4Programming Hive Reading #4
Programming Hive Reading #4
 
Programming Hive Reading #3
Programming Hive Reading #3Programming Hive Reading #3
Programming Hive Reading #3
 
"Programming Hive" Reading #1
"Programming Hive" Reading #1"Programming Hive" Reading #1
"Programming Hive" Reading #1
 
Casual Compression on MongoDB
Casual Compression on MongoDBCasual Compression on MongoDB
Casual Compression on MongoDB
 
Introduction to MongoDB
Introduction to MongoDBIntroduction to MongoDB
Introduction to MongoDB
 
Hadoop Conference Japan 2011 Fallに行ってきました
Hadoop Conference Japan 2011 Fallに行ってきましたHadoop Conference Japan 2011 Fallに行ってきました
Hadoop Conference Japan 2011 Fallに行ってきました
 
HBase本輪読会資料(11章)
HBase本輪読会資料(11章)HBase本輪読会資料(11章)
HBase本輪読会資料(11章)
 
snappyについて
snappyについてsnappyについて
snappyについて
 
第四回月次セミナー(公開版)
第四回月次セミナー(公開版)第四回月次セミナー(公開版)
第四回月次セミナー(公開版)
 
第三回月次セミナー(公開版)
第三回月次セミナー(公開版)第三回月次セミナー(公開版)
第三回月次セミナー(公開版)
 
Pythonで自然言語処理
Pythonで自然言語処理Pythonで自然言語処理
Pythonで自然言語処理
 
HandlerSocket plugin Client for Javaとそれを用いたベンチマーク
HandlerSocket plugin Client for Javaとそれを用いたベンチマークHandlerSocket plugin Client for Javaとそれを用いたベンチマーク
HandlerSocket plugin Client for Javaとそれを用いたベンチマーク
 
Yammer試用レポート(公開版)
Yammer試用レポート(公開版)Yammer試用レポート(公開版)
Yammer試用レポート(公開版)
 
掲示板時間軸コーパスを用いたワードトレンド解析(公開版)
掲示板時間軸コーパスを用いたワードトレンド解析(公開版)掲示板時間軸コーパスを用いたワードトレンド解析(公開版)
掲示板時間軸コーパスを用いたワードトレンド解析(公開版)
 
中国と私(仮題)
中国と私(仮題)中国と私(仮題)
中国と私(仮題)
 
不自然言語処理コンテストLT資料
不自然言語処理コンテストLT資料不自然言語処理コンテストLT資料
不自然言語処理コンテストLT資料
 
n-gramコーパスを用いた類義語自動獲得手法について
n-gramコーパスを用いた類義語自動獲得手法についてn-gramコーパスを用いた類義語自動獲得手法について
n-gramコーパスを用いた類義語自動獲得手法について
 

Recently uploaded

Understanding the Laravel MVC Architecture
Understanding the Laravel MVC ArchitectureUnderstanding the Laravel MVC Architecture
Understanding the Laravel MVC ArchitecturePixlogix Infotech
 
Slack Application Development 101 Slides
Slack Application Development 101 SlidesSlack Application Development 101 Slides
Slack Application Development 101 Slidespraypatel2
 
SQL Database Design For Developers at php[tek] 2024
SQL Database Design For Developers at php[tek] 2024SQL Database Design For Developers at php[tek] 2024
SQL Database Design For Developers at php[tek] 2024Scott Keck-Warren
 
Human Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsHuman Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsMark Billinghurst
 
SIEMENS: RAPUNZEL – A Tale About Knowledge Graph
SIEMENS: RAPUNZEL – A Tale About Knowledge GraphSIEMENS: RAPUNZEL – A Tale About Knowledge Graph
SIEMENS: RAPUNZEL – A Tale About Knowledge GraphNeo4j
 
08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking Men08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking MenDelhi Call girls
 
Key Features Of Token Development (1).pptx
Key  Features Of Token  Development (1).pptxKey  Features Of Token  Development (1).pptx
Key Features Of Token Development (1).pptxLBM Solutions
 
Azure Monitor & Application Insight to monitor Infrastructure & Application
Azure Monitor & Application Insight to monitor Infrastructure & ApplicationAzure Monitor & Application Insight to monitor Infrastructure & Application
Azure Monitor & Application Insight to monitor Infrastructure & ApplicationAndikSusilo4
 
FULL ENJOY 🔝 8264348440 🔝 Call Girls in Diplomatic Enclave | Delhi
FULL ENJOY 🔝 8264348440 🔝 Call Girls in Diplomatic Enclave | DelhiFULL ENJOY 🔝 8264348440 🔝 Call Girls in Diplomatic Enclave | Delhi
FULL ENJOY 🔝 8264348440 🔝 Call Girls in Diplomatic Enclave | Delhisoniya singh
 
Artificial intelligence in the post-deep learning era
Artificial intelligence in the post-deep learning eraArtificial intelligence in the post-deep learning era
Artificial intelligence in the post-deep learning eraDeakin University
 
GenCyber Cyber Security Day Presentation
GenCyber Cyber Security Day PresentationGenCyber Cyber Security Day Presentation
GenCyber Cyber Security Day PresentationMichael W. Hawkins
 
Pigging Solutions in Pet Food Manufacturing
Pigging Solutions in Pet Food ManufacturingPigging Solutions in Pet Food Manufacturing
Pigging Solutions in Pet Food ManufacturingPigging Solutions
 
My Hashitalk Indonesia April 2024 Presentation
My Hashitalk Indonesia April 2024 PresentationMy Hashitalk Indonesia April 2024 Presentation
My Hashitalk Indonesia April 2024 PresentationRidwan Fadjar
 
Beyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
Beyond Boundaries: Leveraging No-Code Solutions for Industry InnovationBeyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
Beyond Boundaries: Leveraging No-Code Solutions for Industry InnovationSafe Software
 
Breaking the Kubernetes Kill Chain: Host Path Mount
Breaking the Kubernetes Kill Chain: Host Path MountBreaking the Kubernetes Kill Chain: Host Path Mount
Breaking the Kubernetes Kill Chain: Host Path MountPuma Security, LLC
 
Transcript: #StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
Transcript: #StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024Transcript: #StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
Transcript: #StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024BookNet Canada
 
Automating Business Process via MuleSoft Composer | Bangalore MuleSoft Meetup...
Automating Business Process via MuleSoft Composer | Bangalore MuleSoft Meetup...Automating Business Process via MuleSoft Composer | Bangalore MuleSoft Meetup...
Automating Business Process via MuleSoft Composer | Bangalore MuleSoft Meetup...shyamraj55
 
Unblocking The Main Thread Solving ANRs and Frozen Frames
Unblocking The Main Thread Solving ANRs and Frozen FramesUnblocking The Main Thread Solving ANRs and Frozen Frames
Unblocking The Main Thread Solving ANRs and Frozen FramesSinan KOZAK
 
Hyderabad Call Girls Khairatabad ✨ 7001305949 ✨ Cheap Price Your Budget
Hyderabad Call Girls Khairatabad ✨ 7001305949 ✨ Cheap Price Your BudgetHyderabad Call Girls Khairatabad ✨ 7001305949 ✨ Cheap Price Your Budget
Hyderabad Call Girls Khairatabad ✨ 7001305949 ✨ Cheap Price Your BudgetEnjoy Anytime
 
#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024BookNet Canada
 

Recently uploaded (20)

Understanding the Laravel MVC Architecture
Understanding the Laravel MVC ArchitectureUnderstanding the Laravel MVC Architecture
Understanding the Laravel MVC Architecture
 
Slack Application Development 101 Slides
Slack Application Development 101 SlidesSlack Application Development 101 Slides
Slack Application Development 101 Slides
 
SQL Database Design For Developers at php[tek] 2024
SQL Database Design For Developers at php[tek] 2024SQL Database Design For Developers at php[tek] 2024
SQL Database Design For Developers at php[tek] 2024
 
Human Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsHuman Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR Systems
 
SIEMENS: RAPUNZEL – A Tale About Knowledge Graph
SIEMENS: RAPUNZEL – A Tale About Knowledge GraphSIEMENS: RAPUNZEL – A Tale About Knowledge Graph
SIEMENS: RAPUNZEL – A Tale About Knowledge Graph
 
08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking Men08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking Men
 
Key Features Of Token Development (1).pptx
Key  Features Of Token  Development (1).pptxKey  Features Of Token  Development (1).pptx
Key Features Of Token Development (1).pptx
 
Azure Monitor & Application Insight to monitor Infrastructure & Application
Azure Monitor & Application Insight to monitor Infrastructure & ApplicationAzure Monitor & Application Insight to monitor Infrastructure & Application
Azure Monitor & Application Insight to monitor Infrastructure & Application
 
FULL ENJOY 🔝 8264348440 🔝 Call Girls in Diplomatic Enclave | Delhi
FULL ENJOY 🔝 8264348440 🔝 Call Girls in Diplomatic Enclave | DelhiFULL ENJOY 🔝 8264348440 🔝 Call Girls in Diplomatic Enclave | Delhi
FULL ENJOY 🔝 8264348440 🔝 Call Girls in Diplomatic Enclave | Delhi
 
Artificial intelligence in the post-deep learning era
Artificial intelligence in the post-deep learning eraArtificial intelligence in the post-deep learning era
Artificial intelligence in the post-deep learning era
 
GenCyber Cyber Security Day Presentation
GenCyber Cyber Security Day PresentationGenCyber Cyber Security Day Presentation
GenCyber Cyber Security Day Presentation
 
Pigging Solutions in Pet Food Manufacturing
Pigging Solutions in Pet Food ManufacturingPigging Solutions in Pet Food Manufacturing
Pigging Solutions in Pet Food Manufacturing
 
My Hashitalk Indonesia April 2024 Presentation
My Hashitalk Indonesia April 2024 PresentationMy Hashitalk Indonesia April 2024 Presentation
My Hashitalk Indonesia April 2024 Presentation
 
Beyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
Beyond Boundaries: Leveraging No-Code Solutions for Industry InnovationBeyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
Beyond Boundaries: Leveraging No-Code Solutions for Industry Innovation
 
Breaking the Kubernetes Kill Chain: Host Path Mount
Breaking the Kubernetes Kill Chain: Host Path MountBreaking the Kubernetes Kill Chain: Host Path Mount
Breaking the Kubernetes Kill Chain: Host Path Mount
 
Transcript: #StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
Transcript: #StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024Transcript: #StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
Transcript: #StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
 
Automating Business Process via MuleSoft Composer | Bangalore MuleSoft Meetup...
Automating Business Process via MuleSoft Composer | Bangalore MuleSoft Meetup...Automating Business Process via MuleSoft Composer | Bangalore MuleSoft Meetup...
Automating Business Process via MuleSoft Composer | Bangalore MuleSoft Meetup...
 
Unblocking The Main Thread Solving ANRs and Frozen Frames
Unblocking The Main Thread Solving ANRs and Frozen FramesUnblocking The Main Thread Solving ANRs and Frozen Frames
Unblocking The Main Thread Solving ANRs and Frozen Frames
 
Hyderabad Call Girls Khairatabad ✨ 7001305949 ✨ Cheap Price Your Budget
Hyderabad Call Girls Khairatabad ✨ 7001305949 ✨ Cheap Price Your BudgetHyderabad Call Girls Khairatabad ✨ 7001305949 ✨ Cheap Price Your Budget
Hyderabad Call Girls Khairatabad ✨ 7001305949 ✨ Cheap Price Your Budget
 
#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
 

Hadoop本 輪読会 1章〜2章

  • 2.
  • 3. 19:00 20:00 ※
  • 4. Hadoop / MapReduce(43p) Hadoop 38p Hadoop I/O(46p) MapRedue (42p) MapReduce (22p) MapReduce (38p) MapReduce (36p) Hadoop (30p) 10 Hadoop (30p) 11 Pig(46p) 12 HBase(30p) 13 ZooKeeper(38p)
  • 5.
  • 6. http://twitter.com/just_do_neet http://ameblo.jp/just-do-neet/ Hadoop MapReduce
  • 9. (p1) / 100 / Facebook 2.5 Ancestry.com) 20 The Internet Archive) 15
  • 12.
  • 13. (p3) Hadoop(HDFS (HDFS) (MapReduce)
  • 14. (p4) RDBMS MapReduce
  • 15. Hadoop (p9) Doug Cutting(Douglas Read Cutting) Lucene Nutch Google MapReduce Hadoop Yahoo! Inc. Cloudera http://www.sdtimes.com/blog/post/2009/08/10/Hadoop-creator-goes-to-Cloudera.aspx
  • 16. Hadoop (p9) Doug Cutting
  • 17. Hadoop (p13)
  • 18. [ ]Eclipse http://d.hatena.ne.jp/torazuka/20100102/p1
  • 19. [ ]GAE http://d.hatena.ne.jp/torazuka/20100101/gaetan
  • 21. Hadoop (p13) Core I/O Avro RPC MapReduce HDFS
  • 22. Hadoop (p13) Pig/HIVE DSL HBase (≒BigTable) ZooKeeper (≒Chubby) Chukwa
  • 24. Hadoop (p19) MapReduce map reduce
  • 31. Java MapReduce (p20) Java MapReduce MapReduceBase extend Mapper/Reducer implement MapReduce JobClient
  • 32. < 0.20.0 public class WordCount { public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { //Map } public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> { //Reduce } public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); } }
  • 33. < 0.20.0 (20) public class WordCount { public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { //Map } public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> { //Reduce } public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); } }
  • 34. JobConf Job Map/Reduce / / /HBase Map/Reduce
  • 35. Map/Reduce Key-Value Mapper<LongWritable, Text, Text, IntWritable> LongWritable Text IntWritable Hadoop Key=Long/Value=Text Key=Text/Value=Int
  • 36. Writable Hadoop Text,LongWritable,IntWritable, BytesWritable..... Java org.apache.hadoop.io.serializer.WritableSerial ization (ReflectionUtil) ※deserialize DataInputStream (Writable#readField / Writable#write)
  • 37. >= 0.20.0 public class WordCount { public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> { //Map } public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> { //Reduce } public static void main(String[] args) throws Exception { Job job = new Job(); job.setJarByClass(WordCount.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormat(TextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); FileInputFormat.addInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); } }
  • 38. >= 0.20.0 (20) public class WordCount { public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> { //Map } public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> { //Reduce } public static void main(String[] args) throws Exception { Job job = new Job(); job.setJarByClass(WordCount.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormat(TextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); FileInputFormat.addInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); } }
  • 39. API JobConf Configuration JobClient Job Mapper/Reducer OutputCollector,Reporter Context Key-Value Map Map
  • 40. (p29) → → Map Reduce →Map/Reduce Map
  • 43. Reduce (p30)
  • 44. Reduce (p31) ※Reduce shuffle
  • 45. (p31) Map→Reduce (max/min/average ) Combiner Reduce (Shuffle Key-Value Map→Reduce
  • 46. Hadoop Streaming (p34) Map/Reduce C/Python/Ruby/perl... Map/Reduce Hadoop hadoop-streaming.jar
  • 47. Hadoop Streaming Map:cat/Reduce:wc HDFS $ hadoop dfs -copyFromLocal /usr/local/hadoop/*.txt /dfs/test/input/ $ hadoop jar /usr/local/hadoop/contrib/streaming/hadoop-0.20.1-streaming.jar -input /dfs/test/input/ -output /dfs/test/ output -mapper cat -reducer "wc -l" 09/09/26 17:00:29 INFO jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= 09/09/26 17:00:30 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String). 09/09/26 17:00:30 INFO mapred.FileInputFormat: Total input paths to process : 4 09/09/26 17:00:31 INFO mapred.FileInputFormat: Total input paths to process : 4 09/09/26 17:00:33 INFO streaming.StreamJob: map 0% reduce 0% 09/09/26 17:00:42 INFO streaming.StreamJob: map 100% reduce 0% 09/09/26 17:00:44 INFO streaming.StreamJob: map 100% reduce 100% 09/09/26 17:00:44 INFO streaming.StreamJob: Output: /dfs/test/output $ hadoop dfs -cat /dfs/test/output/* 8842
  • 48. Hadoop Streaming Python http://www.michael-noll.com/wiki/Writing_An_Hadoop_MapReduce_Program_In_Python $ hadoop jar /usr/local/hadoop/contrib/streaming/hadoop-0.20.1-streaming.jar -input /dfs/test/input/ -output /dfs/test/ output -mapper "python map.py" -reducer "python reduce.py" 09/09/26 17:29:25 INFO jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= 09/09/26 17:29:26 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String). 09/09/26 17:29:26 INFO mapred.FileInputFormat: Total input paths to process : 4 09/09/26 17:29:26 INFO mapred.FileInputFormat: Total input paths to process : 4 09/09/26 17:29:30 INFO streaming.StreamJob: map 0% reduce 0% 09/09/26 17:29:33 INFO streaming.StreamJob: map 100% reduce 0% 09/09/26 17:29:35 INFO streaming.StreamJob: map 100% reduce 100% 09/09/26 17:29:35 INFO streaming.StreamJob: Output: /dfs/test/output $ hadoop dfs -cat /dfs/test/output/* via 1942 the 1476 to 1394 in 819 a 816 cutting) 740
  • 49. Hadoop Pipes (p38) Hadoop MapReduce C++ TaskTracker Map/Reduce JNI