SlideShare a Scribd company logo
Hadoop + Clojure
  Hadoop World NYC
Friday, October 2, 2009

Stuart Sierra, AltLaw.org
JVM Languages
                              Object
             Functional      Oriented

Native to
              Clojure           Groovy
the JVM                 Scala


Ported to                       JRuby
            Armed Bear CL
the JVM                         Jython
                Kawa
                                Rhino

   Java is dead, long live the JVM
Clojure

●   a new Lisp,
    neither Common Lisp nor Scheme
●   Dynamic, Functional
●   Immutability and concurrency
●   Hosted on the JVM
●   Open Source (Eclipse Public License)
Clojure Primitive Types
String       "Hello, World!n"
Integer      42
Double       2.0e64
BigInteger   9223372036854775808
BigDecimal   1.0M
Ratio        3/4
Boolean      true, false
Symbol       foo
Keyword      :foo
null          nil
Clojure Collections
List   (print :hello "NYC")

Vector [:eat "Pie" 3.14159]

Map    {:lisp 1   "The Rest" 0}

Set    #{2 1 3 5 "Eureka"}


          Homoiconicity
public void greet(String name) {
  System.out.println("Hi, " + name);
}

greet("New York");
Hi, New York


(defn greet [name]
  (println "Hello," name))

(greet "New York")
Hello, New York
public double average(double[] nums) {
  double total = 0;
  for (int i = 0; i < nums.length; i++) {
    total += nums[i];
  }
  return total / nums.length;
}


(defn average [& nums]
  (/ (reduce + nums) (count nums)))

(average 1 2 3 4)
5/2
Data Structures as Functions
(def m {:f "foo"     (def s #{1 5 3})
        :b "bar"})
                     (s 3)
(m :f)               true
"foo"
                     (s 7)
(:b m)               false
"bar"
(import '(com.example.package
            MyClass YourClass))

(. object method arguments)

(new MyClass arguments)


(.method object arguments)
                             Syntactic
(MyClass. arguments)          Sugar

(MyClass/staticMethod)
...open a stream...
try {
    ...do stuff with the stream...
} finally {
    stream.close();
}

(defmacro with-open [args & body]
  `(let ~args
    (try ~@body
     (finally (.close ~(first args))))))

(with-open [stream (...open a stream...)]
  ...do stuff with stream...)
synchronous   asynchronous


coordinated      ref

independent     atom         agent

unshared         var
(map function values)
         list of values
(reduce function values)
         single value


mapper(key, value)
         list of key-value pairs

reducer(key, values)
         list of key-value pairs
public static class MapClass extends MapReduceBase
  implements Mapper<LongWritable, Text, Text, IntWritable> {

    private final static IntWritable one = new IntWritable(1);
    private Text word = new Text();

    public void map(LongWritable key, Text value,
                    OutputCollector<Text, IntWritable> output,
                    Reporter reporter) throws IOException {
      String line = value.toString();
      StringTokenizer itr = new StringTokenizer(line);
      while (itr.hasMoreTokens()) {
        word.set(itr.nextToken());
        output.collect(word, one);
      }
    }
}



public static class Reduce extends MapReduceBase
  implements Reducer<Text, IntWritable, Text, IntWritable> {

    public void reduce(Text key, Iterator<IntWritable> values,
                       OutputCollector<Text, IntWritable> output,
                       Reporter reporter) throws IOException {
      int sum = 0;
      while (values.hasNext()) {
        sum += values.next().get();
      }
      output.collect(key, new IntWritable(sum));
    }
}
(mapper key value)
        list of key-value pairs

(reducer key values)
        list of key-value pairs
Clojure-Hadoop 1
(defn mapper-map [this key val out reporter]
  (doseq [word (enumeration-seq
                (StringTokenizer. (str val)))]
    (.collect out (Text. word)
                  (IntWritable. 1))))

(defn reducer-reduce [this key vals out reporter]
  (let [sum (reduce +
             (map (fn [w] (.get w))
                  (iterator-seq values)))]
    (.collect output key (IntWritable. sum))))

(gen-job-classes)
Clojure-Hadoop 2
(defn my-map [key value]
   (map (fn [token] [token 1])
        (enumeration-seq (StringTokenizer. value))))

(def mapper-map
  (wrap-map my-map int-string-map-reader))

(defn my-reduce [key values]
   [[key (reduce + values)]])

(def reducer-reduce
  (wrap-reduce my-reduce))

(gen-job-classes)
Clojure print/read
        read



                STRING
DATA




        print
Clojure-Hadoop 3
(defn my-map [key val]
  (map (fn [token] [token 1])
       (enumeration-seq (StringTokenizer. val))))

(defn my-reduce [key values]
  [[key (reduce + values)]])

(defjob job
  :map my-map
  :map-reader int-string-map-reader
  :reduce my-reduce
  :inputformat :text)
public static class MapClass extends MapReduceBase
  implements Mapper<LongWritable, Text, Text, IntWritable> {

    private final static IntWritable one = new IntWritable(1);
    private Text word = new Text();

    public void map(LongWritable key, Text value,
                    OutputCollector<Text, IntWritable> output,
                    Reporter reporter) throws IOException {
      String line = value.toString();
      StringTokenizer itr = new StringTokenizer(line);
      while (itr.hasMoreTokens()) {
        word.set(itr.nextToken());
        output.collect(word, one);
      }
    }
}



public static class Reduce extends MapReduceBase
  implements Reducer<Text, IntWritable, Text, IntWritable> {

    public void reduce(Text key, Iterator<IntWritable> values,
                       OutputCollector<Text, IntWritable> output,
                       Reporter reporter) throws IOException {
      int sum = 0;
      while (values.hasNext()) {
        sum += values.next().get();
      }
      output.collect(key, new IntWritable(sum));
    }
}
Clojure-Hadoop 3
(defn my-map [key val]
  (map (fn [token] [token 1])
       (enumeration-seq (StringTokenizer. val))))

(defn my-reduce [key values]
  [[key (reduce + values)]])

(defjob job
  :map my-map
  :map-reader int-string-map-reader
  :reduce my-reduce
  :inputformat :text)
More

●   http://clojure.org/
●   Google Groups: Clojure
●   #clojure on irc.freenode.net
●   http://richhickey.github.com/clojure-contrib
●   http://stuartsierra.com/
●   http://github.com/stuartsierra
●   http://www.altlaw.org/

More Related Content

What's hot

Clojure for Data Science
Clojure for Data ScienceClojure for Data Science
Clojure for Data Science
Mike Anderson
 
ClojureScript loves React, DomCode May 26 2015
ClojureScript loves React, DomCode May 26 2015ClojureScript loves React, DomCode May 26 2015
ClojureScript loves React, DomCode May 26 2015
Michiel Borkent
 
Idiomatic Kotlin
Idiomatic KotlinIdiomatic Kotlin
Idiomatic Kotlin
intelliyole
 
From Lisp to Clojure/Incanter and RAn Introduction
From Lisp to Clojure/Incanter and RAn IntroductionFrom Lisp to Clojure/Incanter and RAn Introduction
From Lisp to Clojure/Incanter and RAn Introductionelliando dias
 
Wprowadzenie do technologii Big Data / Intro to Big Data Ecosystem
Wprowadzenie do technologii Big Data / Intro to Big Data EcosystemWprowadzenie do technologii Big Data / Intro to Big Data Ecosystem
Wprowadzenie do technologii Big Data / Intro to Big Data Ecosystem
Sages
 
Futures e abstração - QCon São Paulo 2015
Futures e abstração - QCon São Paulo 2015Futures e abstração - QCon São Paulo 2015
Futures e abstração - QCon São Paulo 2015
Leonardo Borges
 
Refactoring to Macros with Clojure
Refactoring to Macros with ClojureRefactoring to Macros with Clojure
Refactoring to Macros with ClojureDmitry Buzdin
 
Herding types with Scala macros
Herding types with Scala macrosHerding types with Scala macros
Herding types with Scala macros
Marina Sigaeva
 
Rust concurrency tutorial 2015 12-02
Rust concurrency tutorial 2015 12-02Rust concurrency tutorial 2015 12-02
Rust concurrency tutorial 2015 12-02
nikomatsakis
 
Poor Man's Functional Programming
Poor Man's Functional ProgrammingPoor Man's Functional Programming
Poor Man's Functional ProgrammingDmitry Buzdin
 
Rust Mozlando Tutorial
Rust Mozlando TutorialRust Mozlando Tutorial
Rust Mozlando Tutorial
nikomatsakis
 
MiamiJS - The Future of JavaScript
MiamiJS - The Future of JavaScriptMiamiJS - The Future of JavaScript
MiamiJS - The Future of JavaScript
Caridy Patino
 
Typelevel summit
Typelevel summitTypelevel summit
Typelevel summit
Marina Sigaeva
 
Meetup slides
Meetup slidesMeetup slides
Meetup slides
suraj_atreya
 
Coffee Scriptでenchant.js
Coffee Scriptでenchant.jsCoffee Scriptでenchant.js
Coffee Scriptでenchant.js
Naoyuki Totani
 
Continuation Passing Style and Macros in Clojure - Jan 2012
Continuation Passing Style and Macros in Clojure - Jan 2012Continuation Passing Style and Macros in Clojure - Jan 2012
Continuation Passing Style and Macros in Clojure - Jan 2012
Leonardo Borges
 

What's hot (20)

Clojure for Data Science
Clojure for Data ScienceClojure for Data Science
Clojure for Data Science
 
ClojureScript loves React, DomCode May 26 2015
ClojureScript loves React, DomCode May 26 2015ClojureScript loves React, DomCode May 26 2015
ClojureScript loves React, DomCode May 26 2015
 
Clojure class
Clojure classClojure class
Clojure class
 
Idiomatic Kotlin
Idiomatic KotlinIdiomatic Kotlin
Idiomatic Kotlin
 
From Lisp to Clojure/Incanter and RAn Introduction
From Lisp to Clojure/Incanter and RAn IntroductionFrom Lisp to Clojure/Incanter and RAn Introduction
From Lisp to Clojure/Incanter and RAn Introduction
 
Wprowadzenie do technologii Big Data / Intro to Big Data Ecosystem
Wprowadzenie do technologii Big Data / Intro to Big Data EcosystemWprowadzenie do technologii Big Data / Intro to Big Data Ecosystem
Wprowadzenie do technologii Big Data / Intro to Big Data Ecosystem
 
Futures e abstração - QCon São Paulo 2015
Futures e abstração - QCon São Paulo 2015Futures e abstração - QCon São Paulo 2015
Futures e abstração - QCon São Paulo 2015
 
Refactoring to Macros with Clojure
Refactoring to Macros with ClojureRefactoring to Macros with Clojure
Refactoring to Macros with Clojure
 
Herding types with Scala macros
Herding types with Scala macrosHerding types with Scala macros
Herding types with Scala macros
 
Rust concurrency tutorial 2015 12-02
Rust concurrency tutorial 2015 12-02Rust concurrency tutorial 2015 12-02
Rust concurrency tutorial 2015 12-02
 
Poor Man's Functional Programming
Poor Man's Functional ProgrammingPoor Man's Functional Programming
Poor Man's Functional Programming
 
Rust Mozlando Tutorial
Rust Mozlando TutorialRust Mozlando Tutorial
Rust Mozlando Tutorial
 
MiamiJS - The Future of JavaScript
MiamiJS - The Future of JavaScriptMiamiJS - The Future of JavaScript
MiamiJS - The Future of JavaScript
 
Typelevel summit
Typelevel summitTypelevel summit
Typelevel summit
 
Clojure intro
Clojure introClojure intro
Clojure intro
 
TeraSort
TeraSortTeraSort
TeraSort
 
Scala
ScalaScala
Scala
 
Meetup slides
Meetup slidesMeetup slides
Meetup slides
 
Coffee Scriptでenchant.js
Coffee Scriptでenchant.jsCoffee Scriptでenchant.js
Coffee Scriptでenchant.js
 
Continuation Passing Style and Macros in Clojure - Jan 2012
Continuation Passing Style and Macros in Clojure - Jan 2012Continuation Passing Style and Macros in Clojure - Jan 2012
Continuation Passing Style and Macros in Clojure - Jan 2012
 

Similar to Hw09 Hadoop + Clojure

Introduction to Scalding and Monoids
Introduction to Scalding and MonoidsIntroduction to Scalding and Monoids
Introduction to Scalding and Monoids
Hugo Gävert
 
Clojure Intro
Clojure IntroClojure Intro
Clojure Intro
thnetos
 
Scoobi - Scala for Startups
Scoobi - Scala for StartupsScoobi - Scala for Startups
Scoobi - Scala for Startups
bmlever
 
(map Clojure everyday-tasks)
(map Clojure everyday-tasks)(map Clojure everyday-tasks)
(map Clojure everyday-tasks)
Jacek Laskowski
 
What can be done with Java, but should better be done with Erlang (@pavlobaron)
What can be done with Java, but should better be done with Erlang (@pavlobaron)What can be done with Java, but should better be done with Erlang (@pavlobaron)
What can be done with Java, but should better be done with Erlang (@pavlobaron)
Pavlo Baron
 
(How) can we benefit from adopting scala?
(How) can we benefit from adopting scala?(How) can we benefit from adopting scala?
(How) can we benefit from adopting scala?Tomasz Wrobel
 
Scala presentation by Aleksandar Prokopec
Scala presentation by Aleksandar ProkopecScala presentation by Aleksandar Prokopec
Scala presentation by Aleksandar ProkopecLoïc Descotte
 
From Java to Scala - advantages and possible risks
From Java to Scala - advantages and possible risksFrom Java to Scala - advantages and possible risks
From Java to Scala - advantages and possible risks
SeniorDevOnly
 
Scala @ TechMeetup Edinburgh
Scala @ TechMeetup EdinburghScala @ TechMeetup Edinburgh
Scala @ TechMeetup Edinburgh
Stuart Roebuck
 
(first '(Clojure.))
(first '(Clojure.))(first '(Clojure.))
(first '(Clojure.))
niklal
 
Scala - en bedre og mere effektiv Java?
Scala - en bedre og mere effektiv Java?Scala - en bedre og mere effektiv Java?
Scala - en bedre og mere effektiv Java?
Jesper Kamstrup Linnet
 
Scala coated JVM
Scala coated JVMScala coated JVM
Scala coated JVM
Stuart Roebuck
 
Столпы функционального программирования для адептов ООП, Николай Мозговой
Столпы функционального программирования для адептов ООП, Николай МозговойСтолпы функционального программирования для адептов ООП, Николай Мозговой
Столпы функционального программирования для адептов ООП, Николай Мозговой
Sigma Software
 
Introducción a hadoop
Introducción a hadoopIntroducción a hadoop
Introducción a hadoop
datasalt
 
A Sceptical Guide to Functional Programming
A Sceptical Guide to Functional ProgrammingA Sceptical Guide to Functional Programming
A Sceptical Guide to Functional ProgrammingGarth Gilmour
 
Clojure And Swing
Clojure And SwingClojure And Swing
Clojure And Swing
Skills Matter
 
Reactive Programming Patterns with RxSwift
Reactive Programming Patterns with RxSwiftReactive Programming Patterns with RxSwift
Reactive Programming Patterns with RxSwift
Florent Pillet
 
Underscore.js
Underscore.jsUnderscore.js
Underscore.jstimourian
 
Introduction to Scala
Introduction to ScalaIntroduction to Scala
Introduction to Scala
Aleksandar Prokopec
 

Similar to Hw09 Hadoop + Clojure (20)

Introduction to Scalding and Monoids
Introduction to Scalding and MonoidsIntroduction to Scalding and Monoids
Introduction to Scalding and Monoids
 
Clojure Intro
Clojure IntroClojure Intro
Clojure Intro
 
Scoobi - Scala for Startups
Scoobi - Scala for StartupsScoobi - Scala for Startups
Scoobi - Scala for Startups
 
(map Clojure everyday-tasks)
(map Clojure everyday-tasks)(map Clojure everyday-tasks)
(map Clojure everyday-tasks)
 
What can be done with Java, but should better be done with Erlang (@pavlobaron)
What can be done with Java, but should better be done with Erlang (@pavlobaron)What can be done with Java, but should better be done with Erlang (@pavlobaron)
What can be done with Java, but should better be done with Erlang (@pavlobaron)
 
(How) can we benefit from adopting scala?
(How) can we benefit from adopting scala?(How) can we benefit from adopting scala?
(How) can we benefit from adopting scala?
 
Scala presentation by Aleksandar Prokopec
Scala presentation by Aleksandar ProkopecScala presentation by Aleksandar Prokopec
Scala presentation by Aleksandar Prokopec
 
From Java to Scala - advantages and possible risks
From Java to Scala - advantages and possible risksFrom Java to Scala - advantages and possible risks
From Java to Scala - advantages and possible risks
 
Scala @ TechMeetup Edinburgh
Scala @ TechMeetup EdinburghScala @ TechMeetup Edinburgh
Scala @ TechMeetup Edinburgh
 
(first '(Clojure.))
(first '(Clojure.))(first '(Clojure.))
(first '(Clojure.))
 
Scala - en bedre og mere effektiv Java?
Scala - en bedre og mere effektiv Java?Scala - en bedre og mere effektiv Java?
Scala - en bedre og mere effektiv Java?
 
Scala coated JVM
Scala coated JVMScala coated JVM
Scala coated JVM
 
Столпы функционального программирования для адептов ООП, Николай Мозговой
Столпы функционального программирования для адептов ООП, Николай МозговойСтолпы функционального программирования для адептов ООП, Николай Мозговой
Столпы функционального программирования для адептов ООП, Николай Мозговой
 
Introducción a hadoop
Introducción a hadoopIntroducción a hadoop
Introducción a hadoop
 
A Sceptical Guide to Functional Programming
A Sceptical Guide to Functional ProgrammingA Sceptical Guide to Functional Programming
A Sceptical Guide to Functional Programming
 
Clojure And Swing
Clojure And SwingClojure And Swing
Clojure And Swing
 
An introduction to scala
An introduction to scalaAn introduction to scala
An introduction to scala
 
Reactive Programming Patterns with RxSwift
Reactive Programming Patterns with RxSwiftReactive Programming Patterns with RxSwift
Reactive Programming Patterns with RxSwift
 
Underscore.js
Underscore.jsUnderscore.js
Underscore.js
 
Introduction to Scala
Introduction to ScalaIntroduction to Scala
Introduction to Scala
 

More from Cloudera, Inc.

Partner Briefing_January 25 (FINAL).pptx
Partner Briefing_January 25 (FINAL).pptxPartner Briefing_January 25 (FINAL).pptx
Partner Briefing_January 25 (FINAL).pptx
Cloudera, Inc.
 
Cloudera Data Impact Awards 2021 - Finalists
Cloudera Data Impact Awards 2021 - Finalists Cloudera Data Impact Awards 2021 - Finalists
Cloudera Data Impact Awards 2021 - Finalists
Cloudera, Inc.
 
2020 Cloudera Data Impact Awards Finalists
2020 Cloudera Data Impact Awards Finalists2020 Cloudera Data Impact Awards Finalists
2020 Cloudera Data Impact Awards Finalists
Cloudera, Inc.
 
Edc event vienna presentation 1 oct 2019
Edc event vienna presentation 1 oct 2019Edc event vienna presentation 1 oct 2019
Edc event vienna presentation 1 oct 2019
Cloudera, Inc.
 
Machine Learning with Limited Labeled Data 4/3/19
Machine Learning with Limited Labeled Data 4/3/19Machine Learning with Limited Labeled Data 4/3/19
Machine Learning with Limited Labeled Data 4/3/19
Cloudera, Inc.
 
Data Driven With the Cloudera Modern Data Warehouse 3.19.19
Data Driven With the Cloudera Modern Data Warehouse 3.19.19Data Driven With the Cloudera Modern Data Warehouse 3.19.19
Data Driven With the Cloudera Modern Data Warehouse 3.19.19
Cloudera, Inc.
 
Introducing Cloudera DataFlow (CDF) 2.13.19
Introducing Cloudera DataFlow (CDF) 2.13.19Introducing Cloudera DataFlow (CDF) 2.13.19
Introducing Cloudera DataFlow (CDF) 2.13.19
Cloudera, Inc.
 
Introducing Cloudera Data Science Workbench for HDP 2.12.19
Introducing Cloudera Data Science Workbench for HDP 2.12.19Introducing Cloudera Data Science Workbench for HDP 2.12.19
Introducing Cloudera Data Science Workbench for HDP 2.12.19
Cloudera, Inc.
 
Shortening the Sales Cycle with a Modern Data Warehouse 1.30.19
Shortening the Sales Cycle with a Modern Data Warehouse 1.30.19Shortening the Sales Cycle with a Modern Data Warehouse 1.30.19
Shortening the Sales Cycle with a Modern Data Warehouse 1.30.19
Cloudera, Inc.
 
Leveraging the cloud for analytics and machine learning 1.29.19
Leveraging the cloud for analytics and machine learning 1.29.19Leveraging the cloud for analytics and machine learning 1.29.19
Leveraging the cloud for analytics and machine learning 1.29.19
Cloudera, Inc.
 
Modernizing the Legacy Data Warehouse – What, Why, and How 1.23.19
Modernizing the Legacy Data Warehouse – What, Why, and How 1.23.19Modernizing the Legacy Data Warehouse – What, Why, and How 1.23.19
Modernizing the Legacy Data Warehouse – What, Why, and How 1.23.19
Cloudera, Inc.
 
Leveraging the Cloud for Big Data Analytics 12.11.18
Leveraging the Cloud for Big Data Analytics 12.11.18Leveraging the Cloud for Big Data Analytics 12.11.18
Leveraging the Cloud for Big Data Analytics 12.11.18
Cloudera, Inc.
 
Modern Data Warehouse Fundamentals Part 3
Modern Data Warehouse Fundamentals Part 3Modern Data Warehouse Fundamentals Part 3
Modern Data Warehouse Fundamentals Part 3
Cloudera, Inc.
 
Modern Data Warehouse Fundamentals Part 2
Modern Data Warehouse Fundamentals Part 2Modern Data Warehouse Fundamentals Part 2
Modern Data Warehouse Fundamentals Part 2
Cloudera, Inc.
 
Modern Data Warehouse Fundamentals Part 1
Modern Data Warehouse Fundamentals Part 1Modern Data Warehouse Fundamentals Part 1
Modern Data Warehouse Fundamentals Part 1
Cloudera, Inc.
 
Extending Cloudera SDX beyond the Platform
Extending Cloudera SDX beyond the PlatformExtending Cloudera SDX beyond the Platform
Extending Cloudera SDX beyond the Platform
Cloudera, Inc.
 
Federated Learning: ML with Privacy on the Edge 11.15.18
Federated Learning: ML with Privacy on the Edge 11.15.18Federated Learning: ML with Privacy on the Edge 11.15.18
Federated Learning: ML with Privacy on the Edge 11.15.18
Cloudera, Inc.
 
Analyst Webinar: Doing a 180 on Customer 360
Analyst Webinar: Doing a 180 on Customer 360Analyst Webinar: Doing a 180 on Customer 360
Analyst Webinar: Doing a 180 on Customer 360
Cloudera, Inc.
 
Build a modern platform for anti-money laundering 9.19.18
Build a modern platform for anti-money laundering 9.19.18Build a modern platform for anti-money laundering 9.19.18
Build a modern platform for anti-money laundering 9.19.18
Cloudera, Inc.
 
Introducing the data science sandbox as a service 8.30.18
Introducing the data science sandbox as a service 8.30.18Introducing the data science sandbox as a service 8.30.18
Introducing the data science sandbox as a service 8.30.18
Cloudera, Inc.
 

More from Cloudera, Inc. (20)

Partner Briefing_January 25 (FINAL).pptx
Partner Briefing_January 25 (FINAL).pptxPartner Briefing_January 25 (FINAL).pptx
Partner Briefing_January 25 (FINAL).pptx
 
Cloudera Data Impact Awards 2021 - Finalists
Cloudera Data Impact Awards 2021 - Finalists Cloudera Data Impact Awards 2021 - Finalists
Cloudera Data Impact Awards 2021 - Finalists
 
2020 Cloudera Data Impact Awards Finalists
2020 Cloudera Data Impact Awards Finalists2020 Cloudera Data Impact Awards Finalists
2020 Cloudera Data Impact Awards Finalists
 
Edc event vienna presentation 1 oct 2019
Edc event vienna presentation 1 oct 2019Edc event vienna presentation 1 oct 2019
Edc event vienna presentation 1 oct 2019
 
Machine Learning with Limited Labeled Data 4/3/19
Machine Learning with Limited Labeled Data 4/3/19Machine Learning with Limited Labeled Data 4/3/19
Machine Learning with Limited Labeled Data 4/3/19
 
Data Driven With the Cloudera Modern Data Warehouse 3.19.19
Data Driven With the Cloudera Modern Data Warehouse 3.19.19Data Driven With the Cloudera Modern Data Warehouse 3.19.19
Data Driven With the Cloudera Modern Data Warehouse 3.19.19
 
Introducing Cloudera DataFlow (CDF) 2.13.19
Introducing Cloudera DataFlow (CDF) 2.13.19Introducing Cloudera DataFlow (CDF) 2.13.19
Introducing Cloudera DataFlow (CDF) 2.13.19
 
Introducing Cloudera Data Science Workbench for HDP 2.12.19
Introducing Cloudera Data Science Workbench for HDP 2.12.19Introducing Cloudera Data Science Workbench for HDP 2.12.19
Introducing Cloudera Data Science Workbench for HDP 2.12.19
 
Shortening the Sales Cycle with a Modern Data Warehouse 1.30.19
Shortening the Sales Cycle with a Modern Data Warehouse 1.30.19Shortening the Sales Cycle with a Modern Data Warehouse 1.30.19
Shortening the Sales Cycle with a Modern Data Warehouse 1.30.19
 
Leveraging the cloud for analytics and machine learning 1.29.19
Leveraging the cloud for analytics and machine learning 1.29.19Leveraging the cloud for analytics and machine learning 1.29.19
Leveraging the cloud for analytics and machine learning 1.29.19
 
Modernizing the Legacy Data Warehouse – What, Why, and How 1.23.19
Modernizing the Legacy Data Warehouse – What, Why, and How 1.23.19Modernizing the Legacy Data Warehouse – What, Why, and How 1.23.19
Modernizing the Legacy Data Warehouse – What, Why, and How 1.23.19
 
Leveraging the Cloud for Big Data Analytics 12.11.18
Leveraging the Cloud for Big Data Analytics 12.11.18Leveraging the Cloud for Big Data Analytics 12.11.18
Leveraging the Cloud for Big Data Analytics 12.11.18
 
Modern Data Warehouse Fundamentals Part 3
Modern Data Warehouse Fundamentals Part 3Modern Data Warehouse Fundamentals Part 3
Modern Data Warehouse Fundamentals Part 3
 
Modern Data Warehouse Fundamentals Part 2
Modern Data Warehouse Fundamentals Part 2Modern Data Warehouse Fundamentals Part 2
Modern Data Warehouse Fundamentals Part 2
 
Modern Data Warehouse Fundamentals Part 1
Modern Data Warehouse Fundamentals Part 1Modern Data Warehouse Fundamentals Part 1
Modern Data Warehouse Fundamentals Part 1
 
Extending Cloudera SDX beyond the Platform
Extending Cloudera SDX beyond the PlatformExtending Cloudera SDX beyond the Platform
Extending Cloudera SDX beyond the Platform
 
Federated Learning: ML with Privacy on the Edge 11.15.18
Federated Learning: ML with Privacy on the Edge 11.15.18Federated Learning: ML with Privacy on the Edge 11.15.18
Federated Learning: ML with Privacy on the Edge 11.15.18
 
Analyst Webinar: Doing a 180 on Customer 360
Analyst Webinar: Doing a 180 on Customer 360Analyst Webinar: Doing a 180 on Customer 360
Analyst Webinar: Doing a 180 on Customer 360
 
Build a modern platform for anti-money laundering 9.19.18
Build a modern platform for anti-money laundering 9.19.18Build a modern platform for anti-money laundering 9.19.18
Build a modern platform for anti-money laundering 9.19.18
 
Introducing the data science sandbox as a service 8.30.18
Introducing the data science sandbox as a service 8.30.18Introducing the data science sandbox as a service 8.30.18
Introducing the data science sandbox as a service 8.30.18
 

Recently uploaded

Smart TV Buyer Insights Survey 2024 by 91mobiles.pdf
Smart TV Buyer Insights Survey 2024 by 91mobiles.pdfSmart TV Buyer Insights Survey 2024 by 91mobiles.pdf
Smart TV Buyer Insights Survey 2024 by 91mobiles.pdf
91mobiles
 
Assuring Contact Center Experiences for Your Customers With ThousandEyes
Assuring Contact Center Experiences for Your Customers With ThousandEyesAssuring Contact Center Experiences for Your Customers With ThousandEyes
Assuring Contact Center Experiences for Your Customers With ThousandEyes
ThousandEyes
 
Generating a custom Ruby SDK for your web service or Rails API using Smithy
Generating a custom Ruby SDK for your web service or Rails API using SmithyGenerating a custom Ruby SDK for your web service or Rails API using Smithy
Generating a custom Ruby SDK for your web service or Rails API using Smithy
g2nightmarescribd
 
LF Energy Webinar: Electrical Grid Modelling and Simulation Through PowSyBl -...
LF Energy Webinar: Electrical Grid Modelling and Simulation Through PowSyBl -...LF Energy Webinar: Electrical Grid Modelling and Simulation Through PowSyBl -...
LF Energy Webinar: Electrical Grid Modelling and Simulation Through PowSyBl -...
DanBrown980551
 
FIDO Alliance Osaka Seminar: Overview.pdf
FIDO Alliance Osaka Seminar: Overview.pdfFIDO Alliance Osaka Seminar: Overview.pdf
FIDO Alliance Osaka Seminar: Overview.pdf
FIDO Alliance
 
PCI PIN Basics Webinar from the Controlcase Team
PCI PIN Basics Webinar from the Controlcase TeamPCI PIN Basics Webinar from the Controlcase Team
PCI PIN Basics Webinar from the Controlcase Team
ControlCase
 
When stars align: studies in data quality, knowledge graphs, and machine lear...
When stars align: studies in data quality, knowledge graphs, and machine lear...When stars align: studies in data quality, knowledge graphs, and machine lear...
When stars align: studies in data quality, knowledge graphs, and machine lear...
Elena Simperl
 
Designing Great Products: The Power of Design and Leadership by Chief Designe...
Designing Great Products: The Power of Design and Leadership by Chief Designe...Designing Great Products: The Power of Design and Leadership by Chief Designe...
Designing Great Products: The Power of Design and Leadership by Chief Designe...
Product School
 
Builder.ai Founder Sachin Dev Duggal's Strategic Approach to Create an Innova...
Builder.ai Founder Sachin Dev Duggal's Strategic Approach to Create an Innova...Builder.ai Founder Sachin Dev Duggal's Strategic Approach to Create an Innova...
Builder.ai Founder Sachin Dev Duggal's Strategic Approach to Create an Innova...
Ramesh Iyer
 
JMeter webinar - integration with InfluxDB and Grafana
JMeter webinar - integration with InfluxDB and GrafanaJMeter webinar - integration with InfluxDB and Grafana
JMeter webinar - integration with InfluxDB and Grafana
RTTS
 
Mission to Decommission: Importance of Decommissioning Products to Increase E...
Mission to Decommission: Importance of Decommissioning Products to Increase E...Mission to Decommission: Importance of Decommissioning Products to Increase E...
Mission to Decommission: Importance of Decommissioning Products to Increase E...
Product School
 
Slack (or Teams) Automation for Bonterra Impact Management (fka Social Soluti...
Slack (or Teams) Automation for Bonterra Impact Management (fka Social Soluti...Slack (or Teams) Automation for Bonterra Impact Management (fka Social Soluti...
Slack (or Teams) Automation for Bonterra Impact Management (fka Social Soluti...
Jeffrey Haguewood
 
FIDO Alliance Osaka Seminar: The WebAuthn API and Discoverable Credentials.pdf
FIDO Alliance Osaka Seminar: The WebAuthn API and Discoverable Credentials.pdfFIDO Alliance Osaka Seminar: The WebAuthn API and Discoverable Credentials.pdf
FIDO Alliance Osaka Seminar: The WebAuthn API and Discoverable Credentials.pdf
FIDO Alliance
 
Monitoring Java Application Security with JDK Tools and JFR Events
Monitoring Java Application Security with JDK Tools and JFR EventsMonitoring Java Application Security with JDK Tools and JFR Events
Monitoring Java Application Security with JDK Tools and JFR Events
Ana-Maria Mihalceanu
 
The Art of the Pitch: WordPress Relationships and Sales
The Art of the Pitch: WordPress Relationships and SalesThe Art of the Pitch: WordPress Relationships and Sales
The Art of the Pitch: WordPress Relationships and Sales
Laura Byrne
 
UiPath Test Automation using UiPath Test Suite series, part 3
UiPath Test Automation using UiPath Test Suite series, part 3UiPath Test Automation using UiPath Test Suite series, part 3
UiPath Test Automation using UiPath Test Suite series, part 3
DianaGray10
 
Leading Change strategies and insights for effective change management pdf 1.pdf
Leading Change strategies and insights for effective change management pdf 1.pdfLeading Change strategies and insights for effective change management pdf 1.pdf
Leading Change strategies and insights for effective change management pdf 1.pdf
OnBoard
 
UiPath Test Automation using UiPath Test Suite series, part 4
UiPath Test Automation using UiPath Test Suite series, part 4UiPath Test Automation using UiPath Test Suite series, part 4
UiPath Test Automation using UiPath Test Suite series, part 4
DianaGray10
 
FIDO Alliance Osaka Seminar: FIDO Security Aspects.pdf
FIDO Alliance Osaka Seminar: FIDO Security Aspects.pdfFIDO Alliance Osaka Seminar: FIDO Security Aspects.pdf
FIDO Alliance Osaka Seminar: FIDO Security Aspects.pdf
FIDO Alliance
 
The Future of Platform Engineering
The Future of Platform EngineeringThe Future of Platform Engineering
The Future of Platform Engineering
Jemma Hussein Allen
 

Recently uploaded (20)

Smart TV Buyer Insights Survey 2024 by 91mobiles.pdf
Smart TV Buyer Insights Survey 2024 by 91mobiles.pdfSmart TV Buyer Insights Survey 2024 by 91mobiles.pdf
Smart TV Buyer Insights Survey 2024 by 91mobiles.pdf
 
Assuring Contact Center Experiences for Your Customers With ThousandEyes
Assuring Contact Center Experiences for Your Customers With ThousandEyesAssuring Contact Center Experiences for Your Customers With ThousandEyes
Assuring Contact Center Experiences for Your Customers With ThousandEyes
 
Generating a custom Ruby SDK for your web service or Rails API using Smithy
Generating a custom Ruby SDK for your web service or Rails API using SmithyGenerating a custom Ruby SDK for your web service or Rails API using Smithy
Generating a custom Ruby SDK for your web service or Rails API using Smithy
 
LF Energy Webinar: Electrical Grid Modelling and Simulation Through PowSyBl -...
LF Energy Webinar: Electrical Grid Modelling and Simulation Through PowSyBl -...LF Energy Webinar: Electrical Grid Modelling and Simulation Through PowSyBl -...
LF Energy Webinar: Electrical Grid Modelling and Simulation Through PowSyBl -...
 
FIDO Alliance Osaka Seminar: Overview.pdf
FIDO Alliance Osaka Seminar: Overview.pdfFIDO Alliance Osaka Seminar: Overview.pdf
FIDO Alliance Osaka Seminar: Overview.pdf
 
PCI PIN Basics Webinar from the Controlcase Team
PCI PIN Basics Webinar from the Controlcase TeamPCI PIN Basics Webinar from the Controlcase Team
PCI PIN Basics Webinar from the Controlcase Team
 
When stars align: studies in data quality, knowledge graphs, and machine lear...
When stars align: studies in data quality, knowledge graphs, and machine lear...When stars align: studies in data quality, knowledge graphs, and machine lear...
When stars align: studies in data quality, knowledge graphs, and machine lear...
 
Designing Great Products: The Power of Design and Leadership by Chief Designe...
Designing Great Products: The Power of Design and Leadership by Chief Designe...Designing Great Products: The Power of Design and Leadership by Chief Designe...
Designing Great Products: The Power of Design and Leadership by Chief Designe...
 
Builder.ai Founder Sachin Dev Duggal's Strategic Approach to Create an Innova...
Builder.ai Founder Sachin Dev Duggal's Strategic Approach to Create an Innova...Builder.ai Founder Sachin Dev Duggal's Strategic Approach to Create an Innova...
Builder.ai Founder Sachin Dev Duggal's Strategic Approach to Create an Innova...
 
JMeter webinar - integration with InfluxDB and Grafana
JMeter webinar - integration with InfluxDB and GrafanaJMeter webinar - integration with InfluxDB and Grafana
JMeter webinar - integration with InfluxDB and Grafana
 
Mission to Decommission: Importance of Decommissioning Products to Increase E...
Mission to Decommission: Importance of Decommissioning Products to Increase E...Mission to Decommission: Importance of Decommissioning Products to Increase E...
Mission to Decommission: Importance of Decommissioning Products to Increase E...
 
Slack (or Teams) Automation for Bonterra Impact Management (fka Social Soluti...
Slack (or Teams) Automation for Bonterra Impact Management (fka Social Soluti...Slack (or Teams) Automation for Bonterra Impact Management (fka Social Soluti...
Slack (or Teams) Automation for Bonterra Impact Management (fka Social Soluti...
 
FIDO Alliance Osaka Seminar: The WebAuthn API and Discoverable Credentials.pdf
FIDO Alliance Osaka Seminar: The WebAuthn API and Discoverable Credentials.pdfFIDO Alliance Osaka Seminar: The WebAuthn API and Discoverable Credentials.pdf
FIDO Alliance Osaka Seminar: The WebAuthn API and Discoverable Credentials.pdf
 
Monitoring Java Application Security with JDK Tools and JFR Events
Monitoring Java Application Security with JDK Tools and JFR EventsMonitoring Java Application Security with JDK Tools and JFR Events
Monitoring Java Application Security with JDK Tools and JFR Events
 
The Art of the Pitch: WordPress Relationships and Sales
The Art of the Pitch: WordPress Relationships and SalesThe Art of the Pitch: WordPress Relationships and Sales
The Art of the Pitch: WordPress Relationships and Sales
 
UiPath Test Automation using UiPath Test Suite series, part 3
UiPath Test Automation using UiPath Test Suite series, part 3UiPath Test Automation using UiPath Test Suite series, part 3
UiPath Test Automation using UiPath Test Suite series, part 3
 
Leading Change strategies and insights for effective change management pdf 1.pdf
Leading Change strategies and insights for effective change management pdf 1.pdfLeading Change strategies and insights for effective change management pdf 1.pdf
Leading Change strategies and insights for effective change management pdf 1.pdf
 
UiPath Test Automation using UiPath Test Suite series, part 4
UiPath Test Automation using UiPath Test Suite series, part 4UiPath Test Automation using UiPath Test Suite series, part 4
UiPath Test Automation using UiPath Test Suite series, part 4
 
FIDO Alliance Osaka Seminar: FIDO Security Aspects.pdf
FIDO Alliance Osaka Seminar: FIDO Security Aspects.pdfFIDO Alliance Osaka Seminar: FIDO Security Aspects.pdf
FIDO Alliance Osaka Seminar: FIDO Security Aspects.pdf
 
The Future of Platform Engineering
The Future of Platform EngineeringThe Future of Platform Engineering
The Future of Platform Engineering
 

Hw09 Hadoop + Clojure

  • 1. Hadoop + Clojure Hadoop World NYC Friday, October 2, 2009 Stuart Sierra, AltLaw.org
  • 2. JVM Languages Object Functional Oriented Native to Clojure Groovy the JVM Scala Ported to JRuby Armed Bear CL the JVM Jython Kawa Rhino Java is dead, long live the JVM
  • 3. Clojure ● a new Lisp, neither Common Lisp nor Scheme ● Dynamic, Functional ● Immutability and concurrency ● Hosted on the JVM ● Open Source (Eclipse Public License)
  • 4. Clojure Primitive Types String "Hello, World!n" Integer 42 Double 2.0e64 BigInteger 9223372036854775808 BigDecimal 1.0M Ratio 3/4 Boolean true, false Symbol foo Keyword :foo null nil
  • 5. Clojure Collections List (print :hello "NYC") Vector [:eat "Pie" 3.14159] Map {:lisp 1 "The Rest" 0} Set #{2 1 3 5 "Eureka"} Homoiconicity
  • 6. public void greet(String name) { System.out.println("Hi, " + name); } greet("New York"); Hi, New York (defn greet [name] (println "Hello," name)) (greet "New York") Hello, New York
  • 7. public double average(double[] nums) { double total = 0; for (int i = 0; i < nums.length; i++) { total += nums[i]; } return total / nums.length; } (defn average [& nums] (/ (reduce + nums) (count nums))) (average 1 2 3 4) 5/2
  • 8. Data Structures as Functions (def m {:f "foo" (def s #{1 5 3}) :b "bar"}) (s 3) (m :f) true "foo" (s 7) (:b m) false "bar"
  • 9. (import '(com.example.package MyClass YourClass)) (. object method arguments) (new MyClass arguments) (.method object arguments) Syntactic (MyClass. arguments) Sugar (MyClass/staticMethod)
  • 10. ...open a stream... try { ...do stuff with the stream... } finally { stream.close(); } (defmacro with-open [args & body] `(let ~args (try ~@body (finally (.close ~(first args)))))) (with-open [stream (...open a stream...)] ...do stuff with stream...)
  • 11. synchronous asynchronous coordinated ref independent atom agent unshared var
  • 12. (map function values) list of values (reduce function values) single value mapper(key, value) list of key-value pairs reducer(key, values) list of key-value pairs
  • 13. public static class MapClass extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer itr = new StringTokenizer(line); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); output.collect(word, one); } } } public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> { public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { int sum = 0; while (values.hasNext()) { sum += values.next().get(); } output.collect(key, new IntWritable(sum)); } }
  • 14. (mapper key value) list of key-value pairs (reducer key values) list of key-value pairs
  • 15. Clojure-Hadoop 1 (defn mapper-map [this key val out reporter] (doseq [word (enumeration-seq (StringTokenizer. (str val)))] (.collect out (Text. word) (IntWritable. 1)))) (defn reducer-reduce [this key vals out reporter] (let [sum (reduce + (map (fn [w] (.get w)) (iterator-seq values)))] (.collect output key (IntWritable. sum)))) (gen-job-classes)
  • 16. Clojure-Hadoop 2 (defn my-map [key value] (map (fn [token] [token 1]) (enumeration-seq (StringTokenizer. value)))) (def mapper-map (wrap-map my-map int-string-map-reader)) (defn my-reduce [key values] [[key (reduce + values)]]) (def reducer-reduce (wrap-reduce my-reduce)) (gen-job-classes)
  • 17. Clojure print/read read STRING DATA print
  • 18. Clojure-Hadoop 3 (defn my-map [key val] (map (fn [token] [token 1]) (enumeration-seq (StringTokenizer. val)))) (defn my-reduce [key values] [[key (reduce + values)]]) (defjob job :map my-map :map-reader int-string-map-reader :reduce my-reduce :inputformat :text)
  • 19. public static class MapClass extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer itr = new StringTokenizer(line); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); output.collect(word, one); } } } public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> { public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { int sum = 0; while (values.hasNext()) { sum += values.next().get(); } output.collect(key, new IntWritable(sum)); } }
  • 20. Clojure-Hadoop 3 (defn my-map [key val] (map (fn [token] [token 1]) (enumeration-seq (StringTokenizer. val)))) (defn my-reduce [key values] [[key (reduce + values)]]) (defjob job :map my-map :map-reader int-string-map-reader :reduce my-reduce :inputformat :text)
  • 21. More ● http://clojure.org/ ● Google Groups: Clojure ● #clojure on irc.freenode.net ● http://richhickey.github.com/clojure-contrib ● http://stuartsierra.com/ ● http://github.com/stuartsierra ● http://www.altlaw.org/