Hadoop + Clojure
  Hadoop World NYC
Friday, October 2, 2009

Stuart Sierra, AltLaw.org
JVM Languages
                              Object
             Functional      Oriented

Native to
              Clojure           Groovy
the JVM                 Scala


Ported to                       JRuby
            Armed Bear CL
the JVM                         Jython
                Kawa
                                Rhino

   Java is dead, long live the JVM
Clojure

●   a new Lisp,
    neither Common Lisp nor Scheme
●   Dynamic, Functional
●   Immutability and concurrency
●   Hosted on the JVM
●   Open Source (Eclipse Public License)
Clojure Primitive Types
String       "Hello, World!n"
Integer      42
Double       2.0e64
BigInteger   9223372036854775808
BigDecimal   1.0M
Ratio        3/4
Boolean      true, false
Symbol       foo
Keyword      :foo
null          nil
Clojure Collections
List   (print :hello "NYC")

Vector [:eat "Pie" 3.14159]

Map    {:lisp 1   "The Rest" 0}

Set    #{2 1 3 5 "Eureka"}


          Homoiconicity
public void greet(String name) {
  System.out.println("Hi, " + name);
}

greet("New York");
Hi, New York


(defn greet [name]
  (println "Hello," name))

(greet "New York")
Hello, New York
public double average(double[] nums) {
  double total = 0;
  for (int i = 0; i < nums.length; i++) {
    total += nums[i];
  }
  return total / nums.length;
}


(defn average [& nums]
  (/ (reduce + nums) (count nums)))

(average 1 2 3 4)
5/2
Data Structures as Functions
(def m {:f "foo"     (def s #{1 5 3})
        :b "bar"})
                     (s 3)
(m :f)               true
"foo"
                     (s 7)
(:b m)               false
"bar"
(import '(com.example.package
            MyClass YourClass))

(. object method arguments)

(new MyClass arguments)


(.method object arguments)
                             Syntactic
(MyClass. arguments)          Sugar

(MyClass/staticMethod)
...open a stream...
try {
    ...do stuff with the stream...
} finally {
    stream.close();
}

(defmacro with-open [args & body]
  `(let ~args
    (try ~@body
     (finally (.close ~(first args))))))

(with-open [stream (...open a stream...)]
  ...do stuff with stream...)
synchronous   asynchronous


coordinated      ref

independent     atom         agent

unshared         var
(map function values)
         list of values
(reduce function values)
         single value


mapper(key, value)
         list of key-value pairs

reducer(key, values)
         list of key-value pairs
public static class MapClass extends MapReduceBase
  implements Mapper<LongWritable, Text, Text, IntWritable> {

    private final static IntWritable one = new IntWritable(1);
    private Text word = new Text();

    public void map(LongWritable key, Text value,
                    OutputCollector<Text, IntWritable> output,
                    Reporter reporter) throws IOException {
      String line = value.toString();
      StringTokenizer itr = new StringTokenizer(line);
      while (itr.hasMoreTokens()) {
        word.set(itr.nextToken());
        output.collect(word, one);
      }
    }
}



public static class Reduce extends MapReduceBase
  implements Reducer<Text, IntWritable, Text, IntWritable> {

    public void reduce(Text key, Iterator<IntWritable> values,
                       OutputCollector<Text, IntWritable> output,
                       Reporter reporter) throws IOException {
      int sum = 0;
      while (values.hasNext()) {
        sum += values.next().get();
      }
      output.collect(key, new IntWritable(sum));
    }
}
(mapper key value)
        list of key-value pairs

(reducer key values)
        list of key-value pairs
Clojure-Hadoop 1
(defn mapper-map [this key val out reporter]
  (doseq [word (enumeration-seq
                (StringTokenizer. (str val)))]
    (.collect out (Text. word)
                  (IntWritable. 1))))

(defn reducer-reduce [this key vals out reporter]
  (let [sum (reduce +
             (map (fn [w] (.get w))
                  (iterator-seq values)))]
    (.collect output key (IntWritable. sum))))

(gen-job-classes)
Clojure-Hadoop 2
(defn my-map [key value]
   (map (fn [token] [token 1])
        (enumeration-seq (StringTokenizer. value))))

(def mapper-map
  (wrap-map my-map int-string-map-reader))

(defn my-reduce [key values]
   [[key (reduce + values)]])

(def reducer-reduce
  (wrap-reduce my-reduce))

(gen-job-classes)
Clojure print/read
        read



                STRING
DATA




        print
Clojure-Hadoop 3
(defn my-map [key val]
  (map (fn [token] [token 1])
       (enumeration-seq (StringTokenizer. val))))

(defn my-reduce [key values]
  [[key (reduce + values)]])

(defjob job
  :map my-map
  :map-reader int-string-map-reader
  :reduce my-reduce
  :inputformat :text)
public static class MapClass extends MapReduceBase
  implements Mapper<LongWritable, Text, Text, IntWritable> {

    private final static IntWritable one = new IntWritable(1);
    private Text word = new Text();

    public void map(LongWritable key, Text value,
                    OutputCollector<Text, IntWritable> output,
                    Reporter reporter) throws IOException {
      String line = value.toString();
      StringTokenizer itr = new StringTokenizer(line);
      while (itr.hasMoreTokens()) {
        word.set(itr.nextToken());
        output.collect(word, one);
      }
    }
}



public static class Reduce extends MapReduceBase
  implements Reducer<Text, IntWritable, Text, IntWritable> {

    public void reduce(Text key, Iterator<IntWritable> values,
                       OutputCollector<Text, IntWritable> output,
                       Reporter reporter) throws IOException {
      int sum = 0;
      while (values.hasNext()) {
        sum += values.next().get();
      }
      output.collect(key, new IntWritable(sum));
    }
}
Clojure-Hadoop 3
(defn my-map [key val]
  (map (fn [token] [token 1])
       (enumeration-seq (StringTokenizer. val))))

(defn my-reduce [key values]
  [[key (reduce + values)]])

(defjob job
  :map my-map
  :map-reader int-string-map-reader
  :reduce my-reduce
  :inputformat :text)
More

●   http://clojure.org/
●   Google Groups: Clojure
●   #clojure on irc.freenode.net
●   http://richhickey.github.com/clojure-contrib
●   http://stuartsierra.com/
●   http://github.com/stuartsierra
●   http://www.altlaw.org/

More Related Content

PDF
Hadoop + Clojure
PDF
Pune Clojure Course Outline
PDF
Clojure: The Art of Abstraction
PPTX
Hello kotlin | An Event by DSC Unideb
PDF
Python tour
PDF
Coding in Style
PDF
Full Stack Clojure
PDF
ClojureScript for the web
Hadoop + Clojure
Pune Clojure Course Outline
Clojure: The Art of Abstraction
Hello kotlin | An Event by DSC Unideb
Python tour
Coding in Style
Full Stack Clojure
ClojureScript for the web

What's hot (20)

PPTX
Clojure for Data Science
PDF
ClojureScript loves React, DomCode May 26 2015
PDF
Clojure class
PDF
Idiomatic Kotlin
PDF
From Lisp to Clojure/Incanter and RAn Introduction
PDF
Wprowadzenie do technologii Big Data / Intro to Big Data Ecosystem
PDF
Futures e abstração - QCon São Paulo 2015
PDF
Refactoring to Macros with Clojure
PDF
Herding types with Scala macros
PDF
Rust concurrency tutorial 2015 12-02
PPTX
Poor Man's Functional Programming
PDF
Rust Mozlando Tutorial
PPTX
MiamiJS - The Future of JavaScript
PDF
Typelevel summit
PDF
Clojure intro
PDF
TeraSort
PPTX
ODP
Meetup slides
KEY
Coffee Scriptでenchant.js
PDF
Continuation Passing Style and Macros in Clojure - Jan 2012
Clojure for Data Science
ClojureScript loves React, DomCode May 26 2015
Clojure class
Idiomatic Kotlin
From Lisp to Clojure/Incanter and RAn Introduction
Wprowadzenie do technologii Big Data / Intro to Big Data Ecosystem
Futures e abstração - QCon São Paulo 2015
Refactoring to Macros with Clojure
Herding types with Scala macros
Rust concurrency tutorial 2015 12-02
Poor Man's Functional Programming
Rust Mozlando Tutorial
MiamiJS - The Future of JavaScript
Typelevel summit
Clojure intro
TeraSort
Meetup slides
Coffee Scriptでenchant.js
Continuation Passing Style and Macros in Clojure - Jan 2012

Similar to Hw09 Hadoop + Clojure (20)

KEY
Clojure Intro
PDF
Scalding - the not-so-basics @ ScalaDays 2014
PDF
(first '(Clojure.))
KEY
Ruby on Big Data (Cassandra + Hadoop)
KEY
Ruby on Big Data @ Philly Ruby Group
PDF
Distributed batch processing with Hadoop
PPTX
Scalable and Flexible Machine Learning With Scala @ LinkedIn
PDF
Brief introduction on Hadoop,Dremel, Pig, FlumeJava and Cassandra
PDF
Polyglot Programming in the JVM - Øredev
PPTX
Scoobi - Scala for Startups
PDF
KEY
Hadoop本 輪読会 1章〜2章
PDF
Clojure A Dynamic Programming Language for the JVM
PDF
Scala for Java Programmers
PDF
Hadoop pig
PDF
OSDC.fr 2012 :: Cascalog : progammation logique pour Hadoop
PDF
Mapreduce by examples
PDF
7li7w devcon5
PDF
What I learned from Seven Languages in Seven Weeks (IPRUG)
Clojure Intro
Scalding - the not-so-basics @ ScalaDays 2014
(first '(Clojure.))
Ruby on Big Data (Cassandra + Hadoop)
Ruby on Big Data @ Philly Ruby Group
Distributed batch processing with Hadoop
Scalable and Flexible Machine Learning With Scala @ LinkedIn
Brief introduction on Hadoop,Dremel, Pig, FlumeJava and Cassandra
Polyglot Programming in the JVM - Øredev
Scoobi - Scala for Startups
Hadoop本 輪読会 1章〜2章
Clojure A Dynamic Programming Language for the JVM
Scala for Java Programmers
Hadoop pig
OSDC.fr 2012 :: Cascalog : progammation logique pour Hadoop
Mapreduce by examples
7li7w devcon5
What I learned from Seven Languages in Seven Weeks (IPRUG)

More from Cloudera, Inc. (20)

PPTX
Partner Briefing_January 25 (FINAL).pptx
PPTX
Cloudera Data Impact Awards 2021 - Finalists
PPTX
2020 Cloudera Data Impact Awards Finalists
PPTX
Edc event vienna presentation 1 oct 2019
PPTX
Machine Learning with Limited Labeled Data 4/3/19
PPTX
Data Driven With the Cloudera Modern Data Warehouse 3.19.19
PPTX
Introducing Cloudera DataFlow (CDF) 2.13.19
PPTX
Introducing Cloudera Data Science Workbench for HDP 2.12.19
PPTX
Shortening the Sales Cycle with a Modern Data Warehouse 1.30.19
PPTX
Leveraging the cloud for analytics and machine learning 1.29.19
PPTX
Modernizing the Legacy Data Warehouse – What, Why, and How 1.23.19
PPTX
Leveraging the Cloud for Big Data Analytics 12.11.18
PPTX
Modern Data Warehouse Fundamentals Part 3
PPTX
Modern Data Warehouse Fundamentals Part 2
PPTX
Modern Data Warehouse Fundamentals Part 1
PPTX
Extending Cloudera SDX beyond the Platform
PPTX
Federated Learning: ML with Privacy on the Edge 11.15.18
PPTX
Analyst Webinar: Doing a 180 on Customer 360
PPTX
Build a modern platform for anti-money laundering 9.19.18
PPTX
Introducing the data science sandbox as a service 8.30.18
Partner Briefing_January 25 (FINAL).pptx
Cloudera Data Impact Awards 2021 - Finalists
2020 Cloudera Data Impact Awards Finalists
Edc event vienna presentation 1 oct 2019
Machine Learning with Limited Labeled Data 4/3/19
Data Driven With the Cloudera Modern Data Warehouse 3.19.19
Introducing Cloudera DataFlow (CDF) 2.13.19
Introducing Cloudera Data Science Workbench for HDP 2.12.19
Shortening the Sales Cycle with a Modern Data Warehouse 1.30.19
Leveraging the cloud for analytics and machine learning 1.29.19
Modernizing the Legacy Data Warehouse – What, Why, and How 1.23.19
Leveraging the Cloud for Big Data Analytics 12.11.18
Modern Data Warehouse Fundamentals Part 3
Modern Data Warehouse Fundamentals Part 2
Modern Data Warehouse Fundamentals Part 1
Extending Cloudera SDX beyond the Platform
Federated Learning: ML with Privacy on the Edge 11.15.18
Analyst Webinar: Doing a 180 on Customer 360
Build a modern platform for anti-money laundering 9.19.18
Introducing the data science sandbox as a service 8.30.18

Recently uploaded (20)

PDF
Addressing the challenges of harmonizing law and artificial intelligence tech...
PDF
Uncertainty-aware contextual multi-armed bandits for recommendations in e-com...
PPTX
From Curiosity to ROI — Cost-Benefit Analysis of Agentic Automation [3/6]
PDF
Be ready for tomorrow’s needs with a longer-lasting, higher-performing PC
PDF
Introduction to c language from lecture slides
PDF
EIS-Webinar-Regulated-Industries-2025-08.pdf
PDF
ELLIE29.pdfWETWETAWTAWETAETAETERTRTERTER
PDF
Slides World Game (s) Great Redesign Eco Economic Epochs.pdf
PPTX
maintenance powerrpoint for adaprive and preventive
PDF
Domain-specific knowledge and context in large language models: challenges, c...
PDF
TicketRoot: Event Tech Solutions Deck 2025
PPTX
Report in SIP_Distance_Learning_Technology_Impact.pptx
PDF
State of AI in Business 2025 - MIT NANDA
PDF
Ebook - The Future of AI A Comprehensive Guide.pdf
PDF
Streamline Vulnerability Management From Minimal Images to SBOMs
PPTX
Information-Technology-in-Human-Society.pptx
PDF
The Digital Engine Room: Unlocking APAC’s Economic and Digital Potential thro...
PDF
Technical Debt in the AI Coding Era - By Antonio Bianco
PPT
Overviiew on Intellectual property right
PPTX
Presentation - Principles of Instructional Design.pptx
Addressing the challenges of harmonizing law and artificial intelligence tech...
Uncertainty-aware contextual multi-armed bandits for recommendations in e-com...
From Curiosity to ROI — Cost-Benefit Analysis of Agentic Automation [3/6]
Be ready for tomorrow’s needs with a longer-lasting, higher-performing PC
Introduction to c language from lecture slides
EIS-Webinar-Regulated-Industries-2025-08.pdf
ELLIE29.pdfWETWETAWTAWETAETAETERTRTERTER
Slides World Game (s) Great Redesign Eco Economic Epochs.pdf
maintenance powerrpoint for adaprive and preventive
Domain-specific knowledge and context in large language models: challenges, c...
TicketRoot: Event Tech Solutions Deck 2025
Report in SIP_Distance_Learning_Technology_Impact.pptx
State of AI in Business 2025 - MIT NANDA
Ebook - The Future of AI A Comprehensive Guide.pdf
Streamline Vulnerability Management From Minimal Images to SBOMs
Information-Technology-in-Human-Society.pptx
The Digital Engine Room: Unlocking APAC’s Economic and Digital Potential thro...
Technical Debt in the AI Coding Era - By Antonio Bianco
Overviiew on Intellectual property right
Presentation - Principles of Instructional Design.pptx

Hw09 Hadoop + Clojure

  • 1. Hadoop + Clojure Hadoop World NYC Friday, October 2, 2009 Stuart Sierra, AltLaw.org
  • 2. JVM Languages Object Functional Oriented Native to Clojure Groovy the JVM Scala Ported to JRuby Armed Bear CL the JVM Jython Kawa Rhino Java is dead, long live the JVM
  • 3. Clojure ● a new Lisp, neither Common Lisp nor Scheme ● Dynamic, Functional ● Immutability and concurrency ● Hosted on the JVM ● Open Source (Eclipse Public License)
  • 4. Clojure Primitive Types String "Hello, World!n" Integer 42 Double 2.0e64 BigInteger 9223372036854775808 BigDecimal 1.0M Ratio 3/4 Boolean true, false Symbol foo Keyword :foo null nil
  • 5. Clojure Collections List (print :hello "NYC") Vector [:eat "Pie" 3.14159] Map {:lisp 1 "The Rest" 0} Set #{2 1 3 5 "Eureka"} Homoiconicity
  • 6. public void greet(String name) { System.out.println("Hi, " + name); } greet("New York"); Hi, New York (defn greet [name] (println "Hello," name)) (greet "New York") Hello, New York
  • 7. public double average(double[] nums) { double total = 0; for (int i = 0; i < nums.length; i++) { total += nums[i]; } return total / nums.length; } (defn average [& nums] (/ (reduce + nums) (count nums))) (average 1 2 3 4) 5/2
  • 8. Data Structures as Functions (def m {:f "foo" (def s #{1 5 3}) :b "bar"}) (s 3) (m :f) true "foo" (s 7) (:b m) false "bar"
  • 9. (import '(com.example.package MyClass YourClass)) (. object method arguments) (new MyClass arguments) (.method object arguments) Syntactic (MyClass. arguments) Sugar (MyClass/staticMethod)
  • 10. ...open a stream... try { ...do stuff with the stream... } finally { stream.close(); } (defmacro with-open [args & body] `(let ~args (try ~@body (finally (.close ~(first args)))))) (with-open [stream (...open a stream...)] ...do stuff with stream...)
  • 11. synchronous asynchronous coordinated ref independent atom agent unshared var
  • 12. (map function values) list of values (reduce function values) single value mapper(key, value) list of key-value pairs reducer(key, values) list of key-value pairs
  • 13. public static class MapClass extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer itr = new StringTokenizer(line); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); output.collect(word, one); } } } public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> { public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { int sum = 0; while (values.hasNext()) { sum += values.next().get(); } output.collect(key, new IntWritable(sum)); } }
  • 14. (mapper key value) list of key-value pairs (reducer key values) list of key-value pairs
  • 15. Clojure-Hadoop 1 (defn mapper-map [this key val out reporter] (doseq [word (enumeration-seq (StringTokenizer. (str val)))] (.collect out (Text. word) (IntWritable. 1)))) (defn reducer-reduce [this key vals out reporter] (let [sum (reduce + (map (fn [w] (.get w)) (iterator-seq values)))] (.collect output key (IntWritable. sum)))) (gen-job-classes)
  • 16. Clojure-Hadoop 2 (defn my-map [key value] (map (fn [token] [token 1]) (enumeration-seq (StringTokenizer. value)))) (def mapper-map (wrap-map my-map int-string-map-reader)) (defn my-reduce [key values] [[key (reduce + values)]]) (def reducer-reduce (wrap-reduce my-reduce)) (gen-job-classes)
  • 17. Clojure print/read read STRING DATA print
  • 18. Clojure-Hadoop 3 (defn my-map [key val] (map (fn [token] [token 1]) (enumeration-seq (StringTokenizer. val)))) (defn my-reduce [key values] [[key (reduce + values)]]) (defjob job :map my-map :map-reader int-string-map-reader :reduce my-reduce :inputformat :text)
  • 19. public static class MapClass extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer itr = new StringTokenizer(line); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); output.collect(word, one); } } } public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> { public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { int sum = 0; while (values.hasNext()) { sum += values.next().get(); } output.collect(key, new IntWritable(sum)); } }
  • 20. Clojure-Hadoop 3 (defn my-map [key val] (map (fn [token] [token 1]) (enumeration-seq (StringTokenizer. val)))) (defn my-reduce [key values] [[key (reduce + values)]]) (defjob job :map my-map :map-reader int-string-map-reader :reduce my-reduce :inputformat :text)
  • 21. More ● http://clojure.org/ ● Google Groups: Clojure ● #clojure on irc.freenode.net ● http://richhickey.github.com/clojure-contrib ● http://stuartsierra.com/ ● http://github.com/stuartsierra ● http://www.altlaw.org/