SlideShare a Scribd company logo
1 of 6
Download to read offline
package org.ajug;

import   cascading.cascade.Cascade;
import   cascading.cascade.CascadeConnector;
import   cascading.flow.Flow;
import   cascading.flow.FlowConnector;
import   cascading.pipe.Each;
import   cascading.pipe.Every;
import   cascading.pipe.GroupBy;
import   cascading.pipe.Pipe;
import   cascading.scheme.TextDelimited;
import   cascading.scheme.TextLine;
import   cascading.tap.Hfs;
import   cascading.tap.SinkMode;
import   cascading.tap.Tap;
import   cascading.tuple.Fields;

import java.util.Properties;

public class Main {

   public static void main(String[] args) {
       Properties properties = new Properties();
       FlowConnector.setApplicationJarClass(properties, Main.class);
       properties.put("mapred.reduce.tasks", 5);

        Pipe mainPipe = new Each("M&M", new Fields("line"), new Parser());
        mainPipe = new GroupBy(mainPipe, new Fields("COLOR"));
        mainPipe = new Every(mainPipe, Fields.ALL, new ColorAggregator(new
ColorData()));

         Tap sourceTap = new Hfs(new TextLine(), args[0]);

        TextDelimited scheme = new TextDelimited(new Fields("COLOR",
"AVG_WIDTH", "AVG_WEIGHT"), ",", """);
        scheme.setNumSinkParts(1); // make sure we only get one file


         Tap sinkTap = new Hfs(scheme, args[1], SinkMode.REPLACE);

         FlowConnector flowConnector = new FlowConnector(properties);
         CascadeConnector cascadeConnector = new CascadeConnector();

         Flow flow = flowConnector.connect(sourceTap, sinkTap, mainPipe);

         Cascade cascade = cascadeConnector.connect(flow);
         cascade.complete();     // Finally run everything

    }
}
================================================
package org.ajug;

import   cascading.cascade.Cascade;
import   cascading.cascade.CascadeConnector;
import   cascading.flow.Flow;
import   cascading.flow.FlowConnector;
import   cascading.pipe.*;
import   cascading.scheme.TextDelimited;
import   cascading.scheme.TextLine;
import   cascading.tap.Hfs;
import   cascading.tap.SinkMode;
import   cascading.tap.Tap;
import   cascading.tuple.Fields;

import java.util.HashMap;
import java.util.Map;
import java.util.Properties;

public class MultiOutputMain {

   public static void main(String[] args) {
       Properties properties = new Properties();
       FlowConnector.setApplicationJarClass(properties, Main.class);
       properties.put("mapred.reduce.tasks", 5);

         Pipe sourcePipe = new Each("M&M", new Fields("line"), new Parser());

        Pipe totalPipe = new GroupBy("Total", sourcePipe, new Fields("ONE"));
        totalPipe = new Every(totalPipe, Fields.ALL, new TotalAggregator(new
TotalData()));

        Pipe mainPipe = new GroupBy("Color", sourcePipe, new Fields("COLOR"));
        mainPipe = new Every(mainPipe, Fields.ALL, new ColorAggregator(new
ColorData()));

         Tap sourceTap = new Hfs(new TextLine(), args[0]);

        TextDelimited scheme = new TextDelimited(new Fields("COLOR",
"AVG_WIDTH", "AVG_WEIGHT"), ",", """);
        scheme.setNumSinkParts(1); // make sure we only get one file
        Tap colorTap = new Hfs(scheme, args[1] + "/color", SinkMode.REPLACE);


        TextDelimited totalScheme = new TextDelimited(new
Fields("FINAL_WIDTH", "FINAL_WEIGHT"), ",", """);
        totalScheme.setNumSinkParts(1); // make sure we only get one file
        Tap totalTap = new Hfs(totalScheme, args[1] + "/total",
SinkMode.REPLACE);

         FlowConnector flowConnector = new FlowConnector(properties);
         CascadeConnector cascadeConnector = new CascadeConnector();

         Map<String, Tap> outputs = new HashMap<String, Tap>();
         outputs.put(totalPipe.getName(), totalTap);
         outputs.put(mainPipe.getName(), colorTap);

        Flow flow = flowConnector.connect(sourceTap, outputs, totalPipe,
mainPipe);

         Cascade cascade = cascadeConnector.connect(flow);
         cascade.complete();     // Finally run everything

    }
}
=======================================
package org.ajug;
import   cascading.flow.FlowProcess;
import   cascading.operation.Function;
import   cascading.operation.FunctionCall;
import   cascading.tuple.Fields;
import   cascading.tuple.Tuple;

import java.io.Serializable;


public class Parser extends cascading.operation.BaseOperation implements
Serializable, Function {

    public Parser() {
        super(new Fields("ONE","COLOR", "WIDTH", "WEIGHT"));   // should be
constants file ;)
    }


     public void operate(FlowProcess a_flow, FunctionCall a_call) {

         String sourceData = a_call.getArguments().getString(0);
         sourceData = sourceData.trim();
         if (sourceData == null || sourceData.length() == 0) {
             return;       // blank line read from the source file, so ignore
it
         }

         String values[] = sourceData.split(",");

         Tuple output = new Tuple();

         output.add("1");
         output.add(values[0]);
         output.add(values[1]);
         output.add(values[2]);
         a_call.getOutputCollector().add(output);
     }

}
==============================================
package org.ajug;


import cascading.tuple.Tuple;

import java.io.Serializable;

public class ColorData implements Serializable {

     private long m_num = 0;
     private double m_width = 0;
     private double m_weight = 0;

     public void reset(){
         m_num = 0;
         m_width = 0;
         m_weight = 0;
     }
public void addData(double a_weight, double a_width){
       m_weight += a_weight;
       m_width+=a_width;
       m_num++;
   }

   public   Tuple getTuple() {

       if (m_num == 0) return null;

       Tuple rtnValue = new Tuple();

       rtnValue.add(m_width/m_num);
       rtnValue.add(m_weight/m_num);

       return rtnValue;
   }
}
===============================================
package org.ajug;

import cascading.tuple.Tuple;

import java.io.Serializable;

public class TotalData implements Serializable {

   private long m_num = 0;
   private double m_width = 0;
   private double m_weight = 0;

   public void reset(){
       m_num = 0;
       m_width = 0;
       m_weight = 0;
   }

   public void addData(double a_weight, double a_width){
       m_weight += a_weight;
       m_width+=a_width;
       m_num++;
   }

   public Tuple getTuple() {

       if (m_num == 0) return null;

       Tuple rtnValue = new Tuple();

       rtnValue.add(m_width/m_num);
       rtnValue.add(m_weight/m_num);

       return rtnValue;
   }
}
==================================================
package org.ajug;
import   cascading.flow.FlowProcess;
import   cascading.operation.Aggregator;
import   cascading.operation.AggregatorCall;
import   cascading.operation.BaseOperation;
import   cascading.tuple.Fields;
import   cascading.tuple.Tuple;
import   cascading.tuple.TupleEntry;
import   org.apache.log4j.Logger;

public class TotalAggregator extends BaseOperation<TotalData>
        implements Aggregator<TotalData> {

    static Logger m_logger =
Logger.getLogger(TotalAggregator.class.getName());

    private TotalData m_row;

    public TotalAggregator(TotalData a_row) {
        super(new Fields("FINAL_WIDTH", "FINAL_WEIGHT"));
        m_row = a_row;
    }

    public void start(FlowProcess flowProcess,
                      AggregatorCall<TotalData> aggregatorCall) {

         // set the context object
         m_row.reset();
         aggregatorCall.setContext(m_row);
    }

    public void complete(FlowProcess flowProcess,
                         AggregatorCall<TotalData> aggregatorCall) {
        TotalData context = aggregatorCall.getContext();

         Tuple results = context.getTuple();
         if (results == null) return;        // Nothing there to report
         aggregatorCall.getOutputCollector().add(results);

    }

    public void aggregate(FlowProcess flowProcess,
                          AggregatorCall<TotalData> aggregatorCall) {
        TupleEntry arguments = aggregatorCall.getArguments();
        TotalData context = aggregatorCall.getContext();

         double weight = arguments.getDouble("WEIGHT");
         double width = arguments.getDouble("WIDTH");

         context.addData(weight, width);

    }
}

=========================================
package org.ajug;

import cascading.flow.FlowProcess;
import cascading.operation.Aggregator;
import cascading.operation.AggregatorCall;
import   cascading.operation.BaseOperation;
import   cascading.tuple.Fields;
import   cascading.tuple.Tuple;
import   cascading.tuple.TupleEntry;
import   org.apache.log4j.Logger;


public class ColorAggregator extends BaseOperation<ColorData>
        implements Aggregator<ColorData> {

    static Logger m_logger =
Logger.getLogger(ColorAggregator.class.getName());

    private ColorData m_row;

    public ColorAggregator(ColorData a_row) {
        super(new Fields("AVG_WIDTH", "AVG_WEIGHT"));
        m_row = a_row;
    }

    public void start(FlowProcess flowProcess,
                      AggregatorCall<ColorData> aggregatorCall) {

         // set the context object
         m_row.reset();
         aggregatorCall.setContext(m_row);
    }

    public void complete(FlowProcess flowProcess,
                         AggregatorCall<ColorData> aggregatorCall) {
        ColorData context = aggregatorCall.getContext();

         Tuple results = context.getTuple();
         if (results == null) return;        // Nothing there to report
         aggregatorCall.getOutputCollector().add(results);

    }

    public void aggregate(FlowProcess flowProcess,
                          AggregatorCall<ColorData> aggregatorCall) {
        TupleEntry arguments = aggregatorCall.getArguments();
        ColorData context = aggregatorCall.getContext();

         double weight = arguments.getDouble("WEIGHT");
         double width = arguments.getDouble("WIDTH");

         context.addData(weight, width);

    }
}

More Related Content

What's hot

Apache Spark - Basics of RDD & RDD Operations | Big Data Hadoop Spark Tutoria...
Apache Spark - Basics of RDD & RDD Operations | Big Data Hadoop Spark Tutoria...Apache Spark - Basics of RDD & RDD Operations | Big Data Hadoop Spark Tutoria...
Apache Spark - Basics of RDD & RDD Operations | Big Data Hadoop Spark Tutoria...CloudxLab
 
Compact and safely: static DSL on Kotlin
Compact and safely: static DSL on KotlinCompact and safely: static DSL on Kotlin
Compact and safely: static DSL on KotlinDmitry Pranchuk
 
Greach, GroovyFx Workshop
Greach, GroovyFx WorkshopGreach, GroovyFx Workshop
Greach, GroovyFx WorkshopDierk König
 
Jggug 2010 330 Grails 1.3 観察
Jggug 2010 330 Grails 1.3 観察Jggug 2010 330 Grails 1.3 観察
Jggug 2010 330 Grails 1.3 観察Tsuyoshi Yamamoto
 
The Ring programming language version 1.9 book - Part 43 of 210
The Ring programming language version 1.9 book - Part 43 of 210The Ring programming language version 1.9 book - Part 43 of 210
The Ring programming language version 1.9 book - Part 43 of 210Mahmoud Samir Fayed
 
Michael Häusler – Everyday flink
Michael Häusler – Everyday flinkMichael Häusler – Everyday flink
Michael Häusler – Everyday flinkFlink Forward
 
Spring data ii
Spring data iiSpring data ii
Spring data ii명철 강
 
Reactive Programming for a demanding world: building event-driven and respons...
Reactive Programming for a demanding world: building event-driven and respons...Reactive Programming for a demanding world: building event-driven and respons...
Reactive Programming for a demanding world: building event-driven and respons...Mario Fusco
 
Scott Anderson [InfluxData] | InfluxDB Tasks – Beyond Downsampling | InfluxDa...
Scott Anderson [InfluxData] | InfluxDB Tasks – Beyond Downsampling | InfluxDa...Scott Anderson [InfluxData] | InfluxDB Tasks – Beyond Downsampling | InfluxDa...
Scott Anderson [InfluxData] | InfluxDB Tasks – Beyond Downsampling | InfluxDa...InfluxData
 
Столпы функционального программирования для адептов ООП, Николай Мозговой
Столпы функционального программирования для адептов ООП, Николай МозговойСтолпы функционального программирования для адептов ООП, Николай Мозговой
Столпы функционального программирования для адептов ООП, Николай МозговойSigma Software
 
Image magick++
Image magick++Image magick++
Image magick++Yubin Lim
 
Look Ma, “update DB to HTML5 using C++”, no hands! 
Look Ma, “update DB to HTML5 using C++”, no hands! Look Ma, “update DB to HTML5 using C++”, no hands! 
Look Ma, “update DB to HTML5 using C++”, no hands! aleks-f
 

What's hot (20)

Apache Spark - Basics of RDD & RDD Operations | Big Data Hadoop Spark Tutoria...
Apache Spark - Basics of RDD & RDD Operations | Big Data Hadoop Spark Tutoria...Apache Spark - Basics of RDD & RDD Operations | Big Data Hadoop Spark Tutoria...
Apache Spark - Basics of RDD & RDD Operations | Big Data Hadoop Spark Tutoria...
 
Angular2 rxjs
Angular2 rxjsAngular2 rxjs
Angular2 rxjs
 
Kitura Todolist tutorial
Kitura Todolist tutorialKitura Todolist tutorial
Kitura Todolist tutorial
 
Compact and safely: static DSL on Kotlin
Compact and safely: static DSL on KotlinCompact and safely: static DSL on Kotlin
Compact and safely: static DSL on Kotlin
 
Greach, GroovyFx Workshop
Greach, GroovyFx WorkshopGreach, GroovyFx Workshop
Greach, GroovyFx Workshop
 
Jggug 2010 330 Grails 1.3 観察
Jggug 2010 330 Grails 1.3 観察Jggug 2010 330 Grails 1.3 観察
Jggug 2010 330 Grails 1.3 観察
 
Rxjs ngvikings
Rxjs ngvikingsRxjs ngvikings
Rxjs ngvikings
 
The Ring programming language version 1.9 book - Part 43 of 210
The Ring programming language version 1.9 book - Part 43 of 210The Ring programming language version 1.9 book - Part 43 of 210
The Ring programming language version 1.9 book - Part 43 of 210
 
Michael Häusler – Everyday flink
Michael Häusler – Everyday flinkMichael Häusler – Everyday flink
Michael Häusler – Everyday flink
 
Spring data ii
Spring data iiSpring data ii
Spring data ii
 
Reactive Programming for a demanding world: building event-driven and respons...
Reactive Programming for a demanding world: building event-driven and respons...Reactive Programming for a demanding world: building event-driven and respons...
Reactive Programming for a demanding world: building event-driven and respons...
 
Scott Anderson [InfluxData] | InfluxDB Tasks – Beyond Downsampling | InfluxDa...
Scott Anderson [InfluxData] | InfluxDB Tasks – Beyond Downsampling | InfluxDa...Scott Anderson [InfluxData] | InfluxDB Tasks – Beyond Downsampling | InfluxDa...
Scott Anderson [InfluxData] | InfluxDB Tasks – Beyond Downsampling | InfluxDa...
 
Столпы функционального программирования для адептов ООП, Николай Мозговой
Столпы функционального программирования для адептов ООП, Николай МозговойСтолпы функционального программирования для адептов ООП, Николай Мозговой
Столпы функционального программирования для адептов ООП, Николай Мозговой
 
Image magick++
Image magick++Image magick++
Image magick++
 
Look Ma, “update DB to HTML5 using C++”, no hands! 
Look Ma, “update DB to HTML5 using C++”, no hands! Look Ma, “update DB to HTML5 using C++”, no hands! 
Look Ma, “update DB to HTML5 using C++”, no hands! 
 
Spark_Documentation_Template1
Spark_Documentation_Template1Spark_Documentation_Template1
Spark_Documentation_Template1
 
Parallel streams in java 8
Parallel streams in java 8Parallel streams in java 8
Parallel streams in java 8
 
Oop assignment 02
Oop assignment 02Oop assignment 02
Oop assignment 02
 
Hadoop Puzzlers
Hadoop PuzzlersHadoop Puzzlers
Hadoop Puzzlers
 
Gwt and Xtend
Gwt and XtendGwt and Xtend
Gwt and Xtend
 

Viewers also liked

Jenny, Katerina And Arynda
Jenny, Katerina And AryndaJenny, Katerina And Arynda
Jenny, Katerina And Aryndakaterinawsy
 
Hadoop and Cascading At AJUG July 2009
Hadoop and Cascading At AJUG July 2009Hadoop and Cascading At AJUG July 2009
Hadoop and Cascading At AJUG July 2009Christopher Curtin
 
Atlanta hadoop users group july 2013
Atlanta hadoop users group july 2013Atlanta hadoop users group july 2013
Atlanta hadoop users group july 2013Christopher Curtin
 
2011 march cloud computing atlanta
2011 march cloud computing atlanta2011 march cloud computing atlanta
2011 march cloud computing atlantaChristopher Curtin
 
UnConference for Georgia Southern Computer Science March 31, 2015
UnConference for Georgia Southern Computer Science March 31, 2015UnConference for Georgia Southern Computer Science March 31, 2015
UnConference for Georgia Southern Computer Science March 31, 2015Christopher Curtin
 

Viewers also liked (7)

Jenny, Katerina And Arynda
Jenny, Katerina And AryndaJenny, Katerina And Arynda
Jenny, Katerina And Arynda
 
Hadoop and Cascading At AJUG July 2009
Hadoop and Cascading At AJUG July 2009Hadoop and Cascading At AJUG July 2009
Hadoop and Cascading At AJUG July 2009
 
Atlanta hadoop users group july 2013
Atlanta hadoop users group july 2013Atlanta hadoop users group july 2013
Atlanta hadoop users group july 2013
 
2011 march cloud computing atlanta
2011 march cloud computing atlanta2011 march cloud computing atlanta
2011 march cloud computing atlanta
 
Nosql East October 2009
Nosql East October 2009Nosql East October 2009
Nosql East October 2009
 
Ajug april 2011
Ajug april 2011Ajug april 2011
Ajug april 2011
 
UnConference for Georgia Southern Computer Science March 31, 2015
UnConference for Georgia Southern Computer Science March 31, 2015UnConference for Georgia Southern Computer Science March 31, 2015
UnConference for Georgia Southern Computer Science March 31, 2015
 

Similar to AJUG April 2011 Cascading example

TypeScript Introduction
TypeScript IntroductionTypeScript Introduction
TypeScript IntroductionDmitry Sheiko
 
AJUG April 2011 Raw hadoop example
AJUG April 2011 Raw hadoop exampleAJUG April 2011 Raw hadoop example
AJUG April 2011 Raw hadoop exampleChristopher Curtin
 
VISUALIZAR REGISTROS EN UN JTABLE
VISUALIZAR REGISTROS EN UN JTABLEVISUALIZAR REGISTROS EN UN JTABLE
VISUALIZAR REGISTROS EN UN JTABLEDarwin Durand
 
Cascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUGCascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUGMatthew McCullough
 
JJUG CCC 2011 Spring
JJUG CCC 2011 SpringJJUG CCC 2011 Spring
JJUG CCC 2011 SpringKiyotaka Oku
 
Hadoop Integration in Cassandra
Hadoop Integration in CassandraHadoop Integration in Cassandra
Hadoop Integration in CassandraJairam Chandar
 
Hazelcast
HazelcastHazelcast
Hazelcastoztalip
 
Clustering your Application with Hazelcast
Clustering your Application with HazelcastClustering your Application with Hazelcast
Clustering your Application with HazelcastHazelcast
 
Store and Process Big Data with Hadoop and Cassandra
Store and Process Big Data with Hadoop and CassandraStore and Process Big Data with Hadoop and Cassandra
Store and Process Big Data with Hadoop and CassandraDeependra Ariyadewa
 
Hadoop Installation_13_09_2022(1).docx
Hadoop Installation_13_09_2022(1).docxHadoop Installation_13_09_2022(1).docx
Hadoop Installation_13_09_2022(1).docx1MS20CS406
 
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash course
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash courseCodepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash course
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash courseSages
 
Nice to meet Kotlin
Nice to meet KotlinNice to meet Kotlin
Nice to meet KotlinJieyi Wu
 
The current program only run one iteration of the KMeans algorithm. .docx
The current program only run one iteration of the KMeans algorithm. .docxThe current program only run one iteration of the KMeans algorithm. .docx
The current program only run one iteration of the KMeans algorithm. .docxtodd241
 
The core libraries you always wanted - Google Guava
The core libraries you always wanted - Google GuavaThe core libraries you always wanted - Google Guava
The core libraries you always wanted - Google GuavaMite Mitreski
 
Easy Scaling with Open Source Data Structures, by Talip Ozturk
Easy Scaling with Open Source Data Structures, by Talip OzturkEasy Scaling with Open Source Data Structures, by Talip Ozturk
Easy Scaling with Open Source Data Structures, by Talip OzturkZeroTurnaround
 
Logic Equations Resolver J Script
Logic Equations Resolver   J ScriptLogic Equations Resolver   J Script
Logic Equations Resolver J ScriptRoman Agaev
 
Reactive programming on Android
Reactive programming on AndroidReactive programming on Android
Reactive programming on AndroidTomáš Kypta
 
Main class --------------------------import java.awt.FlowLayout.pdf
Main class --------------------------import java.awt.FlowLayout.pdfMain class --------------------------import java.awt.FlowLayout.pdf
Main class --------------------------import java.awt.FlowLayout.pdfanushkaent7
 

Similar to AJUG April 2011 Cascading example (20)

TypeScript Introduction
TypeScript IntroductionTypeScript Introduction
TypeScript Introduction
 
AJUG April 2011 Raw hadoop example
AJUG April 2011 Raw hadoop exampleAJUG April 2011 Raw hadoop example
AJUG April 2011 Raw hadoop example
 
VISUALIZAR REGISTROS EN UN JTABLE
VISUALIZAR REGISTROS EN UN JTABLEVISUALIZAR REGISTROS EN UN JTABLE
VISUALIZAR REGISTROS EN UN JTABLE
 
Cascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUGCascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUG
 
Amazon elastic map reduce
Amazon elastic map reduceAmazon elastic map reduce
Amazon elastic map reduce
 
JJUG CCC 2011 Spring
JJUG CCC 2011 SpringJJUG CCC 2011 Spring
JJUG CCC 2011 Spring
 
Hadoop Integration in Cassandra
Hadoop Integration in CassandraHadoop Integration in Cassandra
Hadoop Integration in Cassandra
 
Hazelcast
HazelcastHazelcast
Hazelcast
 
Clustering your Application with Hazelcast
Clustering your Application with HazelcastClustering your Application with Hazelcast
Clustering your Application with Hazelcast
 
Store and Process Big Data with Hadoop and Cassandra
Store and Process Big Data with Hadoop and CassandraStore and Process Big Data with Hadoop and Cassandra
Store and Process Big Data with Hadoop and Cassandra
 
Hadoop Installation_13_09_2022(1).docx
Hadoop Installation_13_09_2022(1).docxHadoop Installation_13_09_2022(1).docx
Hadoop Installation_13_09_2022(1).docx
 
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash course
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash courseCodepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash course
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash course
 
Jersey Guice AOP
Jersey Guice AOPJersey Guice AOP
Jersey Guice AOP
 
Nice to meet Kotlin
Nice to meet KotlinNice to meet Kotlin
Nice to meet Kotlin
 
The current program only run one iteration of the KMeans algorithm. .docx
The current program only run one iteration of the KMeans algorithm. .docxThe current program only run one iteration of the KMeans algorithm. .docx
The current program only run one iteration of the KMeans algorithm. .docx
 
The core libraries you always wanted - Google Guava
The core libraries you always wanted - Google GuavaThe core libraries you always wanted - Google Guava
The core libraries you always wanted - Google Guava
 
Easy Scaling with Open Source Data Structures, by Talip Ozturk
Easy Scaling with Open Source Data Structures, by Talip OzturkEasy Scaling with Open Source Data Structures, by Talip Ozturk
Easy Scaling with Open Source Data Structures, by Talip Ozturk
 
Logic Equations Resolver J Script
Logic Equations Resolver   J ScriptLogic Equations Resolver   J Script
Logic Equations Resolver J Script
 
Reactive programming on Android
Reactive programming on AndroidReactive programming on Android
Reactive programming on Android
 
Main class --------------------------import java.awt.FlowLayout.pdf
Main class --------------------------import java.awt.FlowLayout.pdfMain class --------------------------import java.awt.FlowLayout.pdf
Main class --------------------------import java.awt.FlowLayout.pdf
 

Recently uploaded

Making_way_through_DLL_hollowing_inspite_of_CFG_by_Debjeet Banerjee.pptx
Making_way_through_DLL_hollowing_inspite_of_CFG_by_Debjeet Banerjee.pptxMaking_way_through_DLL_hollowing_inspite_of_CFG_by_Debjeet Banerjee.pptx
Making_way_through_DLL_hollowing_inspite_of_CFG_by_Debjeet Banerjee.pptxnull - The Open Security Community
 
Transcript: #StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
Transcript: #StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024Transcript: #StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
Transcript: #StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024BookNet Canada
 
Pigging Solutions in Pet Food Manufacturing
Pigging Solutions in Pet Food ManufacturingPigging Solutions in Pet Food Manufacturing
Pigging Solutions in Pet Food ManufacturingPigging Solutions
 
Presentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreterPresentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreternaman860154
 
Maximizing Board Effectiveness 2024 Webinar.pptx
Maximizing Board Effectiveness 2024 Webinar.pptxMaximizing Board Effectiveness 2024 Webinar.pptx
Maximizing Board Effectiveness 2024 Webinar.pptxOnBoard
 
Benefits Of Flutter Compared To Other Frameworks
Benefits Of Flutter Compared To Other FrameworksBenefits Of Flutter Compared To Other Frameworks
Benefits Of Flutter Compared To Other FrameworksSoftradix Technologies
 
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...Patryk Bandurski
 
Enhancing Worker Digital Experience: A Hands-on Workshop for Partners
Enhancing Worker Digital Experience: A Hands-on Workshop for PartnersEnhancing Worker Digital Experience: A Hands-on Workshop for Partners
Enhancing Worker Digital Experience: A Hands-on Workshop for PartnersThousandEyes
 
Slack Application Development 101 Slides
Slack Application Development 101 SlidesSlack Application Development 101 Slides
Slack Application Development 101 Slidespraypatel2
 
How to Remove Document Management Hurdles with X-Docs?
How to Remove Document Management Hurdles with X-Docs?How to Remove Document Management Hurdles with X-Docs?
How to Remove Document Management Hurdles with X-Docs?XfilesPro
 
Understanding the Laravel MVC Architecture
Understanding the Laravel MVC ArchitectureUnderstanding the Laravel MVC Architecture
Understanding the Laravel MVC ArchitecturePixlogix Infotech
 
Transforming Data Streams with Kafka Connect: An Introduction to Single Messa...
Transforming Data Streams with Kafka Connect: An Introduction to Single Messa...Transforming Data Streams with Kafka Connect: An Introduction to Single Messa...
Transforming Data Streams with Kafka Connect: An Introduction to Single Messa...HostedbyConfluent
 
Breaking the Kubernetes Kill Chain: Host Path Mount
Breaking the Kubernetes Kill Chain: Host Path MountBreaking the Kubernetes Kill Chain: Host Path Mount
Breaking the Kubernetes Kill Chain: Host Path MountPuma Security, LLC
 
Human Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsHuman Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsMark Billinghurst
 
Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365
Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365
Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 3652toLead Limited
 
GenCyber Cyber Security Day Presentation
GenCyber Cyber Security Day PresentationGenCyber Cyber Security Day Presentation
GenCyber Cyber Security Day PresentationMichael W. Hawkins
 
Key Features Of Token Development (1).pptx
Key  Features Of Token  Development (1).pptxKey  Features Of Token  Development (1).pptx
Key Features Of Token Development (1).pptxLBM Solutions
 
Kotlin Multiplatform & Compose Multiplatform - Starter kit for pragmatics
Kotlin Multiplatform & Compose Multiplatform - Starter kit for pragmaticsKotlin Multiplatform & Compose Multiplatform - Starter kit for pragmatics
Kotlin Multiplatform & Compose Multiplatform - Starter kit for pragmaticscarlostorres15106
 
The Codex of Business Writing Software for Real-World Solutions 2.pptx
The Codex of Business Writing Software for Real-World Solutions 2.pptxThe Codex of Business Writing Software for Real-World Solutions 2.pptx
The Codex of Business Writing Software for Real-World Solutions 2.pptxMalak Abu Hammad
 
08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking Men08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking MenDelhi Call girls
 

Recently uploaded (20)

Making_way_through_DLL_hollowing_inspite_of_CFG_by_Debjeet Banerjee.pptx
Making_way_through_DLL_hollowing_inspite_of_CFG_by_Debjeet Banerjee.pptxMaking_way_through_DLL_hollowing_inspite_of_CFG_by_Debjeet Banerjee.pptx
Making_way_through_DLL_hollowing_inspite_of_CFG_by_Debjeet Banerjee.pptx
 
Transcript: #StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
Transcript: #StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024Transcript: #StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
Transcript: #StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
 
Pigging Solutions in Pet Food Manufacturing
Pigging Solutions in Pet Food ManufacturingPigging Solutions in Pet Food Manufacturing
Pigging Solutions in Pet Food Manufacturing
 
Presentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreterPresentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreter
 
Maximizing Board Effectiveness 2024 Webinar.pptx
Maximizing Board Effectiveness 2024 Webinar.pptxMaximizing Board Effectiveness 2024 Webinar.pptx
Maximizing Board Effectiveness 2024 Webinar.pptx
 
Benefits Of Flutter Compared To Other Frameworks
Benefits Of Flutter Compared To Other FrameworksBenefits Of Flutter Compared To Other Frameworks
Benefits Of Flutter Compared To Other Frameworks
 
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
 
Enhancing Worker Digital Experience: A Hands-on Workshop for Partners
Enhancing Worker Digital Experience: A Hands-on Workshop for PartnersEnhancing Worker Digital Experience: A Hands-on Workshop for Partners
Enhancing Worker Digital Experience: A Hands-on Workshop for Partners
 
Slack Application Development 101 Slides
Slack Application Development 101 SlidesSlack Application Development 101 Slides
Slack Application Development 101 Slides
 
How to Remove Document Management Hurdles with X-Docs?
How to Remove Document Management Hurdles with X-Docs?How to Remove Document Management Hurdles with X-Docs?
How to Remove Document Management Hurdles with X-Docs?
 
Understanding the Laravel MVC Architecture
Understanding the Laravel MVC ArchitectureUnderstanding the Laravel MVC Architecture
Understanding the Laravel MVC Architecture
 
Transforming Data Streams with Kafka Connect: An Introduction to Single Messa...
Transforming Data Streams with Kafka Connect: An Introduction to Single Messa...Transforming Data Streams with Kafka Connect: An Introduction to Single Messa...
Transforming Data Streams with Kafka Connect: An Introduction to Single Messa...
 
Breaking the Kubernetes Kill Chain: Host Path Mount
Breaking the Kubernetes Kill Chain: Host Path MountBreaking the Kubernetes Kill Chain: Host Path Mount
Breaking the Kubernetes Kill Chain: Host Path Mount
 
Human Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsHuman Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR Systems
 
Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365
Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365
Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365
 
GenCyber Cyber Security Day Presentation
GenCyber Cyber Security Day PresentationGenCyber Cyber Security Day Presentation
GenCyber Cyber Security Day Presentation
 
Key Features Of Token Development (1).pptx
Key  Features Of Token  Development (1).pptxKey  Features Of Token  Development (1).pptx
Key Features Of Token Development (1).pptx
 
Kotlin Multiplatform & Compose Multiplatform - Starter kit for pragmatics
Kotlin Multiplatform & Compose Multiplatform - Starter kit for pragmaticsKotlin Multiplatform & Compose Multiplatform - Starter kit for pragmatics
Kotlin Multiplatform & Compose Multiplatform - Starter kit for pragmatics
 
The Codex of Business Writing Software for Real-World Solutions 2.pptx
The Codex of Business Writing Software for Real-World Solutions 2.pptxThe Codex of Business Writing Software for Real-World Solutions 2.pptx
The Codex of Business Writing Software for Real-World Solutions 2.pptx
 
08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking Men08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking Men
 

AJUG April 2011 Cascading example

  • 1. package org.ajug; import cascading.cascade.Cascade; import cascading.cascade.CascadeConnector; import cascading.flow.Flow; import cascading.flow.FlowConnector; import cascading.pipe.Each; import cascading.pipe.Every; import cascading.pipe.GroupBy; import cascading.pipe.Pipe; import cascading.scheme.TextDelimited; import cascading.scheme.TextLine; import cascading.tap.Hfs; import cascading.tap.SinkMode; import cascading.tap.Tap; import cascading.tuple.Fields; import java.util.Properties; public class Main { public static void main(String[] args) { Properties properties = new Properties(); FlowConnector.setApplicationJarClass(properties, Main.class); properties.put("mapred.reduce.tasks", 5); Pipe mainPipe = new Each("M&M", new Fields("line"), new Parser()); mainPipe = new GroupBy(mainPipe, new Fields("COLOR")); mainPipe = new Every(mainPipe, Fields.ALL, new ColorAggregator(new ColorData())); Tap sourceTap = new Hfs(new TextLine(), args[0]); TextDelimited scheme = new TextDelimited(new Fields("COLOR", "AVG_WIDTH", "AVG_WEIGHT"), ",", """); scheme.setNumSinkParts(1); // make sure we only get one file Tap sinkTap = new Hfs(scheme, args[1], SinkMode.REPLACE); FlowConnector flowConnector = new FlowConnector(properties); CascadeConnector cascadeConnector = new CascadeConnector(); Flow flow = flowConnector.connect(sourceTap, sinkTap, mainPipe); Cascade cascade = cascadeConnector.connect(flow); cascade.complete(); // Finally run everything } } ================================================ package org.ajug; import cascading.cascade.Cascade; import cascading.cascade.CascadeConnector; import cascading.flow.Flow; import cascading.flow.FlowConnector; import cascading.pipe.*; import cascading.scheme.TextDelimited;
  • 2. import cascading.scheme.TextLine; import cascading.tap.Hfs; import cascading.tap.SinkMode; import cascading.tap.Tap; import cascading.tuple.Fields; import java.util.HashMap; import java.util.Map; import java.util.Properties; public class MultiOutputMain { public static void main(String[] args) { Properties properties = new Properties(); FlowConnector.setApplicationJarClass(properties, Main.class); properties.put("mapred.reduce.tasks", 5); Pipe sourcePipe = new Each("M&M", new Fields("line"), new Parser()); Pipe totalPipe = new GroupBy("Total", sourcePipe, new Fields("ONE")); totalPipe = new Every(totalPipe, Fields.ALL, new TotalAggregator(new TotalData())); Pipe mainPipe = new GroupBy("Color", sourcePipe, new Fields("COLOR")); mainPipe = new Every(mainPipe, Fields.ALL, new ColorAggregator(new ColorData())); Tap sourceTap = new Hfs(new TextLine(), args[0]); TextDelimited scheme = new TextDelimited(new Fields("COLOR", "AVG_WIDTH", "AVG_WEIGHT"), ",", """); scheme.setNumSinkParts(1); // make sure we only get one file Tap colorTap = new Hfs(scheme, args[1] + "/color", SinkMode.REPLACE); TextDelimited totalScheme = new TextDelimited(new Fields("FINAL_WIDTH", "FINAL_WEIGHT"), ",", """); totalScheme.setNumSinkParts(1); // make sure we only get one file Tap totalTap = new Hfs(totalScheme, args[1] + "/total", SinkMode.REPLACE); FlowConnector flowConnector = new FlowConnector(properties); CascadeConnector cascadeConnector = new CascadeConnector(); Map<String, Tap> outputs = new HashMap<String, Tap>(); outputs.put(totalPipe.getName(), totalTap); outputs.put(mainPipe.getName(), colorTap); Flow flow = flowConnector.connect(sourceTap, outputs, totalPipe, mainPipe); Cascade cascade = cascadeConnector.connect(flow); cascade.complete(); // Finally run everything } } ======================================= package org.ajug;
  • 3. import cascading.flow.FlowProcess; import cascading.operation.Function; import cascading.operation.FunctionCall; import cascading.tuple.Fields; import cascading.tuple.Tuple; import java.io.Serializable; public class Parser extends cascading.operation.BaseOperation implements Serializable, Function { public Parser() { super(new Fields("ONE","COLOR", "WIDTH", "WEIGHT")); // should be constants file ;) } public void operate(FlowProcess a_flow, FunctionCall a_call) { String sourceData = a_call.getArguments().getString(0); sourceData = sourceData.trim(); if (sourceData == null || sourceData.length() == 0) { return; // blank line read from the source file, so ignore it } String values[] = sourceData.split(","); Tuple output = new Tuple(); output.add("1"); output.add(values[0]); output.add(values[1]); output.add(values[2]); a_call.getOutputCollector().add(output); } } ============================================== package org.ajug; import cascading.tuple.Tuple; import java.io.Serializable; public class ColorData implements Serializable { private long m_num = 0; private double m_width = 0; private double m_weight = 0; public void reset(){ m_num = 0; m_width = 0; m_weight = 0; }
  • 4. public void addData(double a_weight, double a_width){ m_weight += a_weight; m_width+=a_width; m_num++; } public Tuple getTuple() { if (m_num == 0) return null; Tuple rtnValue = new Tuple(); rtnValue.add(m_width/m_num); rtnValue.add(m_weight/m_num); return rtnValue; } } =============================================== package org.ajug; import cascading.tuple.Tuple; import java.io.Serializable; public class TotalData implements Serializable { private long m_num = 0; private double m_width = 0; private double m_weight = 0; public void reset(){ m_num = 0; m_width = 0; m_weight = 0; } public void addData(double a_weight, double a_width){ m_weight += a_weight; m_width+=a_width; m_num++; } public Tuple getTuple() { if (m_num == 0) return null; Tuple rtnValue = new Tuple(); rtnValue.add(m_width/m_num); rtnValue.add(m_weight/m_num); return rtnValue; } } ================================================== package org.ajug;
  • 5. import cascading.flow.FlowProcess; import cascading.operation.Aggregator; import cascading.operation.AggregatorCall; import cascading.operation.BaseOperation; import cascading.tuple.Fields; import cascading.tuple.Tuple; import cascading.tuple.TupleEntry; import org.apache.log4j.Logger; public class TotalAggregator extends BaseOperation<TotalData> implements Aggregator<TotalData> { static Logger m_logger = Logger.getLogger(TotalAggregator.class.getName()); private TotalData m_row; public TotalAggregator(TotalData a_row) { super(new Fields("FINAL_WIDTH", "FINAL_WEIGHT")); m_row = a_row; } public void start(FlowProcess flowProcess, AggregatorCall<TotalData> aggregatorCall) { // set the context object m_row.reset(); aggregatorCall.setContext(m_row); } public void complete(FlowProcess flowProcess, AggregatorCall<TotalData> aggregatorCall) { TotalData context = aggregatorCall.getContext(); Tuple results = context.getTuple(); if (results == null) return; // Nothing there to report aggregatorCall.getOutputCollector().add(results); } public void aggregate(FlowProcess flowProcess, AggregatorCall<TotalData> aggregatorCall) { TupleEntry arguments = aggregatorCall.getArguments(); TotalData context = aggregatorCall.getContext(); double weight = arguments.getDouble("WEIGHT"); double width = arguments.getDouble("WIDTH"); context.addData(weight, width); } } ========================================= package org.ajug; import cascading.flow.FlowProcess; import cascading.operation.Aggregator; import cascading.operation.AggregatorCall;
  • 6. import cascading.operation.BaseOperation; import cascading.tuple.Fields; import cascading.tuple.Tuple; import cascading.tuple.TupleEntry; import org.apache.log4j.Logger; public class ColorAggregator extends BaseOperation<ColorData> implements Aggregator<ColorData> { static Logger m_logger = Logger.getLogger(ColorAggregator.class.getName()); private ColorData m_row; public ColorAggregator(ColorData a_row) { super(new Fields("AVG_WIDTH", "AVG_WEIGHT")); m_row = a_row; } public void start(FlowProcess flowProcess, AggregatorCall<ColorData> aggregatorCall) { // set the context object m_row.reset(); aggregatorCall.setContext(m_row); } public void complete(FlowProcess flowProcess, AggregatorCall<ColorData> aggregatorCall) { ColorData context = aggregatorCall.getContext(); Tuple results = context.getTuple(); if (results == null) return; // Nothing there to report aggregatorCall.getOutputCollector().add(results); } public void aggregate(FlowProcess flowProcess, AggregatorCall<ColorData> aggregatorCall) { TupleEntry arguments = aggregatorCall.getArguments(); ColorData context = aggregatorCall.getContext(); double weight = arguments.getDouble("WEIGHT"); double width = arguments.getDouble("WIDTH"); context.addData(weight, width); } }