0
dachisgroup.comDachis GroupLas Vegas 2012  Pig Unit Testing     Clint Miller     Pigout Hackday, Austin TX     May 11, 201...
dachisgroup.comWhat is PigUnit?  • Not really a *Unit framework.  • Library that you can use within your JUnit tests that ...
dachisgroup.comSimple Pig Script  minutes_and_goals = LOAD minutes_and_goals USING BinStorage() AS (              name: ch...
dachisgroup.comSimple Test Program     public void testSamplePigScript() throws Exception {       String[] args = {       ...
dachisgroup.comMore Complex Pig Script(reads two input files)  players = LOAD minutes_and_goals USING BinStorage() AS (   ...
dachisgroup.comMethods on PigTest  Iterator<Tuple> getAlias(String alias);  Iterator<Tuple> getAlias(); // Fetches value o...
dachisgroup.comMethod override() Saves the Day!   public class InputMocker {       protected PigTest test;       protected...
dachisgroup.comAllows You to Rewrite Pig Script  players = LOAD minutes_and_goals USING BinStorage() AS (          name: c...
dachisgroup.comTest Program - Initialization     public void testSamplePigScript2() throws Exception {       PigServer pig...
dachisgroup.comTest Program – Overriding Inputs       String[] players = {          "BenzematReal Madridt2165t20",        ...
dachisgroup.comTest Program – Testing Results         String[] percentOfTeamGoals = {            "(Falcao,Atletico Madrid,...
Upcoming SlideShare
Loading in...5
×

Unit testing pig

2,750

Published on

Published in: Technology, Education
1 Comment
5 Likes
Statistics
Notes
  • Thanks for sharing. I used this pattern to make a library for writing PigUnit tests in JRuby that support multiple inputs: https://github.com/masverba/baconrack.
       Reply 
    Are you sure you want to  Yes  No
    Your message goes here
No Downloads
Views
Total Views
2,750
On Slideshare
0
From Embeds
0
Number of Embeds
3
Actions
Shares
0
Downloads
67
Comments
1
Likes
5
Embeds 0
No embeds

No notes for slide

Transcript of "Unit testing pig"

  1. 1. dachisgroup.comDachis GroupLas Vegas 2012 Pig Unit Testing Clint Miller Pigout Hackday, Austin TX May 11, 2012® 2011 Dachis Group.
  2. 2. dachisgroup.comWhat is PigUnit? • Not really a *Unit framework. • Library that you can use within your JUnit tests that allows you to • Run your Pig scripts from within your JUnit tests. • Override variables in your Pig scripts so that they get values from your JUnit tests rather than reading external sources such as HDFS. • Inspect the values of your Pig script variables. • Make your STORE statements into no-ops so that your Pig scripts run without side effects.® 2011 Dachis Group.
  3. 3. dachisgroup.comSimple Pig Script minutes_and_goals = LOAD minutes_and_goals USING BinStorage() AS ( name: chararray, team: chararray, minutes: long, goals: long ); top_goal_scorers = FILTER minutes_and_goals BY goals >= $MIN_GOALS; minutes_per_goal_unsorted = FOREACH top_goal_scorers GENERATE name, minutes/goals AS minutes_per_goal; minutes_per_goal = ORDER minutes_per_goal_unsorted BY minutes_per_goal; STORE minutes_per_goal INTO minutes_per_goal USING BinStorage();® 2011 Dachis Group.
  4. 4. dachisgroup.comSimple Test Program public void testSamplePigScript() throws Exception { String[] args = { "MIN_GOALS=20" }; PigTest test = new PigTest("/Users/clintmiller/blah/sampleScript.pig", args); String[] input = { "BenzematReal Madridt2165t20", "RonaldotReal Madridt3264t45", "FalcaotAtletico Madridt2852t23", "MessitBarcelonat3177t50", "XavitBarcelonat2079t10", "HiguaintReal Madridt1641t22", "SancheztBarcelonat1678t12" }; String[] expectedOutput = { "(Messi,63)", "(Ronaldo,72)", "(Higuain,74)", "(Benzema,108)", "(Falcao,124)" }; test.assertOutput("minutes_and_goals", input, "minutes_per_goal", expectedOutput); }® 2011 Dachis Group.
  5. 5. dachisgroup.comMore Complex Pig Script(reads two input files) players = LOAD minutes_and_goals USING BinStorage() AS ( name: chararray, team: chararray, minutes: long, goals: long ); teams = LOAD team_goals USING BinStorage() AS ( name: chararray, goals: long ); player_and_team = JOIN players BY team, teams BY name; percent_of_team_goals_unsorted = FOREACH player_and_team GENERATE players::name, teams::name, (players::goals * 100) / teams::goals AS percent_of_team_goals; percent_of_team_goals = ORDER percent_of_team_goals_unsorted BY percent_of_team_goals DESC, teams::name; STORE percent_of_team_goals INTO percent_of_team_goals USING BinStorage();® 2011 Dachis Group.
  6. 6. dachisgroup.comMethods on PigTest Iterator<Tuple> getAlias(String alias); Iterator<Tuple> getAlias(); // Fetches value of last variable used in a STORE command void override(String alias, String query); void unoverride(String alias); void assertOutput(String[] expected); void assertOutput(String alias, String[] expected); void assertOutput(File expected); void assertOutput(String alias, File expected); void assertOutput(String aliasInput, String[] input, String alias, String[] expected); There is no simple way to override the values of multiple input variables!® 2011 Dachis Group.
  7. 7. dachisgroup.comMethod override() Saves the Day! public class InputMocker { protected PigTest test; protected PigServer pigServer; protected Cluster cluster; protected List<String> overrideFiles; public InputMocker(PigTest test, PigServer pigServer, Cluster cluster) { this.test = test; this.pigServer = pigServer; this.cluster = cluster; this.overrideFiles = new ArrayList<String>(); } public void mockInputAlias(String alias, String[] input) throws Exception { test.runScript(); StringBuilder sb = new StringBuilder(); Schema.stringifySchema(sb, pigServer.dumpSchema(alias), DataType.TUPLE); String destination = alias + "-pigunit-input-overridden.txt"; overrideFiles.add(destination); cluster.copyFromLocalFile(input, destination, true); test.override(alias, String.format("%s = LOAD %s AS %s;", alias, destination, sb.toString())); } public void cleanup() throws Exception { for (String overrideFile: overrideFiles) { cluster.delete(new Path(overrideFile)); } } }® 2011 Dachis Group.
  8. 8. dachisgroup.comAllows You to Rewrite Pig Script players = LOAD minutes_and_goals USING BinStorage() AS ( name: chararray, team: chararray, minutes: long, goals: long ); teams = LOAD team_goals USING BinStorage() AS ( name: chararray, goals: long ); Test input data written to temp files and Pig script rewritten to read those files. players = LOAD ’players-pigunit-input-overridden.txt’ AS ( name: chararray, team: chararray, minutes: long, goals: long ); teams = LOAD ’teams-pigunit-input-overridden.txt’ AS ( name: chararray, goals: long );® 2011 Dachis Group.
  9. 9. dachisgroup.comTest Program - Initialization public void testSamplePigScript2() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); Cluster cluster = new Cluster(pigServer.getPigContext()); String[] args = new String[] {}; PigTest test = new PigTest ("/Users/clintmiller/blah/sampleScript2.pig", args, pigServer, cluster); InputMocker mocker = new InputMocker(test, pigServer, cluster);® 2011 Dachis Group.
  10. 10. dachisgroup.comTest Program – Overriding Inputs String[] players = { "BenzematReal Madridt2165t20", "RonaldotReal Madridt3264t45", "FalcaotAtletico Madridt2852t23", "MessitBarcelonat3177t50", "XavitBarcelonat2079t10", "HiguaintReal Madridt1641t22", "SancheztBarcelonat1678t12" }; String[] teams = { "Barcelonat112", "Real Madridt117", "Atletico Madridt52" }; mocker.mockInputAlias("players", players); mocker.mockInputAlias("teams", teams);® 2011 Dachis Group.
  11. 11. dachisgroup.comTest Program – Testing Results String[] percentOfTeamGoals = { "(Falcao,Atletico Madrid,44)", "(Messi,Barcelona,44)", "(Ronaldo,Real Madrid,38)", "(Higuain,Real Madrid,18)", "(Benzema,Real Madrid,17)", "(Sanchez,Barcelona,10)", "(Xavi,Barcelona,8)" }; test.assertOutput("percent_of_team_goals", percentOfTeamGoals); mocker.cleanup(); }® 2011 Dachis Group.
  1. A particular slide catching your eye?

    Clipping is a handy way to collect important slides you want to go back to later.

×