  • Thanks for sharing. I used this pattern to make a library for writing PigUnit tests in JRuby that support multiple inputs: https://github.com/masverba/baconrack.
  dachisgroup.comDachis GroupLas Vegas 2012 Pig Unit Testing Clint Miller Pigout Hackday, Austin TX May 11, 2012
  dachisgroup.comWhat is PigUnit? • Not really a *Unit framework. • Library that you can use within your JUnit tests that allows you to • Run your Pig scripts from within your JUnit tests. • Override variables in your Pig scripts so that they get values from your JUnit tests rather than reading external sources such as HDFS. • Inspect the values of your Pig script variables. • Make your STORE statements into no-ops so that your Pig scripts run without side effects.
  dachisgroup.comSimple Pig Script minutes_and_goals = LOAD minutes_and_goals USING BinStorage() AS ( name: chararray, team: chararray, minutes: long, goals: long ); top_goal_scorers = FILTER minutes_and_goals BY goals >= $MIN_GOALS; minutes_per_goal_unsorted = FOREACH top_goal_scorers GENERATE name, minutes/goals AS minutes_per_goal; minutes_per_goal = ORDER minutes_per_goal_unsorted BY minutes_per_goal; STORE minutes_per_goal INTO minutes_per_goal USING BinStorage();
  dachisgroup.comSimple Test Program public void testSamplePigScript() throws Exception { String[] args = { "MIN_GOALS=20" }; PigTest test = new PigTest("/Users/clintmiller/blah/sampleScript.pig", args); String[] input = { "BenzematReal Madridt2165t20", "RonaldotReal Madridt3264t45", "FalcaotAtletico Madridt2852t23", "MessitBarcelonat3177t50", "XavitBarcelonat2079t10", "HiguaintReal Madridt1641t22", "SancheztBarcelonat1678t12" }; String[] expectedOutput = { "(Messi,63)", "(Ronaldo,72)", "(Higuain,74)", "(Benzema,108)", "(Falcao,124)" }; test.assertOutput("minutes_and_goals", input, "minutes_per_goal", expectedOutput); }
  dachisgroup.comMore Complex Pig Script(reads two input files) players = LOAD minutes_and_goals USING BinStorage() AS ( name: chararray, team: chararray, minutes: long, goals: long ); teams = LOAD team_goals USING BinStorage() AS ( name: chararray, goals: long ); player_and_team = JOIN players BY team, teams BY name; percent_of_team_goals_unsorted = FOREACH player_and_team GENERATE players::name, teams::name, (players::goals * 100) / teams::goals AS percent_of_team_goals; percent_of_team_goals = ORDER percent_of_team_goals_unsorted BY percent_of_team_goals DESC, teams::name; STORE percent_of_team_goals INTO percent_of_team_goals USING BinStorage();
  dachisgroup.comMethods on PigTest Iterator<Tuple> getAlias(String alias); Iterator<Tuple> getAlias(); // Fetches value of last variable used in a STORE command void override(String alias, String query); void unoverride(String alias); void assertOutput(String[] expected); void assertOutput(String alias, String[] expected); void assertOutput(File expected); void assertOutput(String alias, File expected); void assertOutput(String aliasInput, String[] input, String alias, String[] expected); There is no simple way to override the values of multiple input variables!
  dachisgroup.comMethod override() Saves the Day! public class InputMocker { protected PigTest test; protected PigServer pigServer; protected Cluster cluster; protected List<String> overrideFiles; public InputMocker(PigTest test, PigServer pigServer, Cluster cluster) { this.test = test; this.pigServer = pigServer; this.cluster = cluster; this.overrideFiles = new ArrayList<String>(); } public void mockInputAlias(String alias, String[] input) throws Exception { test.runScript(); StringBuilder sb = new StringBuilder(); Schema.stringifySchema(sb, pigServer.dumpSchema(alias), DataType.TUPLE); String destination = alias + "-pigunit-input-overridden.txt"; overrideFiles.add(destination); cluster.copyFromLocalFile(input, destination, true); test.override(alias, String.format("%s = LOAD %s AS %s;", alias, destination, sb.toString())); } public void cleanup() throws Exception { for (String overrideFile: overrideFiles) { cluster.delete(new Path(overrideFile)); } } }
  dachisgroup.comAllows You to Rewrite Pig Script players = LOAD minutes_and_goals USING BinStorage() AS ( name: chararray, team: chararray, minutes: long, goals: long ); teams = LOAD team_goals USING BinStorage() AS ( name: chararray, goals: long ); Test input data written to temp files and Pig script rewritten to read those files. players = LOAD 'players-pigunit-input-overridden.txt' AS ( name: chararray, team: chararray, minutes: long, goals: long ); teams = LOAD 'teams-pigunit-input-overridden.txt' AS ( name: chararray, goals: long );
  dachisgroup.comTest Program - Initialization public void testSamplePigScript2() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); Cluster cluster = new Cluster(pigServer.getPigContext()); String[] args = new String[] {}; PigTest test = new PigTest ("/Users/clintmiller/blah/sampleScript2.pig", args, pigServer, cluster); InputMocker mocker = new InputMocker(test, pigServer, cluster);
  dachisgroup.comTest Program – Overriding Inputs String[] players = { "BenzematReal Madridt2165t20", "RonaldotReal Madridt3264t45", "FalcaotAtletico Madridt2852t23", "MessitBarcelonat3177t50", "XavitBarcelonat2079t10", "HiguaintReal Madridt1641t22", "SancheztBarcelonat1678t12" }; String[] teams = { "Barcelonat112", "Real Madridt117", "Atletico Madridt52" }; mocker.mockInputAlias("players", players); mocker.mockInputAlias("teams", teams);
  dachisgroup.comTest Program – Testing Results String[] percentOfTeamGoals = { "(Falcao,Atletico Madrid,44)", "(Messi,Barcelona,44)", "(Ronaldo,Real Madrid,38)", "(Higuain,Real Madrid,18)", "(Benzema,Real Madrid,17)", "(Sanchez,Barcelona,10)", "(Xavi,Barcelona,8)" }; test.assertOutput("percent_of_team_goals", percentOfTeamGoals); mocker.cleanup(); }