SlideShare a Scribd company logo
1 of 44
Download to read offline
HDInsight 
Programming
Port 
• HDFS: http://localhost:50070/ 
• Oozie: http://localhost:11000/oozie/v1/ 
admin/status 
• Templeton: http://localhost:50111/ 
templeton/v1/status 
• ODBC: use port 10000 in DSN 
configuration or connection string.
HDFS WebClient 
Nuget Microsoft.NET API for Hadoop WebClient
WebHDFS
List Directory 
var client = new WebHDFSClient(new Uri("http://localhost:50070"),"hadoop");! 
client.GetDirectoryStatus("/").ContinueWith(dl => dl.Result.Directories.ToList().ForEach(d => Console.WriteLine("/" + 
d.PathSuffix)));
Create Directory 
var client = new WebHDFSClient(new Uri("http://localhost:50070"), "hadoop");! 
var created = await client.CreateDirectory("/TEST");! 
Console.WriteLine("True or False, we created the directory " + created.ToString());! 
var deleted = await client.DeleteDirectory("/TEST");! 
Console.WriteLine("True or False, we deleted the directory " + deleted.ToString());
Task Chaining 
client.CreateDirectory("/TEST")! 
.ContinueWith(x => client.CreateFile(@"c:tmpTitles.txt", "/user/hadoop/titles.txt")! 
.ContinueWith(t => Console.WriteLine("new file located at " + t.Result))! 
.ContinueWith(t => client.OpenFile("/user/hadoop/titles.txt")! 
.ContinueWith(! 
resp => resp.Result.Content.ReadAsStringAsync()! 
.ContinueWith(bigString => Console.WriteLine("new file is " + 
bigString.Result.Length + " bytes long"))! 
.ContinueWith(! 
t2 => client.DeleteDirectory("/user/hadoop/titles.txt")! 
.ContinueWith(b => Console.WriteLine("Successfully deleted file."))! 
)! 
)! 
)! 
);
WebHCat 
• Management of HCatalog metadata. 
• Hive job submission. 
• Pig job submission. 
• Map/Reduce job submission. 
• Streaming Map/Reduce job submission.
CreateHive 
using System.Net.Http; 
string outputDir = "basichivejob";! 
var client = new WebHCatHttpClient(new Uri("http://localhost:50111"), "administrator", "", "hadoop");! 
var t1 = client.CreateHiveJob(@"select * from src;", null, null, outputDir, null);! 
t1.Wait();! 
var response = t1.Result;! 
var output = response.Content.ReadAsAsync<JObject>();! 
output.Wait();! 
response.EnsureSuccessStatusCode();! 
string id = output.Result.GetValue("id").ToString();! 
client.WaitForJobToCompleteAsync(id).Wait();
Oozie 
http://hadoopsdk.codeplex.com/wikipage?title=Oozie 
%20Client&referringTitle=Home
.NET MapReduce
MRRunner
Mapper 
public class SqrtMapper : MapperBase! 
{! 
public override void Map(string inputLine, MapperContext context)! 
{! 
int inputValue = int.Parse(inputLine);! 
! 
// Perform the work.! 
double sqrt = Math.Sqrt((double)inputValue);! 
! 
// Write output data.! 
context.EmitKeyValue(inputValue.ToString(), sqrt.ToString());! 
}! 
}
Hadoop Job 
public class FirstJob : HadoopJob<Mapper,Combiner,Reducer>! 
{! 
public override HadoopJobConfiguration Configure(ExecutorContext context)! 
{! 
HadoopJobConfiguration config = new HadoopJobConfiguration();! 
config.InputPath = "input/SqrtJob";! 
config.OutputFolder = "output/SqrtJob";! 
return config;! 
}! 
}!
var hadoop = Hadoop.Connect(); 
hadoop.MapReduceJob.ExecuteJob<JobType>(arguments);
MRRunner -dll MyMRProgram.dll {-class jobClass} {-- job-class options}
Linq to Hive
HiveRow 
public class TitlesRow : HiveRow! 
{! 
public string MovieId { get; set; }! 
public string Name { get; set; }! 
public int Year { get; set; }! 
public string Rating { get; set; }! 
}! 
! 
public class AwardsRow : HiveRow! 
{! 
public string MovieId { get; set; }! 
public string AwardId { get; set; }! 
public int Year { get; set; }! 
public string Won { get; set; }! 
public string Type { get; set; }! 
public string Category { get; set; }! 
}! 
! 
public class ActorsRow : HiveRow! 
{! 
public string MovieId { get; set; }! 
public string ActorId { get; set; }! 
public int AwardsCount { get; set; }! 
public string Name { get; set; }!
HiveConnection 
public class MyHiveDatabase : HiveConnection! 
{! 
public MyHiveDatabase(Uri webHcatUri, string username, string password, string azureStorageAccount, string azureStorageKey) : base(webHcatUri, 
username, password, azureStorageAccount, azureStorageKey) { }! 
! 
public HiveTable<AwardsRow> Awards! 
{! 
get! 
{! 
return this.GetTable<AwardsRow>("Awards");! 
}! 
}! 
! 
public HiveTable<TitlesRow> Titles! 
{! 
get! 
{! 
return this.GetTable<TitlesRow>("Titles");! 
}! 
}! 
! 
public HiveTable<ActorsRow> Actors! 
{! 
get! 
{! 
return this.GetTable<ActorsRow>("Actors");! 
}! 
}! 
}
Simple Linq 
var db = new MyHiveDatabase(! 
webHCatUri: new Uri("http://localhost:50111"),! 
userName: "hadoop", password: null,! 
storageAccount: “ASV storage account name”, storageKey: “ASV storage account key”);! 
! 
var q = from x in! 
(from a in db.Actors! 
select new { a.ActorId, foo = a.AwardsCount })! 
group x by x.ActorId into g! 
select new { ActorId = g.Key, bar = g.Average(z => z.foo) };! 
! 
q.ExecuteQuery().Wait();! 
var results1 = q.ToList();! 
!! 
var projectionQuery = from aw in db.Awards! 
join t in db.Titles! 
on aw.MovieId equals t.MovieId! 
where t.Year == 1994 && aw.Won == "True"! 
select new { MovieId = t.MovieId, Name = t.Name, Type = aw.Type, Category = aw.Category, 
Year = t.Year };! 
!! 
var newTable = projectionQuery.CreateTable("AwardsIn1994");
Excel ODBC
http://www.microsoft.com/en-us/download/details.aspx? 
id=40886
Resource 
• http://hadoopsdk.codeplex.com/ 
• https://github.com/WindowsAzure-Samples/ 
HDInsight-Labs-Preview 
• http://wag.codeplex.com/
Mahout
Machine Learning is programming 
computers to optimize a 
performance criterion using 
example data or past experience
Classification
Clustering
Recommenders
Collaborative Filtering - 
User Based
Collaborative Filtering - 
Item Based
Data 
http://labrosa.ee.columbia.edu/millionsong/tasteprofile 
http://www.grouplens.org/node/12
Mahout Command 
c:appsdistmahout-0.7bin>hadoop jar c:Appsdistmahout-0.7mahout-core-0.7-job.jar 
org.apache.mahout.cf.taste.hadoop.item.RecommenderJob -s SIMILARITY_COOCCURRENCE --input=input/mInput.txt -- 
output=output --usersFile=input/users.txt!

More Related Content

What's hot

Php 102: Out with the Bad, In with the Good
Php 102: Out with the Bad, In with the GoodPhp 102: Out with the Bad, In with the Good
Php 102: Out with the Bad, In with the GoodJeremy Kendall
 
Apache CouchDB talk at Ontario GNU Linux Fest
Apache CouchDB talk at Ontario GNU Linux FestApache CouchDB talk at Ontario GNU Linux Fest
Apache CouchDB talk at Ontario GNU Linux FestMyles Braithwaite
 
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-Tsuyoshi Yamamoto
 
Leveraging the Power of Graph Databases in PHP
Leveraging the Power of Graph Databases in PHPLeveraging the Power of Graph Databases in PHP
Leveraging the Power of Graph Databases in PHPJeremy Kendall
 
MongoDB Aggregation
MongoDB Aggregation MongoDB Aggregation
MongoDB Aggregation Amit Ghosh
 
Leveraging the Power of Graph Databases in PHP
Leveraging the Power of Graph Databases in PHPLeveraging the Power of Graph Databases in PHP
Leveraging the Power of Graph Databases in PHPJeremy Kendall
 
Testing stateful, concurrent, and async systems using test.check
Testing stateful, concurrent, and async systems using test.checkTesting stateful, concurrent, and async systems using test.check
Testing stateful, concurrent, and async systems using test.checkEric Normand
 
Everything About PowerShell
Everything About PowerShellEverything About PowerShell
Everything About PowerShellGaetano Causio
 
Cutting Edge Data Processing with PHP & XQuery
Cutting Edge Data Processing with PHP & XQueryCutting Edge Data Processing with PHP & XQuery
Cutting Edge Data Processing with PHP & XQueryWilliam Candillon
 
第3回Grails/Groovy勉強会名古屋「Grails名古屋座談会」
第3回Grails/Groovy勉強会名古屋「Grails名古屋座談会」第3回Grails/Groovy勉強会名古屋「Grails名古屋座談会」
第3回Grails/Groovy勉強会名古屋「Grails名古屋座談会」Tsuyoshi Yamamoto
 
Security Challenges in Node.js
Security Challenges in Node.jsSecurity Challenges in Node.js
Security Challenges in Node.jsWebsecurify
 
第4回 g* ワークショップ はじめてみよう! Grailsプラグイン
第4回 g* ワークショップ はじめてみよう! Grailsプラグイン第4回 g* ワークショップ はじめてみよう! Grailsプラグイン
第4回 g* ワークショップ はじめてみよう! GrailsプラグインTsuyoshi Yamamoto
 
Manifests of Future Past
Manifests of Future PastManifests of Future Past
Manifests of Future PastPuppet
 
SunshinePHP 2017 - Making the most out of MySQL
SunshinePHP 2017 - Making the most out of MySQLSunshinePHP 2017 - Making the most out of MySQL
SunshinePHP 2017 - Making the most out of MySQLGabriela Ferrara
 
Zepto.js, a jQuery-compatible mobile JavaScript framework in 2K
Zepto.js, a jQuery-compatible mobile JavaScript framework in 2KZepto.js, a jQuery-compatible mobile JavaScript framework in 2K
Zepto.js, a jQuery-compatible mobile JavaScript framework in 2KThomas Fuchs
 
Finch.io - Purely Functional REST API with Finagle
Finch.io - Purely Functional REST API with FinagleFinch.io - Purely Functional REST API with Finagle
Finch.io - Purely Functional REST API with FinagleVladimir Kostyukov
 
Undercover Pods / WP Functions
Undercover Pods / WP FunctionsUndercover Pods / WP Functions
Undercover Pods / WP Functionspodsframework
 
Web Components With Rails
Web Components With RailsWeb Components With Rails
Web Components With RailsBoris Nadion
 

What's hot (20)

Php 102: Out with the Bad, In with the Good
Php 102: Out with the Bad, In with the GoodPhp 102: Out with the Bad, In with the Good
Php 102: Out with the Bad, In with the Good
 
Apache CouchDB talk at Ontario GNU Linux Fest
Apache CouchDB talk at Ontario GNU Linux FestApache CouchDB talk at Ontario GNU Linux Fest
Apache CouchDB talk at Ontario GNU Linux Fest
 
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
 
Leveraging the Power of Graph Databases in PHP
Leveraging the Power of Graph Databases in PHPLeveraging the Power of Graph Databases in PHP
Leveraging the Power of Graph Databases in PHP
 
MongoDB Aggregation
MongoDB Aggregation MongoDB Aggregation
MongoDB Aggregation
 
Leveraging the Power of Graph Databases in PHP
Leveraging the Power of Graph Databases in PHPLeveraging the Power of Graph Databases in PHP
Leveraging the Power of Graph Databases in PHP
 
Testing stateful, concurrent, and async systems using test.check
Testing stateful, concurrent, and async systems using test.checkTesting stateful, concurrent, and async systems using test.check
Testing stateful, concurrent, and async systems using test.check
 
Everything About PowerShell
Everything About PowerShellEverything About PowerShell
Everything About PowerShell
 
Cutting Edge Data Processing with PHP & XQuery
Cutting Edge Data Processing with PHP & XQueryCutting Edge Data Processing with PHP & XQuery
Cutting Edge Data Processing with PHP & XQuery
 
第3回Grails/Groovy勉強会名古屋「Grails名古屋座談会」
第3回Grails/Groovy勉強会名古屋「Grails名古屋座談会」第3回Grails/Groovy勉強会名古屋「Grails名古屋座談会」
第3回Grails/Groovy勉強会名古屋「Grails名古屋座談会」
 
Security Challenges in Node.js
Security Challenges in Node.jsSecurity Challenges in Node.js
Security Challenges in Node.js
 
第4回 g* ワークショップ はじめてみよう! Grailsプラグイン
第4回 g* ワークショップ はじめてみよう! Grailsプラグイン第4回 g* ワークショップ はじめてみよう! Grailsプラグイン
第4回 g* ワークショップ はじめてみよう! Grailsプラグイン
 
Manifests of Future Past
Manifests of Future PastManifests of Future Past
Manifests of Future Past
 
Tax management-system
Tax management-systemTax management-system
Tax management-system
 
SunshinePHP 2017 - Making the most out of MySQL
SunshinePHP 2017 - Making the most out of MySQLSunshinePHP 2017 - Making the most out of MySQL
SunshinePHP 2017 - Making the most out of MySQL
 
Zepto.js, a jQuery-compatible mobile JavaScript framework in 2K
Zepto.js, a jQuery-compatible mobile JavaScript framework in 2KZepto.js, a jQuery-compatible mobile JavaScript framework in 2K
Zepto.js, a jQuery-compatible mobile JavaScript framework in 2K
 
Mongo db for c# developers
Mongo db for c# developersMongo db for c# developers
Mongo db for c# developers
 
Finch.io - Purely Functional REST API with Finagle
Finch.io - Purely Functional REST API with FinagleFinch.io - Purely Functional REST API with Finagle
Finch.io - Purely Functional REST API with Finagle
 
Undercover Pods / WP Functions
Undercover Pods / WP FunctionsUndercover Pods / WP Functions
Undercover Pods / WP Functions
 
Web Components With Rails
Web Components With RailsWeb Components With Rails
Web Components With Rails
 

Viewers also liked

Viewers also liked (20)

Picasso Light Work
Picasso Light WorkPicasso Light Work
Picasso Light Work
 
Shutter Speed/Water
Shutter Speed/WaterShutter Speed/Water
Shutter Speed/Water
 
Photography Summer Work
Photography Summer WorkPhotography Summer Work
Photography Summer Work
 
React 101
React 101React 101
React 101
 
Aperture
ApertureAperture
Aperture
 
Family Pics When We Were Younger!
Family Pics When We Were Younger!Family Pics When We Were Younger!
Family Pics When We Were Younger!
 
How to use Layers
How to use LayersHow to use Layers
How to use Layers
 
Relationships Final Piece
Relationships Final PieceRelationships Final Piece
Relationships Final Piece
 
Chuck Close
Chuck CloseChuck Close
Chuck Close
 
Darren Almond
Darren AlmondDarren Almond
Darren Almond
 
Shutter Speeds/Movement
Shutter Speeds/MovementShutter Speeds/Movement
Shutter Speeds/Movement
 
Bill wadman
Bill wadmanBill wadman
Bill wadman
 
前端技術大亂鬥
前端技術大亂鬥前端技術大亂鬥
前端技術大亂鬥
 
Victor Schrager
Victor SchragerVictor Schrager
Victor Schrager
 
Objects
ObjectsObjects
Objects
 
Triptych
TriptychTriptych
Triptych
 
GeoG - Product Pitch Deck
GeoG - Product Pitch DeckGeoG - Product Pitch Deck
GeoG - Product Pitch Deck
 
Karl Blossfeldt
Karl BlossfeldtKarl Blossfeldt
Karl Blossfeldt
 
Acoustic Communication
Acoustic CommunicationAcoustic Communication
Acoustic Communication
 
Schema-on-Read vs Schema-on-Write
Schema-on-Read vs Schema-on-WriteSchema-on-Read vs Schema-on-Write
Schema-on-Read vs Schema-on-Write
 

Similar to Hd insight programming

Spring data iii
Spring data iiiSpring data iii
Spring data iii명철 강
 
Geospatial Graphs made easy with OrientDB - Codemotion Warsaw 2016
Geospatial Graphs made easy with OrientDB - Codemotion Warsaw 2016Geospatial Graphs made easy with OrientDB - Codemotion Warsaw 2016
Geospatial Graphs made easy with OrientDB - Codemotion Warsaw 2016Luigi Dell'Aquila
 
Hadoop User Group EU 2014
Hadoop User Group EU 2014Hadoop User Group EU 2014
Hadoop User Group EU 2014cwensel
 
NoSQL and JavaScript: a Love Story
NoSQL and JavaScript: a Love StoryNoSQL and JavaScript: a Love Story
NoSQL and JavaScript: a Love StoryAlexandre Morgaut
 
Velocity EU 2014 — Offline-first web apps
Velocity EU 2014 — Offline-first web appsVelocity EU 2014 — Offline-first web apps
Velocity EU 2014 — Offline-first web appsandrewsmatt
 
Replacing Oracle with MongoDB for a templating application at the Bavarian go...
Replacing Oracle with MongoDB for a templating application at the Bavarian go...Replacing Oracle with MongoDB for a templating application at the Bavarian go...
Replacing Oracle with MongoDB for a templating application at the Bavarian go...Comsysto Reply GmbH
 
OSCON 2011 CouchApps
OSCON 2011 CouchAppsOSCON 2011 CouchApps
OSCON 2011 CouchAppsBradley Holt
 
MongoDB Munich 2012: MongoDB for official documents in Bavaria
MongoDB Munich 2012: MongoDB for official documents in BavariaMongoDB Munich 2012: MongoDB for official documents in Bavaria
MongoDB Munich 2012: MongoDB for official documents in BavariaMongoDB
 
Quick and Easy Development with Node.js and Couchbase Server
Quick and Easy Development with Node.js and Couchbase ServerQuick and Easy Development with Node.js and Couchbase Server
Quick and Easy Development with Node.js and Couchbase ServerNic Raboy
 
CouchDB Mobile - From Couch to 5K in 1 Hour
CouchDB Mobile - From Couch to 5K in 1 HourCouchDB Mobile - From Couch to 5K in 1 Hour
CouchDB Mobile - From Couch to 5K in 1 HourPeter Friese
 
CouchDB on Android
CouchDB on AndroidCouchDB on Android
CouchDB on AndroidSven Haiges
 
Cascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUGCascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUGMatthew McCullough
 
Bare-knuckle web development
Bare-knuckle web developmentBare-knuckle web development
Bare-knuckle web developmentJohannes Brodwall
 
Express Presentation
Express PresentationExpress Presentation
Express Presentationaaronheckmann
 
Future of Web Apps: Google Gears
Future of Web Apps: Google GearsFuture of Web Apps: Google Gears
Future of Web Apps: Google Gearsdion
 
Paris js extensions
Paris js extensionsParis js extensions
Paris js extensionserwanl
 
Flask and Angular: An approach to build robust platforms
Flask and Angular:  An approach to build robust platformsFlask and Angular:  An approach to build robust platforms
Flask and Angular: An approach to build robust platformsAyush Sharma
 

Similar to Hd insight programming (20)

Spring data iii
Spring data iiiSpring data iii
Spring data iii
 
Play!ng with scala
Play!ng with scalaPlay!ng with scala
Play!ng with scala
 
Geospatial Graphs made easy with OrientDB - Codemotion Warsaw 2016
Geospatial Graphs made easy with OrientDB - Codemotion Warsaw 2016Geospatial Graphs made easy with OrientDB - Codemotion Warsaw 2016
Geospatial Graphs made easy with OrientDB - Codemotion Warsaw 2016
 
Hadoop User Group EU 2014
Hadoop User Group EU 2014Hadoop User Group EU 2014
Hadoop User Group EU 2014
 
NoSQL and JavaScript: a Love Story
NoSQL and JavaScript: a Love StoryNoSQL and JavaScript: a Love Story
NoSQL and JavaScript: a Love Story
 
Velocity EU 2014 — Offline-first web apps
Velocity EU 2014 — Offline-first web appsVelocity EU 2014 — Offline-first web apps
Velocity EU 2014 — Offline-first web apps
 
Replacing Oracle with MongoDB for a templating application at the Bavarian go...
Replacing Oracle with MongoDB for a templating application at the Bavarian go...Replacing Oracle with MongoDB for a templating application at the Bavarian go...
Replacing Oracle with MongoDB for a templating application at the Bavarian go...
 
OSCON 2011 CouchApps
OSCON 2011 CouchAppsOSCON 2011 CouchApps
OSCON 2011 CouchApps
 
MongoDB Munich 2012: MongoDB for official documents in Bavaria
MongoDB Munich 2012: MongoDB for official documents in BavariaMongoDB Munich 2012: MongoDB for official documents in Bavaria
MongoDB Munich 2012: MongoDB for official documents in Bavaria
 
Quick and Easy Development with Node.js and Couchbase Server
Quick and Easy Development with Node.js and Couchbase ServerQuick and Easy Development with Node.js and Couchbase Server
Quick and Easy Development with Node.js and Couchbase Server
 
CouchDB Mobile - From Couch to 5K in 1 Hour
CouchDB Mobile - From Couch to 5K in 1 HourCouchDB Mobile - From Couch to 5K in 1 Hour
CouchDB Mobile - From Couch to 5K in 1 Hour
 
CouchDB on Android
CouchDB on AndroidCouchDB on Android
CouchDB on Android
 
Cascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUGCascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUG
 
Bare-knuckle web development
Bare-knuckle web developmentBare-knuckle web development
Bare-knuckle web development
 
Express Presentation
Express PresentationExpress Presentation
Express Presentation
 
Future of Web Apps: Google Gears
Future of Web Apps: Google GearsFuture of Web Apps: Google Gears
Future of Web Apps: Google Gears
 
Paris js extensions
Paris js extensionsParis js extensions
Paris js extensions
 
Requery overview
Requery overviewRequery overview
Requery overview
 
Html5 For Jjugccc2009fall
Html5 For Jjugccc2009fallHtml5 For Jjugccc2009fall
Html5 For Jjugccc2009fall
 
Flask and Angular: An approach to build robust platforms
Flask and Angular:  An approach to build robust platformsFlask and Angular:  An approach to build robust platforms
Flask and Angular: An approach to build robust platforms
 

Hd insight programming

  • 2. Port • HDFS: http://localhost:50070/ • Oozie: http://localhost:11000/oozie/v1/ admin/status • Templeton: http://localhost:50111/ templeton/v1/status • ODBC: use port 10000 in DSN configuration or connection string.
  • 3. HDFS WebClient Nuget Microsoft.NET API for Hadoop WebClient
  • 5. List Directory var client = new WebHDFSClient(new Uri("http://localhost:50070"),"hadoop");! client.GetDirectoryStatus("/").ContinueWith(dl => dl.Result.Directories.ToList().ForEach(d => Console.WriteLine("/" + d.PathSuffix)));
  • 6. Create Directory var client = new WebHDFSClient(new Uri("http://localhost:50070"), "hadoop");! var created = await client.CreateDirectory("/TEST");! Console.WriteLine("True or False, we created the directory " + created.ToString());! var deleted = await client.DeleteDirectory("/TEST");! Console.WriteLine("True or False, we deleted the directory " + deleted.ToString());
  • 7. Task Chaining client.CreateDirectory("/TEST")! .ContinueWith(x => client.CreateFile(@"c:tmpTitles.txt", "/user/hadoop/titles.txt")! .ContinueWith(t => Console.WriteLine("new file located at " + t.Result))! .ContinueWith(t => client.OpenFile("/user/hadoop/titles.txt")! .ContinueWith(! resp => resp.Result.Content.ReadAsStringAsync()! .ContinueWith(bigString => Console.WriteLine("new file is " + bigString.Result.Length + " bytes long"))! .ContinueWith(! t2 => client.DeleteDirectory("/user/hadoop/titles.txt")! .ContinueWith(b => Console.WriteLine("Successfully deleted file."))! )! )! )! );
  • 8. WebHCat • Management of HCatalog metadata. • Hive job submission. • Pig job submission. • Map/Reduce job submission. • Streaming Map/Reduce job submission.
  • 9. CreateHive using System.Net.Http; string outputDir = "basichivejob";! var client = new WebHCatHttpClient(new Uri("http://localhost:50111"), "administrator", "", "hadoop");! var t1 = client.CreateHiveJob(@"select * from src;", null, null, outputDir, null);! t1.Wait();! var response = t1.Result;! var output = response.Content.ReadAsAsync<JObject>();! output.Wait();! response.EnsureSuccessStatusCode();! string id = output.Result.GetValue("id").ToString();! client.WaitForJobToCompleteAsync(id).Wait();
  • 13. Mapper public class SqrtMapper : MapperBase! {! public override void Map(string inputLine, MapperContext context)! {! int inputValue = int.Parse(inputLine);! ! // Perform the work.! double sqrt = Math.Sqrt((double)inputValue);! ! // Write output data.! context.EmitKeyValue(inputValue.ToString(), sqrt.ToString());! }! }
  • 14. Hadoop Job public class FirstJob : HadoopJob<Mapper,Combiner,Reducer>! {! public override HadoopJobConfiguration Configure(ExecutorContext context)! {! HadoopJobConfiguration config = new HadoopJobConfiguration();! config.InputPath = "input/SqrtJob";! config.OutputFolder = "output/SqrtJob";! return config;! }! }!
  • 15. var hadoop = Hadoop.Connect(); hadoop.MapReduceJob.ExecuteJob<JobType>(arguments);
  • 16. MRRunner -dll MyMRProgram.dll {-class jobClass} {-- job-class options}
  • 18. HiveRow public class TitlesRow : HiveRow! {! public string MovieId { get; set; }! public string Name { get; set; }! public int Year { get; set; }! public string Rating { get; set; }! }! ! public class AwardsRow : HiveRow! {! public string MovieId { get; set; }! public string AwardId { get; set; }! public int Year { get; set; }! public string Won { get; set; }! public string Type { get; set; }! public string Category { get; set; }! }! ! public class ActorsRow : HiveRow! {! public string MovieId { get; set; }! public string ActorId { get; set; }! public int AwardsCount { get; set; }! public string Name { get; set; }!
  • 19. HiveConnection public class MyHiveDatabase : HiveConnection! {! public MyHiveDatabase(Uri webHcatUri, string username, string password, string azureStorageAccount, string azureStorageKey) : base(webHcatUri, username, password, azureStorageAccount, azureStorageKey) { }! ! public HiveTable<AwardsRow> Awards! {! get! {! return this.GetTable<AwardsRow>("Awards");! }! }! ! public HiveTable<TitlesRow> Titles! {! get! {! return this.GetTable<TitlesRow>("Titles");! }! }! ! public HiveTable<ActorsRow> Actors! {! get! {! return this.GetTable<ActorsRow>("Actors");! }! }! }
  • 20. Simple Linq var db = new MyHiveDatabase(! webHCatUri: new Uri("http://localhost:50111"),! userName: "hadoop", password: null,! storageAccount: “ASV storage account name”, storageKey: “ASV storage account key”);! ! var q = from x in! (from a in db.Actors! select new { a.ActorId, foo = a.AwardsCount })! group x by x.ActorId into g! select new { ActorId = g.Key, bar = g.Average(z => z.foo) };! ! q.ExecuteQuery().Wait();! var results1 = q.ToList();! !! var projectionQuery = from aw in db.Awards! join t in db.Titles! on aw.MovieId equals t.MovieId! where t.Year == 1994 && aw.Won == "True"! select new { MovieId = t.MovieId, Name = t.Name, Type = aw.Type, Category = aw.Category, Year = t.Year };! !! var newTable = projectionQuery.CreateTable("AwardsIn1994");
  • 23.
  • 24.
  • 25.
  • 26.
  • 27.
  • 28.
  • 29.
  • 30.
  • 31.
  • 32.
  • 33.
  • 34. Resource • http://hadoopsdk.codeplex.com/ • https://github.com/WindowsAzure-Samples/ HDInsight-Labs-Preview • http://wag.codeplex.com/
  • 36. Machine Learning is programming computers to optimize a performance criterion using example data or past experience
  • 37.
  • 44. Mahout Command c:appsdistmahout-0.7bin>hadoop jar c:Appsdistmahout-0.7mahout-core-0.7-job.jar org.apache.mahout.cf.taste.hadoop.item.RecommenderJob -s SIMILARITY_COOCCURRENCE --input=input/mInput.txt -- output=output --usersFile=input/users.txt!