SlideShare a Scribd company logo
HDInsight 
Programming
Port 
• HDFS: http://localhost:50070/ 
• Oozie: http://localhost:11000/oozie/v1/ 
admin/status 
• Templeton: http://localhost:50111/ 
templeton/v1/status 
• ODBC: use port 10000 in DSN 
configuration or connection string.
HDFS WebClient 
Nuget Microsoft.NET API for Hadoop WebClient
WebHDFS
List Directory 
var client = new WebHDFSClient(new Uri("http://localhost:50070"),"hadoop");! 
client.GetDirectoryStatus("/").ContinueWith(dl => dl.Result.Directories.ToList().ForEach(d => Console.WriteLine("/" + 
d.PathSuffix)));
Create Directory 
var client = new WebHDFSClient(new Uri("http://localhost:50070"), "hadoop");! 
var created = await client.CreateDirectory("/TEST");! 
Console.WriteLine("True or False, we created the directory " + created.ToString());! 
var deleted = await client.DeleteDirectory("/TEST");! 
Console.WriteLine("True or False, we deleted the directory " + deleted.ToString());
Task Chaining 
client.CreateDirectory("/TEST")! 
.ContinueWith(x => client.CreateFile(@"c:tmpTitles.txt", "/user/hadoop/titles.txt")! 
.ContinueWith(t => Console.WriteLine("new file located at " + t.Result))! 
.ContinueWith(t => client.OpenFile("/user/hadoop/titles.txt")! 
.ContinueWith(! 
resp => resp.Result.Content.ReadAsStringAsync()! 
.ContinueWith(bigString => Console.WriteLine("new file is " + 
bigString.Result.Length + " bytes long"))! 
.ContinueWith(! 
t2 => client.DeleteDirectory("/user/hadoop/titles.txt")! 
.ContinueWith(b => Console.WriteLine("Successfully deleted file."))! 
)! 
)! 
)! 
);
WebHCat 
• Management of HCatalog metadata. 
• Hive job submission. 
• Pig job submission. 
• Map/Reduce job submission. 
• Streaming Map/Reduce job submission.
CreateHive 
using System.Net.Http; 
string outputDir = "basichivejob";! 
var client = new WebHCatHttpClient(new Uri("http://localhost:50111"), "administrator", "", "hadoop");! 
var t1 = client.CreateHiveJob(@"select * from src;", null, null, outputDir, null);! 
t1.Wait();! 
var response = t1.Result;! 
var output = response.Content.ReadAsAsync<JObject>();! 
output.Wait();! 
response.EnsureSuccessStatusCode();! 
string id = output.Result.GetValue("id").ToString();! 
client.WaitForJobToCompleteAsync(id).Wait();
Oozie 
http://hadoopsdk.codeplex.com/wikipage?title=Oozie 
%20Client&referringTitle=Home
.NET MapReduce
MRRunner
Mapper 
public class SqrtMapper : MapperBase! 
{! 
public override void Map(string inputLine, MapperContext context)! 
{! 
int inputValue = int.Parse(inputLine);! 
! 
// Perform the work.! 
double sqrt = Math.Sqrt((double)inputValue);! 
! 
// Write output data.! 
context.EmitKeyValue(inputValue.ToString(), sqrt.ToString());! 
}! 
}
Hadoop Job 
public class FirstJob : HadoopJob<Mapper,Combiner,Reducer>! 
{! 
public override HadoopJobConfiguration Configure(ExecutorContext context)! 
{! 
HadoopJobConfiguration config = new HadoopJobConfiguration();! 
config.InputPath = "input/SqrtJob";! 
config.OutputFolder = "output/SqrtJob";! 
return config;! 
}! 
}!
var hadoop = Hadoop.Connect(); 
hadoop.MapReduceJob.ExecuteJob<JobType>(arguments);
MRRunner -dll MyMRProgram.dll {-class jobClass} {-- job-class options}
Linq to Hive
HiveRow 
public class TitlesRow : HiveRow! 
{! 
public string MovieId { get; set; }! 
public string Name { get; set; }! 
public int Year { get; set; }! 
public string Rating { get; set; }! 
}! 
! 
public class AwardsRow : HiveRow! 
{! 
public string MovieId { get; set; }! 
public string AwardId { get; set; }! 
public int Year { get; set; }! 
public string Won { get; set; }! 
public string Type { get; set; }! 
public string Category { get; set; }! 
}! 
! 
public class ActorsRow : HiveRow! 
{! 
public string MovieId { get; set; }! 
public string ActorId { get; set; }! 
public int AwardsCount { get; set; }! 
public string Name { get; set; }!
HiveConnection 
public class MyHiveDatabase : HiveConnection! 
{! 
public MyHiveDatabase(Uri webHcatUri, string username, string password, string azureStorageAccount, string azureStorageKey) : base(webHcatUri, 
username, password, azureStorageAccount, azureStorageKey) { }! 
! 
public HiveTable<AwardsRow> Awards! 
{! 
get! 
{! 
return this.GetTable<AwardsRow>("Awards");! 
}! 
}! 
! 
public HiveTable<TitlesRow> Titles! 
{! 
get! 
{! 
return this.GetTable<TitlesRow>("Titles");! 
}! 
}! 
! 
public HiveTable<ActorsRow> Actors! 
{! 
get! 
{! 
return this.GetTable<ActorsRow>("Actors");! 
}! 
}! 
}
Simple Linq 
var db = new MyHiveDatabase(! 
webHCatUri: new Uri("http://localhost:50111"),! 
userName: "hadoop", password: null,! 
storageAccount: “ASV storage account name”, storageKey: “ASV storage account key”);! 
! 
var q = from x in! 
(from a in db.Actors! 
select new { a.ActorId, foo = a.AwardsCount })! 
group x by x.ActorId into g! 
select new { ActorId = g.Key, bar = g.Average(z => z.foo) };! 
! 
q.ExecuteQuery().Wait();! 
var results1 = q.ToList();! 
!! 
var projectionQuery = from aw in db.Awards! 
join t in db.Titles! 
on aw.MovieId equals t.MovieId! 
where t.Year == 1994 && aw.Won == "True"! 
select new { MovieId = t.MovieId, Name = t.Name, Type = aw.Type, Category = aw.Category, 
Year = t.Year };! 
!! 
var newTable = projectionQuery.CreateTable("AwardsIn1994");
Excel ODBC
http://www.microsoft.com/en-us/download/details.aspx? 
id=40886
Hd insight programming
Hd insight programming
Hd insight programming
Hd insight programming
Hd insight programming
Hd insight programming
Hd insight programming
Hd insight programming
Hd insight programming
Hd insight programming
Hd insight programming
Resource 
• http://hadoopsdk.codeplex.com/ 
• https://github.com/WindowsAzure-Samples/ 
HDInsight-Labs-Preview 
• http://wag.codeplex.com/
Mahout
Machine Learning is programming 
computers to optimize a 
performance criterion using 
example data or past experience
Hd insight programming
Classification
Clustering
Recommenders
Collaborative Filtering - 
User Based
Collaborative Filtering - 
Item Based
Data 
http://labrosa.ee.columbia.edu/millionsong/tasteprofile 
http://www.grouplens.org/node/12
Mahout Command 
c:appsdistmahout-0.7bin>hadoop jar c:Appsdistmahout-0.7mahout-core-0.7-job.jar 
org.apache.mahout.cf.taste.hadoop.item.RecommenderJob -s SIMILARITY_COOCCURRENCE --input=input/mInput.txt -- 
output=output --usersFile=input/users.txt!

More Related Content

What's hot (20)

ODP
Php 102: Out with the Bad, In with the Good
Jeremy Kendall
 
PDF
Apache CouchDB talk at Ontario GNU Linux Fest
Myles Braithwaite
 
PDF
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
Tsuyoshi Yamamoto
 
PDF
Leveraging the Power of Graph Databases in PHP
Jeremy Kendall
 
PPTX
MongoDB Aggregation
Amit Ghosh
 
PDF
Leveraging the Power of Graph Databases in PHP
Jeremy Kendall
 
PDF
Testing stateful, concurrent, and async systems using test.check
Eric Normand
 
PDF
Everything About PowerShell
Gaetano Causio
 
PDF
Cutting Edge Data Processing with PHP & XQuery
William Candillon
 
PDF
第3回Grails/Groovy勉強会名古屋「Grails名古屋座談会」
Tsuyoshi Yamamoto
 
PDF
Security Challenges in Node.js
Websecurify
 
PDF
第4回 g* ワークショップ はじめてみよう! Grailsプラグイン
Tsuyoshi Yamamoto
 
PDF
Manifests of Future Past
Puppet
 
PPTX
Tax management-system
Fahim Faysal Kabir
 
PDF
SunshinePHP 2017 - Making the most out of MySQL
Gabriela Ferrara
 
PDF
Zepto.js, a jQuery-compatible mobile JavaScript framework in 2K
Thomas Fuchs
 
PDF
Mongo db for c# developers
Simon Elliston Ball
 
PDF
Finch.io - Purely Functional REST API with Finagle
Vladimir Kostyukov
 
PDF
Undercover Pods / WP Functions
podsframework
 
PDF
Web Components With Rails
Boris Nadion
 
Php 102: Out with the Bad, In with the Good
Jeremy Kendall
 
Apache CouchDB talk at Ontario GNU Linux Fest
Myles Braithwaite
 
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
Tsuyoshi Yamamoto
 
Leveraging the Power of Graph Databases in PHP
Jeremy Kendall
 
MongoDB Aggregation
Amit Ghosh
 
Leveraging the Power of Graph Databases in PHP
Jeremy Kendall
 
Testing stateful, concurrent, and async systems using test.check
Eric Normand
 
Everything About PowerShell
Gaetano Causio
 
Cutting Edge Data Processing with PHP & XQuery
William Candillon
 
第3回Grails/Groovy勉強会名古屋「Grails名古屋座談会」
Tsuyoshi Yamamoto
 
Security Challenges in Node.js
Websecurify
 
第4回 g* ワークショップ はじめてみよう! Grailsプラグイン
Tsuyoshi Yamamoto
 
Manifests of Future Past
Puppet
 
Tax management-system
Fahim Faysal Kabir
 
SunshinePHP 2017 - Making the most out of MySQL
Gabriela Ferrara
 
Zepto.js, a jQuery-compatible mobile JavaScript framework in 2K
Thomas Fuchs
 
Mongo db for c# developers
Simon Elliston Ball
 
Finch.io - Purely Functional REST API with Finagle
Vladimir Kostyukov
 
Undercover Pods / WP Functions
podsframework
 
Web Components With Rails
Boris Nadion
 

Viewers also liked (20)

PPTX
Picasso Light Work
StephenShare
 
PPTX
Shutter Speed/Water
StephenShare
 
PPTX
Photography Summer Work
StephenShare
 
PDF
React 101
Casear Chu
 
PPTX
Aperture
StephenShare
 
PPSX
Family Pics When We Were Younger!
EricVickyFamily
 
PPTX
How to use Layers
StephenShare
 
PPTX
Relationships Final Piece
StephenShare
 
PPTX
Chuck Close
StephenShare
 
PPTX
Darren Almond
StephenShare
 
PPTX
Shutter Speeds/Movement
StephenShare
 
PPTX
Bill wadman
StephenShare
 
PDF
前端技術大亂鬥
Casear Chu
 
PPTX
Victor Schrager
StephenShare
 
PPTX
Objects
StephenShare
 
PPTX
Triptych
StephenShare
 
PDF
GeoG - Product Pitch Deck
Geographic Company
 
PPTX
Karl Blossfeldt
StephenShare
 
PPTX
Acoustic Communication
Trijendra Singh
 
PPTX
Schema-on-Read vs Schema-on-Write
Amr Awadallah
 
Picasso Light Work
StephenShare
 
Shutter Speed/Water
StephenShare
 
Photography Summer Work
StephenShare
 
React 101
Casear Chu
 
Aperture
StephenShare
 
Family Pics When We Were Younger!
EricVickyFamily
 
How to use Layers
StephenShare
 
Relationships Final Piece
StephenShare
 
Chuck Close
StephenShare
 
Darren Almond
StephenShare
 
Shutter Speeds/Movement
StephenShare
 
Bill wadman
StephenShare
 
前端技術大亂鬥
Casear Chu
 
Victor Schrager
StephenShare
 
Objects
StephenShare
 
Triptych
StephenShare
 
GeoG - Product Pitch Deck
Geographic Company
 
Karl Blossfeldt
StephenShare
 
Acoustic Communication
Trijendra Singh
 
Schema-on-Read vs Schema-on-Write
Amr Awadallah
 
Ad

Similar to Hd insight programming (20)

PPT
Spring data iii
명철 강
 
PPT
Play!ng with scala
Siarzh Miadzvedzeu
 
PDF
Geospatial Graphs made easy with OrientDB - Codemotion Warsaw 2016
Luigi Dell'Aquila
 
PDF
Hadoop User Group EU 2014
cwensel
 
PDF
NoSQL and JavaScript: a Love Story
Alexandre Morgaut
 
PDF
Velocity EU 2014 — Offline-first web apps
andrewsmatt
 
PDF
Replacing Oracle with MongoDB for a templating application at the Bavarian go...
Comsysto Reply GmbH
 
KEY
OSCON 2011 CouchApps
Bradley Holt
 
PDF
MongoDB Munich 2012: MongoDB for official documents in Bavaria
MongoDB
 
PPTX
Quick and Easy Development with Node.js and Couchbase Server
Nic Raboy
 
PDF
CouchDB Mobile - From Couch to 5K in 1 Hour
Peter Friese
 
KEY
CouchDB on Android
Sven Haiges
 
PDF
Cascading Through Hadoop for the Boulder JUG
Matthew McCullough
 
PPTX
Bare-knuckle web development
Johannes Brodwall
 
KEY
Express Presentation
aaronheckmann
 
PDF
Future of Web Apps: Google Gears
dion
 
KEY
Paris js extensions
erwanl
 
PDF
Requery overview
Sunghyouk Bae
 
KEY
Html5 For Jjugccc2009fall
Shumpei Shiraishi
 
PDF
Flask and Angular: An approach to build robust platforms
Ayush Sharma
 
Spring data iii
명철 강
 
Play!ng with scala
Siarzh Miadzvedzeu
 
Geospatial Graphs made easy with OrientDB - Codemotion Warsaw 2016
Luigi Dell'Aquila
 
Hadoop User Group EU 2014
cwensel
 
NoSQL and JavaScript: a Love Story
Alexandre Morgaut
 
Velocity EU 2014 — Offline-first web apps
andrewsmatt
 
Replacing Oracle with MongoDB for a templating application at the Bavarian go...
Comsysto Reply GmbH
 
OSCON 2011 CouchApps
Bradley Holt
 
MongoDB Munich 2012: MongoDB for official documents in Bavaria
MongoDB
 
Quick and Easy Development with Node.js and Couchbase Server
Nic Raboy
 
CouchDB Mobile - From Couch to 5K in 1 Hour
Peter Friese
 
CouchDB on Android
Sven Haiges
 
Cascading Through Hadoop for the Boulder JUG
Matthew McCullough
 
Bare-knuckle web development
Johannes Brodwall
 
Express Presentation
aaronheckmann
 
Future of Web Apps: Google Gears
dion
 
Paris js extensions
erwanl
 
Requery overview
Sunghyouk Bae
 
Html5 For Jjugccc2009fall
Shumpei Shiraishi
 
Flask and Angular: An approach to build robust platforms
Ayush Sharma
 
Ad

Hd insight programming

  • 2. Port • HDFS: http://localhost:50070/ • Oozie: http://localhost:11000/oozie/v1/ admin/status • Templeton: http://localhost:50111/ templeton/v1/status • ODBC: use port 10000 in DSN configuration or connection string.
  • 3. HDFS WebClient Nuget Microsoft.NET API for Hadoop WebClient
  • 5. List Directory var client = new WebHDFSClient(new Uri("http://localhost:50070"),"hadoop");! client.GetDirectoryStatus("/").ContinueWith(dl => dl.Result.Directories.ToList().ForEach(d => Console.WriteLine("/" + d.PathSuffix)));
  • 6. Create Directory var client = new WebHDFSClient(new Uri("http://localhost:50070"), "hadoop");! var created = await client.CreateDirectory("/TEST");! Console.WriteLine("True or False, we created the directory " + created.ToString());! var deleted = await client.DeleteDirectory("/TEST");! Console.WriteLine("True or False, we deleted the directory " + deleted.ToString());
  • 7. Task Chaining client.CreateDirectory("/TEST")! .ContinueWith(x => client.CreateFile(@"c:tmpTitles.txt", "/user/hadoop/titles.txt")! .ContinueWith(t => Console.WriteLine("new file located at " + t.Result))! .ContinueWith(t => client.OpenFile("/user/hadoop/titles.txt")! .ContinueWith(! resp => resp.Result.Content.ReadAsStringAsync()! .ContinueWith(bigString => Console.WriteLine("new file is " + bigString.Result.Length + " bytes long"))! .ContinueWith(! t2 => client.DeleteDirectory("/user/hadoop/titles.txt")! .ContinueWith(b => Console.WriteLine("Successfully deleted file."))! )! )! )! );
  • 8. WebHCat • Management of HCatalog metadata. • Hive job submission. • Pig job submission. • Map/Reduce job submission. • Streaming Map/Reduce job submission.
  • 9. CreateHive using System.Net.Http; string outputDir = "basichivejob";! var client = new WebHCatHttpClient(new Uri("http://localhost:50111"), "administrator", "", "hadoop");! var t1 = client.CreateHiveJob(@"select * from src;", null, null, outputDir, null);! t1.Wait();! var response = t1.Result;! var output = response.Content.ReadAsAsync<JObject>();! output.Wait();! response.EnsureSuccessStatusCode();! string id = output.Result.GetValue("id").ToString();! client.WaitForJobToCompleteAsync(id).Wait();
  • 13. Mapper public class SqrtMapper : MapperBase! {! public override void Map(string inputLine, MapperContext context)! {! int inputValue = int.Parse(inputLine);! ! // Perform the work.! double sqrt = Math.Sqrt((double)inputValue);! ! // Write output data.! context.EmitKeyValue(inputValue.ToString(), sqrt.ToString());! }! }
  • 14. Hadoop Job public class FirstJob : HadoopJob<Mapper,Combiner,Reducer>! {! public override HadoopJobConfiguration Configure(ExecutorContext context)! {! HadoopJobConfiguration config = new HadoopJobConfiguration();! config.InputPath = "input/SqrtJob";! config.OutputFolder = "output/SqrtJob";! return config;! }! }!
  • 15. var hadoop = Hadoop.Connect(); hadoop.MapReduceJob.ExecuteJob<JobType>(arguments);
  • 16. MRRunner -dll MyMRProgram.dll {-class jobClass} {-- job-class options}
  • 18. HiveRow public class TitlesRow : HiveRow! {! public string MovieId { get; set; }! public string Name { get; set; }! public int Year { get; set; }! public string Rating { get; set; }! }! ! public class AwardsRow : HiveRow! {! public string MovieId { get; set; }! public string AwardId { get; set; }! public int Year { get; set; }! public string Won { get; set; }! public string Type { get; set; }! public string Category { get; set; }! }! ! public class ActorsRow : HiveRow! {! public string MovieId { get; set; }! public string ActorId { get; set; }! public int AwardsCount { get; set; }! public string Name { get; set; }!
  • 19. HiveConnection public class MyHiveDatabase : HiveConnection! {! public MyHiveDatabase(Uri webHcatUri, string username, string password, string azureStorageAccount, string azureStorageKey) : base(webHcatUri, username, password, azureStorageAccount, azureStorageKey) { }! ! public HiveTable<AwardsRow> Awards! {! get! {! return this.GetTable<AwardsRow>("Awards");! }! }! ! public HiveTable<TitlesRow> Titles! {! get! {! return this.GetTable<TitlesRow>("Titles");! }! }! ! public HiveTable<ActorsRow> Actors! {! get! {! return this.GetTable<ActorsRow>("Actors");! }! }! }
  • 20. Simple Linq var db = new MyHiveDatabase(! webHCatUri: new Uri("http://localhost:50111"),! userName: "hadoop", password: null,! storageAccount: “ASV storage account name”, storageKey: “ASV storage account key”);! ! var q = from x in! (from a in db.Actors! select new { a.ActorId, foo = a.AwardsCount })! group x by x.ActorId into g! select new { ActorId = g.Key, bar = g.Average(z => z.foo) };! ! q.ExecuteQuery().Wait();! var results1 = q.ToList();! !! var projectionQuery = from aw in db.Awards! join t in db.Titles! on aw.MovieId equals t.MovieId! where t.Year == 1994 && aw.Won == "True"! select new { MovieId = t.MovieId, Name = t.Name, Type = aw.Type, Category = aw.Category, Year = t.Year };! !! var newTable = projectionQuery.CreateTable("AwardsIn1994");
  • 34. Resource • http://hadoopsdk.codeplex.com/ • https://github.com/WindowsAzure-Samples/ HDInsight-Labs-Preview • http://wag.codeplex.com/
  • 36. Machine Learning is programming computers to optimize a performance criterion using example data or past experience
  • 44. Mahout Command c:appsdistmahout-0.7bin>hadoop jar c:Appsdistmahout-0.7mahout-core-0.7-job.jar org.apache.mahout.cf.taste.hadoop.item.RecommenderJob -s SIMILARITY_COOCCURRENCE --input=input/mInput.txt -- output=output --usersFile=input/users.txt!