SlideShare a Scribd company logo
1 of 39
Download to read offline
Parallel
Computing
With Dask
Christian Aichinger
https://greek0.net
@chaichinger
def download(url):
return requests.get(url).content
for url in urls:
download(url)
def download(url):
return requests.get(url).content
@asyncio.coroutine
def asyncio_download(loop):
futures = [loop.run_in_executor(None, download, url)
for url in urls]
return [(yield from future) for future in futures]
loop = asyncio.get_event_loop()
job = asyncio_download_coroutine(loop)
loop.run_until_complete(job)
@dask.delayed
def download(url):
return requests.get(url).content
contents = [download(url) for url in urls]
dask.compute(contents)
def process_cpu(url):
url = url.encode()
charsum = 0
for c1 in url:
for c2 in url:
for c3 in url:
charsum += c1 * c2 * c3
return charsum
[process_cpu(url) for url in urls]
@dask.delayed
def process_cpu(url):
...
graph = [process_cpu(url) for url in urls]
dask.compute(graph)
@dask.delayed
def process_cpu(url):
...
graph = [process_cpu(url) for url in urls]
dask.compute(graph,
get=dask.multiprocessing.get)
@dask.delayed
def f(arg):
print("f", arg)
return 2 * arg
@dask.delayed
def g(args):
print("g", args)
return sum(args)
lst = [1, 2, 3]
graph = g([f(i) for i in lst])
f-#0
g
f
g-#1
f-#2 f-#3
f f
print("result", graph.compute())
f 2
f 1
f 3
g [2, 4, 6]
result 12
f-#0
g
f
g-#1
f-#2 f-#3
f f
Collection similar to Python lists
import dask.bag as db
db.from_sequence(urls)
.map(download)
.map(convert_to_image)
.filter(lambda img: img.size[0] < 500)
.map(remove_artifacts)
.map(save_to_disk)
.compute()
import dask.bag as db
import json
js = db.read_text('log-2017*.gz').map(json.loads)
js.take(2)
({'name': 'Alice',
'location': {'city': 'LA', 'state': 'CA'}},
{'name': 'Bob',
'location': {'city': 'NYC', 'state': 'NY'})
result = js.pluck('name').frequencies()
dict(result)
{'Alice': 10000, 'Bob': 5555, 'Charlie': ...}
http://dask.pydata.org/en/latest/examples/bag-json.html
Collection similar to NumPy Arrays
import dask.array as da
import skimage.io
delayed_imread = dask.delayed(skimage.io.imread, pure=True)
sample = skimage.io.imread(urls[0])
images = [delayed_imread(url) for url in urls]
images = [da.from_delayed(img,
dtype=sample.dtype,
shape=sample.shape)
for img in images]
images = da.stack(images, axis=0)
images.shape
(1000000, 360, 500, 3)
images.shape
(1000000, 360, 500, 3)
max_img = images.mean(axis=3).max(axis=0)
max_img.shape
(360, 500)
max_img.compute()
array([[ 157., 155., 153., ..., 134., 137.],
[ 154., 153., 151., ..., 129., 132.],
...,
[ 97., 66., 81., ..., 74., 82.]])
da.linalg.svd(max_img, 10)
da.fft.fft(max_img)
('tensordot-#0', 2, 1, 2)
sum
apply
('transpose-#1', 1, 2)
apply apply
('wrapped-#2', 2, 1)
apply applytranspose
('tensordot-#0', 1, 1, 1)
sum
apply
('transpose-#1', 1, 1)
apply
('wrapped-#2', 1, 1)
apply
transpose
('tensordot-#0', 2, 0, 2)
apply
('wrapped-#2', 2, 0)
apply applytranspose
('transpose-#1', 0, 2)
apply apply
('tensordot-#0', 0, 1, 0)
sum
apply
('wrapped-#2', 0, 1)
transpose
('transpose-#1', 1, 0)
('tensordot-#0', 0, 0, 2)
sum
('wrapped-#2', 0, 0)
apply apply
transpose
('tensordot-#0', 2, 0, 0)
sum
('transpose-#1', 0, 0)
apply
('tensordot-#0', 0, 0, 0) ('tensordot-#0', 2, 2, 0)
apply
('wrapped-#2', 2, 2)
apply
applytranspose
('transpose-#1', 2, 0)
apply apply
('tensordot-#0', 0, 2, 2)
apply
('transpose-#1', 2, 2)
apply
('wrapped-#2', 0, 2)
apply
transpose
('tensordot-#0', 1, 0, 1)
apply
('transpose-#1', 0, 1)
('wrapped-#2', 1, 0)
transpose
('tensordot-#0', 2, 1, 0) ('tensordot-#0', 0, 2, 1)
sum
('transpose-#1', 2, 1)
apply
('tensordot-#0', 0, 2, 0) ('tensordot-#0', 0, 0, 1)
('tensordot-#0', 0, 1, 2)
('tensordot-#0', 1, 2, 1)
('wrapped-#2', 1, 2)
transpose
('tensordot-#0', 2, 2, 2) ('tensordot-#0', 1, 2, 2)
sum
('tensordot-#0', 2, 2, 1)
sum
('tensordot-#0', 1, 0, 0)
sum
('tensordot-#0', 1, 1, 0)('tensordot-#0', 2, 0, 1) ('tensordot-#0', 0, 1, 1)('tensordot-#0', 1, 2, 0)
('tensordot-#0', 1, 0, 2)
('tensordot-#0', 2, 1, 1)
('tensordot-#0', 1, 1, 2)('sum-#3', 2, 0) ('sum-#3', 0, 0) ('sum-#3', 0, 1)
('sum-#3', 2, 2) ('sum-#3', 1, 2)('sum-#3', 0, 2)
('sum-#3', 1, 1)('sum-#3', 2, 1) ('sum-#3', 1, 0)
onesones onesones
onesones
ones ones
ones
('tensordot-#0', 2, 1, 2)
sum
apply
('transpose-#1', 1, 2)
apply
('wrapped-#2', 2, 1)
appltranspose
('tensordot-#0', 2, 0, 2)
apply
('wrapped-#2', 2, 0)
applytranspose
('transpose-#1', 0, 2)
apply
('tensordot-#0', 0, 0, 2)
sum
('wrapped-#2', 0, 0)
apply
transpose
('tensordot-#0', 2, 0, 0)
sum
('transpose-#1', 0, 0)
('tensordot-#0', 0, 0, 0) ('tensordot-#0', 2, 2, 0)
apply
('wrapped-#2', 2, 2)
apply
transpose
('transpos
('tensordot-#0', 0, 2, 2)
apply
('transpose-#1', 2, 2)
('wrapped-#2', 0, 2
trans
('tensordot-#0', 2, 1, 0)
('tensordot-#0', 0, 1, 2)('tensordot-#0', 2, 2, 2) ('sum-#3', 2, 0)
('sum-#3', 2, 2) ('sum-#3', 0, 2)
ones ones
onesones ones
Collection similar to Pandas Dataframes
__Request received (wms) : #17236, 2016-12-27 16:03:44.898007,
current_connections = connected=4, accepted=4, idle threads=4
appid="mapcache" client_ip=10.0.39.1 user_agent="..." query=…
__Request processed (wms) : #17236, total_duration=00:00:11.377182
cache_hits=7917 cache_misses=0
success_rate=100% successes=262144 failures=0
RE_REQ_RECEIVE = re.compile(r"""
__Request receiveds+
((?P<iface>w+))s*:s* # Interface (wfs, wms)
#(?P<req_id>d+),s* # Request id
(?P<starttime>[^,]+),s* # Request start timestamp
current_connections=s*
...
""", re.VERBOSE)
RE_REQ_PROCESSED = re.compile(r"""
__Request processeds+
(w+)s*:s* # Interface (wfs, wms)
#(?P<req_id>d+),s* # Request id
total_duration=(?P<total_duration>[0-9:.]+)s+
...
""", re.VERBOSE)
bag = db.read_text(files)
ddf_recv = (bag
.str.strip()
.map(lambda line: REQ_RECEIVE.match(line))
.remove(lambda el: el is None)
.map(lambda m: m.groupdict())
.to_dataframe(columns=pd.DataFrame(columns=RECV_COLS))
)
ddf_proc = (bag ...)
requests = ddf_recv.merge(ddf_proc, on='req_id', how='inner')
slow_req = requests[
(requests.starttime >= datetime(2017, 5, 1) &
(requests.starttime < datetime(2017, 5, 2) &
(requests.total_duration >= timedelta(seconds=5))]
slow_req = slow_req.compute(get=dask.multiprocessing.get)
$ dask-scheduler
Scheduler at: tcp://10.0.0.8:8786
$ ssh worker1 dask-client 10.0.0.8:8786
$ ssh worker2 dask-client 10.0.0.8:8786
$ ssh worker3 dask-client 10.0.0.8:8786
from distributed import Client
client = Client('10.0.0.8:8786')
Image Credit
●
UBIMET background and company logo
Used with permission
●
CPU frequency scaling:
Created by Wikipedia user Newhorizons msk, in the public domain
https://en.wikipedia.org/wiki/File:Clock_CPU_Scaling.jpg
●
Parallel computing:
Created by the US government, in the public domain
https://computing.llnl.gov/tutorials/parallel_comp/
●
Python logo:
A trademark of the Python Software Foundation
https://www.python.org/community/logos/
●
Dask logo:
Part of the Dask source distribution, licensed BSD v3
https://github.com/dask/dask/blob/master/docs/source/images/dask_horizontal.svg
●
All charts and graphs: created myself
●
Bag
By Pixabay user “OpenClipart-Vectors”, in the public domain
https://pixabay.com/p-156023/?no_redirect
●
Array
Jerome S. Higgins, in the public domain
https://commons.wikimedia.org/wiki/File:Land_Act_of_1785_section_numbering.png
●
Frame
Modified form of a Wellcome Trust image, licensed CC-BY 4.0
https://commons.wikimedia.org/wiki/File:Picture_frame_Wellcome_L0051764.jpg
●
Dask Array Composition of NumPy Arrays, Dask DataFrame Composition of Pandas Dataframes
Partially modified, part of the Dask source distribution, licensed BSD v3
All from https://github.com/dask/dask/blob/master/docs/source/images/
●
Cluster:
Created by Julian Herzog, licensed GNU FDL v2 / CC-BY 4.0
https://commons.wikimedia.org/wiki/File:High_Performance_Computing_Center_Stuttgart_HLRS_2015_08_Cray_XC40_Hazel_Hen_IO.jpg
●
Dask Distributed graph:
Partially modified, part of the Dask source distribution, licensed BSD v3
https://github.com/dask/dask/blob/9f344bbf38610e03f723ac034f9c4a390a7debec/docs/source/images/distributed-layout.svg

More Related Content

What's hot

How to stand on the shoulders of giants
How to stand on the shoulders of giantsHow to stand on the shoulders of giants
How to stand on the shoulders of giantsIan Barber
 
Getting started with RDO Havana
Getting started with RDO HavanaGetting started with RDO Havana
Getting started with RDO HavanaDan Radez
 
Correcting Common Async/Await Mistakes in .NET
Correcting Common Async/Await Mistakes in .NETCorrecting Common Async/Await Mistakes in .NET
Correcting Common Async/Await Mistakes in .NETBrandon Minnick, MBA
 
Py conkr 20150829_docker-python
Py conkr 20150829_docker-pythonPy conkr 20150829_docker-python
Py conkr 20150829_docker-pythonEric Ahn
 
Intro to OTP in Elixir
Intro to OTP in ElixirIntro to OTP in Elixir
Intro to OTP in ElixirJesse Anderson
 
PuppetCamp SEA @ Blk 71 - Nagios in under 10 mins with Puppet
PuppetCamp SEA @ Blk 71 -  Nagios in under 10 mins with PuppetPuppetCamp SEA @ Blk 71 -  Nagios in under 10 mins with Puppet
PuppetCamp SEA @ Blk 71 - Nagios in under 10 mins with PuppetWalter Heck
 
PuppetCamp SEA @ Blk 71 - Nagios in under 10 mins with Puppet
PuppetCamp SEA @ Blk 71 -  Nagios in under 10 mins with PuppetPuppetCamp SEA @ Blk 71 -  Nagios in under 10 mins with Puppet
PuppetCamp SEA @ Blk 71 - Nagios in under 10 mins with PuppetOlinData
 
The Ring programming language version 1.10 book - Part 56 of 212
The Ring programming language version 1.10 book - Part 56 of 212The Ring programming language version 1.10 book - Part 56 of 212
The Ring programming language version 1.10 book - Part 56 of 212Mahmoud Samir Fayed
 
Best Practices in Handling Performance Issues
Best Practices in Handling Performance IssuesBest Practices in Handling Performance Issues
Best Practices in Handling Performance IssuesOdoo
 
Http capturing
Http capturingHttp capturing
Http capturingEric Ahn
 
QA Fest 2019. Saar Rachamim. Developing Tools, While Testing
QA Fest 2019. Saar Rachamim. Developing Tools, While TestingQA Fest 2019. Saar Rachamim. Developing Tools, While Testing
QA Fest 2019. Saar Rachamim. Developing Tools, While TestingQAFest
 
Kubernetes Tutorial
Kubernetes TutorialKubernetes Tutorial
Kubernetes TutorialCi Jie Li
 
AnyMQ, Hippie, and the real-time web
AnyMQ, Hippie, and the real-time webAnyMQ, Hippie, and the real-time web
AnyMQ, Hippie, and the real-time webclkao
 
Logstash for SEO: come monitorare i Log del Web Server in realtime
Logstash for SEO: come monitorare i Log del Web Server in realtimeLogstash for SEO: come monitorare i Log del Web Server in realtime
Logstash for SEO: come monitorare i Log del Web Server in realtimeAndrea Cardinale
 
Beyond php it's not (just) about the code
Beyond php   it's not (just) about the codeBeyond php   it's not (just) about the code
Beyond php it's not (just) about the codeWim Godden
 
Using New Web APIs For Your Own Pleasure – How I Wrote New Features For My Vi...
Using New Web APIs For Your Own Pleasure – How I Wrote New Features For My Vi...Using New Web APIs For Your Own Pleasure – How I Wrote New Features For My Vi...
Using New Web APIs For Your Own Pleasure – How I Wrote New Features For My Vi...GeilDanke
 

What's hot (20)

How to stand on the shoulders of giants
How to stand on the shoulders of giantsHow to stand on the shoulders of giants
How to stand on the shoulders of giants
 
Getting started with RDO Havana
Getting started with RDO HavanaGetting started with RDO Havana
Getting started with RDO Havana
 
Correcting Common Async/Await Mistakes in .NET
Correcting Common Async/Await Mistakes in .NETCorrecting Common Async/Await Mistakes in .NET
Correcting Common Async/Await Mistakes in .NET
 
Py conkr 20150829_docker-python
Py conkr 20150829_docker-pythonPy conkr 20150829_docker-python
Py conkr 20150829_docker-python
 
Intro to OTP in Elixir
Intro to OTP in ElixirIntro to OTP in Elixir
Intro to OTP in Elixir
 
Redis 101
Redis 101Redis 101
Redis 101
 
PuppetCamp SEA @ Blk 71 - Nagios in under 10 mins with Puppet
PuppetCamp SEA @ Blk 71 -  Nagios in under 10 mins with PuppetPuppetCamp SEA @ Blk 71 -  Nagios in under 10 mins with Puppet
PuppetCamp SEA @ Blk 71 - Nagios in under 10 mins with Puppet
 
PuppetCamp SEA @ Blk 71 - Nagios in under 10 mins with Puppet
PuppetCamp SEA @ Blk 71 -  Nagios in under 10 mins with PuppetPuppetCamp SEA @ Blk 71 -  Nagios in under 10 mins with Puppet
PuppetCamp SEA @ Blk 71 - Nagios in under 10 mins with Puppet
 
The Ring programming language version 1.10 book - Part 56 of 212
The Ring programming language version 1.10 book - Part 56 of 212The Ring programming language version 1.10 book - Part 56 of 212
The Ring programming language version 1.10 book - Part 56 of 212
 
Best Practices in Handling Performance Issues
Best Practices in Handling Performance IssuesBest Practices in Handling Performance Issues
Best Practices in Handling Performance Issues
 
Http capturing
Http capturingHttp capturing
Http capturing
 
QA Fest 2019. Saar Rachamim. Developing Tools, While Testing
QA Fest 2019. Saar Rachamim. Developing Tools, While TestingQA Fest 2019. Saar Rachamim. Developing Tools, While Testing
QA Fest 2019. Saar Rachamim. Developing Tools, While Testing
 
Kubernetes Tutorial
Kubernetes TutorialKubernetes Tutorial
Kubernetes Tutorial
 
AnyMQ, Hippie, and the real-time web
AnyMQ, Hippie, and the real-time webAnyMQ, Hippie, and the real-time web
AnyMQ, Hippie, and the real-time web
 
Logstash for SEO: come monitorare i Log del Web Server in realtime
Logstash for SEO: come monitorare i Log del Web Server in realtimeLogstash for SEO: come monitorare i Log del Web Server in realtime
Logstash for SEO: come monitorare i Log del Web Server in realtime
 
Log mining
Log miningLog mining
Log mining
 
Beyond php it's not (just) about the code
Beyond php   it's not (just) about the codeBeyond php   it's not (just) about the code
Beyond php it's not (just) about the code
 
はじめてのGroovy
はじめてのGroovyはじめてのGroovy
はじめてのGroovy
 
Using New Web APIs For Your Own Pleasure – How I Wrote New Features For My Vi...
Using New Web APIs For Your Own Pleasure – How I Wrote New Features For My Vi...Using New Web APIs For Your Own Pleasure – How I Wrote New Features For My Vi...
Using New Web APIs For Your Own Pleasure – How I Wrote New Features For My Vi...
 
Common scenarios in vcl
Common scenarios in vclCommon scenarios in vcl
Common scenarios in vcl
 

Similar to Parallel Computing With Dask - PyDays 2017

fog or: How I Learned to Stop Worrying and Love the Cloud
fog or: How I Learned to Stop Worrying and Love the Cloudfog or: How I Learned to Stop Worrying and Love the Cloud
fog or: How I Learned to Stop Worrying and Love the CloudWesley Beary
 
fog or: How I Learned to Stop Worrying and Love the Cloud (OpenStack Edition)
fog or: How I Learned to Stop Worrying and Love the Cloud (OpenStack Edition)fog or: How I Learned to Stop Worrying and Love the Cloud (OpenStack Edition)
fog or: How I Learned to Stop Worrying and Love the Cloud (OpenStack Edition)Wesley Beary
 
Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...
Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...
Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...Big Data Spain
 
Refactoring to Macros with Clojure
Refactoring to Macros with ClojureRefactoring to Macros with Clojure
Refactoring to Macros with ClojureDmitry Buzdin
 
async/await in Swift
async/await in Swiftasync/await in Swift
async/await in SwiftPeter Friese
 
Beyond PHP - It's not (just) about the code
Beyond PHP - It's not (just) about the codeBeyond PHP - It's not (just) about the code
Beyond PHP - It's not (just) about the codeWim Godden
 
Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011
Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011
Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011Masahiro Nagano
 
An intro to Docker, Terraform, and Amazon ECS
An intro to Docker, Terraform, and Amazon ECSAn intro to Docker, Terraform, and Amazon ECS
An intro to Docker, Terraform, and Amazon ECSYevgeniy Brikman
 
Centralize your Business Logic with Pipelines in Elixir
Centralize your Business Logic with Pipelines in ElixirCentralize your Business Logic with Pipelines in Elixir
Centralize your Business Logic with Pipelines in ElixirMichael Viveros
 
And the Greatest of These Is ... Rack Support
And the Greatest of These Is ... Rack SupportAnd the Greatest of These Is ... Rack Support
And the Greatest of These Is ... Rack SupportBen Scofield
 
Rhebok, High Performance Rack Handler / Rubykaigi 2015
Rhebok, High Performance Rack Handler / Rubykaigi 2015Rhebok, High Performance Rack Handler / Rubykaigi 2015
Rhebok, High Performance Rack Handler / Rubykaigi 2015Masahiro Nagano
 
Emerging Languages: A Tour of the Horizon
Emerging Languages: A Tour of the HorizonEmerging Languages: A Tour of the Horizon
Emerging Languages: A Tour of the HorizonAlex Payne
 
Rails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
Rails-like JavaScript Using CoffeeScript, Backbone.js and JasmineRails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
Rails-like JavaScript Using CoffeeScript, Backbone.js and JasmineRaimonds Simanovskis
 
Performance and stability testing \w Gatling
Performance and stability testing \w GatlingPerformance and stability testing \w Gatling
Performance and stability testing \w GatlingDmitry Vrublevsky
 
Server-Side Push: Comet, Web Sockets come of age (OSCON 2013)
Server-Side Push: Comet, Web Sockets come of age (OSCON 2013)Server-Side Push: Comet, Web Sockets come of age (OSCON 2013)
Server-Side Push: Comet, Web Sockets come of age (OSCON 2013)Brian Sam-Bodden
 
Railsconf2011 deployment tips_for_slideshare
Railsconf2011 deployment tips_for_slideshareRailsconf2011 deployment tips_for_slideshare
Railsconf2011 deployment tips_for_slidesharetomcopeland
 
リローダブルClojureアプリケーション
リローダブルClojureアプリケーションリローダブルClojureアプリケーション
リローダブルClojureアプリケーションKenji Nakamura
 

Similar to Parallel Computing With Dask - PyDays 2017 (20)

fog or: How I Learned to Stop Worrying and Love the Cloud
fog or: How I Learned to Stop Worrying and Love the Cloudfog or: How I Learned to Stop Worrying and Love the Cloud
fog or: How I Learned to Stop Worrying and Love the Cloud
 
fog or: How I Learned to Stop Worrying and Love the Cloud (OpenStack Edition)
fog or: How I Learned to Stop Worrying and Love the Cloud (OpenStack Edition)fog or: How I Learned to Stop Worrying and Love the Cloud (OpenStack Edition)
fog or: How I Learned to Stop Worrying and Love the Cloud (OpenStack Edition)
 
Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...
Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...
Apache MXNet Distributed Training Explained In Depth by Viacheslav Kovalevsky...
 
Refactoring to Macros with Clojure
Refactoring to Macros with ClojureRefactoring to Macros with Clojure
Refactoring to Macros with Clojure
 
async/await in Swift
async/await in Swiftasync/await in Swift
async/await in Swift
 
Play!ng with scala
Play!ng with scalaPlay!ng with scala
Play!ng with scala
 
Beyond PHP - It's not (just) about the code
Beyond PHP - It's not (just) about the codeBeyond PHP - It's not (just) about the code
Beyond PHP - It's not (just) about the code
 
Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011
Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011
Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011
 
An intro to Docker, Terraform, and Amazon ECS
An intro to Docker, Terraform, and Amazon ECSAn intro to Docker, Terraform, and Amazon ECS
An intro to Docker, Terraform, and Amazon ECS
 
Centralize your Business Logic with Pipelines in Elixir
Centralize your Business Logic with Pipelines in ElixirCentralize your Business Logic with Pipelines in Elixir
Centralize your Business Logic with Pipelines in Elixir
 
Server Side Swift: Vapor
Server Side Swift: VaporServer Side Swift: Vapor
Server Side Swift: Vapor
 
And the Greatest of These Is ... Rack Support
And the Greatest of These Is ... Rack SupportAnd the Greatest of These Is ... Rack Support
And the Greatest of These Is ... Rack Support
 
Little Big Ruby
Little Big RubyLittle Big Ruby
Little Big Ruby
 
Rhebok, High Performance Rack Handler / Rubykaigi 2015
Rhebok, High Performance Rack Handler / Rubykaigi 2015Rhebok, High Performance Rack Handler / Rubykaigi 2015
Rhebok, High Performance Rack Handler / Rubykaigi 2015
 
Emerging Languages: A Tour of the Horizon
Emerging Languages: A Tour of the HorizonEmerging Languages: A Tour of the Horizon
Emerging Languages: A Tour of the Horizon
 
Rails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
Rails-like JavaScript Using CoffeeScript, Backbone.js and JasmineRails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
Rails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
 
Performance and stability testing \w Gatling
Performance and stability testing \w GatlingPerformance and stability testing \w Gatling
Performance and stability testing \w Gatling
 
Server-Side Push: Comet, Web Sockets come of age (OSCON 2013)
Server-Side Push: Comet, Web Sockets come of age (OSCON 2013)Server-Side Push: Comet, Web Sockets come of age (OSCON 2013)
Server-Side Push: Comet, Web Sockets come of age (OSCON 2013)
 
Railsconf2011 deployment tips_for_slideshare
Railsconf2011 deployment tips_for_slideshareRailsconf2011 deployment tips_for_slideshare
Railsconf2011 deployment tips_for_slideshare
 
リローダブルClojureアプリケーション
リローダブルClojureアプリケーションリローダブルClojureアプリケーション
リローダブルClojureアプリケーション
 

Recently uploaded

Machine Learning Software Engineering Patterns and Their Engineering
Machine Learning Software Engineering Patterns and Their EngineeringMachine Learning Software Engineering Patterns and Their Engineering
Machine Learning Software Engineering Patterns and Their EngineeringHironori Washizaki
 
Real-time Tracking and Monitoring with Cargo Cloud Solutions.pptx
Real-time Tracking and Monitoring with Cargo Cloud Solutions.pptxReal-time Tracking and Monitoring with Cargo Cloud Solutions.pptx
Real-time Tracking and Monitoring with Cargo Cloud Solutions.pptxRTS corp
 
Comparing Linux OS Image Update Models - EOSS 2024.pdf
Comparing Linux OS Image Update Models - EOSS 2024.pdfComparing Linux OS Image Update Models - EOSS 2024.pdf
Comparing Linux OS Image Update Models - EOSS 2024.pdfDrew Moseley
 
SuccessFactors 1H 2024 Release - Sneak-Peek by Deloitte Germany
SuccessFactors 1H 2024 Release - Sneak-Peek by Deloitte GermanySuccessFactors 1H 2024 Release - Sneak-Peek by Deloitte Germany
SuccessFactors 1H 2024 Release - Sneak-Peek by Deloitte GermanyChristoph Pohl
 
Large Language Models for Test Case Evolution and Repair
Large Language Models for Test Case Evolution and RepairLarge Language Models for Test Case Evolution and Repair
Large Language Models for Test Case Evolution and RepairLionel Briand
 
Call Us🔝>༒+91-9711147426⇛Call In girls karol bagh (Delhi)
Call Us🔝>༒+91-9711147426⇛Call In girls karol bagh (Delhi)Call Us🔝>༒+91-9711147426⇛Call In girls karol bagh (Delhi)
Call Us🔝>༒+91-9711147426⇛Call In girls karol bagh (Delhi)jennyeacort
 
Global Identity Enrolment and Verification Pro Solution - Cizo Technology Ser...
Global Identity Enrolment and Verification Pro Solution - Cizo Technology Ser...Global Identity Enrolment and Verification Pro Solution - Cizo Technology Ser...
Global Identity Enrolment and Verification Pro Solution - Cizo Technology Ser...Cizo Technology Services
 
Ronisha Informatics Private Limited Catalogue
Ronisha Informatics Private Limited CatalogueRonisha Informatics Private Limited Catalogue
Ronisha Informatics Private Limited Catalogueitservices996
 
CRM Contender Series: HubSpot vs. Salesforce
CRM Contender Series: HubSpot vs. SalesforceCRM Contender Series: HubSpot vs. Salesforce
CRM Contender Series: HubSpot vs. SalesforceBrainSell Technologies
 
Taming Distributed Systems: Key Insights from Wix's Large-Scale Experience - ...
Taming Distributed Systems: Key Insights from Wix's Large-Scale Experience - ...Taming Distributed Systems: Key Insights from Wix's Large-Scale Experience - ...
Taming Distributed Systems: Key Insights from Wix's Large-Scale Experience - ...Natan Silnitsky
 
Tech Tuesday - Mastering Time Management Unlock the Power of OnePlan's Timesh...
Tech Tuesday - Mastering Time Management Unlock the Power of OnePlan's Timesh...Tech Tuesday - Mastering Time Management Unlock the Power of OnePlan's Timesh...
Tech Tuesday - Mastering Time Management Unlock the Power of OnePlan's Timesh...OnePlan Solutions
 
SpotFlow: Tracking Method Calls and States at Runtime
SpotFlow: Tracking Method Calls and States at RuntimeSpotFlow: Tracking Method Calls and States at Runtime
SpotFlow: Tracking Method Calls and States at Runtimeandrehoraa
 
Precise and Complete Requirements? An Elusive Goal
Precise and Complete Requirements? An Elusive GoalPrecise and Complete Requirements? An Elusive Goal
Precise and Complete Requirements? An Elusive GoalLionel Briand
 
SensoDat: Simulation-based Sensor Dataset of Self-driving Cars
SensoDat: Simulation-based Sensor Dataset of Self-driving CarsSensoDat: Simulation-based Sensor Dataset of Self-driving Cars
SensoDat: Simulation-based Sensor Dataset of Self-driving CarsChristian Birchler
 
20240415 [Container Plumbing Days] Usernetes Gen2 - Kubernetes in Rootless Do...
20240415 [Container Plumbing Days] Usernetes Gen2 - Kubernetes in Rootless Do...20240415 [Container Plumbing Days] Usernetes Gen2 - Kubernetes in Rootless Do...
20240415 [Container Plumbing Days] Usernetes Gen2 - Kubernetes in Rootless Do...Akihiro Suda
 
Powering Real-Time Decisions with Continuous Data Streams
Powering Real-Time Decisions with Continuous Data StreamsPowering Real-Time Decisions with Continuous Data Streams
Powering Real-Time Decisions with Continuous Data StreamsSafe Software
 
VK Business Profile - provides IT solutions and Web Development
VK Business Profile - provides IT solutions and Web DevelopmentVK Business Profile - provides IT solutions and Web Development
VK Business Profile - provides IT solutions and Web Developmentvyaparkranti
 
Post Quantum Cryptography – The Impact on Identity
Post Quantum Cryptography – The Impact on IdentityPost Quantum Cryptography – The Impact on Identity
Post Quantum Cryptography – The Impact on Identityteam-WIBU
 
Catch the Wave: SAP Event-Driven and Data Streaming for the Intelligence Ente...
Catch the Wave: SAP Event-Driven and Data Streaming for the Intelligence Ente...Catch the Wave: SAP Event-Driven and Data Streaming for the Intelligence Ente...
Catch the Wave: SAP Event-Driven and Data Streaming for the Intelligence Ente...confluent
 
Balasore Best It Company|| Top 10 IT Company || Balasore Software company Odisha
Balasore Best It Company|| Top 10 IT Company || Balasore Software company OdishaBalasore Best It Company|| Top 10 IT Company || Balasore Software company Odisha
Balasore Best It Company|| Top 10 IT Company || Balasore Software company Odishasmiwainfosol
 

Recently uploaded (20)

Machine Learning Software Engineering Patterns and Their Engineering
Machine Learning Software Engineering Patterns and Their EngineeringMachine Learning Software Engineering Patterns and Their Engineering
Machine Learning Software Engineering Patterns and Their Engineering
 
Real-time Tracking and Monitoring with Cargo Cloud Solutions.pptx
Real-time Tracking and Monitoring with Cargo Cloud Solutions.pptxReal-time Tracking and Monitoring with Cargo Cloud Solutions.pptx
Real-time Tracking and Monitoring with Cargo Cloud Solutions.pptx
 
Comparing Linux OS Image Update Models - EOSS 2024.pdf
Comparing Linux OS Image Update Models - EOSS 2024.pdfComparing Linux OS Image Update Models - EOSS 2024.pdf
Comparing Linux OS Image Update Models - EOSS 2024.pdf
 
SuccessFactors 1H 2024 Release - Sneak-Peek by Deloitte Germany
SuccessFactors 1H 2024 Release - Sneak-Peek by Deloitte GermanySuccessFactors 1H 2024 Release - Sneak-Peek by Deloitte Germany
SuccessFactors 1H 2024 Release - Sneak-Peek by Deloitte Germany
 
Large Language Models for Test Case Evolution and Repair
Large Language Models for Test Case Evolution and RepairLarge Language Models for Test Case Evolution and Repair
Large Language Models for Test Case Evolution and Repair
 
Call Us🔝>༒+91-9711147426⇛Call In girls karol bagh (Delhi)
Call Us🔝>༒+91-9711147426⇛Call In girls karol bagh (Delhi)Call Us🔝>༒+91-9711147426⇛Call In girls karol bagh (Delhi)
Call Us🔝>༒+91-9711147426⇛Call In girls karol bagh (Delhi)
 
Global Identity Enrolment and Verification Pro Solution - Cizo Technology Ser...
Global Identity Enrolment and Verification Pro Solution - Cizo Technology Ser...Global Identity Enrolment and Verification Pro Solution - Cizo Technology Ser...
Global Identity Enrolment and Verification Pro Solution - Cizo Technology Ser...
 
Ronisha Informatics Private Limited Catalogue
Ronisha Informatics Private Limited CatalogueRonisha Informatics Private Limited Catalogue
Ronisha Informatics Private Limited Catalogue
 
CRM Contender Series: HubSpot vs. Salesforce
CRM Contender Series: HubSpot vs. SalesforceCRM Contender Series: HubSpot vs. Salesforce
CRM Contender Series: HubSpot vs. Salesforce
 
Taming Distributed Systems: Key Insights from Wix's Large-Scale Experience - ...
Taming Distributed Systems: Key Insights from Wix's Large-Scale Experience - ...Taming Distributed Systems: Key Insights from Wix's Large-Scale Experience - ...
Taming Distributed Systems: Key Insights from Wix's Large-Scale Experience - ...
 
Tech Tuesday - Mastering Time Management Unlock the Power of OnePlan's Timesh...
Tech Tuesday - Mastering Time Management Unlock the Power of OnePlan's Timesh...Tech Tuesday - Mastering Time Management Unlock the Power of OnePlan's Timesh...
Tech Tuesday - Mastering Time Management Unlock the Power of OnePlan's Timesh...
 
SpotFlow: Tracking Method Calls and States at Runtime
SpotFlow: Tracking Method Calls and States at RuntimeSpotFlow: Tracking Method Calls and States at Runtime
SpotFlow: Tracking Method Calls and States at Runtime
 
Precise and Complete Requirements? An Elusive Goal
Precise and Complete Requirements? An Elusive GoalPrecise and Complete Requirements? An Elusive Goal
Precise and Complete Requirements? An Elusive Goal
 
SensoDat: Simulation-based Sensor Dataset of Self-driving Cars
SensoDat: Simulation-based Sensor Dataset of Self-driving CarsSensoDat: Simulation-based Sensor Dataset of Self-driving Cars
SensoDat: Simulation-based Sensor Dataset of Self-driving Cars
 
20240415 [Container Plumbing Days] Usernetes Gen2 - Kubernetes in Rootless Do...
20240415 [Container Plumbing Days] Usernetes Gen2 - Kubernetes in Rootless Do...20240415 [Container Plumbing Days] Usernetes Gen2 - Kubernetes in Rootless Do...
20240415 [Container Plumbing Days] Usernetes Gen2 - Kubernetes in Rootless Do...
 
Powering Real-Time Decisions with Continuous Data Streams
Powering Real-Time Decisions with Continuous Data StreamsPowering Real-Time Decisions with Continuous Data Streams
Powering Real-Time Decisions with Continuous Data Streams
 
VK Business Profile - provides IT solutions and Web Development
VK Business Profile - provides IT solutions and Web DevelopmentVK Business Profile - provides IT solutions and Web Development
VK Business Profile - provides IT solutions and Web Development
 
Post Quantum Cryptography – The Impact on Identity
Post Quantum Cryptography – The Impact on IdentityPost Quantum Cryptography – The Impact on Identity
Post Quantum Cryptography – The Impact on Identity
 
Catch the Wave: SAP Event-Driven and Data Streaming for the Intelligence Ente...
Catch the Wave: SAP Event-Driven and Data Streaming for the Intelligence Ente...Catch the Wave: SAP Event-Driven and Data Streaming for the Intelligence Ente...
Catch the Wave: SAP Event-Driven and Data Streaming for the Intelligence Ente...
 
Balasore Best It Company|| Top 10 IT Company || Balasore Software company Odisha
Balasore Best It Company|| Top 10 IT Company || Balasore Software company OdishaBalasore Best It Company|| Top 10 IT Company || Balasore Software company Odisha
Balasore Best It Company|| Top 10 IT Company || Balasore Software company Odisha
 

Parallel Computing With Dask - PyDays 2017

  • 3.
  • 4.
  • 5.
  • 6.
  • 7.
  • 8.
  • 10. def download(url): return requests.get(url).content @asyncio.coroutine def asyncio_download(loop): futures = [loop.run_in_executor(None, download, url) for url in urls] return [(yield from future) for future in futures] loop = asyncio.get_event_loop() job = asyncio_download_coroutine(loop) loop.run_until_complete(job)
  • 11. @dask.delayed def download(url): return requests.get(url).content contents = [download(url) for url in urls] dask.compute(contents)
  • 12.
  • 13. def process_cpu(url): url = url.encode() charsum = 0 for c1 in url: for c2 in url: for c3 in url: charsum += c1 * c2 * c3 return charsum [process_cpu(url) for url in urls]
  • 14.
  • 15. @dask.delayed def process_cpu(url): ... graph = [process_cpu(url) for url in urls] dask.compute(graph)
  • 16. @dask.delayed def process_cpu(url): ... graph = [process_cpu(url) for url in urls] dask.compute(graph, get=dask.multiprocessing.get)
  • 17. @dask.delayed def f(arg): print("f", arg) return 2 * arg @dask.delayed def g(args): print("g", args) return sum(args) lst = [1, 2, 3] graph = g([f(i) for i in lst]) f-#0 g f g-#1 f-#2 f-#3 f f
  • 18. print("result", graph.compute()) f 2 f 1 f 3 g [2, 4, 6] result 12 f-#0 g f g-#1 f-#2 f-#3 f f
  • 19. Collection similar to Python lists
  • 20. import dask.bag as db db.from_sequence(urls) .map(download) .map(convert_to_image) .filter(lambda img: img.size[0] < 500) .map(remove_artifacts) .map(save_to_disk) .compute()
  • 21. import dask.bag as db import json js = db.read_text('log-2017*.gz').map(json.loads) js.take(2) ({'name': 'Alice', 'location': {'city': 'LA', 'state': 'CA'}}, {'name': 'Bob', 'location': {'city': 'NYC', 'state': 'NY'}) result = js.pluck('name').frequencies() dict(result) {'Alice': 10000, 'Bob': 5555, 'Charlie': ...} http://dask.pydata.org/en/latest/examples/bag-json.html
  • 22. Collection similar to NumPy Arrays
  • 23.
  • 24. import dask.array as da import skimage.io delayed_imread = dask.delayed(skimage.io.imread, pure=True) sample = skimage.io.imread(urls[0]) images = [delayed_imread(url) for url in urls] images = [da.from_delayed(img, dtype=sample.dtype, shape=sample.shape) for img in images] images = da.stack(images, axis=0) images.shape (1000000, 360, 500, 3)
  • 25. images.shape (1000000, 360, 500, 3) max_img = images.mean(axis=3).max(axis=0) max_img.shape (360, 500) max_img.compute() array([[ 157., 155., 153., ..., 134., 137.], [ 154., 153., 151., ..., 129., 132.], ..., [ 97., 66., 81., ..., 74., 82.]]) da.linalg.svd(max_img, 10) da.fft.fft(max_img)
  • 26. ('tensordot-#0', 2, 1, 2) sum apply ('transpose-#1', 1, 2) apply apply ('wrapped-#2', 2, 1) apply applytranspose ('tensordot-#0', 1, 1, 1) sum apply ('transpose-#1', 1, 1) apply ('wrapped-#2', 1, 1) apply transpose ('tensordot-#0', 2, 0, 2) apply ('wrapped-#2', 2, 0) apply applytranspose ('transpose-#1', 0, 2) apply apply ('tensordot-#0', 0, 1, 0) sum apply ('wrapped-#2', 0, 1) transpose ('transpose-#1', 1, 0) ('tensordot-#0', 0, 0, 2) sum ('wrapped-#2', 0, 0) apply apply transpose ('tensordot-#0', 2, 0, 0) sum ('transpose-#1', 0, 0) apply ('tensordot-#0', 0, 0, 0) ('tensordot-#0', 2, 2, 0) apply ('wrapped-#2', 2, 2) apply applytranspose ('transpose-#1', 2, 0) apply apply ('tensordot-#0', 0, 2, 2) apply ('transpose-#1', 2, 2) apply ('wrapped-#2', 0, 2) apply transpose ('tensordot-#0', 1, 0, 1) apply ('transpose-#1', 0, 1) ('wrapped-#2', 1, 0) transpose ('tensordot-#0', 2, 1, 0) ('tensordot-#0', 0, 2, 1) sum ('transpose-#1', 2, 1) apply ('tensordot-#0', 0, 2, 0) ('tensordot-#0', 0, 0, 1) ('tensordot-#0', 0, 1, 2) ('tensordot-#0', 1, 2, 1) ('wrapped-#2', 1, 2) transpose ('tensordot-#0', 2, 2, 2) ('tensordot-#0', 1, 2, 2) sum ('tensordot-#0', 2, 2, 1) sum ('tensordot-#0', 1, 0, 0) sum ('tensordot-#0', 1, 1, 0)('tensordot-#0', 2, 0, 1) ('tensordot-#0', 0, 1, 1)('tensordot-#0', 1, 2, 0) ('tensordot-#0', 1, 0, 2) ('tensordot-#0', 2, 1, 1) ('tensordot-#0', 1, 1, 2)('sum-#3', 2, 0) ('sum-#3', 0, 0) ('sum-#3', 0, 1) ('sum-#3', 2, 2) ('sum-#3', 1, 2)('sum-#3', 0, 2) ('sum-#3', 1, 1)('sum-#3', 2, 1) ('sum-#3', 1, 0) onesones onesones onesones ones ones ones ('tensordot-#0', 2, 1, 2) sum apply ('transpose-#1', 1, 2) apply ('wrapped-#2', 2, 1) appltranspose ('tensordot-#0', 2, 0, 2) apply ('wrapped-#2', 2, 0) applytranspose ('transpose-#1', 0, 2) apply ('tensordot-#0', 0, 0, 2) sum ('wrapped-#2', 0, 0) apply transpose ('tensordot-#0', 2, 0, 0) sum ('transpose-#1', 0, 0) ('tensordot-#0', 0, 0, 0) ('tensordot-#0', 2, 2, 0) apply ('wrapped-#2', 2, 2) apply transpose ('transpos ('tensordot-#0', 0, 2, 2) apply ('transpose-#1', 2, 2) ('wrapped-#2', 0, 2 trans ('tensordot-#0', 2, 1, 0) ('tensordot-#0', 0, 1, 2)('tensordot-#0', 2, 2, 2) ('sum-#3', 2, 0) ('sum-#3', 2, 2) ('sum-#3', 0, 2) ones ones onesones ones
  • 27. Collection similar to Pandas Dataframes
  • 28.
  • 29. __Request received (wms) : #17236, 2016-12-27 16:03:44.898007, current_connections = connected=4, accepted=4, idle threads=4 appid="mapcache" client_ip=10.0.39.1 user_agent="..." query=… __Request processed (wms) : #17236, total_duration=00:00:11.377182 cache_hits=7917 cache_misses=0 success_rate=100% successes=262144 failures=0
  • 30. RE_REQ_RECEIVE = re.compile(r""" __Request receiveds+ ((?P<iface>w+))s*:s* # Interface (wfs, wms) #(?P<req_id>d+),s* # Request id (?P<starttime>[^,]+),s* # Request start timestamp current_connections=s* ... """, re.VERBOSE) RE_REQ_PROCESSED = re.compile(r""" __Request processeds+ (w+)s*:s* # Interface (wfs, wms) #(?P<req_id>d+),s* # Request id total_duration=(?P<total_duration>[0-9:.]+)s+ ... """, re.VERBOSE)
  • 31. bag = db.read_text(files) ddf_recv = (bag .str.strip() .map(lambda line: REQ_RECEIVE.match(line)) .remove(lambda el: el is None) .map(lambda m: m.groupdict()) .to_dataframe(columns=pd.DataFrame(columns=RECV_COLS)) ) ddf_proc = (bag ...) requests = ddf_recv.merge(ddf_proc, on='req_id', how='inner')
  • 32. slow_req = requests[ (requests.starttime >= datetime(2017, 5, 1) & (requests.starttime < datetime(2017, 5, 2) & (requests.total_duration >= timedelta(seconds=5))] slow_req = slow_req.compute(get=dask.multiprocessing.get)
  • 33.
  • 34.
  • 35. $ dask-scheduler Scheduler at: tcp://10.0.0.8:8786 $ ssh worker1 dask-client 10.0.0.8:8786 $ ssh worker2 dask-client 10.0.0.8:8786 $ ssh worker3 dask-client 10.0.0.8:8786
  • 36. from distributed import Client client = Client('10.0.0.8:8786')
  • 37.
  • 38.
  • 39. Image Credit ● UBIMET background and company logo Used with permission ● CPU frequency scaling: Created by Wikipedia user Newhorizons msk, in the public domain https://en.wikipedia.org/wiki/File:Clock_CPU_Scaling.jpg ● Parallel computing: Created by the US government, in the public domain https://computing.llnl.gov/tutorials/parallel_comp/ ● Python logo: A trademark of the Python Software Foundation https://www.python.org/community/logos/ ● Dask logo: Part of the Dask source distribution, licensed BSD v3 https://github.com/dask/dask/blob/master/docs/source/images/dask_horizontal.svg ● All charts and graphs: created myself ● Bag By Pixabay user “OpenClipart-Vectors”, in the public domain https://pixabay.com/p-156023/?no_redirect ● Array Jerome S. Higgins, in the public domain https://commons.wikimedia.org/wiki/File:Land_Act_of_1785_section_numbering.png ● Frame Modified form of a Wellcome Trust image, licensed CC-BY 4.0 https://commons.wikimedia.org/wiki/File:Picture_frame_Wellcome_L0051764.jpg ● Dask Array Composition of NumPy Arrays, Dask DataFrame Composition of Pandas Dataframes Partially modified, part of the Dask source distribution, licensed BSD v3 All from https://github.com/dask/dask/blob/master/docs/source/images/ ● Cluster: Created by Julian Herzog, licensed GNU FDL v2 / CC-BY 4.0 https://commons.wikimedia.org/wiki/File:High_Performance_Computing_Center_Stuttgart_HLRS_2015_08_Cray_XC40_Hazel_Hen_IO.jpg ● Dask Distributed graph: Partially modified, part of the Dask source distribution, licensed BSD v3 https://github.com/dask/dask/blob/9f344bbf38610e03f723ac034f9c4a390a7debec/docs/source/images/distributed-layout.svg