Dataflow
The declarative concurrent
   programming model
Larry Diehl

  {:larrytheliquid =>
   %w[.com
        github
        twitter]}
Outline

Purpose of presentation



Gradual explanation of concepts



Helpful tips
Purpose
Lexical Scope


foo = :foo
define_method :foo do
 foo
end
Dynamic Scope

def foo
 @foo
end
Mutability

def initialize
 @foo = :foo
end

def foo
 @foo
end
Mutability


def foo
 @foo = :foo
 @foo
end
Mutability+Concurrency

def initialize
 Thread.new { loop { @foo = :shazbot } }
end

def foo
 @foo = :foo
 @foo
end
The Declarative Model
Declarative Synchronous


my_var = :bound
my_var = :rebind # NOT ALLOWED!
Declarative Synchronous


local do |my_var|
  my_var.object_id # thread sleeps
end
Declarative Synchronous


local do |my_var|
  unify my_var, :bound
  unify my_var, :rebind # =>
  # Dataflow::UnificationError,
  # ":bound != :rebind"
end
Declarative Synchronous

class MyClass
 declare :my_var
 def initialize
  unify my_var, :bound
 end
end
Declarative Concurrent
     (MAGIC)
Declarative Concurrent


local do |my_var|
  Thread.new { unify my_var, :bound }
  my_var.should == :bound
end
Dependency Resolution

local do |sentence, middle, tail|
  Thread.new { unify middle, "base are belong #{tail}" }
  Thread.new { unify tail, "to us" }
  Thread.new { unify sentence, "all your #{middle}" }
  sentence.should == "all your base are belong to us"
end
Asynchronous Output
def Worker.async(output=nil)
 Thread.new do
  result = # do hard work
  unify output, result if output
 end
end

local do |output|
  Worker.async(output)
  output.should == # hard work result
end
Asynchronous Output
local do |output|
  flow(output) do
    # do hard work
  end
  output.should == # hard work result
end
Anonymous variables

{'google.com' => Dataflow::Variable.new,
 'bing.com' => Dataflow::Variable.new
}.map do |domain,var|
  Thread.new do
   unify var, open("http://#{domain}").read
  end
  var
end
need_later

%w[google.com bing.com].map do |domain|
 need_later { open("http://#{domain}").read }
end
Chunked Sequential Processing


 (1..100).each_slice(10).map do |chunk|
  sleep(1)
  chunk.inject(&:+)
 end.inject(&:+) # => ~10s
Chunked Parallel Processing


(1..100).each_slice(10).map do |chunk|
 need_later do
   sleep(1)
   chunk.inject(&:+)
 end
end.inject(&:+) # => ~1s
Leaving Declarative
    via Async
Ports & Streams

local do |port, stream|
  unify port, Dataflow::Port.new(stream)
  port.send 1
  port.send 2
  stream.take(2).should == [1, 2]
end
Ports & Streams (async)
local do |port, stream|
  unify port, Dataflow::Port.new(stream)
  Thread.new do
   stream.each do |message|
     puts "received: #{message}"
   end
  end
  %w[x y z].each do |letter|
    Thread.new{ port.send letter }
   end
  stream.take(3).sort.should == %w[x y z]
end
FutureQueue
local do |queue, first, second, third|
  unify queue, FutureQueue.new
  queue.pop first
  queue.pop second
  queue.push 1
  queue.push 2
  queue.push 3
  queue.pop third
  [first, second, third].should == [1, 2, 3]
end
Actors
Ping = Actor.new {                  Pong = Actor.new {
  3.times {                           3.times {
    case receive                        case receive
    when :ping                          when :pong
     puts "Ping"                         puts "Pong"
     Pong.send :pong                     Ping.send :ping
    end                                 end
  }                                   }
}                                   }



                  Ping.send :ping
by_need

def baz(num)
 might_get_used = by_need { Factory.gen }
 might_get_used.value if num%2 == 0
end
Tips
Modular

local do |my_var|
  Thread.new { unify my_var, :bound }
  # my_var.wait
  my_var.should == :bound
end
Debugging

local do |my_var|
  my_var.inspect
# => #<Dataflow::Variable:2637860 unbound>
end
Class/Module methods
Dataflow.local do |my_var|
 Dataflow.async do
  Dataflow.unify my_var, :bound
 end
 my_var.should == :bound
end
Use Cases
general purpose
   concurrency for elegant program structure with respect
   to coordination

   concurrency to make use of extra processors/cores
   (depending on Ruby implementation)

web development
  worker daemons

   concurrently munging together data from various rest
   api's
Ruby Implementations

Pure Ruby library, should work on any implementation


JRuby in particular has a great GC, no GIL, native threads,
and a tunable threadpool option.


Rubinius has more code written in Ruby, so it proxies more
method calls (e.g. Array#flatten).
class FutureQueue
 include Dataflow
 declare :push_port, :pop_port

 def initialize
  local do |pushed, popped|
    unify push_port, Dataflow::Port.new(pushed)
    unify pop_port, Dataflow::Port.new(popped)

   Thread.new {
     loop do
      barrier pushed.head, popped.head
      unify popped.head, pushed.head
      pushed, popped = pushed.tail, popped.tail
     end
   }
  end
 end

 def push(x) push_port.send x end
 def pop(x) pop_port.send x end
end
The End
sudo port install dataflow



   http://github.com
   /larrytheliquid
   /dataflow


freenode: #dataflow-gem

Dataflow: Declarative concurrency in Ruby