SlideShare a Scribd company logo
1 of 28
Download to read offline
Functional streams with
Kafka
A comparison between Akka-streams and
FS2
12th May 2017
Luis Reis & Rui Batista
Apache Kafka
Apache Kafka
Read
and Write
Offset
management
Parallelism
Which features?
val props = new Properties()
props.put("bootstrap.servers", "localhost:9092")
props.put("group.id", "consumer-tutorial")
props.put("key.deserializer", StringDeserializer.class.getName())
props.put("value.deserializer", StringDeserializer.class.getName())
consumer = new KafkaConsumer[String, String](props)
consumer.subscribe(List("foo", "bar").asJava)
//One thread
try {
while (true) {
val records = consumer.poll(Long.MAX_VALUE);
records.iterator.asScala foreach { record => println(record.value) }
}
} catch {
case e: WakeupException => // ignore for shutdown
} finally {
consumer.close()
}
//separate thread
consumer.wakeup()
Java API
val props = new Properties()
props.put("bootstrap.servers", "localhost:9092")
props.put("group.id", "consumer-tutorial")
props.put("key.deserializer", StringDeserializer.class.getName())
props.put("value.deserializer", StringDeserializer.class.getName())
consumer = new KafkaConsumer[String, String](props)
consumer.subscribe(List("foo", "bar").asJava)
Java API
Functional Streams
Akka
Streams
FS2
Monix
Kafka
Streams
Functional Streams
Akka Streams + FS2
Akka
Streams
Reactive-Kafka
FS2
FS2-Kafka
(by Rui)
Functional Streams
Akka Streams
Functional Streams
FS2
Stream[F, O]
F[_] --> fs2.Task || scalaz.concurrent.Task || cats.effect.IO
//A Function from Stream[F, I] to Stream[F, O]
Pipe[F, I, O] =:= (Stream[F, I] => Stream[F, O])
Sink[F, O] =:= Pipe[F, O, Unit]
Stream[Task, Int](1).map(_.toString).runLast.unsafeRun()
Read Write
Read Write
Akka Streams
val bootstrapServers = "localhost:9092"
val consumerGroup = "retweets"
val stringSerializer = new StringSerializer
val stringDeserializer = new StringDeserializer
val bootstrapServers = "localhost:9092"
val consumerGroup = "retweets"
val stringSerializer = new StringSerializer
val stringDeserializer = new StringDeserializer
val producerSettings = ProducerSettings(system, stringSerializer, stringSerializer)
.withBootstrapServers(bootstrapServers)
val consumerSettings = ConsumerSettings(system, stringDeserializer, stringDeserializer)
.withBootstrapServers(bootstrapServers)
.withGroupId(consumerGroup)
val regularSubscription = Subscriptions.topics("topic1")
val bootstrapServers = "localhost:9092"
val consumerGroup = "retweets"
val stringSerializer = new StringSerializer
val stringDeserializer = new StringDeserializer
val producerSettings = ProducerSettings(system, stringSerializer, stringSerializer)
.withBootstrapServers(bootstrapServers)
val consumerSettings = ConsumerSettings(system, stringDeserializer, stringDeserializer)
.withBootstrapServers(bootstrapServers)
.withGroupId(consumerGroup)
val regularSubscription = Subscriptions.topics("topic1")
Consumer.plainSource(consumerSettings, regularSubscription)
.map { message =>
parseTweet[Message](record, _.value())
}.map { tweet =>
val count = tweet.retweet_count.toString
new ProducerRecord[String, String]("topic2", count)
}.runWith(Producer.plainSink(producerSettings))
Read Write
FS2
val bootstrapServers = "localhost:9092"
val consumerGroup = "retweets"
val bootstrapServers = "localhost:9092"
val consumerGroup = "retweets"
val consumerSettings = ConsumerSettings[String, String](50 millis)
.withBootstrapServers(bootstrapServers)
.withGroupId(consumerGroup)
val producerSettings = ProducerSettings[String, String]()
.withBootstrapServers(bootstrapServers)
val subscription = Subscriptions.topics("topic1")
val bootstrapServers = "localhost:9092"
val consumerGroup = "retweets"
val consumerSettings = ConsumerSettings[String, String](50 millis)
.withBootstrapServers(bootstrapServers)
.withGroupId(consumerGroup)
val producerSettings = ProducerSettings[String, String]()
.withBootstrapServers(bootstrapServers)
val subscription = Subscriptions.topics("topic1")
// stream definition
val stream = Consumer[Task, String, String](consumerSettings)
.simpleStream
.plainMessages(subscription)
.map(msg => parseTweet[ConsumerRecord[String, String]](msg, _.value))
.map(_.retweet_count)
.map(count => new ProducerRecord[String, String]("topic2", "key", count.toString))
.to(Producer[Task, String, String](producerSettings).sendAsync)
// run at end of universe
stream.run.unsafeRun()
Offset Management
Offset Management
Akka Streams
//Automatically
Consumer.committableSource(consumerSettings, regularSubscription)
.map { message =>
(message, parseTweet[Message](message, _.record.value()))
}.map { case (message, tweet) =>
Util.messageWithOffset(message.committableOffset, tweet, "topic2")
}.runWith(Producer.commitableSink(producerSettings))
//Automatically
Consumer.committableSource(consumerSettings, regularSubscription)
.map { message =>
(message, parseTweet[Message](message, _.record.value()))
}.map { case (message, tweet) =>
Util.messageWithOffset(message.committableOffset, tweet, "topic2")
}.runWith(Producer.commitableSink(producerSettings))
//External offset storage
Source.fromFuture(offsetDB.loadOffset())
.flatMapConcat { offset =>
val subscription = Subscriptions.assignmentWithOffsets(
Map(new TopicPartition("topic1", 0) -> offset)
)
Consumer.committableSource(consumerSettings, subscription)
.mapAsync(producerSettings.parallelism) { record =>
val offset = record.committableOffset.partitionOffset.offset
offsetDB.save(offset + 1)
}
}.runWith(Sink.ignore)
Offset Management
FS2
//Automatically
Consumer[Task, String, String](consumerSettings)
.simpleStream
.commitableMessages(subscription)
.map(_.map(r => parseTweet[Message](r, _.value)))
// do stuff with tweet
.map { message =>
Util.messageWithOffset(message.committableOffset, tweet, "topic2")
}
.to(Producer[Task, String, String](producerSettings).sendCommitable)
//Automatically
Consumer[Task, String, String](consumerSettings)
.simpleStream
.commitableMessages(subscription)
.map(_.map(r => parseTweet[Message](r, _.value)))
// do stuff with tweet
.map { message =>
Util.messageWithOffset(message.committableOffset, tweet, "topic2")
}
.to(Producer[Task, String, String](producerSettings).sendCommitable)
//External offset storage
Stream.eval(offsetDB.loadOffset)
.flatMap { offset =>
val assignment = Subscriptions.assignmentWithOffsets(
Map(new TopicPartition("topic1", 0) -> offset)
)
Consumer[Task, String, String](consumerSettings)
.simpleStream
.commitableMessages(assignment)
// do stuff with message
.evalMap(msg => offsetDB.save(msg.commitableOffset.partitionOffset.offset + 1))
}
Parallelism
Parallelism
Akka Streams
//One source per partition
Consumer.committablePartitionedSource(consumerSettings, regularSubscription)
//One source per partition
Consumer.committablePartitionedSource(consumerSettings, regularSubscription)
.map {
case (topicPartition, source) =>
source
.via(logicFlow)
.map { flowResponse => createKafkaMessage(producerTopic, flowResponse) }
.runWith(Producer.committablePartitionedSink(producerSettings))
}
.mapAsyncUnordered(producerSettings.parallelism)(identity)
.runWith(Sink.ignore)
Parallelism
FS2
def parallelCount(keyFunc: KeyFunc[Task, Message],
signal: Signal[Task, Map[String, Int]])
def parallelCount(keyFunc: KeyFunc[Task, Message],
signal: Signal[Task, Map[String, Int]]) = {
val partitioned = Consumer[Task, String, String](consumerSettings)
.partitionedStreams
.commitableMessages(Subscriptions.topics("topic1"))
.map {
case (_, innerStream) =>
innerStream.evalMap(keyFunc)
}
//still needs to join streams
def parallelCount(keyFunc: KeyFunc[Task, Message],
signal: Signal[Task, Map[String, Int]]) = {
val partitioned = Consumer[Task, String, String](consumerSettings)
.partitionedStreams
.commitableMessages(Subscriptions.topics("topic1"))
.map {
case (_, innerStream) =>
innerStream.evalMap(keyFunc)
}
// join streams and aggregate key counts
fs2.concurrent.join(100)(partitioned)
.scan(Map.empty[String, Int]) { case (current, key) =>
current |+| Map(key -> 1)
}
.evalMap(signal.set _)
}
Functional Streams
API Design for FS2-Kafka
Typesafe
API
Resource
acquisition
State
management
Back
Pressure
trait Consumer[F[_], K, V]
FS2-Kafka
Typesafe API
trait Consumer[F[_], K, V] {
val simpleStream = new StreamType {
type OutStreamType[A] = Stream[F, A]
// ...
}
val partitionedStreams = new StreamType {
type OutStreamType[A] = Stream[F, (TopicPartition, Stream[F, A])]
// ...
}
}
trait Consumer[F[_], K, V] {
private[kafka] def createConsumer: F[ConsumerControl[F, K, V]]
trait StreamType {
type OutStreamType[_] <: Stream[F, _]
type CMessage = CommitableMessage[F, ConsumerRecord[K, V]]]
private[kafka] def makeStream(
subscription: Subscription,
builder: MessageBuilder[F, K, V]
)(implicit F: Async[F]): OutStreamType[builder.Message]
def commitableMessages(subscription: Subscription)
(implicit F: Async[F]): OutStreamType[CMessage]] =
makeStream(subscription, new CommitableMessageBuilder[F, K, V])
}
}
FS2-Kafka
Typesafe API
trait Producer[F[_], K, V] {
//...
type PMessage[P] = ProducerMessage[K, V, P]
def send[P](implicit F: Async[F]): Pipe[F, PMessage[P], ProducerMetadata[P]]
def sendAsync: Sink[F, ProducerRecord[K, V]]
def sendCommitable[P <: Commitable[F]](implicit F: Async[F]): Sink[F, PMessage[P]]
}
Resource Acquisition
def bracket[F[_],R,A](r: F[R])
(use: R => Stream[F,A],
release: R => F[Unit]): Stream[F,A]
Stream.bracket(createConsumer)({consumer => ???}, _.close)
FS2-Kafka
Typesafe API
type Record = ConsumerRecord[K, V]
//Mutable queues and references
openPartitions: Async.Ref[F, Map[TopicPartition, Queue[F, Option[Chunk[Record]]]]]
//Notify assigned partitions
openPartitionsQueue: Queue[F, (TopicPartition, Stream[F, Record])]
FS2-Kafka
State Management
FS2-Kafka
Back pressure
References
Apache Kafka
http://kafka.apache.org/
http://blog.cloudera.com/blog/2014/09/apache
-kafka-for-beginners/
http://nverma-tech-
blog.blogspot.pt/2015/10/apache-kafka-quick-
start-on-windows.html
FS2
https://github.com/ragb/fs2-kafka
https://github.com/functional-
streams-for-scala/fs2
Akka-Streams
http://akka.io/docs/
https://pt.slideshare.net/Lightben
d/understanding-akka-streams-
back-pressure-and-
asynchronous-architectures
Thank you !!!!!

More Related Content

What's hot

Store and Process Big Data with Hadoop and Cassandra
Store and Process Big Data with Hadoop and CassandraStore and Process Big Data with Hadoop and Cassandra
Store and Process Big Data with Hadoop and Cassandra
Deependra Ariyadewa
 
Scala - den smarta kusinen
Scala - den smarta kusinenScala - den smarta kusinen
Scala - den smarta kusinen
Redpill Linpro
 
Node 관계형 데이터베이스_바인딩
Node 관계형 데이터베이스_바인딩Node 관계형 데이터베이스_바인딩
Node 관계형 데이터베이스_바인딩
HyeonSeok Choi
 
The Query the Whole Query and Nothing but the Query
The Query the Whole Query and Nothing but the QueryThe Query the Whole Query and Nothing but the Query
The Query the Whole Query and Nothing but the Query
Chris Olbekson
 

What's hot (20)

Cassandra summit LWTs
Cassandra summit  LWTsCassandra summit  LWTs
Cassandra summit LWTs
 
A Tour to MySQL Commands
A Tour to MySQL CommandsA Tour to MySQL Commands
A Tour to MySQL Commands
 
Tom Lazar Using Zope3 Views And Viewlets For Plone 3.0 Product Development
Tom Lazar   Using Zope3 Views And Viewlets For Plone 3.0 Product DevelopmentTom Lazar   Using Zope3 Views And Viewlets For Plone 3.0 Product Development
Tom Lazar Using Zope3 Views And Viewlets For Plone 3.0 Product Development
 
Python Metaprogramming
Python MetaprogrammingPython Metaprogramming
Python Metaprogramming
 
XQuery in the Cloud
XQuery in the CloudXQuery in the Cloud
XQuery in the Cloud
 
Backbone.js: Run your Application Inside The Browser
Backbone.js: Run your Application Inside The BrowserBackbone.js: Run your Application Inside The Browser
Backbone.js: Run your Application Inside The Browser
 
SICP_2.5 일반화된 연산시스템
SICP_2.5 일반화된 연산시스템SICP_2.5 일반화된 연산시스템
SICP_2.5 일반화된 연산시스템
 
Intro programacion funcional
Intro programacion funcionalIntro programacion funcional
Intro programacion funcional
 
Store and Process Big Data with Hadoop and Cassandra
Store and Process Big Data with Hadoop and CassandraStore and Process Big Data with Hadoop and Cassandra
Store and Process Big Data with Hadoop and Cassandra
 
Rails Model Basics
Rails Model BasicsRails Model Basics
Rails Model Basics
 
R for Pirates. ESCCONF October 27, 2011
R for Pirates. ESCCONF October 27, 2011R for Pirates. ESCCONF October 27, 2011
R for Pirates. ESCCONF October 27, 2011
 
Cassandra Day NYC - Cassandra anti patterns
Cassandra Day NYC - Cassandra anti patternsCassandra Day NYC - Cassandra anti patterns
Cassandra Day NYC - Cassandra anti patterns
 
Scalding for Hadoop
Scalding for HadoopScalding for Hadoop
Scalding for Hadoop
 
PHP 5.3 and Lithium: the most rad php framework
PHP 5.3 and Lithium: the most rad php frameworkPHP 5.3 and Lithium: the most rad php framework
PHP 5.3 and Lithium: the most rad php framework
 
Apache Spark - Basics of RDD & RDD Operations | Big Data Hadoop Spark Tutoria...
Apache Spark - Basics of RDD & RDD Operations | Big Data Hadoop Spark Tutoria...Apache Spark - Basics of RDD & RDD Operations | Big Data Hadoop Spark Tutoria...
Apache Spark - Basics of RDD & RDD Operations | Big Data Hadoop Spark Tutoria...
 
ES6 Overview
ES6 OverviewES6 Overview
ES6 Overview
 
Scala - den smarta kusinen
Scala - den smarta kusinenScala - den smarta kusinen
Scala - den smarta kusinen
 
Node 관계형 데이터베이스_바인딩
Node 관계형 데이터베이스_바인딩Node 관계형 데이터베이스_바인딩
Node 관계형 데이터베이스_바인딩
 
The Query the Whole Query and Nothing but the Query
The Query the Whole Query and Nothing but the QueryThe Query the Whole Query and Nothing but the Query
The Query the Whole Query and Nothing but the Query
 
Digital Mayflower - Data Pilgrimage with the Drupal Migrate Module
Digital Mayflower - Data Pilgrimage with the Drupal Migrate ModuleDigital Mayflower - Data Pilgrimage with the Drupal Migrate Module
Digital Mayflower - Data Pilgrimage with the Drupal Migrate Module
 

Similar to Functional streams with Kafka - A comparison between Akka-streams and FS2

pragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdfpragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
Hiroshi Ono
 
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdfpragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
Hiroshi Ono
 
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdfpragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
Hiroshi Ono
 
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdfpragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
Hiroshi Ono
 
(How) can we benefit from adopting scala?
(How) can we benefit from adopting scala?(How) can we benefit from adopting scala?
(How) can we benefit from adopting scala?
Tomasz Wrobel
 
Streaming Design Patterns Using Alpakka Kafka Connector (Sean Glover, Lightbe...
Streaming Design Patterns Using Alpakka Kafka Connector (Sean Glover, Lightbe...Streaming Design Patterns Using Alpakka Kafka Connector (Sean Glover, Lightbe...
Streaming Design Patterns Using Alpakka Kafka Connector (Sean Glover, Lightbe...
confluent
 
Easy, scalable, fault tolerant stream processing with structured streaming - ...
Easy, scalable, fault tolerant stream processing with structured streaming - ...Easy, scalable, fault tolerant stream processing with structured streaming - ...
Easy, scalable, fault tolerant stream processing with structured streaming - ...
Databricks
 

Similar to Functional streams with Kafka - A comparison between Akka-streams and FS2 (20)

Quick introduction to Kafka @ Codeurs en Seine meetups
Quick introduction to Kafka @ Codeurs en Seine meetupsQuick introduction to Kafka @ Codeurs en Seine meetups
Quick introduction to Kafka @ Codeurs en Seine meetups
 
Leveraging Azure Databricks to minimize time to insight by combining Batch an...
Leveraging Azure Databricks to minimize time to insight by combining Batch an...Leveraging Azure Databricks to minimize time to insight by combining Batch an...
Leveraging Azure Databricks to minimize time to insight by combining Batch an...
 
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdfpragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
 
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdfpragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
 
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdfpragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
 
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdfpragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
pragmaticrealworldscalajfokus2009-1233251076441384-2.pdf
 
Hadoop + Clojure
Hadoop + ClojureHadoop + Clojure
Hadoop + Clojure
 
Hw09 Hadoop + Clojure
Hw09   Hadoop + ClojureHw09   Hadoop + Clojure
Hw09 Hadoop + Clojure
 
(How) can we benefit from adopting scala?
(How) can we benefit from adopting scala?(How) can we benefit from adopting scala?
(How) can we benefit from adopting scala?
 
The Wonderful World of Apache Kafka®
The Wonderful World of Apache Kafka®The Wonderful World of Apache Kafka®
The Wonderful World of Apache Kafka®
 
Introduction to Apache Kafka- Part 2
Introduction to Apache Kafka- Part 2Introduction to Apache Kafka- Part 2
Introduction to Apache Kafka- Part 2
 
Streaming Design Patterns Using Alpakka Kafka Connector (Sean Glover, Lightbe...
Streaming Design Patterns Using Alpakka Kafka Connector (Sean Glover, Lightbe...Streaming Design Patterns Using Alpakka Kafka Connector (Sean Glover, Lightbe...
Streaming Design Patterns Using Alpakka Kafka Connector (Sean Glover, Lightbe...
 
Easy, scalable, fault tolerant stream processing with structured streaming - ...
Easy, scalable, fault tolerant stream processing with structured streaming - ...Easy, scalable, fault tolerant stream processing with structured streaming - ...
Easy, scalable, fault tolerant stream processing with structured streaming - ...
 
Meet Up - Spark Stream Processing + Kafka
Meet Up - Spark Stream Processing + KafkaMeet Up - Spark Stream Processing + Kafka
Meet Up - Spark Stream Processing + Kafka
 
Managing parallelism using coroutines
Managing parallelism using coroutinesManaging parallelism using coroutines
Managing parallelism using coroutines
 
Productionalizing spark streaming applications
Productionalizing spark streaming applicationsProductionalizing spark streaming applications
Productionalizing spark streaming applications
 
Funtional Reactive Programming with Examples in Scala + GWT
Funtional Reactive Programming with Examples in Scala + GWTFuntional Reactive Programming with Examples in Scala + GWT
Funtional Reactive Programming with Examples in Scala + GWT
 
Hadoop Integration in Cassandra
Hadoop Integration in CassandraHadoop Integration in Cassandra
Hadoop Integration in Cassandra
 
Flux and InfluxDB 2.0 by Paul Dix
Flux and InfluxDB 2.0 by Paul DixFlux and InfluxDB 2.0 by Paul Dix
Flux and InfluxDB 2.0 by Paul Dix
 
JDD 2016 - Pawel Byszewski - Kotlin, why?
JDD 2016 - Pawel Byszewski - Kotlin, why?JDD 2016 - Pawel Byszewski - Kotlin, why?
JDD 2016 - Pawel Byszewski - Kotlin, why?
 

Recently uploaded

AI/ML Infra Meetup | Improve Speed and GPU Utilization for Model Training & S...
AI/ML Infra Meetup | Improve Speed and GPU Utilization for Model Training & S...AI/ML Infra Meetup | Improve Speed and GPU Utilization for Model Training & S...
AI/ML Infra Meetup | Improve Speed and GPU Utilization for Model Training & S...
Alluxio, Inc.
 

Recently uploaded (20)

GraphSummit Stockholm - Neo4j - Knowledge Graphs and Product Updates
GraphSummit Stockholm - Neo4j - Knowledge Graphs and Product UpdatesGraphSummit Stockholm - Neo4j - Knowledge Graphs and Product Updates
GraphSummit Stockholm - Neo4j - Knowledge Graphs and Product Updates
 
OpenChain @ LF Japan Executive Briefing - May 2024
OpenChain @ LF Japan Executive Briefing - May 2024OpenChain @ LF Japan Executive Briefing - May 2024
OpenChain @ LF Japan Executive Briefing - May 2024
 
Tree in the Forest - Managing Details in BDD Scenarios (live2test 2024)
Tree in the Forest - Managing Details in BDD Scenarios (live2test 2024)Tree in the Forest - Managing Details in BDD Scenarios (live2test 2024)
Tree in the Forest - Managing Details in BDD Scenarios (live2test 2024)
 
AI/ML Infra Meetup | Improve Speed and GPU Utilization for Model Training & S...
AI/ML Infra Meetup | Improve Speed and GPU Utilization for Model Training & S...AI/ML Infra Meetup | Improve Speed and GPU Utilization for Model Training & S...
AI/ML Infra Meetup | Improve Speed and GPU Utilization for Model Training & S...
 
Crafting the Perfect Measurement Sheet with PLM Integration
Crafting the Perfect Measurement Sheet with PLM IntegrationCrafting the Perfect Measurement Sheet with PLM Integration
Crafting the Perfect Measurement Sheet with PLM Integration
 
IT Software Development Resume, Vaibhav jha 2024
IT Software Development Resume, Vaibhav jha 2024IT Software Development Resume, Vaibhav jha 2024
IT Software Development Resume, Vaibhav jha 2024
 
Workforce Efficiency with Employee Time Tracking Software.pdf
Workforce Efficiency with Employee Time Tracking Software.pdfWorkforce Efficiency with Employee Time Tracking Software.pdf
Workforce Efficiency with Employee Time Tracking Software.pdf
 
Top Mobile App Development Companies 2024
Top Mobile App Development Companies 2024Top Mobile App Development Companies 2024
Top Mobile App Development Companies 2024
 
Secure Software Ecosystem Teqnation 2024
Secure Software Ecosystem Teqnation 2024Secure Software Ecosystem Teqnation 2024
Secure Software Ecosystem Teqnation 2024
 
CompTIA Security+ (Study Notes) for cs.pdf
CompTIA Security+ (Study Notes) for cs.pdfCompTIA Security+ (Study Notes) for cs.pdf
CompTIA Security+ (Study Notes) for cs.pdf
 
A Python-based approach to data loading in TM1 - Using Airflow as an ETL for TM1
A Python-based approach to data loading in TM1 - Using Airflow as an ETL for TM1A Python-based approach to data loading in TM1 - Using Airflow as an ETL for TM1
A Python-based approach to data loading in TM1 - Using Airflow as an ETL for TM1
 
Entropy, Software Quality, and Innovation (presented at Princeton Plasma Phys...
Entropy, Software Quality, and Innovation (presented at Princeton Plasma Phys...Entropy, Software Quality, and Innovation (presented at Princeton Plasma Phys...
Entropy, Software Quality, and Innovation (presented at Princeton Plasma Phys...
 
APVP,apvp apvp High quality supplier safe spot transport, 98% purity
APVP,apvp apvp High quality supplier safe spot transport, 98% purityAPVP,apvp apvp High quality supplier safe spot transport, 98% purity
APVP,apvp apvp High quality supplier safe spot transport, 98% purity
 
Facemoji Keyboard released its 2023 State of Emoji report, outlining the most...
Facemoji Keyboard released its 2023 State of Emoji report, outlining the most...Facemoji Keyboard released its 2023 State of Emoji report, outlining the most...
Facemoji Keyboard released its 2023 State of Emoji report, outlining the most...
 
10 Essential Software Testing Tools You Need to Know About.pdf
10 Essential Software Testing Tools You Need to Know About.pdf10 Essential Software Testing Tools You Need to Know About.pdf
10 Essential Software Testing Tools You Need to Know About.pdf
 
StrimziCon 2024 - Transition to Apache Kafka on Kubernetes with Strimzi.pdf
StrimziCon 2024 - Transition to Apache Kafka on Kubernetes with Strimzi.pdfStrimziCon 2024 - Transition to Apache Kafka on Kubernetes with Strimzi.pdf
StrimziCon 2024 - Transition to Apache Kafka on Kubernetes with Strimzi.pdf
 
AI Hackathon.pptx
AI                        Hackathon.pptxAI                        Hackathon.pptx
AI Hackathon.pptx
 
What need to be mastered as AI-Powered Java Developers
What need to be mastered as AI-Powered Java DevelopersWhat need to be mastered as AI-Powered Java Developers
What need to be mastered as AI-Powered Java Developers
 
Microsoft 365 Copilot; An AI tool changing the world of work _PDF.pdf
Microsoft 365 Copilot; An AI tool changing the world of work _PDF.pdfMicrosoft 365 Copilot; An AI tool changing the world of work _PDF.pdf
Microsoft 365 Copilot; An AI tool changing the world of work _PDF.pdf
 
how-to-download-files-safely-from-the-internet.pdf
how-to-download-files-safely-from-the-internet.pdfhow-to-download-files-safely-from-the-internet.pdf
how-to-download-files-safely-from-the-internet.pdf
 

Functional streams with Kafka - A comparison between Akka-streams and FS2

  • 1. Functional streams with Kafka A comparison between Akka-streams and FS2 12th May 2017 Luis Reis & Rui Batista
  • 2.
  • 6. val props = new Properties() props.put("bootstrap.servers", "localhost:9092") props.put("group.id", "consumer-tutorial") props.put("key.deserializer", StringDeserializer.class.getName()) props.put("value.deserializer", StringDeserializer.class.getName()) consumer = new KafkaConsumer[String, String](props) consumer.subscribe(List("foo", "bar").asJava) //One thread try { while (true) { val records = consumer.poll(Long.MAX_VALUE); records.iterator.asScala foreach { record => println(record.value) } } } catch { case e: WakeupException => // ignore for shutdown } finally { consumer.close() } //separate thread consumer.wakeup() Java API val props = new Properties() props.put("bootstrap.servers", "localhost:9092") props.put("group.id", "consumer-tutorial") props.put("key.deserializer", StringDeserializer.class.getName()) props.put("value.deserializer", StringDeserializer.class.getName()) consumer = new KafkaConsumer[String, String](props) consumer.subscribe(List("foo", "bar").asJava)
  • 9. Functional Streams Akka Streams + FS2 Akka Streams Reactive-Kafka FS2 FS2-Kafka (by Rui)
  • 11. Functional Streams FS2 Stream[F, O] F[_] --> fs2.Task || scalaz.concurrent.Task || cats.effect.IO //A Function from Stream[F, I] to Stream[F, O] Pipe[F, I, O] =:= (Stream[F, I] => Stream[F, O]) Sink[F, O] =:= Pipe[F, O, Unit] Stream[Task, Int](1).map(_.toString).runLast.unsafeRun()
  • 13. Read Write Akka Streams val bootstrapServers = "localhost:9092" val consumerGroup = "retweets" val stringSerializer = new StringSerializer val stringDeserializer = new StringDeserializer val bootstrapServers = "localhost:9092" val consumerGroup = "retweets" val stringSerializer = new StringSerializer val stringDeserializer = new StringDeserializer val producerSettings = ProducerSettings(system, stringSerializer, stringSerializer) .withBootstrapServers(bootstrapServers) val consumerSettings = ConsumerSettings(system, stringDeserializer, stringDeserializer) .withBootstrapServers(bootstrapServers) .withGroupId(consumerGroup) val regularSubscription = Subscriptions.topics("topic1") val bootstrapServers = "localhost:9092" val consumerGroup = "retweets" val stringSerializer = new StringSerializer val stringDeserializer = new StringDeserializer val producerSettings = ProducerSettings(system, stringSerializer, stringSerializer) .withBootstrapServers(bootstrapServers) val consumerSettings = ConsumerSettings(system, stringDeserializer, stringDeserializer) .withBootstrapServers(bootstrapServers) .withGroupId(consumerGroup) val regularSubscription = Subscriptions.topics("topic1") Consumer.plainSource(consumerSettings, regularSubscription) .map { message => parseTweet[Message](record, _.value()) }.map { tweet => val count = tweet.retweet_count.toString new ProducerRecord[String, String]("topic2", count) }.runWith(Producer.plainSink(producerSettings))
  • 14. Read Write FS2 val bootstrapServers = "localhost:9092" val consumerGroup = "retweets" val bootstrapServers = "localhost:9092" val consumerGroup = "retweets" val consumerSettings = ConsumerSettings[String, String](50 millis) .withBootstrapServers(bootstrapServers) .withGroupId(consumerGroup) val producerSettings = ProducerSettings[String, String]() .withBootstrapServers(bootstrapServers) val subscription = Subscriptions.topics("topic1") val bootstrapServers = "localhost:9092" val consumerGroup = "retweets" val consumerSettings = ConsumerSettings[String, String](50 millis) .withBootstrapServers(bootstrapServers) .withGroupId(consumerGroup) val producerSettings = ProducerSettings[String, String]() .withBootstrapServers(bootstrapServers) val subscription = Subscriptions.topics("topic1") // stream definition val stream = Consumer[Task, String, String](consumerSettings) .simpleStream .plainMessages(subscription) .map(msg => parseTweet[ConsumerRecord[String, String]](msg, _.value)) .map(_.retweet_count) .map(count => new ProducerRecord[String, String]("topic2", "key", count.toString)) .to(Producer[Task, String, String](producerSettings).sendAsync) // run at end of universe stream.run.unsafeRun()
  • 16. Offset Management Akka Streams //Automatically Consumer.committableSource(consumerSettings, regularSubscription) .map { message => (message, parseTweet[Message](message, _.record.value())) }.map { case (message, tweet) => Util.messageWithOffset(message.committableOffset, tweet, "topic2") }.runWith(Producer.commitableSink(producerSettings)) //Automatically Consumer.committableSource(consumerSettings, regularSubscription) .map { message => (message, parseTweet[Message](message, _.record.value())) }.map { case (message, tweet) => Util.messageWithOffset(message.committableOffset, tweet, "topic2") }.runWith(Producer.commitableSink(producerSettings)) //External offset storage Source.fromFuture(offsetDB.loadOffset()) .flatMapConcat { offset => val subscription = Subscriptions.assignmentWithOffsets( Map(new TopicPartition("topic1", 0) -> offset) ) Consumer.committableSource(consumerSettings, subscription) .mapAsync(producerSettings.parallelism) { record => val offset = record.committableOffset.partitionOffset.offset offsetDB.save(offset + 1) } }.runWith(Sink.ignore)
  • 17. Offset Management FS2 //Automatically Consumer[Task, String, String](consumerSettings) .simpleStream .commitableMessages(subscription) .map(_.map(r => parseTweet[Message](r, _.value))) // do stuff with tweet .map { message => Util.messageWithOffset(message.committableOffset, tweet, "topic2") } .to(Producer[Task, String, String](producerSettings).sendCommitable) //Automatically Consumer[Task, String, String](consumerSettings) .simpleStream .commitableMessages(subscription) .map(_.map(r => parseTweet[Message](r, _.value))) // do stuff with tweet .map { message => Util.messageWithOffset(message.committableOffset, tweet, "topic2") } .to(Producer[Task, String, String](producerSettings).sendCommitable) //External offset storage Stream.eval(offsetDB.loadOffset) .flatMap { offset => val assignment = Subscriptions.assignmentWithOffsets( Map(new TopicPartition("topic1", 0) -> offset) ) Consumer[Task, String, String](consumerSettings) .simpleStream .commitableMessages(assignment) // do stuff with message .evalMap(msg => offsetDB.save(msg.commitableOffset.partitionOffset.offset + 1)) }
  • 19. Parallelism Akka Streams //One source per partition Consumer.committablePartitionedSource(consumerSettings, regularSubscription) //One source per partition Consumer.committablePartitionedSource(consumerSettings, regularSubscription) .map { case (topicPartition, source) => source .via(logicFlow) .map { flowResponse => createKafkaMessage(producerTopic, flowResponse) } .runWith(Producer.committablePartitionedSink(producerSettings)) } .mapAsyncUnordered(producerSettings.parallelism)(identity) .runWith(Sink.ignore)
  • 20. Parallelism FS2 def parallelCount(keyFunc: KeyFunc[Task, Message], signal: Signal[Task, Map[String, Int]]) def parallelCount(keyFunc: KeyFunc[Task, Message], signal: Signal[Task, Map[String, Int]]) = { val partitioned = Consumer[Task, String, String](consumerSettings) .partitionedStreams .commitableMessages(Subscriptions.topics("topic1")) .map { case (_, innerStream) => innerStream.evalMap(keyFunc) } //still needs to join streams def parallelCount(keyFunc: KeyFunc[Task, Message], signal: Signal[Task, Map[String, Int]]) = { val partitioned = Consumer[Task, String, String](consumerSettings) .partitionedStreams .commitableMessages(Subscriptions.topics("topic1")) .map { case (_, innerStream) => innerStream.evalMap(keyFunc) } // join streams and aggregate key counts fs2.concurrent.join(100)(partitioned) .scan(Map.empty[String, Int]) { case (current, key) => current |+| Map(key -> 1) } .evalMap(signal.set _) }
  • 21. Functional Streams API Design for FS2-Kafka Typesafe API Resource acquisition State management Back Pressure
  • 22. trait Consumer[F[_], K, V] FS2-Kafka Typesafe API trait Consumer[F[_], K, V] { val simpleStream = new StreamType { type OutStreamType[A] = Stream[F, A] // ... } val partitionedStreams = new StreamType { type OutStreamType[A] = Stream[F, (TopicPartition, Stream[F, A])] // ... } }
  • 23. trait Consumer[F[_], K, V] { private[kafka] def createConsumer: F[ConsumerControl[F, K, V]] trait StreamType { type OutStreamType[_] <: Stream[F, _] type CMessage = CommitableMessage[F, ConsumerRecord[K, V]]] private[kafka] def makeStream( subscription: Subscription, builder: MessageBuilder[F, K, V] )(implicit F: Async[F]): OutStreamType[builder.Message] def commitableMessages(subscription: Subscription) (implicit F: Async[F]): OutStreamType[CMessage]] = makeStream(subscription, new CommitableMessageBuilder[F, K, V]) } } FS2-Kafka Typesafe API
  • 24. trait Producer[F[_], K, V] { //... type PMessage[P] = ProducerMessage[K, V, P] def send[P](implicit F: Async[F]): Pipe[F, PMessage[P], ProducerMetadata[P]] def sendAsync: Sink[F, ProducerRecord[K, V]] def sendCommitable[P <: Commitable[F]](implicit F: Async[F]): Sink[F, PMessage[P]] } Resource Acquisition def bracket[F[_],R,A](r: F[R]) (use: R => Stream[F,A], release: R => F[Unit]): Stream[F,A] Stream.bracket(createConsumer)({consumer => ???}, _.close) FS2-Kafka Typesafe API
  • 25. type Record = ConsumerRecord[K, V] //Mutable queues and references openPartitions: Async.Ref[F, Map[TopicPartition, Queue[F, Option[Chunk[Record]]]]] //Notify assigned partitions openPartitionsQueue: Queue[F, (TopicPartition, Stream[F, Record])] FS2-Kafka State Management