Change Data Capture in
Distributed Systems
@petr_zapletal
Change Data Capture in
Distributed Systems
@petr_zapletal
CDC is a design pattern that enables
capturing changes to data and notifying
actors so they can react accordingly
Things to Consider
Things to Consider
● Reliability
Things to Consider
● Reliability
● Scalable
Things to Consider
● Reliability
● Scalable
● Performant
Things to Consider
● Reliability
● Scalable
● Performant
● Consistent
Things to Consider
● Reliability
● Scalable
● Performant
● Consistent
● Fault Tolerant
Strategies
Strategies
● Timestamps or version
numbers
Strategies
● Timestamps or version
numbers
● Triggers
Strategies
● Timestamps or version
numbers
● Triggers
● Diffs
Strategies
● Timestamps or version
numbers
● Triggers
● Diffs
● Log scraping
Polling
Server
override def routes: Routes = {
case GET(p"/v1/rules/state") =>
DB.fetchAllRules
.transact(readTx.tx)
.map { rs =>
...
}
Server
override def routes: Routes = {
case GET(p"/v1/rules/state") =>
DB.fetchAllRules
.transact(readTx.tx)
.map { rs =>
...
}
Server
override def routes: Routes = {
case GET(p"/v1/rules/state") =>
DB.fetchAllRules
.transact(readTx.tx)
.map { rs =>
...
}
Client
val url = URLHelper
.parseAbsoluteUrl(target.base +
“/v1/rules/state”)
val response = client
.get(url, Map.empty,
Map.empty, timeout)
updateState(response)
Client
val url = URLHelper
.parseAbsoluteUrl(target.base +
“/v1/rules/state”)
val response = client
.get(url, Map.empty,
Map.empty, timeout)
updateState(response)
Client
val url = URLHelper
.parseAbsoluteUrl(target.base +
“/v1/rules/state”)
val response = client
.get(url, Map.empty,
Map.empty, timeout)
updateState(response)
Client
val url = URLHelper
.parseAbsoluteUrl(target.base +
“/v1/rules/state”)
val response = client
.get(url, Map.empty,
Map.empty, timeout)
updateState(response)
Log Scraping
Schema
CREATE TABLE dynamo_source (
id BIGSERIAL PRIMARY KEY,
rules VARCHAR(255) NOT NULL,
created_at BIGINT NOT NULL,
updated_at BIGINT NOT NULL
);
Schema
CREATE TABLE dynamo_source (
id BIGSERIAL PRIMARY KEY,
rules VARCHAR(255) NOT NULL,
created_at BIGINT NOT NULL,
updated_at BIGINT NOT NULL
);
Source
val writeCols =
fr"id, rules, created_at, updated_at"
val writeValues =
fr"$id, $rules, $createdAt, $updatedAt"
(fr"""INSERT INTO
dynamo_source(""" ++ writeCols ++ fr""")
VALUES(""" ++ writeValues ++ fr""")
RETURNING id""").update
Source
val writeCols =
fr"id, rules, created_at, updated_at"
val writeValues =
fr"$id, $rules, $createdAt, $updatedAt"
(fr"""INSERT INTO
dynamo_source(""" ++ writeCols ++
fr""")
VALUES(""" ++ writeValues ++ fr""")
RETURNING id""").update
DMS Mapping Samples
{ "rule-type": "selection",
"object-locator": {
"schema-name": "sql-schema-somewhere",
"table-name": "dynamo_source" }, },
{ "rule-type": "object-mapping",
"rule-name": "TransformToDynamo",
"rule-action": "map-record-to-record",
"target-table-name": "dynamo-db-table",
...
DMS Mapping Samples
{ "rule-type": "selection",
"object-locator": {
"schema-name":
"sql-schema-somewhere",
"table-name": "dynamo_source" }, },
{ "rule-type": "object-mapping",
"rule-name": "TransformToDynamo",
"rule-action": "map-record-to-record",
"target-table-name": "dynamo-db-table",
...
DMS Mapping Samples
{ "rule-type": "selection",
"object-locator": {
"schema-name": "sql-schema-somewhere",
"table-name": "dynamo_source" }, },
{ "rule-type": "object-mapping",
"rule-name": "TransformToDynamo",
"rule-action": "map-record-to-record",
"target-table-name": "dynamo-db-table",
...
Target
final case class DynamoItem(id: String, rules: String, createdAt:
Epoch, updatedAt: Epoch)
val table: Table[DynamoItem] = Table[DynamoItem](tableName)
override def get(key: String): IO[Option[DynamoItem]] = {
val op = table.get('id -> key)
IO.fromFuture(IO(ScanamoAsync.exec(client)(op)))(cs)
}
final case class DynamoItem(id: String, rules: String,
createdAt: Epoch, updatedAt: Epoch)
val table: Table[DynamoItem] = Table[DynamoItem](tableName)
override def get(key: String): IO[Option[DynamoItem]] = {
val op = table.get('id -> key)
IO.fromFuture(IO(ScanamoAsync.exec(client)(op)))(cs)
}
Target
final case class DynamoItem(id: String, rules: String, createdAt:
Epoch, updatedAt: Epoch)
val table: Table[DynamoItem] = Table[DynamoItem](tableName)
override def get(key: String): IO[Option[DynamoItem]] =
{
val op = table.get('id -> key)
IO.fromFuture(IO(ScanamoAsync.exec(client)(op)))(cs)
}
Target
Use cases
Use Cases
● Caching
Use Cases
● Caching
● Search Indexing
Use Cases
● Caching
● Search Indexing
● Offline Processing
Use Cases
● Caching
● Search Indexing
● Offline Processing
● Simplifying monolithic
applications
Use Cases
● Caching
● Search Indexing
● Offline Processing
● Simplifying monolithic
applications
● CQRS
Use Cases
● Caching
● Search Indexing
● Offline Processing
● Simplifying monolithic
applications
● CQRS
● ...
Frameworks
Frameworks
● pg2k4j
Frameworks
● pg2k4j
● Maxwell
Frameworks
● pg2k4j
● Maxwell
● AWS Database Migration
Service
Frameworks
● pg2k4j
● Maxwell
● AWS Database Migration
Service
● SpinalTap
Frameworks
● pg2k4j
● Maxwell
● AWS Database Migration
Service
● SpinalTap
● DataBus
Frameworks
● pg2k4j
● Maxwell
● AWS Database Migration
Service
● SpinalTap
● DataBus
● Debezium
Frameworks
● pg2k4j
● Maxwell
● AWS Database Migration
Service
● SpinalTap
● DataBus
● Debezium
● ...
Summary
Questions
References
References
● https://github.com/disneystreaming/pg2k4j
● https://aws.amazon.com/dms/
● https://github.com/airbnb/SpinalTap
● https://github.com/linkedin/databus
● https://github.com/zendesk/maxwell
● https://github.com/debezium/debezium

Change Data Capture - Scale by the Bay 2019