Real World CouchDB

Real World

John Wood
Windy City DB 2011
@johnpwood

• Software Developer at Signal
• Coding for about 15 years
• Working with CouchDB for 2.5 years (in
production for about 2 years)

• Enjoy tinkering with data storage solutions

Document Database
{
“_id” : “2d7f015226a05b6940984bbe39004fde”,
“_rev” : “2-477f6ab2dec6df185de1a078d270d8”,
“first_name” : “John”,
“last_name” : “Wood”,
“interests” : [“hacking”, “fishing”, “reading”],
“offspring” : [
{ “name” : “Dylan”, “age” : 6 },
{ “name” : “Chloe”, “age” : 3 }
]
}

Strong Focus on
Replication

RESTful API
# Create
POST http://localhost:5984/employees

# Read
GET http://localhost:5984/employees/1

# Update
PUT http://localhost:5984/employees/1

# Delete
DELETE http://localhost:5984/employees/1

Queried and Indexed
with MapReduce
function(doc) {
if (doc.first_name == “John”)
emit(doc._id, 1);
}

function(keys, values, rereduce) {
return sum(values);
}

MapReduce

// Map
function(doc) {
emit(doc._id, 1);
}

// Reduce
return sum(values);
}

MapReduce
// Map
function(doc) {
if (doc.dependents) {
for (i in doc.dependents) {
emit(doc._id, doc.dependents[i]);
}
}
}

// Reduce
_count

MapReduce
function sum(type_counts, totals, status) {
if (type_counts[status]) { // OK or ERR
if (!totals[status]) {
totals[status] = new Object();
}
var status_totals = totals[status];
var status_type_counts = type_counts[status];
for (key in status_type_counts) { // MO, MT, CM, etc.
var count = status_type_counts[key];
if (!status_totals[key]) {
status_totals[key] = count;
} else {
status_totals[key] += count;
}
}
}
}
var totals = new Object();
// values should be something like
// {"OK":{"MO":1234,"MT":1000,"CM":20},"ERR":{"MO":1,"MT": 1}}
for (i = 0; i < values.length; i++) {
var message_count = values[i];
sum(message_count, totals, 'OK');
}
return totals;
}

Design Documents
{
"_id": "_design/stats",
"views": {
"total_employees": {
"map": "function(doc) { emit(doc._id, 1); }",
"reduce": "function(keys, values, rereduce) { return
sum(values); }"
},
"by_lastname": {
"map": "function(doc) { emit(doc.last_name, null); }"
},
"dependents": {
"map": "function(doc) { if (doc.dependents) { for (i in
doc.dependents) { emit(doc._id, doc.dependents[i]); } } }",
"reduce": "_count"
}
}
}

{
"_id": "1",
"first_name": "Robert", {
"last_name": "Johnson", "_id": "4",
"date_hired": "2010/01/10", "first_name": "Bob",
"dependents": [ { { "last_name": "Smith",
{ "first_name": "Margie", "last_name": "Johnson" }, "_id": "2", "_id": "3", "salary": 80000,
{ "first_name": "Charlie", "last_name": "Johnson" }, "first_name": "Jim", "first_name": "Sally", "date_hired": "2010/03/11",
{ "first_name": "Sophie", "last_name": "Johnson" } "last_name": "Jones", "last_name": "Stevenson", "dependents": [
], "date_hired": "2010/02/11", "date_hired": "2010/04/23", { "first_name": "Susan", "last_name": "Smith" }
"salary": 250000 "salary": 150000 "salary": 100000 ]
} } } }

MapReduce

{"total_rows":4,"offset":0,"rows":[
{"id":"1","key":"1","value":{"first_name":"Margie","last_name":"Johnson"}},
{"id":"1","key":"1","value":{"first_name":"Charlie","last_name":"Johnson"}},
{"id":"1","key":"1","value":{"first_name":"Sophie","last_name":"Johnson"}},
{"id":"4","key":"4","value":{"first_name":"Susan","last_name":"Smith"}}
]}

{
"_id": "1",
} } } }

MapReduce

{"rows":[
{"key":null,"value":4}
]}

{
"_id": "1",
} } } }

MapReduce

{"rows":[
{"key":"1","value":3},
{"key":"4","value":1}
]}

View Structure

http://guide.couchdb.org/editions/1/en/views.html

View Structure
?key="ch"

http://guide.couchdb.org/editions/1/en/views.html

The Problem
• Reports that utilized data in some large tables (30M+
rows) were taking a very long to create

• Increasing query execution times

• Occasional page timeouts

• Limited resources for super powered hardware or the
leading relational database product

• Database migrations on these large tables were taking an
increasingly long time to run

The Solution
• Using CouchDB as an archive database

• Migrated old data in tables to CouchDB, dramatically
reducing the tables sizes, speeding up queries that were
still hitting those tables

• Re-wrote SQL queries as views to fetch data from the
archive database, dramatically reducing the amount of
time needed to fetch the old data

• Views updated nightly with the new set of archived data

POST /_replicate
{"source":"database",
"target":"http://example.org/database"}

Filtered

function(doc, req) {
if (doc.type && doc.type == "foo") {
return true;
} else {
return false;
}
}

Filtered

if (doc.type && doc.type == req.query.doc_type) {
return true;
} else {
return false;
}
}

"_conflicts":
["2-7c971bb974251ae8541b8fe045964219"]

Finding Conﬂicts
function(doc) {
if (doc._conflicts) {
emit(doc._conflicts, null);
}
}

{"total_rows":1,"offset":0,"rows":[
{"id":"foo","key":
["2-7c971bb974251ae8541b8fe045964219"],"value":null}
]}
http://guide.couchdb.org/draft/conﬂicts.html

Resolving Conﬂicts

# Step 1
PUT /db/document {... merged data ...}

# Step 2
DELETE /db/document?
rev=2-7c971bb974251ae8541b8fe045964219

Real World Example

http://www.couchbase.com/case-studies/bbc

The Problem
• Need to make sure site was always up and available, even
in the face of a data center catastrophe

• Needed a solution that could easily replicate data
between two or more data centers

• Needed the solution to store data in a safe and reliable
way


The Solution
• Using CouchDB to create a multi-master, multi-data
center failover conﬁguration

• 32 nodes in the cluster

• 16 nodes in each of their two data centers

• 8 primary nodes, 8 backup nodes

• Terabyte of data

• 150 - 170 million requests per day


My Guess at BBC’s Replication Setup

App

Load
Balancer

P1 P2 P3 P4 P5 P6 P7 P8 P1 P2 P3 P4 P5 P6 P7 P8

B1 B2 B3 B4 B5 B6 B7 B8 B1 B2 B3 B4 B5 B6 B7 B8

{"seq":12,"id":"foo","changes":
[{"rev":"1-23202479633c2b380f79507a776743d5"}]}

Polling

GET /db/_changes
{"results":[
{"seq":1,"id":"test","changes":
[{"rev":"1-aaa8e2a031bca334f50b48b6682fb486"}]},
{"seq":2,"id":"test2","changes":
[{"rev":"1-e18422e6a82d0f2157d74b5dcf457997"}]}
],
"last_seq":2}

Polling

GET /db/_changes?since=1

{"results":[
[{"rev":"1-e18422e6a82d0f2157d74b5dcf457997"}]}
],
"last_seq":2}

Polling
GET /db/_changes?since=1
&include_docs=true

{"results":[
[{"rev":"1-e18422e6a82d0f2157d74b5dcf457997"}],
,"doc":{"_id":"test2", "name":"John", "age":"33",
"_rev":"1-e18422e6a82d0f2157d74b5dcf457997"}}
],
"last_seq":2}

Long Polling

GET /db/_changes?feed=longpoll
&since=2

Continuous Changes

GET /db/_changes?feed=continuous

Filtered Changes
GET /db/_changes?filter=filters/
signal_employees

if (doc.company == “Signal”) {
return true;
} else {
return false;
}
}

Filtered Changes
GET /db/_changes?filter=filters/
employees&company=Signal

if (doc.company == req.query.company) {
return true;
} else {
return false;
}
}

Real World Example

http://www.couchbase.com/case-studies/skechers

Sketchers
• Already using CouchDB to help power
www.sketchers.com

• Utilized the _changes long poll feature to add a “What’s
happening now” widget to the main page

• Updates are processed in real time

• The widget was written in just a few hours, with the
majority of the code handling the display of the data

http://www.couchbase.com/case-studies/skechers

Real World Example

http://www.dimagi.com/pulling-data-from-couchdb-to-a-relational-database-made-easy-with-_changes/

“Perhaps at the top of the list of ‘things that are annoying in CouchDB’ is
general reporting.”

http://browsertoolkit.com/fault-tolerance.png

dimagi
• “Perhaps at the top of the list of ‘things that are annoying
in CouchDB’ is general reporting.”

• CouchDB views are not nearly as ﬂexible as SQL

• Using the _changes feed to mirror changes made in
CouchDB over to a relational database

• The relational database is used more extensive reporting

• “Couch to SQL in 20 lines of code!”

http://www.dimagi.com/pulling-data-from-couchdb-to-a-relational-database-made-easy-with-_changes/

Real World Example

couchdb-lucene

https://github.com/rnewson/couchdb-lucene

couchdb-lucene
• Provides full text search functionality for data stored in
CouchDB

• Uses the continuous _changes feed to stay notiﬁed of the
most recent changes in the database

• Documents are included with change notiﬁcations

• Index is updated shortly after document is saved in
CouchDB

https://github.com/rnewson/couchdb-lucene

Image credit: http://happyclouddesign.blogspot.com/

Image credit: http://gmﬂightlog.blogspot.com

Real World Example

http://www.couchbase.com/case-studies/groupcomplete

The Problem
• Looking to modernize mobile data collection (surveys, etc)

• People collecting the data (mobile workers) have limited ability to
review or modify data once it is submitted

• Mobile workers work in a void, unable to collaborate with their
team members, and increasing the likelihood of double-entry and
duplicated effort

• Mobile workers don’t have access to aggregated data, as this is
usually done on the back end, where the data is sent

• Access to a laptop or desktop is required to perform certain tasks


The Solution
• Cluster of CouchDB servers with shared forms, data, and proﬁles for
mobile devices collecting data

• A native application running on the device collects the data, and
interacts with a local CouchDB server

• Native application can access the data on the remote servers, or
locally via replicated databases served by CouchDB running on the
device

• Since data can be stored locally, access is fast, and unaffected by spotty
network availability


The Solution
• Mobile workers can easily share form templates and data

• The application manages conﬂicts, allowing mobile workers to
update, correct, and revise collected data at any time

• Resolved conﬂicts are distributed to the team via standard
replication, so everybody has the same data

• Rich media (pictures, audio, video) stored as _attachments


HTTP/1.1 200 OK
Server: CouchDB/1.0.2 (Erlang OTP/R14B)
Date: Tue, 07 Jun 2011 12:24:36 GMT
Content-Type: text/plain;charset=utf-8
Content-Length: 40
Cache-Control: must-revalidate

{"couchdb":"Welcome","version":"1.0.2"}

https://github.com/benoitc/afgwardiary

Show Functions

return '<h1>' + doc.title + '</h1>';
}

http://guide.couchdb.org/draft/show.html

Show Functions
return {
body : "<foo>" + doc.title + "</foo>",
headers : {
"Content-Type" : "application/xml",
"X-My-Own-Header": "foo"
}
}
}

http://guide.couchdb.org/draft/show.html

List Functions

function(head, req) {
send('<ul>');
while (row = getRow()) {
send('<li>' + row.value + '</li>');
}
return('</ul>');
}

String concatenation to build HTML? Ewww!

How do I get all of my Javascript into CouchDB?

Can I use my existing development tools?

What about images, CSS ﬁles, and other resources?

The CouchApp Project
• Scripts that allow you to easily deploy your CouchApp from your
file system to CouchDB

• Where the files live on your filesystem determine where they will
be pushed to the database. myapp/views/foobar/map.js will be
pushed to _design/myapp, into a view named foobar, as the map
function.

https://github.com/couchapp/couchapp

The CouchApp Project
• Evently - A declarative, CouchDB friendly jQuery library for writing
event based Javascript applications

• jquery.couch.js - Javascript library for communicating with CouchDB

• jquery.pathbinder.js - Framework for triggering events based on
paths in URL hash

• mustache.js - A simple javascript template framework

https://github.com/couchapp/couchapp

Templates
<!DOCTYPE html>
<html>
<head>
<title>Example</title>
<link rel=”stylesheet” href=”../../style/
screen.css” type=”text/css”>
</head>

<body>
<h1 id=”post_title”>{{title}}</h1>
<div id=”post_body”>{{body}}</div>
</body>

<script src=”../../script/awesome.js”></script>
</html>

Templates
// Show Function
  var mustache = require("vendor/couchapp/lib/mustache");

  var data = {
    title : doc.title,
body : doc.body
  };

  return mustache.to_html(this.templates.blog_post, data);
}

Real World Example

http://www.couchbase.com/case-studies/incandescent

The Problem
• Wanted to develop a web based solution for managing a
veterinary clinic (managing patients, procedures, back
ofﬁce, etc)

• Needed something that could operate in an environment
without an internet connection

• Wanted something ﬂexible enough to scale up to a SaaS
offering


The Solution
• The application was built as an installable CouchApp

• Written entirely in HTML and Javascript

• Developed using Backbone.js

• Platform independent, running on all platforms and
browsers (iPad too!)

• iPhone and Android versions in development


Resources
CouchDB Project Website
http://couchdb.apache.org

CouchDB: The Deﬁnitive Guide
http://guide.couchdb.org

CouchDB Project Wiki
http://wiki.apache.org/couchdb

CouchApps
http://couchapp.org

Thanks!
john@johnpwood.net
@johnpwood

Real World CouchDB

Recommended

Recommended

More Related Content

What's hot

What's hot (20)

Viewers also liked

Viewers also liked (20)

Recently uploaded

Recently uploaded (20)

Real World CouchDB

Editor's Notes