with
style
                gabriele lana
   gabriele.lana@cleancode.it
       twitter: @gabrielelana
         http://joind.in/2943
Style


query &
design


scale
mongo console

$  ~/Work/opt/mongodb-­‐1.6.5/bin/mongod  
          -­‐-­‐dbpath=~/Work/src/nosqlday/db/mongodb.01  
          -­‐-­‐logpath=~/Work/src/nosqlday/log/mongodb.01  
          -­‐-­‐fork  -­‐-­‐port  30001

$  ~/Work/opt/mongodb-­‐1.6.5/bin/mongo  localhost:30001
MongoDB  shell  version:  1.6.5
connecting  to:  localhost:30001/test

>  use  nosqlday
switched  to  db  nosqlday

>  db.getCollectionNames()
[  "system.indexes",  "users"  ]

>  db.users.find({  "name":  "Gabriele"  })
{  "_id"  :  ObjectId("4d8706767bb037a8a8f98db2"),  "name"  :  "Gabriele",  
"surname"  :  "Lana",  "job"  :  "softwarecraftsman"  }

>  exit
bye
ruby driver

require "mongo"

db = Mongo::Connection.new("localhost", 30001).db("nosqlday")

puts "Collections:"
db.collections.each do |collection|
  puts "t#{collection.name}"
end

puts "Gabriele:"
db["users"].find(:name => "Gabriele").each do |user|
  puts "t#{user["_id"]}"
end

db.connection.close
ruby driver

require "mongo"

db = Mongo::Connection.new("localhost", 30001).db("nosqlday")

puts "Collections:"
db.collections.each do |collection|
  puts "t#{collection.name}"
                        $  ruby  src/connect.rb  
                        Collections:
end                           users
                                 system.indexes
                            Gabriele:
puts "Gabriele:"                 4d8706767bb037a8a8f98db2
db["users"].find(:name => "Gabriele").each do |user|
  puts "t#{user["_id"]}"
end

db.connection.close
Style


query &
design


scale
Style
        know your
          driver
mongo

smart driver

document object mapper
puts "Gabriele:"
db["users"].find(:name => "Gabriele").each do |user|
  puts "t#{user["_id"]}"
end

puts "Gabriele:"
db["users"].select{|user| user["name"] == "Gabriele"}.each do |user|
  puts "t#{user["_id"]}"
end




mongo

smart driver
puts "Gabriele:"
db["users"].find(:name => "Gabriele").each do |user|
  puts "t#{user["_id"]}"
end

puts "Gabriele:"
                        $  ruby  src/find_vs_select.rb  
db["users"].select{|user| user["name"] == "Gabriele"}.each do |user|
                        Gabriele:
  puts "t#{user["_id"]}" 4d8706767bb037a8a8f98db2
                          
                        Gabriele:
end                           4d8706767bb037a8a8f98db2




mongo

smart driver
puts "Gabriele:"
db["users"].find(:name => "Gabriele").each do |user|
  puts "t#{user["_id"]}"
end

puts "Gabriele:"
db["users"].select{|user| user["name"] == "Gabriele"}.each do |user|
  puts "t#{user["_id"]}"
end




mongo

smart driver
Style
        incremental
           design
          based on
         application
          behavior
the best design is
 the one where needed
   data can be easily
     extracted
      the way you need
        to query your data
          should influence
            your design
Style
        incremental
           design
          based on
         application
         monitoring
monitoring and adapting is better
than doing it right the first time
    ...actually the first time
       is the worst time :-)
monitoring & adapting

>  db.setProfilingLevel(1,  5)                                                                                              
                                                                                                                            
{  "was"  :  1,  "slowms"  :  100,  "ok"  :  1  }

//  after  product  usage  find  problematic  queries

>  db.system.profile.find().sort({millis:-­‐1})                                                              
                                                                                                             
{  "ts":  "Mon  Mar  21  2011  14:30:56  GMT+0100  (CET)",
    "info":  "
        query  pomodorist.pomodori
            reslen:202
            nscanned:26950
            query:  
                {  $query:  {  task_id:  ObjectId('4d6f1d3931f2386e9c089796')  }}
            nreturned:1
      ",
      "millis":17
}
monitoring & adapting

>  db.pomodori.find({
        $query:  {  task_id:  ObjectId('4d6f1d3931f2386e9c089796')  },
        $explain:  true
})
                                                      
{  "cursor":  "BasicCursor",
    "nscanned":  26950,
    "nscannedObjects":  26950,
    "n":  1,
    "millis":  17,
    "indexBounds":  {  },
    "allPlans":  [
        {  "cursor"  :  "BasicCursor",  "indexBounds"  :  {  }  }  
    ]
}
monitoring & adapting

>  db.pomodori.ensureIndex({"task_id":  1})
                                                                                                              
>  db.pomodori.find({
        $query:  {  task_id:  ObjectId('4d6f1d3931f2386e9c089796')  },
        $explain:  true
})

{  "cursor":  "BtreeCursor  task_id_1",
    "nscanned":  1,
    "nscannedObjects":  1,
    "n":  1,
    "millis":  0,
    "indexBounds":  {
        "task_id":  [
       [
                ObjectId("4d6f1d3931f2386e9c089796"),
                ObjectId("4d6f1d3931f2386e9c089796")
          ]
    ]},  "allPlans":  [...]
}
Style


query &
design


scale
query &
design
           use $in
          operator
          for batch
            query
retrieve all objects with $in

users = [
{:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"},
{:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"},
{:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"}
]


ids = users.map{|user| db["users"].insert(user)}

puts ids.map{|id| db["users"].find_one(:_id => id)}
retrieve all objects with $in

users = [
{:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"},
{:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"},
{:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"}
]

                          $  ruby  src/find_by_all_ids.rb  
                          {"_id"=>BSON::ObjectId('4d87605731f23824a0000001'),  ...}
ids = users.map{|user|   db["users"].insert(user)}
                          {"_id"=>BSON::ObjectId('4d87605731f23824a0000002'),  ...}
                          {"_id"=>BSON::ObjectId('4d87605731f23824a0000003'),  ...}

puts ids.map{|id| db["users"].find_one(:_id => id)}
retrieve all objects with $in

users = [
{:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"},
{:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"},
{:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"}
]


ids = users.map{|user| db["users"].insert(user)}

puts ids.map{|id| db["users"].find_one(:_id => id)}
retrieve all objects with $in

users = [
{:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"},
{:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"},
{:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"}
]


ids = users.map{|user| db["users"].insert(user)}
ids = db["users"].insert(users)

puts ids.map{|id| db["users"].find_one(:_id => id)}
puts db["users"].find(:_id => {:$in => ids}).all
retrieve all objects with $in

users = [
{:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"},
{:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"},
{:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"}
]

                          $  ruby  src/find_by_all_ids.rb  
                          {"_id"=>BSON::ObjectId('4d87605731f23824a0000001'),  ...}
ids =   users.map{|user| db["users"].insert(user)}
                          {"_id"=>BSON::ObjectId('4d87605731f23824a0000002'),  ...}
ids =   db["users"].insert(users)
                          {"_id"=>BSON::ObjectId('4d87605731f23824a0000003'),  ...}


puts ids.map{|id| db["users"].find_one(:_id => id)}
puts db["users"].find(:_id => {:$in => ids}).all
query &
design          use
          conventions to
           build smart
              object
            identifiers
conventions are fun to play with

>  db.user_scores.find({},  {"_id":  1})

{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐week-­‐200944"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐month-­‐200911"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐year-­‐2009"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐user"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐advertising"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐week-­‐200944-­‐advertising"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐art"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐week-­‐200944-­‐art"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐artist"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐week-­‐200944-­‐artist"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐information"  }
conventions are fun to play with

>  db.user_scores.findOne(
        {"_id":  "4d873ce631f238241d00000d-­‐day-­‐20091106"}
    )  

{
     "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106",
     "pomodori"  :  15,
     "pomodori_squashed"  :  3,
     "breaks"  :  7,
     "tasks_created"  :  8,
     "tasks_done"  :  6,
     "estimation_accuracy"  :  0,
     "seconds_of_focused_time"  :  22500,
     "seconds_of_wasted_time"  :  1999,
     "seconds_of_breaks"  :  8820
}
conventions are fun to play with
(user scores in day per tag)

>  db.user_scores.find(
        {"_id":  /^4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/},  {"_id":  1}
    )
                
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐advertising"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐art"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐artist"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐blogging"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐culture"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐html"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐illustration"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐information"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐inspiration"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐marketing"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐movies"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐resources"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐technology"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐tool"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐tutorials"  }
conventions are fun to play with
(list of tags per day)
>  db.user_scores.find(
        {"_id":  /^4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/},  {"_id":  1}

    ).map(function(document)  {
        return  document._id.replace(
            "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐",  ""
        )
    })

[
     "advertising",
     "art",
     "artist",
     "blogging",
     "culture",
     "html",
     "illustration",
     "information",
     ...
]
conventions are fun to play with
(anchored regexp uses indexes)
>  db.user_scores.find(
        {"_id":  /^4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/},  {"_id":  1}
    ).explain()
                                                                                                                                                      
                                                                                                                                                      
{
   "cursor"  :  "BtreeCursor  _id_  multi",
   "nscanned"  :  15,
   "nscannedObjects"  :  15,
   "n"  :  15,
   "millis"  :  0,
   "indexBounds"  :  {
      "_id"  :  [
         [
            "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐",
            "4d873ce631f238241d00000d-­‐day-­‐20091106."
         ],
         [
            /^4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/,
            /^4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/
         ]
      ]
conventions are fun to play with
(anchored regexp uses indexes)
>  db.user_scores.find(
        {"_id":  /4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/},  {"_id":  1}
    ).explain()

{
     "cursor"  :  "BtreeCursor  _id_  multi",
     "nscanned"  :  109349,
     "nscannedObjects"  :  15,
     "n"  :  15,
     "millis"  :  217,
     "indexBounds"  :  {
        "_id"  :  [
           ...
        ]
     }
}
query &   use “group”
design     method to
            do small
          computations
             without
            fetching
            related
           documents
group to compute data in mongo
(inject client side)

days = [ 20091110, 20091111, 20091112 ]
scores_id = %r{^4d87d00931f2380c7700000d-day-(#{days.join("|")})$}

scores = db["user_scores"].find(:_id => scores_id)

pomodori = scores.inject(0) do |pomodori, scores|
  pomodori + scores["pomodori"]
end

puts "Pomodori in days #{days.join(",")}: #{pomodori}"
group to compute data in mongo
(inject client side)

days = [ 20091110, 20091111, 20091112 ]
scores_id = %r{^4d87d00931f2380c7700000d-day-(#{days.join("|")})$}

scores = db["user_scores"].find(:_id => scores_id)

pomodori = scores.inject(0) do |pomodori, scores|
                        $  ruby  src/inject_for_reduce.rb  
  pomodori + scores["pomodori"]
                        Pomodori  in  days  20091110,20091111,20091112:  36
end

puts "Pomodori in days #{days.join(",")}: #{pomodori}"
group to compute data in mongo
(group server side)

days = [ 20091110, 20091111, 20091112 ]
scores_id = %r{^4d87d00931f2380c7700000d-day-(#{days.join("|")})$}

result = db["user_scores"].group(
  :cond => { :_id => scores_id },
  :initial => { :pomodori => 0 },
  :reduce => <<-EOF
    function(document, result) {
      result.pomodori += document.pomodori
    }
  EOF
)

puts "Pomodori in days #{days.join(",")}: #{result.first["pomodori"]}"
group to compute data in mongo
(group server side)

days = [ 20091110, 20091111, 20091112 ]
scores_id = %r{^4d87d00931f2380c7700000d-day-(#{days.join("|")})$}

result = db["user_scores"].group(
  :cond => { :_id => scores_id },
  :initial => { :pomodori => 0 },
  :reduce => <<-EOF     $  ruby  src/group_for_reduce.rb  
                        Pomodori  in  days  20091110,20091111,20091112:  36
    function(document, result) {
      result.pomodori += document.pomodori
    }
  EOF
)

puts "Pomodori in days #{days.join(",")}: #{result.first["pomodori"]}"
group to compute data in mongo
(ex. sum pomodori by tag “ruby”)

result = db["user_scores"].group(
  :cond => {
     :_id => /^4d87d00931f2380c7700000d-day-d{8}-ruby$/
  },
  :initial => { :pomodori => 0, :days => 0 },
  :reduce => <<-EOF
     function(document, result) {
       result.days += 1
       result.pomodori += document.pomodori
     }
  EOF
).first

puts "In #{result["days"]} days, #{result["pomodori"]} done for ruby"
group to compute data in mongo
(ex. sum pomodori by tag “ruby”)

result = db["user_scores"].group(
  :cond => {
     :_id => /^4d87d00931f2380c7700000d-day-d{8}-ruby$/
  },
  :initial => { :pomodori => 0, :days => 0 },
  :reduce => <<-EOF
     function(document, result) {
                         $  ruby  src/group_for_ruby_tag.rb  
                         In  43  days,  45  pomodori
       result.days += 1
       result.pomodori += document.pomodori
     }
  EOF
).first

puts "In #{result["days"]} days, #{result["pomodori"]} pomodori"
group to compute data in mongo
(ex. sum pomodori by tag “ruby”)
>  db.user_scores.find({
        "_id":  /^4d87d00931f2380c7700000d-­‐day-­‐d{8}-­‐ruby$/
    }).explain()

{
     "cursor"  :  "BtreeCursor  _id_  multi",
     "nscanned"  :  43,
     "nscannedObjects"  :  43,
     "n"  :  43,
     "millis"  :  3,
     "indexBounds"  :  {
        "_id"  :  [...]
     }
}
query &
design    create indexes
           on arrays to
           create local
             reverse
            indexes in
            documents
reverse index in place
(an array could be indexed)

>  db.tasks.find({  "tags":  {  $in:  [  "nosqlday"  ]  }  })
                                                                            
{  "_id"  :  ObjectId("4d7de446175ca8243d000004"),  
    "tags"  :  [  "nosqlday"  ],  
    "description"  :  "#nosqlday  keynote",  
    "is_recurrent"  :  false,
    "estimated"  :  0,  
    "worked_in"  :  [
   "Mon  Mar  14  2011  00:00:00  GMT+0100  (CET)",
   "Tue  Mar  15  2011  00:00:00  GMT+0100  (CET)"
    ],
    "done_at"  :  "Tue  Mar  15  2011  13:05:03  GMT+0100  (CET)",
    "todo_at"  :  null,
    "created_at"  :  "Mon  Mar  14  2011  10:47:50  GMT+0100  (CET)",
    "updated_at"  :  "Tue  Mar  15  2011  13:05:03  GMT+0100  (CET)",
    "keywords":  [  "nosqldai",  "keynot"  ],
    "user_id":  ObjectId("4d53996c137ce423ff000001"),
    "annotations"  :  [  ]
}
reverse index in place
(an array could be indexed)
>  db.tasks.getIndexes()
[
   {
      "name"  :  "_id_",
      "ns"  :  "app435386.tasks",
      "key"  :  {
         "_id"  :  1
      }
   },
   {
      "name"  :  "tags_1",
      "ns"  :  "app435386.tasks",
      "key"  :  {
         "tags"  :  1
      },
      "unique"  :  false
   },
      ...
]
reverse index in place
(container for deduced data, array)

db["orders"].insert({
   :placed_at => [
      now.strftime("%Y"),    # year: "2011"
      now.strftime("%Y%m"),  # month: "201103"
      now.strftime("%Yw%U"), # week: "2011w11"
      now.strftime("%Y%m%d") # day: "20110316"
   ],
   :user_id => user,
   :items => items_in_order.map{|item| item[:id]},
   :total => items_in_order.inject(0){|total,item| total += item[:price]}
})

# ...

db["orders"].ensure_index([["placed_at", Mongo::DESCENDING]])
reverse index in place
(container for deduced data, array)
>  db.orders.findOne()

{  "_id"  :  ObjectId("4d88bf1f31f23812de0003fd"),  
    "placed_at"  :  [  "2011",  "201103",  "2011w11",  "20110316"  ],
    "user_id"  :  ObjectId("4d88bf1f31f23812de0003e9"),
    "items"  :  [
        ObjectId("4d88bf1f31f23812de0003da"),
        ObjectId("4d88bf1f31f23812de000047"),
        ObjectId("4d88bf1f31f23812de000078"),
        ObjectId("4d88bf1f31f23812de000068"),
      ObjectId("4d88bf1f31f23812de000288")
    ],
    "total"  :  3502
}
reverse index in place
(container for deduced data, array)
>  db.orders.find({  "placed_at":  "20110310"  }).count()
77

>  db.orders.find({  "placed_at":  "20110310"  }).explain()
{
   "cursor"  :  "BtreeCursor  placed_at_-­‐1",
   "nscanned"  :  77,
   "nscannedObjects"  :  77,
   "n"  :  77,
   "millis"  :  0,
   "indexBounds"  :  {
      "placed_at"  :  [
         [
            "20110310",
            "20110310"
         ]
      ]
   }
}
reverse index in place
(container for deduced data, hash)

db["orders"].insert({
    :placed_at => [
       { :year => now.strftime("%Y") },
       { :month => now.strftime("%Y%m") },
       { :week => now.strftime("%Y%U") },
       { :day => now.strftime("%Y%m%d") }
    ],
    :user_id => user,
    :items => items_in_order.map{|item| item[:id]},
    :total => items_in_order.inject(0){|total,item| total += item[:price]}
 })

# ...

db["orders"].ensure_index([["placed_at", Mongo::DESCENDING]])
reverse index in place
(container for deduced data, hash)

>  db.orders.findOne()
                                                            
{  "_id"  :  ObjectId("4d88c31531f23812fe0003ea"),
    "placed_at"  :  [
        {  "year"  :  "2009"  },
        {  "month"  :  "200911"  },
        {  "week"  :  "200945"  },
        {  "day"  :  "20091109"  }
    ],
    "user_id"  :  ObjectId("4d88c31531f23812fe0003e9"),
    "items"  :  [
        ObjectId("4d88c31531f23812fe00013f"),
        ObjectId("4d88c31531f23812fe000176"),
        ObjectId("4d88c31531f23812fe0003e2"),
        ObjectId("4d88c31531f23812fe0003d1"),
        ObjectId("4d88c31531f23812fe0001c1"),
        ObjectId("4d88c31531f23812fe000118"),
        ObjectId("4d88c31531f23812fe00031d")
    ],
    "total"  :  10149
}
reverse index in place
(container for deduced data, hash)

>  db.orders.find({  "placed_at.week":  "201101"  }).count()                
331

>  db.orders.find({  "placed_at.week":  "201101"  }).explain()            
{
   "cursor"  :  "BasicCursor",
   "nscanned"  :  22374,
   "nscannedObjects"  :  22374,
   "n"  :  331,
   "millis"  :  23,
   "indexBounds"  :  {
     
   }
}
reverse index in place
(container for deduced data, hash)

>  db.orders.find({  "placed_at":  {  "week":  "201101"  }}).count()    
331

>  db.orders.find({  "placed_at":  {  "week":  "201101"  }}).explain()
{
   "cursor"  :  "BtreeCursor  placed_at_-­‐1",
   "nscanned"  :  331,
   "nscannedObjects"  :  331,
   "n"  :  331,
   "millis"  :  0,
   "indexBounds"  :  {
      "placed_at"  :  [
         [
            {  "week"  :  "2011w01"  },
            {  "week"  :  "2011w01"  }
         ]
      ]
   }
}
query &
design
          use dates but
           be aware of
          some pitfalls
plain dates are good too

db["orders"].insert({
    :placed_at => now,
    :user_id => user,
    :items => items_in_order.map{|item| item[:id]},
    :total => items_in_order.inject(0){|total,item| total += item[:price]}
 })

# ...

db["orders"].ensure_index([["placed_at", Mongo::DESCENDING]])
plain dates are good too

>  db.orders.findOne()
                                                                                                                                              
{
   "_id"  :  ObjectId("4d88d1f931f23813a10003ea"),
   "placed_at"  :  "Mon  Nov  09  2009  08:00:00  GMT+0100  (CET)",
   "user_id"  :  ObjectId("4d88d1f931f23813a10003e9"),
   "items"  :  [
      ObjectId("4d88d1f931f23813a100016d"),
      ObjectId("4d88d1f931f23813a1000346"),
      ObjectId("4d88d1f931f23813a10001e7"),
      ObjectId("4d88d1f931f23813a10000db"),
      ObjectId("4d88d1f931f23813a1000091"),
      ObjectId("4d88d1f931f23813a10001c1"),
      ObjectId("4d88d1f931f23813a10001d3"),
      ObjectId("4d88d1f931f23813a100031b"),
      ObjectId("4d88d1f931f23813a1000130")
   ],
   "total"  :  5871
}
plain dates are good too

>  db.orders.find({
        "placed_at":  {  
            $gte:  new  Date(2011,2,10),
            $lt:  new  Date(2011,2,11)
        }
    }).explain()

{
     "cursor"  :  "BtreeCursor  placed_at_-­‐1",
     "nscanned"  :  53,
     "nscannedObjects"  :  53,
     "n"  :  53,
     "millis"  :  0,
     "indexBounds"  :  {
        "placed_at"  :  [
           [
              "Fri  Mar  11  2011  00:00:00  GMT+0100  (CET)",
              "Thu  Mar  10  2011  00:00:00  GMT+0100  (CET)"
           ]
        ]
     }
plain dates are good too, but...
(total sold on this year’s mondays)

# find all mondays of the year
now = Time.now.beginning_of_year

now += 1.day until now.monday?
mondays = [ now ]
mondays << now += 7.days while now.year == Time.now.year

# find all orders placed on mondays
query = {
  :$or => mondays.map do |day|
    { :placed_at => {
        :$gte => day.beginning_of_day,
        :$lte => day.end_of_day
      }
    }
  end
}

puts query
plain dates are good too, but...
(total sold on this year’s mondays)

# find all mondays of the year
now = Time.now.beginning_of_year

now += 1.day until now.monday?
mondays = [ now ]
mondays << now += 7.days while now.year == Time.now.year
                                $  ruby  src/orders_on_mondays.rb  
# find all orders placed on mondays
                        {:$or=>[
query = {                   {:placed_at=>{
  :$or => mondays.map do |day|
                                :$gte=>2011-­‐01-­‐03  00:00:00  +0100,
    { :placed_at => {         :$lte=>2011-­‐01-­‐03  23:59:59  +0100
                            }},
        :$gte => day.beginning_of_day,
                            {:placed_at=>{
        :$lte => day.end_of_day
                                :$gte=>2011-­‐01-­‐10  00:00:00  +0100,
                                :$lte=>2011-­‐01-­‐10  23:59:59  +0100
      }                     }},
    }                       {:placed_at=>{
                                :$gte=>2011-­‐01-­‐17  00:00:00  +0100,
  end                           :$lte=>2011-­‐01-­‐17  23:59:59  +0100
}                           }},
                            ...
                                ]}
puts query
plain dates are good too, but...
(it works but it’s too slooow)

db["orders"].find({
   :$or => mondays.map do |day|
     { :placed_at => {
         :$gte => day.beginning_of_day,
         :$lte => day.end_of_day
       }
     }
   end
})
plain dates are good too, but...
(why it’s too slow)
>  db.orders.find({
        $or:  [
            "placed_at":{  $gte:  new  Date(2011,2,3),  $lt:  new  Date(2011,2,4)  },
            "placed_at":{  $gte:  new  Date(2011,2,10),  $lt:  new  Date(2011,2,11)  }
        ]
    }).explain()

{
    "clauses"  :  [{    
        "cursor"  :  "BtreeCursor  placed_at_-­‐1",
            "indexBounds"  :  {
                "placed_at"  :  [[
                    "Tue  Mar  3  2011  00:00:00  GMT+0100  (CET)",
                    "Wed  Mar  4  2011  00:00:00  GMT+0100  (CET)"
                ]]}
    },  {
        "cursor"  :  "BtreeCursor  placed_at_-­‐1",
            "indexBounds"  :  {
                "placed_at"  :  [[
                    "Tue  Mar  10  2011  00:00:00  GMT+0100  (CET)",
                    "Wed  Mar  11  2011  00:00:00  GMT+0100  (CET)"          
with destructured dates
(total sold on mondays this year)
>  db.orders.findOne()

{  "_id"  :  ObjectId("4d88bf1f31f23812de0003fd"),  
    "placed_at"  :  [  "2011",  "201103",  "2011w11",  "20110316"  ],
    "user_id"  :  ObjectId("4d88bf1f31f23812de0003e9"),
    "items"  :  [
        ObjectId("4d88bf1f31f23812de0003da"),
        ObjectId("4d88bf1f31f23812de000047"),
        ObjectId("4d88bf1f31f23812de000078"),
        ObjectId("4d88bf1f31f23812de000068"),
        ObjectId("4d88bf1f31f23812de000288")
    ],
    "total"  :  3502
}
with destructured dates
(total sold on mondays this year)

now = Time.now.beginning_of_year

now += 1.day until now.monday?
mondays = [ now ]
mondays << now += 7.days while now.year == Time.now.year

orders = db["orders"].find({
   :placed_at => {
     :$in => mondays.map {|day| day.strftime("%Y%m%d")}
   }
})

puts orders.explain
with destructured dates
(total sold on mondays this year)

now = Time.now.beginning_of_year

now += 1.day until now.monday?
mondays = [ now ]
mondays << now += 7.days while now.year == Time.now.year

orders = db["orders"].find({
                         $  ruby  src/orders_on_mondays.rb  
   :placed_at => {
                         {  "cursor"=>"BtreeCursor  placed_at_-­‐1  multi",
     :$in => mondays.map     "nscanned"=>744,
                          {|day| day.strftime("%Y%m%d")}
   }                         "nscannedObjects"=>744,
                             "n"=>744,
})                           "millis"=>1,
                                  "indexBounds"=>{
                                      "placed_at"=>[
puts orders.explain                       ["20120102",  "20120102"],  ["20111226",  "20111226"],
                                          ["20111219",  "20111219"],  ["20111212",  "20111212"],  
                                          ["20111205",  "20111205"],  ["20111128",  "20111128"],  
                                          ["20111121",  "20111121"],  ...
                                      ]
                                  }
                              }
query &
design
          full query
          power with
            $where
           operator
pomodori
(find who is ticking)

>  db.pomodori.findOne()
{
   "_id"  :  ObjectId("4d8916ed31f2381480000021"),
   "duration"  :  1500,
   "interruptions"  :  0,
   "after_break_of"  :  0,
   "started_at"  :  "Mon  Mar  14  2011  08:05:00  GMT+0100  (CET)",
   "squashed_at"  :  "Mon  Mar  14  2011  08:07:31  GMT+0100  (CET)",
   "in_day"  :  {
      "position"  :  1,
      "is_last"  :  false
   },
   "task_id"  :  ObjectId("4d8916ec31f2381480000014"),
   "user_id"  :  ObjectId("4d8916ec31f2381480000010"),
   "annotations"  :  [  ]
}
pomodori
(find who is ticking)

now = Time.now.yesterday.beginning_of_day + 10.hours
timestamp_of_now = now.to_i

ticking = db["pomodori"].find(
  :$where => <<-EOF
    var startedAt = this.started_at.getTime()/1000
    return
      ((startedAt + this.duration) > #{timestamp_of_now}) &&
      (startedAt < #{timestamp_of_now})
  EOF
)

puts ticking.map{|pomodoro| pomodoro["_id"]}
pomodori
(find who is ticking)

now = Time.now.yesterday.beginning_of_day + 10.hours
timestamp_of_now = now.to_i

ticking = db["pomodori"].find(
  :$where => <<-EOF
    var startedAt = this.started_at.getTime()/1000
    return              $  ruby  src/find_who_is_ticking.rb  
                        4d8916ef31f238148000011d
      ((startedAt + this.duration) > #{timestamp_of_now}) &&
                        4d8916f231f2381480000271
      (startedAt < #{timestamp_of_now})
                        4d8916f931f23814800004dd
                        4d8916f931f23814800004e0
  EOF
)

puts ticking.map{|pomodoro| pomodoro["_id"]}
pomodori
(find who is ticking for an user)

now = Time.now.yesterday.beginning_of_day + 10.hours
timestamp_of_now = now.to_i
user_id = BSON::ObjectId.from_string("4d8916ec31f2381480000010")

ticking = db["pomodori"].find(
  :user_id => user_id,
  :$where => <<-EOF
    var startedAt = this.started_at.getTime()/1000
    return
      ((startedAt + this.duration) > #{timestamp_of_now}) &&
      (startedAt < #{timestamp_of_now})
  EOF
)

puts ticking.map{|pomodoro| pomodoro["_id"]}
pomodori
(find who is ticking for an user)

now = Time.now.yesterday.beginning_of_day + 10.hours
timestamp_of_now = now.to_i
user_id = BSON::ObjectId.from_string("4d8916ec31f2381480000010")

ticking = db["pomodori"].find(
  :user_id => user_id,
  :$where => <<-EOF     $  ruby  src/find_who_is_ticking_for_an_user.rb  
                        4d8916ef31f238148000011d
    var startedAt = this.started_at.getTime()/1000
    return
      ((startedAt + this.duration) > #{timestamp_of_now}) &&
      (startedAt < #{timestamp_of_now})
  EOF
)

puts ticking.map{|pomodoro| pomodoro["_id"]}
pomodori
(related to tasks tagged with “maps”)

related_to_maps = db["pomodori"].find(
  :$where => <<-EOF
    db.tasks.findOne({ "_id": this.task_id }).tags.indexOf("maps") >= 0
  EOF
)

puts related_to_maps.map{|pomodoro| pomodoro["_id"]}
pomodori
(related to tasks tagged with “maps”)

related_to_maps = db["pomodori"].find(
  :$where => <<-EOF
    db.tasks.findOne({ "_id": this.task_id }).tags.indexOf("maps") >= 0
  EOF
)
                          $  ruby  src/related_to_maps.rb  
puts   related_to_maps.map{|pomodoro| pomodoro["_id"]}
                          4d8916fa31f2381480000579
                          4d8916fa31f238148000057b
                          4d8916fa31f238148000057d
                          4d8916fa31f2381480000580
pomodori
(don’t be carried away :-))

related_to_maps = db["pomodori"].find(
  :$where => <<-EOF
    db.tasks.findOne({ "_id": this.task_id }).tags.indexOf("maps") >= 0
  EOF
)

                        $  ruby  src/related_to_maps.rb  
puts related_to_maps.explain
                        {  "cursor"=>"BasicCursor",  
                                 "nscanned"=>461,  
                                 "nscannedObjects"=>461,
                                 "n"=>4,
                                 "millis"=>52,  
                                 "indexBounds"=>{},  
                                 "allPlans"=>[...]
                             }
pomodori
(related to... a better solution)

related_to_maps = db["pomodori"].find(:task_id => {
   :$in => db["tasks"].find(
     {:tags => "maps"}, :fields => {:_id => 1}
   ).map{|task| task["_id"]}
})
                       $  ruby  src/related_to_maps.rb  
                       4d8916fa31f2381480000579
puts   related_to_maps.map{|pomodoro| pomodoro["_id"]}
                       4d8916fa31f238148000057b
                       4d8916fa31f238148000057d
                       4d8916fa31f2381480000580
pomodori
(related to... a better solution)

related_to_maps = db["pomodori"].find(:task_id => {
   :$in => db["tasks"].find(
     {:tags => "maps"}, :fields => {:_id => 1}
   ).map{|task| task["_id"]}
})
                       $  ruby  src/related_to_maps.rb  
                       {  "cursor"=>"BtreeCursor  tags_1",
puts   related_to_maps.map{|pomodoro| pomodoro["_id"]}
                           "nscanned"=>3,
                           "nscannedObjects"=>3,
                           "n"=>3,
                           "millis"=>0,
                           ...
                       }

                        {  "cursor"=>"BtreeCursor  task_id_1  multi",
                            "nscanned"=>4,
                            "nscannedObjects"=>4,
                            "n"=>4,
                            "millis"=>0,
                            ...
                        }
query &
design
             real time
          analytics with
           increments
keep track of url’s visits
(upsert with custom id)

result = db["visits"].update(
  { :_id => Digest::MD5.hexdigest(url) },
  { :$inc => { :hits => 1 } },
  :upsert => true,
  :safe => true
)

puts "Update: #{result.inspect}"

puts db["visits"].find_one(:_id => Digest::MD5.hexdigest(url))
keep track of url’s visits
(upsert with custom id)

result = db["visits"].update(
  { :_id => Digest::MD5.hexdigest(url) },
  { :$inc => { :hits => 1 } },
  :upsert => true,
  :safe => true
)
                          $  ruby  src/realtime_analytics.rb  
                          Update:  {
puts   "Update: #{result.inspect}"
                              "err"=>nil,
                              "updatedExisting"=>false,
                              "n"=>1,
puts   db["visits"].find_one(:_id => Digest::MD5.hexdigest(url))
                              "ok"=>1.0
                          }
                          {"_id"=>"2d86a774beffe90e715a8028c7bd177b",  "hits"=>1}

                              $  ruby  src/realtime_analytics.rb  
                              Update:  {
                                  "err"=>nil,
                                  "updatedExisting"=>true,
                                  "n"=>1,
                                  "ok"=>1.0
                              }
                              {"_id"=>"2d86a774beffe90e715a8028c7bd177b",  "hits"=>2}
url’s visits aggregated by time
(upsert with multiple documents)

url_digest = Digest::MD5.hexdigest(url)
ids = [
  [ url_digest, Time.now.strftime("%Y%m%d") ].join("-"),
  [ url_digest, Time.now.strftime("%Y%m") ].join("-"),
  [ url_digest, Time.now.strftime("%Y") ].join("-"),
  [ url_digest, user_id ].join("-")
]
puts "Expect to upsert: n#{ids}"

result = db["visits"].update(
  { :_id => { :$in => ids } },
  { :$inc => { :hits => 1 } },
  :multi => true,
  :upsert => true,
  :safe => true
)
puts result.inspect
puts db["visits"].all
url’s visits aggregated by time
(upsert with multiple documents)

url_digest = Digest::MD5.hexdigest(url)
ids = [
  [ url_digest, Time.now.strftime("%Y%m%d") ].join("-"),
  [ url_digest, Time.now.strftime("%Y%m") ].join("-"),
  [ url_digest, Time.now.strftime("%Y") ].join("-"),
  [ url_digest, user_id ].join("-")
]                       $  ruby  src/realtime_analytics_with_aggregation.rb  
                        Expect  to  upsert:[
puts "Expect to upsert:     "2d86a774beffe90e715a8028c7bd177b-­‐20110323",
                         n#{ids}"
                              "2d86a774beffe90e715a8028c7bd177b-­‐201103",
                              "2d86a774beffe90e715a8028c7bd177b-­‐2011",  
result =   db["visits"].update(
                              "2d86a774beffe90e715a8028c7bd177b-­‐4d899fab31f238165c000001"
  { :_id   => { :$in => ids } },
                          ]
  { :$inc => { :hits => {  "err"=>nil,
                         1 } },
  :multi => true,           "updatedExisting"=>false,
                            "upserted"=>BSON::ObjectId('4d899fabe23bd37e768ae76d'),  
  :upsert => true,          "n"=>1,
  :safe => true             "ok"=>1.0
                        }
)
puts result.inspect     {"_id"=>BSON::ObjectId('4d899fabe23bd37e768ae76d'),  "hits"=>1}
puts db["visits"].all
url’s visits aggregated by time
(upsert with multiple documents)

url_digest = Digest::MD5.hexdigest(url)
ids = [
  [ url_digest, Time.now.strftime("%Y%m%d") ].join("-"),
  [ url_digest, Time.now.strftime("%Y%m") ].join("-"),
  [ url_digest, Time.now.strftime("%Y") ].join("-"),
  [ url_digest, user_id ].join("-")
]                       $  ruby  src/realtime_analytics_with_aggregation.rb  
                        Expect  to  upsert:[
puts "Expect to upsert:     "2d86a774beffe90e715a8028c7bd177b-­‐20110323",
                         n#{ids}"
                              "2d86a774beffe90e715a8028c7bd177b-­‐201103",
                              "2d86a774beffe90e715a8028c7bd177b-­‐2011",  
result =   db["visits"].update(
                              "2d86a774beffe90e715a8028c7bd177b-­‐4d899fab31f238165c000001"
  { :_id   => { :$in => ids } },
                          ]
  { :$inc => { :hits => {  "err"=>nil,
                         1 } },
  :multi => true,           "updatedExisting"=>false,
                            "upserted"=>BSON::ObjectId('4d899fabe23bd37e768ae76e'),  
  :upsert => true,          "n"=>1,
  :safe => true             "ok"=>1.0
                        }
)
puts result.inspect     {"_id"=>BSON::ObjectId('4d899fabe23bd37e768ae76d'),  "hits"=>1}
puts db["visits"].all {"_id"=>BSON::ObjectId('4d899fabe23bd37e768ae76e'),  "hits"=>1}
url’s visits aggregated by time
(look before you leap)
result = db["visits"].update(
  { :_id => { :$in => ids } },
  { :$inc => { :hits => 1 } },
  :multi => true,
  :upsert => true,
  :safe => true
)

if result["n"] != ids.size
  updated_ids = db["visits"].find(
    { :_id => { :$in => ids } }, :fields => { :_id => true }
  ).map{|document| document["_id"]}

  db["visits"].insert((ids - updated_ids).map do |id|
    { :_id => id, :hits => 1 }
  end)

  db["visits"].remove(:_id => result["upserted"]) if result["upserted"]
end
url’s visits aggregated by time
(look before you leap)
result = db["visits"].update(
  { :_id => { :$in => ids } },
  { :$inc => { :hits => 1 } },
  :multi => true,
  :upsert => true,
  :safe => true
)                       $  ruby  src/realtime_analytics_with_aggregation.rb
                        {  "err"=>nil,  
                            "updatedExisting"=>false,
if result["n"] != ids.size
                            "upserted"=>BSON::ObjectId('4d89a5ebe23bd37e768ae76f'),  
                            "n"=>1,
  updated_ids = db["visits"].find(
                            "ok"=>1.0
    { :_id => { :$in => ids } }, :fields => { :_id => true }
  ).map{|document| document["_id"]}
                        }

                        {"_id"=>"<url_digest>-­‐20110323",  "hits"=>1}
  db["visits"].insert((ids - updated_ids).map do |id|
                        {"_id"=>"<url_digest>-­‐201103",  "hits"=>1}
                        {"_id"=>"<url_digest>-­‐2011",  "hits"=>1}
    { :_id => id, :hits {"_id"=>"<url_digest>-­‐4d89a43b31f238167a000001",  "hits"=>1}
                        => 1 }
  end)

  db["visits"].remove(:_id => result["upserted"]) if result["upserted"]
end
url’s visits aggregated by time
(look before you leap)
result = db["visits"].update(
  { :_id => { :$in => ids } },
  { :$inc => { :hits => 1 } },
  :multi => true,
  :upsert => true,
  :safe => true
)                       $  ruby  src/realtime_analytics_with_aggregation.rb
                        {  "err"=>nil,
                            "updatedExisting"=>true,
if result["n"] != ids.size
                            "n"=>3,
                            "ok"=>1.0
  updated_ids = db["visits"].find(
                        }
    { :_id => { :$in => ids } }, :fields => { :_id => true }
                        {"_id"=>"<url_digest>-­‐20110323",  "hits"=>2}
  ).map{|document| document["_id"]}
                        {"_id"=>"<url_digest>-­‐201103",  "hits"=>2}
                        {"_id"=>"<url_digest>-­‐2011",  "hits"=>2}
  db["visits"].insert((ids - updated_ids).map do |id|
                        {"_id"=>"<url_digest>-­‐4d89a43b31f238167a000001",  "hits"=>1}
                        {"_id"=>"<url_digest>-­‐4d89a44231f238167e000001",  "hits"=>1}
    { :_id => id, :hits => 1         }
  end)

  db["visits"].remove(:_id => result["upserted"]) if result["upserted"]
end
query &
design
          incremental
          map/reduce
map/reduce hits per day
(we have raw events)

>  db.visit_events.findOne()
{
   "_id"  :  ObjectId("4d89fc6531f2381d2c00000b"),
   "url"  :  "8aa8b68e0b849f70df6dbb3031c6182b",
   "user_id"  :  ObjectId("4d89fc6531f2381d2c000005"),
   "at"  :  "Thu  Jan  13  2011  08:00:06  GMT+0100  (CET)"
}
map/reduce hits per day
(generate data WITH something like)

def generate_events(visits, db, now)
  visits.times do |time|
    now += BETWEEN_VISITS.sample.seconds
    db["visit_events"].insert(
      :url => Digest::MD5.hexdigest(URLS.sample),
      :user_id => USERS.sample[:id],
      :at => now
    )
  end
end

generate_events(10_000, db, now)
map/reduce hits per day
(simple map/reduce)
MAP = <<-EOF
  function() {
    emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 })
  }
EOF

REDUCE = <<-EOF
  function(key, values) {
    var hits = 0
    for(var index in values) hits += values[index]["hits"]
    return { "hits": hits }
  }
EOF

result = db["visit_events"].map_reduce(
  MAP, REDUCE, :out => "visits", :raw => true, :verbose => true
)

puts result.inspect
map/reduce hits per day
(date.prototype.format don’t exists)
MAP = <<-EOF
  function() {
    emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 })
  }
EOF

REDUCE = <<-EOF
  function(key, values) {
    var hits = 0
    for(var index in values) hits += values[index]["hits"]
    return { "hits": hits }
  }
EOF

result = db["visit_events"].map_reduce(
  MAP, REDUCE, :out => "visits", :raw => true, :verbose => true
)

puts result.inspect
map/reduce hits per day
(implement format in place)

MAP = <<-EOF
  function() {
    Date.prototype.format = function(format) {
      ...
    }
    emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 })
  }
EOF

REDUCE = <<-EOF
  function(key, values) {
    var hits = 0
    for(var index in values) hits += values[index]["hits"]
    return { "hits": hits }
  }
EOF
map/reduce hits per day
(implement format only if needed)

MAP = <<-EOF
  function() {
    if (!Date.prototype.format) {
      Date.prototype.format = function(format) {
        ...
      }
    }
    emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 })
  }
EOF

REDUCE = <<-EOF
  function(key, values) {
    var hits = 0
    for(var index in values) hits += values[index]["hits"]
    return { "hits": hits }
  }
EOF
map/reduce hits per day
(implement format once and for all)
db[Mongo::DB::SYSTEM_JS_COLLECTION].save(
  :_id => "formatDate",
  :value => BSON::Code.new(
    <<-EOF
      function(date, format) {
        if (!Date.prototype.format) {
           Date.prototype.format = function(format) { ... }
        }
        return date.format(format)
      }
    EOF
  )
)

MAP = <<-EOF
  function() {
    emit([ this.url, formatDate(this.at, "Ymd") ].join("-"), {"hits":1})
  }
EOF
map/reduce hits per day
(implement format once and for all)
db[Mongo::DB::SYSTEM_JS_COLLECTION].save(
  :_id => "load",
  :value => BSON::Code.new(
    <<-EOF
      function(module) {
        if ((module === "date") && !Date.prototype.format) {
           Date.prototype.format = function(format) { ... }
        }
        return true
      }
    EOF
  )
)


MAP = <<-EOF
  function() {
    load("date") && emit(
      [ this.url, this.at.format("Ymd") ].join("-"),
      { "hits": 1 }
    )
  }
EOF
map/reduce hits per day
(ok, but could be taking too long)
MAP = <<-EOF
  function() {
    emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 })
  }
EOF

REDUCE = <<-EOF         $  ruby  src/incremental_mr.rb
  function(key, values)   
                         {
                        {  "result"=>"visits",
    var hits = 0            "timeMillis"=>4197,
    for(var index in values) hits += values[index]["hits"]
                            "timing"=>  {
                                "mapTime"=>3932,
    return { "hits": hits }
                                "emitLoop"=>4170,
  }                             "total"=>4197
EOF                         },
                            "counts"=>  {
                                  "input"=>10000,
result   = db["visit_events"].map_reduce(
                                  "emit"=>10000,
                                  "output"=>200
  MAP,   REDUCE, :out => "visits", :raw =>
                              },                    true, :verbose => true
)                             "ok"=>1.0
                          }

puts result.inspect
map/reduce hits per day
(ok, every time we need to start over)
>  db.visits.find()                                                  

{  "_id"  :  "019640ff7952425b1b8695605459d223-­‐20110316",
    "value"  :  {  "hits"  :  47  }
}

{  "_id"  :  "019640ff7952425b1b8695605459d223-­‐20110317",
    "value"  :  {  "hits"  :  49  }
}

{  "_id"  :  "019640ff7952425b1b8695605459d223-­‐20110318",
    "value"  :  {  "hits"  :  59  }  
}

{  "_id"  :  "019640ff7952425b1b8695605459d223-­‐20110319",
    "value"  :  {  "hits"  :  37  }  
}
map/reduce hits per day
(incremental with savepoints)



         visit-elements       visit
           collection      collection



     map/reduce
   on last changed        upsert
      documents




            temporary
            collection
map/reduce hits per day
(incremental with savepoints)

db.create_collection("visit_events",
  :capped => true,
                                        visit-elements
  :max => 50_000,
  :size => 5_000_000                      collection
)



                                    map/reduce
                                  on last changed
                                     documents




                                           temporary
                                           collection
map/reduce hits per day
(incremental with savepoints)

FINALIZE = <<-EOF
  function(key, value) {
    db.visits.update(                         visit
      { "_id": key },                      collection
      { $inc: { "hits": value.hits } },
      true
    )
  }
EOF
                                          upsert




                 temporary
                 collection
map/reduce hits per day
(incremental with savepoints)

generate_events(number_of_events, db, now)

from = from_last_updated(db)
to = to_last_inserted(db)

result = db["visit_events"].map_reduce(
  MAP, REDUCE,
  :finalize => FINALIZE,
  :query => { :_id => { :$gt => from, :$lte => to } },
  :raw => true,
  :verbose => true
)

db["visits"].save(:_id => "savepoint", :at => to)
map/reduce hits per day
(incremental with savepoints)

generate_events(number_of_events, db, now)

from = from_last_updated(db)
to = to_last_inserted(db)

result = db["visit_events"].map_reduce(
  MAP, REDUCE,          $  ruby  src/incremental_mr.rb  -­‐e  10000
                          
  :finalize => FINALIZE,{  "result"=>"tmp.mr.mapreduce_1300892393_60",
  :query => { :_id => { :$gt => from, :$lte => to } },
                            "timeMillis"=>4333,
                            "timing"=>{...},
  :raw => true,             "counts"=>{
  :verbose => true              "input"=>10000,  
                                "emit"=>10000,
)                               "output"=>196
                                },
                                "ok"=>1.0
db["visits"].save(:_id     => "savepoint",
                            }                      :at => to)

                             {  "_id"=>"05241f07d0e3ab6a227e67b33ea0b509-­‐20110113",      
                                 "hits"=>26
                             }
map/reduce hits per day
(incremental with savepoints)

generate_events(number_of_events, db, now)

from = from_last_updated(db)
to = to_last_inserted(db)

result = db["visit_events"].map_reduce(
  MAP, REDUCE,          $  ruby  src/incremental_mr.rb  -­‐e  4999
                          
  :finalize => FINALIZE,{  "result"=>"tmp.mr.mapreduce_1300892399_61",
  :query => { :_id => { :$gt => from, :$lte => to } },
                            "timeMillis"=>2159,
                            "timing"=>{...},
  :raw => true,             "counts"=>{
  :verbose => true              "input"=>4999,
                                "emit"=>4999,
)                               "output"=>146
                                },
                                "ok"=>1.0
db["visits"].save(:_id     => "savepoint",
                            }                     :at => to)

                             {  "_id"=>"05241f07d0e3ab6a227e67b33ea0b509-­‐20110113",  
                                 "hits"=>64
                             }
map/reduce hits per day
(incremental with savepoints)

def savepoint(db)
  db["visits"].find_one(:_id => "savepoint") or
    { "at" => BSON::ObjectId.from_time(10.years.ago) }
end

def from_last_updated(db)
  savepoint["at"]
end

def to_last_inserted(db)
  db["visit_events"].find.sort([:_id, Mongo::DESCENDING]).first["_id"]
end
query &
design
           external
          map/reduce
use an external mongod process
to execute map/reduce jobs



  master                     slave

            replicate data
use an external mongod process
to execute map/reduce jobs



  master                 slave



            map/reduce
              on last
            replicated
               data
use an external mongod process
to execute map/reduce jobs



  master                       slave

           push back results
look at the shell source
is more powerful than you think
query &
design    documents
          embedded
              or
           linked?
life cycle:
when root document
  is deleted, he can
 stand for himself?




  if yes       if no
embedded      linked
are always fetched
     together?




  if yes       if no
embedded      linked
his attributes are
used to find the root
      document?




  if yes        if no
embedded       linked
he’s small?




  if yes       if no
embedded      linked
he’s unique or there
   are less then
     hundreds?




  if yes       if no
embedded      linked
Style


query &
design


scale
scale
        distributed
        reads with
          replica
           sets
slave
                                   replicate




          read
                                  master

             read/write



                          slave
           read



                                   replicate



+ Durability
+ fault tolerance
scale
        (seems stupid but...)

           pump
           your
         hardware
scale
        (seems stupid but...)

         call 10gen
        sure they can
           help :-)
Questions?

                gabriele lana
   gabriele.lana@cleancode.it
       twitter: @gabrielelana
         http://joind.in/2943

MongoDB With Style

  • 1.
    with style gabriele lana gabriele.lana@cleancode.it twitter: @gabrielelana http://joind.in/2943
  • 2.
  • 3.
    mongo console $  ~/Work/opt/mongodb-­‐1.6.5/bin/mongod           -­‐-­‐dbpath=~/Work/src/nosqlday/db/mongodb.01            -­‐-­‐logpath=~/Work/src/nosqlday/log/mongodb.01            -­‐-­‐fork  -­‐-­‐port  30001 $  ~/Work/opt/mongodb-­‐1.6.5/bin/mongo  localhost:30001 MongoDB  shell  version:  1.6.5 connecting  to:  localhost:30001/test >  use  nosqlday switched  to  db  nosqlday >  db.getCollectionNames() [  "system.indexes",  "users"  ] >  db.users.find({  "name":  "Gabriele"  }) {  "_id"  :  ObjectId("4d8706767bb037a8a8f98db2"),  "name"  :  "Gabriele",   "surname"  :  "Lana",  "job"  :  "softwarecraftsman"  } >  exit bye
  • 4.
    ruby driver require "mongo" db= Mongo::Connection.new("localhost", 30001).db("nosqlday") puts "Collections:" db.collections.each do |collection| puts "t#{collection.name}" end puts "Gabriele:" db["users"].find(:name => "Gabriele").each do |user| puts "t#{user["_id"]}" end db.connection.close
  • 5.
    ruby driver require "mongo" db= Mongo::Connection.new("localhost", 30001).db("nosqlday") puts "Collections:" db.collections.each do |collection| puts "t#{collection.name}" $  ruby  src/connect.rb   Collections: end   users   system.indexes Gabriele: puts "Gabriele:"   4d8706767bb037a8a8f98db2 db["users"].find(:name => "Gabriele").each do |user| puts "t#{user["_id"]}" end db.connection.close
  • 6.
  • 7.
    Style know your driver
  • 8.
  • 9.
    puts "Gabriele:" db["users"].find(:name =>"Gabriele").each do |user| puts "t#{user["_id"]}" end puts "Gabriele:" db["users"].select{|user| user["name"] == "Gabriele"}.each do |user| puts "t#{user["_id"]}" end mongo smart driver
  • 10.
    puts "Gabriele:" db["users"].find(:name =>"Gabriele").each do |user| puts "t#{user["_id"]}" end puts "Gabriele:" $  ruby  src/find_vs_select.rb   db["users"].select{|user| user["name"] == "Gabriele"}.each do |user| Gabriele: puts "t#{user["_id"]}" 4d8706767bb037a8a8f98db2   Gabriele: end   4d8706767bb037a8a8f98db2 mongo smart driver
  • 11.
    puts "Gabriele:" db["users"].find(:name =>"Gabriele").each do |user| puts "t#{user["_id"]}" end puts "Gabriele:" db["users"].select{|user| user["name"] == "Gabriele"}.each do |user| puts "t#{user["_id"]}" end mongo smart driver
  • 12.
    Style incremental design based on application behavior
  • 13.
    the best designis the one where needed data can be easily extracted the way you need to query your data should influence your design
  • 14.
    Style incremental design based on application monitoring
  • 15.
    monitoring and adaptingis better than doing it right the first time ...actually the first time is the worst time :-)
  • 16.
    monitoring & adapting > db.setProfilingLevel(1,  5)                                                                                                 {  "was"  :  1,  "slowms"  :  100,  "ok"  :  1  } //  after  product  usage  find  problematic  queries >  db.system.profile.find().sort({millis:-­‐1})                                                                 {  "ts":  "Mon  Mar  21  2011  14:30:56  GMT+0100  (CET)",    "info":  "        query  pomodorist.pomodori            reslen:202            nscanned:26950            query:                  {  $query:  {  task_id:  ObjectId('4d6f1d3931f2386e9c089796')  }}            nreturned:1      ",      "millis":17 }
  • 17.
    monitoring & adapting > db.pomodori.find({        $query:  {  task_id:  ObjectId('4d6f1d3931f2386e9c089796')  },        $explain:  true })                                                       {  "cursor":  "BasicCursor",    "nscanned":  26950,    "nscannedObjects":  26950,    "n":  1,    "millis":  17,    "indexBounds":  {  },    "allPlans":  [        {  "cursor"  :  "BasicCursor",  "indexBounds"  :  {  }  }      ] }
  • 18.
    monitoring & adapting > db.pomodori.ensureIndex({"task_id":  1})                                                                                                               >  db.pomodori.find({        $query:  {  task_id:  ObjectId('4d6f1d3931f2386e9c089796')  },        $explain:  true }) {  "cursor":  "BtreeCursor  task_id_1",    "nscanned":  1,    "nscannedObjects":  1,    "n":  1,    "millis":  0,    "indexBounds":  {        "task_id":  [      [                ObjectId("4d6f1d3931f2386e9c089796"),                ObjectId("4d6f1d3931f2386e9c089796")          ]    ]},  "allPlans":  [...] }
  • 19.
  • 20.
    query & design use $in operator for batch query
  • 21.
    retrieve all objectswith $in users = [ {:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"}, {:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"}, {:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"} ] ids = users.map{|user| db["users"].insert(user)} puts ids.map{|id| db["users"].find_one(:_id => id)}
  • 22.
    retrieve all objectswith $in users = [ {:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"}, {:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"}, {:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"} ] $  ruby  src/find_by_all_ids.rb   {"_id"=>BSON::ObjectId('4d87605731f23824a0000001'),  ...} ids = users.map{|user| db["users"].insert(user)} {"_id"=>BSON::ObjectId('4d87605731f23824a0000002'),  ...} {"_id"=>BSON::ObjectId('4d87605731f23824a0000003'),  ...} puts ids.map{|id| db["users"].find_one(:_id => id)}
  • 23.
    retrieve all objectswith $in users = [ {:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"}, {:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"}, {:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"} ] ids = users.map{|user| db["users"].insert(user)} puts ids.map{|id| db["users"].find_one(:_id => id)}
  • 24.
    retrieve all objectswith $in users = [ {:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"}, {:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"}, {:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"} ] ids = users.map{|user| db["users"].insert(user)} ids = db["users"].insert(users) puts ids.map{|id| db["users"].find_one(:_id => id)} puts db["users"].find(:_id => {:$in => ids}).all
  • 25.
    retrieve all objectswith $in users = [ {:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"}, {:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"}, {:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"} ] $  ruby  src/find_by_all_ids.rb   {"_id"=>BSON::ObjectId('4d87605731f23824a0000001'),  ...} ids = users.map{|user| db["users"].insert(user)} {"_id"=>BSON::ObjectId('4d87605731f23824a0000002'),  ...} ids = db["users"].insert(users) {"_id"=>BSON::ObjectId('4d87605731f23824a0000003'),  ...} puts ids.map{|id| db["users"].find_one(:_id => id)} puts db["users"].find(:_id => {:$in => ids}).all
  • 26.
    query & design use conventions to build smart object identifiers
  • 27.
    conventions are funto play with >  db.user_scores.find({},  {"_id":  1}) {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐week-­‐200944"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐month-­‐200911"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐year-­‐2009"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐user"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐advertising"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐week-­‐200944-­‐advertising"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐art"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐week-­‐200944-­‐art"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐artist"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐week-­‐200944-­‐artist"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐information"  }
  • 28.
    conventions are funto play with >  db.user_scores.findOne(        {"_id":  "4d873ce631f238241d00000d-­‐day-­‐20091106"}    )   {   "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106",   "pomodori"  :  15,   "pomodori_squashed"  :  3,   "breaks"  :  7,   "tasks_created"  :  8,   "tasks_done"  :  6,   "estimation_accuracy"  :  0,   "seconds_of_focused_time"  :  22500,   "seconds_of_wasted_time"  :  1999,   "seconds_of_breaks"  :  8820 }
  • 29.
    conventions are funto play with (user scores in day per tag) >  db.user_scores.find(        {"_id":  /^4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/},  {"_id":  1}    )                 {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐advertising"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐art"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐artist"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐blogging"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐culture"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐html"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐illustration"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐information"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐inspiration"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐marketing"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐movies"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐resources"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐technology"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐tool"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐tutorials"  }
  • 30.
    conventions are funto play with (list of tags per day) >  db.user_scores.find(        {"_id":  /^4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/},  {"_id":  1}    ).map(function(document)  {        return  document._id.replace(            "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐",  ""        )    }) [   "advertising",   "art",   "artist",   "blogging",   "culture",   "html",   "illustration",   "information",   ... ]
  • 31.
    conventions are funto play with (anchored regexp uses indexes) >  db.user_scores.find(        {"_id":  /^4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/},  {"_id":  1}    ).explain()                                                                                                                                                         {   "cursor"  :  "BtreeCursor  _id_  multi",   "nscanned"  :  15,   "nscannedObjects"  :  15,   "n"  :  15,   "millis"  :  0,   "indexBounds"  :  {     "_id"  :  [       [         "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐",         "4d873ce631f238241d00000d-­‐day-­‐20091106."       ],       [         /^4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/,         /^4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/       ]     ]
  • 32.
    conventions are funto play with (anchored regexp uses indexes) >  db.user_scores.find(        {"_id":  /4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/},  {"_id":  1}    ).explain() {   "cursor"  :  "BtreeCursor  _id_  multi",   "nscanned"  :  109349,   "nscannedObjects"  :  15,   "n"  :  15,   "millis"  :  217,   "indexBounds"  :  {     "_id"  :  [       ...     ]   } }
  • 33.
    query & use “group” design method to do small computations without fetching related documents
  • 34.
    group to computedata in mongo (inject client side) days = [ 20091110, 20091111, 20091112 ] scores_id = %r{^4d87d00931f2380c7700000d-day-(#{days.join("|")})$} scores = db["user_scores"].find(:_id => scores_id) pomodori = scores.inject(0) do |pomodori, scores| pomodori + scores["pomodori"] end puts "Pomodori in days #{days.join(",")}: #{pomodori}"
  • 35.
    group to computedata in mongo (inject client side) days = [ 20091110, 20091111, 20091112 ] scores_id = %r{^4d87d00931f2380c7700000d-day-(#{days.join("|")})$} scores = db["user_scores"].find(:_id => scores_id) pomodori = scores.inject(0) do |pomodori, scores| $  ruby  src/inject_for_reduce.rb   pomodori + scores["pomodori"] Pomodori  in  days  20091110,20091111,20091112:  36 end puts "Pomodori in days #{days.join(",")}: #{pomodori}"
  • 36.
    group to computedata in mongo (group server side) days = [ 20091110, 20091111, 20091112 ] scores_id = %r{^4d87d00931f2380c7700000d-day-(#{days.join("|")})$} result = db["user_scores"].group( :cond => { :_id => scores_id }, :initial => { :pomodori => 0 }, :reduce => <<-EOF function(document, result) { result.pomodori += document.pomodori } EOF ) puts "Pomodori in days #{days.join(",")}: #{result.first["pomodori"]}"
  • 37.
    group to computedata in mongo (group server side) days = [ 20091110, 20091111, 20091112 ] scores_id = %r{^4d87d00931f2380c7700000d-day-(#{days.join("|")})$} result = db["user_scores"].group( :cond => { :_id => scores_id }, :initial => { :pomodori => 0 }, :reduce => <<-EOF $  ruby  src/group_for_reduce.rb   Pomodori  in  days  20091110,20091111,20091112:  36 function(document, result) { result.pomodori += document.pomodori } EOF ) puts "Pomodori in days #{days.join(",")}: #{result.first["pomodori"]}"
  • 38.
    group to computedata in mongo (ex. sum pomodori by tag “ruby”) result = db["user_scores"].group( :cond => { :_id => /^4d87d00931f2380c7700000d-day-d{8}-ruby$/ }, :initial => { :pomodori => 0, :days => 0 }, :reduce => <<-EOF function(document, result) { result.days += 1 result.pomodori += document.pomodori } EOF ).first puts "In #{result["days"]} days, #{result["pomodori"]} done for ruby"
  • 39.
    group to computedata in mongo (ex. sum pomodori by tag “ruby”) result = db["user_scores"].group( :cond => { :_id => /^4d87d00931f2380c7700000d-day-d{8}-ruby$/ }, :initial => { :pomodori => 0, :days => 0 }, :reduce => <<-EOF function(document, result) { $  ruby  src/group_for_ruby_tag.rb   In  43  days,  45  pomodori result.days += 1 result.pomodori += document.pomodori } EOF ).first puts "In #{result["days"]} days, #{result["pomodori"]} pomodori"
  • 40.
    group to computedata in mongo (ex. sum pomodori by tag “ruby”) >  db.user_scores.find({        "_id":  /^4d87d00931f2380c7700000d-­‐day-­‐d{8}-­‐ruby$/    }).explain() {   "cursor"  :  "BtreeCursor  _id_  multi",   "nscanned"  :  43,   "nscannedObjects"  :  43,   "n"  :  43,   "millis"  :  3,   "indexBounds"  :  {     "_id"  :  [...]   } }
  • 41.
    query & design create indexes on arrays to create local reverse indexes in documents
  • 42.
    reverse index inplace (an array could be indexed) >  db.tasks.find({  "tags":  {  $in:  [  "nosqlday"  ]  }  })                                                                             {  "_id"  :  ObjectId("4d7de446175ca8243d000004"),      "tags"  :  [  "nosqlday"  ],      "description"  :  "#nosqlday  keynote",      "is_recurrent"  :  false,    "estimated"  :  0,      "worked_in"  :  [   "Mon  Mar  14  2011  00:00:00  GMT+0100  (CET)",   "Tue  Mar  15  2011  00:00:00  GMT+0100  (CET)"    ],    "done_at"  :  "Tue  Mar  15  2011  13:05:03  GMT+0100  (CET)",    "todo_at"  :  null,    "created_at"  :  "Mon  Mar  14  2011  10:47:50  GMT+0100  (CET)",    "updated_at"  :  "Tue  Mar  15  2011  13:05:03  GMT+0100  (CET)",    "keywords":  [  "nosqldai",  "keynot"  ],    "user_id":  ObjectId("4d53996c137ce423ff000001"),    "annotations"  :  [  ] }
  • 43.
    reverse index inplace (an array could be indexed) >  db.tasks.getIndexes() [   {     "name"  :  "_id_",     "ns"  :  "app435386.tasks",     "key"  :  {       "_id"  :  1     }   },   {     "name"  :  "tags_1",     "ns"  :  "app435386.tasks",     "key"  :  {       "tags"  :  1     },     "unique"  :  false   },      ... ]
  • 44.
    reverse index inplace (container for deduced data, array) db["orders"].insert({ :placed_at => [ now.strftime("%Y"), # year: "2011" now.strftime("%Y%m"), # month: "201103" now.strftime("%Yw%U"), # week: "2011w11" now.strftime("%Y%m%d") # day: "20110316" ], :user_id => user, :items => items_in_order.map{|item| item[:id]}, :total => items_in_order.inject(0){|total,item| total += item[:price]} }) # ... db["orders"].ensure_index([["placed_at", Mongo::DESCENDING]])
  • 45.
    reverse index inplace (container for deduced data, array) >  db.orders.findOne() {  "_id"  :  ObjectId("4d88bf1f31f23812de0003fd"),      "placed_at"  :  [  "2011",  "201103",  "2011w11",  "20110316"  ],    "user_id"  :  ObjectId("4d88bf1f31f23812de0003e9"),    "items"  :  [        ObjectId("4d88bf1f31f23812de0003da"),        ObjectId("4d88bf1f31f23812de000047"),        ObjectId("4d88bf1f31f23812de000078"),        ObjectId("4d88bf1f31f23812de000068"),      ObjectId("4d88bf1f31f23812de000288")    ],    "total"  :  3502 }
  • 46.
    reverse index inplace (container for deduced data, array) >  db.orders.find({  "placed_at":  "20110310"  }).count() 77 >  db.orders.find({  "placed_at":  "20110310"  }).explain() {   "cursor"  :  "BtreeCursor  placed_at_-­‐1",   "nscanned"  :  77,   "nscannedObjects"  :  77,   "n"  :  77,   "millis"  :  0,   "indexBounds"  :  {     "placed_at"  :  [       [         "20110310",         "20110310"       ]     ]   } }
  • 47.
    reverse index inplace (container for deduced data, hash) db["orders"].insert({ :placed_at => [ { :year => now.strftime("%Y") }, { :month => now.strftime("%Y%m") }, { :week => now.strftime("%Y%U") }, { :day => now.strftime("%Y%m%d") } ], :user_id => user, :items => items_in_order.map{|item| item[:id]}, :total => items_in_order.inject(0){|total,item| total += item[:price]} }) # ... db["orders"].ensure_index([["placed_at", Mongo::DESCENDING]])
  • 48.
    reverse index inplace (container for deduced data, hash) >  db.orders.findOne()                                                             {  "_id"  :  ObjectId("4d88c31531f23812fe0003ea"),    "placed_at"  :  [        {  "year"  :  "2009"  },        {  "month"  :  "200911"  },        {  "week"  :  "200945"  },        {  "day"  :  "20091109"  }    ],    "user_id"  :  ObjectId("4d88c31531f23812fe0003e9"),    "items"  :  [        ObjectId("4d88c31531f23812fe00013f"),        ObjectId("4d88c31531f23812fe000176"),        ObjectId("4d88c31531f23812fe0003e2"),        ObjectId("4d88c31531f23812fe0003d1"),        ObjectId("4d88c31531f23812fe0001c1"),        ObjectId("4d88c31531f23812fe000118"),        ObjectId("4d88c31531f23812fe00031d")    ],    "total"  :  10149 }
  • 49.
    reverse index inplace (container for deduced data, hash) >  db.orders.find({  "placed_at.week":  "201101"  }).count()                 331 >  db.orders.find({  "placed_at.week":  "201101"  }).explain()             {   "cursor"  :  "BasicCursor",   "nscanned"  :  22374,   "nscannedObjects"  :  22374,   "n"  :  331,   "millis"  :  23,   "indexBounds"  :  {       } }
  • 50.
    reverse index inplace (container for deduced data, hash) >  db.orders.find({  "placed_at":  {  "week":  "201101"  }}).count()     331 >  db.orders.find({  "placed_at":  {  "week":  "201101"  }}).explain() {   "cursor"  :  "BtreeCursor  placed_at_-­‐1",   "nscanned"  :  331,   "nscannedObjects"  :  331,   "n"  :  331,   "millis"  :  0,   "indexBounds"  :  {     "placed_at"  :  [       [         {  "week"  :  "2011w01"  },         {  "week"  :  "2011w01"  }       ]     ]   } }
  • 51.
    query & design use dates but be aware of some pitfalls
  • 52.
    plain dates aregood too db["orders"].insert({ :placed_at => now, :user_id => user, :items => items_in_order.map{|item| item[:id]}, :total => items_in_order.inject(0){|total,item| total += item[:price]} }) # ... db["orders"].ensure_index([["placed_at", Mongo::DESCENDING]])
  • 53.
    plain dates aregood too >  db.orders.findOne()                                                                                                                                               {   "_id"  :  ObjectId("4d88d1f931f23813a10003ea"),   "placed_at"  :  "Mon  Nov  09  2009  08:00:00  GMT+0100  (CET)",   "user_id"  :  ObjectId("4d88d1f931f23813a10003e9"),   "items"  :  [     ObjectId("4d88d1f931f23813a100016d"),     ObjectId("4d88d1f931f23813a1000346"),     ObjectId("4d88d1f931f23813a10001e7"),     ObjectId("4d88d1f931f23813a10000db"),     ObjectId("4d88d1f931f23813a1000091"),     ObjectId("4d88d1f931f23813a10001c1"),     ObjectId("4d88d1f931f23813a10001d3"),     ObjectId("4d88d1f931f23813a100031b"),     ObjectId("4d88d1f931f23813a1000130")   ],   "total"  :  5871 }
  • 54.
    plain dates aregood too >  db.orders.find({        "placed_at":  {              $gte:  new  Date(2011,2,10),            $lt:  new  Date(2011,2,11)        }    }).explain() {   "cursor"  :  "BtreeCursor  placed_at_-­‐1",   "nscanned"  :  53,   "nscannedObjects"  :  53,   "n"  :  53,   "millis"  :  0,   "indexBounds"  :  {     "placed_at"  :  [       [         "Fri  Mar  11  2011  00:00:00  GMT+0100  (CET)",         "Thu  Mar  10  2011  00:00:00  GMT+0100  (CET)"       ]     ]   }
  • 55.
    plain dates aregood too, but... (total sold on this year’s mondays) # find all mondays of the year now = Time.now.beginning_of_year now += 1.day until now.monday? mondays = [ now ] mondays << now += 7.days while now.year == Time.now.year # find all orders placed on mondays query = { :$or => mondays.map do |day| { :placed_at => { :$gte => day.beginning_of_day, :$lte => day.end_of_day } } end } puts query
  • 56.
    plain dates aregood too, but... (total sold on this year’s mondays) # find all mondays of the year now = Time.now.beginning_of_year now += 1.day until now.monday? mondays = [ now ] mondays << now += 7.days while now.year == Time.now.year $  ruby  src/orders_on_mondays.rb   # find all orders placed on mondays {:$or=>[ query = {    {:placed_at=>{ :$or => mondays.map do |day|        :$gte=>2011-­‐01-­‐03  00:00:00  +0100, { :placed_at => {        :$lte=>2011-­‐01-­‐03  23:59:59  +0100    }}, :$gte => day.beginning_of_day,    {:placed_at=>{ :$lte => day.end_of_day        :$gte=>2011-­‐01-­‐10  00:00:00  +0100,        :$lte=>2011-­‐01-­‐10  23:59:59  +0100 }    }}, }    {:placed_at=>{        :$gte=>2011-­‐01-­‐17  00:00:00  +0100, end        :$lte=>2011-­‐01-­‐17  23:59:59  +0100 }    }},    ... ]} puts query
  • 57.
    plain dates aregood too, but... (it works but it’s too slooow) db["orders"].find({ :$or => mondays.map do |day| { :placed_at => { :$gte => day.beginning_of_day, :$lte => day.end_of_day } } end })
  • 58.
    plain dates aregood too, but... (why it’s too slow) >  db.orders.find({        $or:  [            "placed_at":{  $gte:  new  Date(2011,2,3),  $lt:  new  Date(2011,2,4)  },            "placed_at":{  $gte:  new  Date(2011,2,10),  $lt:  new  Date(2011,2,11)  }        ]    }).explain() {    "clauses"  :  [{            "cursor"  :  "BtreeCursor  placed_at_-­‐1",            "indexBounds"  :  {                "placed_at"  :  [[                    "Tue  Mar  3  2011  00:00:00  GMT+0100  (CET)",                    "Wed  Mar  4  2011  00:00:00  GMT+0100  (CET)"                ]]}    },  {        "cursor"  :  "BtreeCursor  placed_at_-­‐1",            "indexBounds"  :  {                "placed_at"  :  [[                    "Tue  Mar  10  2011  00:00:00  GMT+0100  (CET)",                    "Wed  Mar  11  2011  00:00:00  GMT+0100  (CET)"          
  • 59.
    with destructured dates (totalsold on mondays this year) >  db.orders.findOne() {  "_id"  :  ObjectId("4d88bf1f31f23812de0003fd"),      "placed_at"  :  [  "2011",  "201103",  "2011w11",  "20110316"  ],    "user_id"  :  ObjectId("4d88bf1f31f23812de0003e9"),    "items"  :  [        ObjectId("4d88bf1f31f23812de0003da"),        ObjectId("4d88bf1f31f23812de000047"),        ObjectId("4d88bf1f31f23812de000078"),        ObjectId("4d88bf1f31f23812de000068"),        ObjectId("4d88bf1f31f23812de000288")    ],    "total"  :  3502 }
  • 60.
    with destructured dates (totalsold on mondays this year) now = Time.now.beginning_of_year now += 1.day until now.monday? mondays = [ now ] mondays << now += 7.days while now.year == Time.now.year orders = db["orders"].find({ :placed_at => { :$in => mondays.map {|day| day.strftime("%Y%m%d")} } }) puts orders.explain
  • 61.
    with destructured dates (totalsold on mondays this year) now = Time.now.beginning_of_year now += 1.day until now.monday? mondays = [ now ] mondays << now += 7.days while now.year == Time.now.year orders = db["orders"].find({ $  ruby  src/orders_on_mondays.rb   :placed_at => { {  "cursor"=>"BtreeCursor  placed_at_-­‐1  multi", :$in => mondays.map    "nscanned"=>744, {|day| day.strftime("%Y%m%d")} }    "nscannedObjects"=>744,    "n"=>744, })    "millis"=>1,    "indexBounds"=>{        "placed_at"=>[ puts orders.explain            ["20120102",  "20120102"],  ["20111226",  "20111226"],            ["20111219",  "20111219"],  ["20111212",  "20111212"],              ["20111205",  "20111205"],  ["20111128",  "20111128"],              ["20111121",  "20111121"],  ...        ]    } }
  • 62.
    query & design full query power with $where operator
  • 63.
    pomodori (find who isticking) >  db.pomodori.findOne() {   "_id"  :  ObjectId("4d8916ed31f2381480000021"),   "duration"  :  1500,   "interruptions"  :  0,   "after_break_of"  :  0,   "started_at"  :  "Mon  Mar  14  2011  08:05:00  GMT+0100  (CET)",   "squashed_at"  :  "Mon  Mar  14  2011  08:07:31  GMT+0100  (CET)",   "in_day"  :  {     "position"  :  1,     "is_last"  :  false   },   "task_id"  :  ObjectId("4d8916ec31f2381480000014"),   "user_id"  :  ObjectId("4d8916ec31f2381480000010"),   "annotations"  :  [  ] }
  • 64.
    pomodori (find who isticking) now = Time.now.yesterday.beginning_of_day + 10.hours timestamp_of_now = now.to_i ticking = db["pomodori"].find( :$where => <<-EOF var startedAt = this.started_at.getTime()/1000 return ((startedAt + this.duration) > #{timestamp_of_now}) && (startedAt < #{timestamp_of_now}) EOF ) puts ticking.map{|pomodoro| pomodoro["_id"]}
  • 65.
    pomodori (find who isticking) now = Time.now.yesterday.beginning_of_day + 10.hours timestamp_of_now = now.to_i ticking = db["pomodori"].find( :$where => <<-EOF var startedAt = this.started_at.getTime()/1000 return $  ruby  src/find_who_is_ticking.rb   4d8916ef31f238148000011d ((startedAt + this.duration) > #{timestamp_of_now}) && 4d8916f231f2381480000271 (startedAt < #{timestamp_of_now}) 4d8916f931f23814800004dd 4d8916f931f23814800004e0 EOF ) puts ticking.map{|pomodoro| pomodoro["_id"]}
  • 66.
    pomodori (find who isticking for an user) now = Time.now.yesterday.beginning_of_day + 10.hours timestamp_of_now = now.to_i user_id = BSON::ObjectId.from_string("4d8916ec31f2381480000010") ticking = db["pomodori"].find( :user_id => user_id, :$where => <<-EOF var startedAt = this.started_at.getTime()/1000 return ((startedAt + this.duration) > #{timestamp_of_now}) && (startedAt < #{timestamp_of_now}) EOF ) puts ticking.map{|pomodoro| pomodoro["_id"]}
  • 67.
    pomodori (find who isticking for an user) now = Time.now.yesterday.beginning_of_day + 10.hours timestamp_of_now = now.to_i user_id = BSON::ObjectId.from_string("4d8916ec31f2381480000010") ticking = db["pomodori"].find( :user_id => user_id, :$where => <<-EOF $  ruby  src/find_who_is_ticking_for_an_user.rb   4d8916ef31f238148000011d var startedAt = this.started_at.getTime()/1000 return ((startedAt + this.duration) > #{timestamp_of_now}) && (startedAt < #{timestamp_of_now}) EOF ) puts ticking.map{|pomodoro| pomodoro["_id"]}
  • 68.
    pomodori (related to taskstagged with “maps”) related_to_maps = db["pomodori"].find( :$where => <<-EOF db.tasks.findOne({ "_id": this.task_id }).tags.indexOf("maps") >= 0 EOF ) puts related_to_maps.map{|pomodoro| pomodoro["_id"]}
  • 69.
    pomodori (related to taskstagged with “maps”) related_to_maps = db["pomodori"].find( :$where => <<-EOF db.tasks.findOne({ "_id": this.task_id }).tags.indexOf("maps") >= 0 EOF ) $  ruby  src/related_to_maps.rb   puts related_to_maps.map{|pomodoro| pomodoro["_id"]} 4d8916fa31f2381480000579 4d8916fa31f238148000057b 4d8916fa31f238148000057d 4d8916fa31f2381480000580
  • 70.
    pomodori (don’t be carriedaway :-)) related_to_maps = db["pomodori"].find( :$where => <<-EOF db.tasks.findOne({ "_id": this.task_id }).tags.indexOf("maps") >= 0 EOF ) $  ruby  src/related_to_maps.rb   puts related_to_maps.explain {  "cursor"=>"BasicCursor",      "nscanned"=>461,      "nscannedObjects"=>461,    "n"=>4,    "millis"=>52,      "indexBounds"=>{},      "allPlans"=>[...] }
  • 71.
    pomodori (related to... abetter solution) related_to_maps = db["pomodori"].find(:task_id => { :$in => db["tasks"].find( {:tags => "maps"}, :fields => {:_id => 1} ).map{|task| task["_id"]} }) $  ruby  src/related_to_maps.rb   4d8916fa31f2381480000579 puts related_to_maps.map{|pomodoro| pomodoro["_id"]} 4d8916fa31f238148000057b 4d8916fa31f238148000057d 4d8916fa31f2381480000580
  • 72.
    pomodori (related to... abetter solution) related_to_maps = db["pomodori"].find(:task_id => { :$in => db["tasks"].find( {:tags => "maps"}, :fields => {:_id => 1} ).map{|task| task["_id"]} }) $  ruby  src/related_to_maps.rb   {  "cursor"=>"BtreeCursor  tags_1", puts related_to_maps.map{|pomodoro| pomodoro["_id"]}    "nscanned"=>3,    "nscannedObjects"=>3,    "n"=>3,    "millis"=>0,    ... } {  "cursor"=>"BtreeCursor  task_id_1  multi",    "nscanned"=>4,    "nscannedObjects"=>4,    "n"=>4,    "millis"=>0,    ... }
  • 73.
    query & design real time analytics with increments
  • 74.
    keep track ofurl’s visits (upsert with custom id) result = db["visits"].update( { :_id => Digest::MD5.hexdigest(url) }, { :$inc => { :hits => 1 } }, :upsert => true, :safe => true ) puts "Update: #{result.inspect}" puts db["visits"].find_one(:_id => Digest::MD5.hexdigest(url))
  • 75.
    keep track ofurl’s visits (upsert with custom id) result = db["visits"].update( { :_id => Digest::MD5.hexdigest(url) }, { :$inc => { :hits => 1 } }, :upsert => true, :safe => true ) $  ruby  src/realtime_analytics.rb   Update:  { puts "Update: #{result.inspect}"    "err"=>nil,    "updatedExisting"=>false,    "n"=>1, puts db["visits"].find_one(:_id => Digest::MD5.hexdigest(url))    "ok"=>1.0 } {"_id"=>"2d86a774beffe90e715a8028c7bd177b",  "hits"=>1} $  ruby  src/realtime_analytics.rb   Update:  {    "err"=>nil,    "updatedExisting"=>true,    "n"=>1,    "ok"=>1.0 } {"_id"=>"2d86a774beffe90e715a8028c7bd177b",  "hits"=>2}
  • 76.
    url’s visits aggregatedby time (upsert with multiple documents) url_digest = Digest::MD5.hexdigest(url) ids = [ [ url_digest, Time.now.strftime("%Y%m%d") ].join("-"), [ url_digest, Time.now.strftime("%Y%m") ].join("-"), [ url_digest, Time.now.strftime("%Y") ].join("-"), [ url_digest, user_id ].join("-") ] puts "Expect to upsert: n#{ids}" result = db["visits"].update( { :_id => { :$in => ids } }, { :$inc => { :hits => 1 } }, :multi => true, :upsert => true, :safe => true ) puts result.inspect puts db["visits"].all
  • 77.
    url’s visits aggregatedby time (upsert with multiple documents) url_digest = Digest::MD5.hexdigest(url) ids = [ [ url_digest, Time.now.strftime("%Y%m%d") ].join("-"), [ url_digest, Time.now.strftime("%Y%m") ].join("-"), [ url_digest, Time.now.strftime("%Y") ].join("-"), [ url_digest, user_id ].join("-") ] $  ruby  src/realtime_analytics_with_aggregation.rb   Expect  to  upsert:[ puts "Expect to upsert:    "2d86a774beffe90e715a8028c7bd177b-­‐20110323", n#{ids}"    "2d86a774beffe90e715a8028c7bd177b-­‐201103",    "2d86a774beffe90e715a8028c7bd177b-­‐2011",   result = db["visits"].update(    "2d86a774beffe90e715a8028c7bd177b-­‐4d899fab31f238165c000001" { :_id => { :$in => ids } }, ] { :$inc => { :hits => {  "err"=>nil, 1 } }, :multi => true,    "updatedExisting"=>false,    "upserted"=>BSON::ObjectId('4d899fabe23bd37e768ae76d'),   :upsert => true,    "n"=>1, :safe => true    "ok"=>1.0 } ) puts result.inspect {"_id"=>BSON::ObjectId('4d899fabe23bd37e768ae76d'),  "hits"=>1} puts db["visits"].all
  • 78.
    url’s visits aggregatedby time (upsert with multiple documents) url_digest = Digest::MD5.hexdigest(url) ids = [ [ url_digest, Time.now.strftime("%Y%m%d") ].join("-"), [ url_digest, Time.now.strftime("%Y%m") ].join("-"), [ url_digest, Time.now.strftime("%Y") ].join("-"), [ url_digest, user_id ].join("-") ] $  ruby  src/realtime_analytics_with_aggregation.rb   Expect  to  upsert:[ puts "Expect to upsert:    "2d86a774beffe90e715a8028c7bd177b-­‐20110323", n#{ids}"    "2d86a774beffe90e715a8028c7bd177b-­‐201103",    "2d86a774beffe90e715a8028c7bd177b-­‐2011",   result = db["visits"].update(    "2d86a774beffe90e715a8028c7bd177b-­‐4d899fab31f238165c000001" { :_id => { :$in => ids } }, ] { :$inc => { :hits => {  "err"=>nil, 1 } }, :multi => true,    "updatedExisting"=>false,    "upserted"=>BSON::ObjectId('4d899fabe23bd37e768ae76e'),   :upsert => true,    "n"=>1, :safe => true    "ok"=>1.0 } ) puts result.inspect {"_id"=>BSON::ObjectId('4d899fabe23bd37e768ae76d'),  "hits"=>1} puts db["visits"].all {"_id"=>BSON::ObjectId('4d899fabe23bd37e768ae76e'),  "hits"=>1}
  • 79.
    url’s visits aggregatedby time (look before you leap) result = db["visits"].update( { :_id => { :$in => ids } }, { :$inc => { :hits => 1 } }, :multi => true, :upsert => true, :safe => true ) if result["n"] != ids.size updated_ids = db["visits"].find( { :_id => { :$in => ids } }, :fields => { :_id => true } ).map{|document| document["_id"]} db["visits"].insert((ids - updated_ids).map do |id| { :_id => id, :hits => 1 } end) db["visits"].remove(:_id => result["upserted"]) if result["upserted"] end
  • 80.
    url’s visits aggregatedby time (look before you leap) result = db["visits"].update( { :_id => { :$in => ids } }, { :$inc => { :hits => 1 } }, :multi => true, :upsert => true, :safe => true ) $  ruby  src/realtime_analytics_with_aggregation.rb {  "err"=>nil,      "updatedExisting"=>false, if result["n"] != ids.size    "upserted"=>BSON::ObjectId('4d89a5ebe23bd37e768ae76f'),      "n"=>1, updated_ids = db["visits"].find(    "ok"=>1.0 { :_id => { :$in => ids } }, :fields => { :_id => true } ).map{|document| document["_id"]} } {"_id"=>"<url_digest>-­‐20110323",  "hits"=>1} db["visits"].insert((ids - updated_ids).map do |id| {"_id"=>"<url_digest>-­‐201103",  "hits"=>1} {"_id"=>"<url_digest>-­‐2011",  "hits"=>1} { :_id => id, :hits {"_id"=>"<url_digest>-­‐4d89a43b31f238167a000001",  "hits"=>1} => 1 } end) db["visits"].remove(:_id => result["upserted"]) if result["upserted"] end
  • 81.
    url’s visits aggregatedby time (look before you leap) result = db["visits"].update( { :_id => { :$in => ids } }, { :$inc => { :hits => 1 } }, :multi => true, :upsert => true, :safe => true ) $  ruby  src/realtime_analytics_with_aggregation.rb {  "err"=>nil,    "updatedExisting"=>true, if result["n"] != ids.size    "n"=>3,    "ok"=>1.0 updated_ids = db["visits"].find( } { :_id => { :$in => ids } }, :fields => { :_id => true } {"_id"=>"<url_digest>-­‐20110323",  "hits"=>2} ).map{|document| document["_id"]} {"_id"=>"<url_digest>-­‐201103",  "hits"=>2} {"_id"=>"<url_digest>-­‐2011",  "hits"=>2} db["visits"].insert((ids - updated_ids).map do |id| {"_id"=>"<url_digest>-­‐4d89a43b31f238167a000001",  "hits"=>1} {"_id"=>"<url_digest>-­‐4d89a44231f238167e000001",  "hits"=>1} { :_id => id, :hits => 1 } end) db["visits"].remove(:_id => result["upserted"]) if result["upserted"] end
  • 82.
    query & design incremental map/reduce
  • 83.
    map/reduce hits perday (we have raw events) >  db.visit_events.findOne() {   "_id"  :  ObjectId("4d89fc6531f2381d2c00000b"),   "url"  :  "8aa8b68e0b849f70df6dbb3031c6182b",   "user_id"  :  ObjectId("4d89fc6531f2381d2c000005"),   "at"  :  "Thu  Jan  13  2011  08:00:06  GMT+0100  (CET)" }
  • 84.
    map/reduce hits perday (generate data WITH something like) def generate_events(visits, db, now) visits.times do |time| now += BETWEEN_VISITS.sample.seconds db["visit_events"].insert( :url => Digest::MD5.hexdigest(URLS.sample), :user_id => USERS.sample[:id], :at => now ) end end generate_events(10_000, db, now)
  • 85.
    map/reduce hits perday (simple map/reduce) MAP = <<-EOF function() { emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 }) } EOF REDUCE = <<-EOF function(key, values) { var hits = 0 for(var index in values) hits += values[index]["hits"] return { "hits": hits } } EOF result = db["visit_events"].map_reduce( MAP, REDUCE, :out => "visits", :raw => true, :verbose => true ) puts result.inspect
  • 86.
    map/reduce hits perday (date.prototype.format don’t exists) MAP = <<-EOF function() { emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 }) } EOF REDUCE = <<-EOF function(key, values) { var hits = 0 for(var index in values) hits += values[index]["hits"] return { "hits": hits } } EOF result = db["visit_events"].map_reduce( MAP, REDUCE, :out => "visits", :raw => true, :verbose => true ) puts result.inspect
  • 87.
    map/reduce hits perday (implement format in place) MAP = <<-EOF function() { Date.prototype.format = function(format) { ... } emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 }) } EOF REDUCE = <<-EOF function(key, values) { var hits = 0 for(var index in values) hits += values[index]["hits"] return { "hits": hits } } EOF
  • 88.
    map/reduce hits perday (implement format only if needed) MAP = <<-EOF function() { if (!Date.prototype.format) { Date.prototype.format = function(format) { ... } } emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 }) } EOF REDUCE = <<-EOF function(key, values) { var hits = 0 for(var index in values) hits += values[index]["hits"] return { "hits": hits } } EOF
  • 89.
    map/reduce hits perday (implement format once and for all) db[Mongo::DB::SYSTEM_JS_COLLECTION].save( :_id => "formatDate", :value => BSON::Code.new( <<-EOF function(date, format) { if (!Date.prototype.format) { Date.prototype.format = function(format) { ... } } return date.format(format) } EOF ) ) MAP = <<-EOF function() { emit([ this.url, formatDate(this.at, "Ymd") ].join("-"), {"hits":1}) } EOF
  • 90.
    map/reduce hits perday (implement format once and for all) db[Mongo::DB::SYSTEM_JS_COLLECTION].save( :_id => "load", :value => BSON::Code.new( <<-EOF function(module) { if ((module === "date") && !Date.prototype.format) { Date.prototype.format = function(format) { ... } } return true } EOF ) ) MAP = <<-EOF function() { load("date") && emit( [ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 } ) } EOF
  • 91.
    map/reduce hits perday (ok, but could be taking too long) MAP = <<-EOF function() { emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 }) } EOF REDUCE = <<-EOF $  ruby  src/incremental_mr.rb function(key, values)   { {  "result"=>"visits", var hits = 0    "timeMillis"=>4197, for(var index in values) hits += values[index]["hits"]    "timing"=>  {        "mapTime"=>3932, return { "hits": hits }        "emitLoop"=>4170, }        "total"=>4197 EOF    },    "counts"=>  {        "input"=>10000, result = db["visit_events"].map_reduce(        "emit"=>10000,        "output"=>200 MAP, REDUCE, :out => "visits", :raw =>    }, true, :verbose => true )    "ok"=>1.0 } puts result.inspect
  • 92.
    map/reduce hits perday (ok, every time we need to start over) >  db.visits.find()                                                   {  "_id"  :  "019640ff7952425b1b8695605459d223-­‐20110316",    "value"  :  {  "hits"  :  47  } } {  "_id"  :  "019640ff7952425b1b8695605459d223-­‐20110317",    "value"  :  {  "hits"  :  49  } } {  "_id"  :  "019640ff7952425b1b8695605459d223-­‐20110318",    "value"  :  {  "hits"  :  59  }   } {  "_id"  :  "019640ff7952425b1b8695605459d223-­‐20110319",    "value"  :  {  "hits"  :  37  }   }
  • 93.
    map/reduce hits perday (incremental with savepoints) visit-elements visit collection collection map/reduce on last changed upsert documents temporary collection
  • 94.
    map/reduce hits perday (incremental with savepoints) db.create_collection("visit_events", :capped => true, visit-elements :max => 50_000, :size => 5_000_000 collection ) map/reduce on last changed documents temporary collection
  • 95.
    map/reduce hits perday (incremental with savepoints) FINALIZE = <<-EOF function(key, value) { db.visits.update( visit { "_id": key }, collection { $inc: { "hits": value.hits } }, true ) } EOF upsert temporary collection
  • 96.
    map/reduce hits perday (incremental with savepoints) generate_events(number_of_events, db, now) from = from_last_updated(db) to = to_last_inserted(db) result = db["visit_events"].map_reduce( MAP, REDUCE, :finalize => FINALIZE, :query => { :_id => { :$gt => from, :$lte => to } }, :raw => true, :verbose => true ) db["visits"].save(:_id => "savepoint", :at => to)
  • 97.
    map/reduce hits perday (incremental with savepoints) generate_events(number_of_events, db, now) from = from_last_updated(db) to = to_last_inserted(db) result = db["visit_events"].map_reduce( MAP, REDUCE, $  ruby  src/incremental_mr.rb  -­‐e  10000   :finalize => FINALIZE,{  "result"=>"tmp.mr.mapreduce_1300892393_60", :query => { :_id => { :$gt => from, :$lte => to } },    "timeMillis"=>4333,    "timing"=>{...}, :raw => true,    "counts"=>{ :verbose => true        "input"=>10000,          "emit"=>10000, )        "output"=>196    },    "ok"=>1.0 db["visits"].save(:_id => "savepoint", } :at => to) {  "_id"=>"05241f07d0e3ab6a227e67b33ea0b509-­‐20110113",          "hits"=>26 }
  • 98.
    map/reduce hits perday (incremental with savepoints) generate_events(number_of_events, db, now) from = from_last_updated(db) to = to_last_inserted(db) result = db["visit_events"].map_reduce( MAP, REDUCE, $  ruby  src/incremental_mr.rb  -­‐e  4999   :finalize => FINALIZE,{  "result"=>"tmp.mr.mapreduce_1300892399_61", :query => { :_id => { :$gt => from, :$lte => to } },    "timeMillis"=>2159,    "timing"=>{...}, :raw => true,    "counts"=>{ :verbose => true        "input"=>4999,        "emit"=>4999, )        "output"=>146    },    "ok"=>1.0 db["visits"].save(:_id => "savepoint", } :at => to) {  "_id"=>"05241f07d0e3ab6a227e67b33ea0b509-­‐20110113",      "hits"=>64 }
  • 99.
    map/reduce hits perday (incremental with savepoints) def savepoint(db) db["visits"].find_one(:_id => "savepoint") or { "at" => BSON::ObjectId.from_time(10.years.ago) } end def from_last_updated(db) savepoint["at"] end def to_last_inserted(db) db["visit_events"].find.sort([:_id, Mongo::DESCENDING]).first["_id"] end
  • 100.
    query & design external map/reduce
  • 101.
    use an externalmongod process to execute map/reduce jobs master slave replicate data
  • 102.
    use an externalmongod process to execute map/reduce jobs master slave map/reduce on last replicated data
  • 103.
    use an externalmongod process to execute map/reduce jobs master slave push back results
  • 104.
    look at theshell source is more powerful than you think
  • 105.
    query & design documents embedded or linked?
  • 106.
    life cycle: when rootdocument is deleted, he can stand for himself? if yes if no embedded linked
  • 107.
    are always fetched together? if yes if no embedded linked
  • 108.
    his attributes are usedto find the root document? if yes if no embedded linked
  • 109.
    he’s small? if yes if no embedded linked
  • 110.
    he’s unique orthere are less then hundreds? if yes if no embedded linked
  • 111.
  • 112.
    scale distributed reads with replica sets
  • 113.
    slave replicate read master read/write slave read replicate + Durability + fault tolerance
  • 114.
    scale (seems stupid but...) pump your hardware
  • 115.
    scale (seems stupid but...) call 10gen sure they can help :-)
  • 116.
    Questions? gabriele lana gabriele.lana@cleancode.it twitter: @gabrielelana http://joind.in/2943