SlideShare a Scribd company logo
1 of 116
Download to read offline
with
style
                gabriele lana
   gabriele.lana@cleancode.it
       twitter: @gabrielelana
         http://joind.in/2943
Style


query &
design


scale
mongo console

$  ~/Work/opt/mongodb-­‐1.6.5/bin/mongod  
          -­‐-­‐dbpath=~/Work/src/nosqlday/db/mongodb.01  
          -­‐-­‐logpath=~/Work/src/nosqlday/log/mongodb.01  
          -­‐-­‐fork  -­‐-­‐port  30001

$  ~/Work/opt/mongodb-­‐1.6.5/bin/mongo  localhost:30001
MongoDB  shell  version:  1.6.5
connecting  to:  localhost:30001/test

>  use  nosqlday
switched  to  db  nosqlday

>  db.getCollectionNames()
[  "system.indexes",  "users"  ]

>  db.users.find({  "name":  "Gabriele"  })
{  "_id"  :  ObjectId("4d8706767bb037a8a8f98db2"),  "name"  :  "Gabriele",  
"surname"  :  "Lana",  "job"  :  "softwarecraftsman"  }

>  exit
bye
ruby driver

require "mongo"

db = Mongo::Connection.new("localhost", 30001).db("nosqlday")

puts "Collections:"
db.collections.each do |collection|
  puts "t#{collection.name}"
end

puts "Gabriele:"
db["users"].find(:name => "Gabriele").each do |user|
  puts "t#{user["_id"]}"
end

db.connection.close
ruby driver

require "mongo"

db = Mongo::Connection.new("localhost", 30001).db("nosqlday")

puts "Collections:"
db.collections.each do |collection|
  puts "t#{collection.name}"
                        $  ruby  src/connect.rb  
                        Collections:
end                           users
                                 system.indexes
                            Gabriele:
puts "Gabriele:"                 4d8706767bb037a8a8f98db2
db["users"].find(:name => "Gabriele").each do |user|
  puts "t#{user["_id"]}"
end

db.connection.close
Style


query &
design


scale
Style
        know your
          driver
mongo

smart driver

document object mapper
puts "Gabriele:"
db["users"].find(:name => "Gabriele").each do |user|
  puts "t#{user["_id"]}"
end

puts "Gabriele:"
db["users"].select{|user| user["name"] == "Gabriele"}.each do |user|
  puts "t#{user["_id"]}"
end




mongo

smart driver
puts "Gabriele:"
db["users"].find(:name => "Gabriele").each do |user|
  puts "t#{user["_id"]}"
end

puts "Gabriele:"
                        $  ruby  src/find_vs_select.rb  
db["users"].select{|user| user["name"] == "Gabriele"}.each do |user|
                        Gabriele:
  puts "t#{user["_id"]}" 4d8706767bb037a8a8f98db2
                          
                        Gabriele:
end                           4d8706767bb037a8a8f98db2




mongo

smart driver
puts "Gabriele:"
db["users"].find(:name => "Gabriele").each do |user|
  puts "t#{user["_id"]}"
end

puts "Gabriele:"
db["users"].select{|user| user["name"] == "Gabriele"}.each do |user|
  puts "t#{user["_id"]}"
end




mongo

smart driver
Style
        incremental
           design
          based on
         application
          behavior
the best design is
 the one where needed
   data can be easily
     extracted
      the way you need
        to query your data
          should influence
            your design
Style
        incremental
           design
          based on
         application
         monitoring
monitoring and adapting is better
than doing it right the first time
    ...actually the first time
       is the worst time :-)
monitoring & adapting

>  db.setProfilingLevel(1,  5)                                                                                              
                                                                                                                            
{  "was"  :  1,  "slowms"  :  100,  "ok"  :  1  }

//  after  product  usage  find  problematic  queries

>  db.system.profile.find().sort({millis:-­‐1})                                                              
                                                                                                             
{  "ts":  "Mon  Mar  21  2011  14:30:56  GMT+0100  (CET)",
    "info":  "
        query  pomodorist.pomodori
            reslen:202
            nscanned:26950
            query:  
                {  $query:  {  task_id:  ObjectId('4d6f1d3931f2386e9c089796')  }}
            nreturned:1
      ",
      "millis":17
}
monitoring & adapting

>  db.pomodori.find({
        $query:  {  task_id:  ObjectId('4d6f1d3931f2386e9c089796')  },
        $explain:  true
})
                                                      
{  "cursor":  "BasicCursor",
    "nscanned":  26950,
    "nscannedObjects":  26950,
    "n":  1,
    "millis":  17,
    "indexBounds":  {  },
    "allPlans":  [
        {  "cursor"  :  "BasicCursor",  "indexBounds"  :  {  }  }  
    ]
}
monitoring & adapting

>  db.pomodori.ensureIndex({"task_id":  1})
                                                                                                              
>  db.pomodori.find({
        $query:  {  task_id:  ObjectId('4d6f1d3931f2386e9c089796')  },
        $explain:  true
})

{  "cursor":  "BtreeCursor  task_id_1",
    "nscanned":  1,
    "nscannedObjects":  1,
    "n":  1,
    "millis":  0,
    "indexBounds":  {
        "task_id":  [
       [
                ObjectId("4d6f1d3931f2386e9c089796"),
                ObjectId("4d6f1d3931f2386e9c089796")
          ]
    ]},  "allPlans":  [...]
}
Style


query &
design


scale
query &
design
           use $in
          operator
          for batch
            query
retrieve all objects with $in

users = [
{:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"},
{:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"},
{:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"}
]


ids = users.map{|user| db["users"].insert(user)}

puts ids.map{|id| db["users"].find_one(:_id => id)}
retrieve all objects with $in

users = [
{:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"},
{:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"},
{:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"}
]

                          $  ruby  src/find_by_all_ids.rb  
                          {"_id"=>BSON::ObjectId('4d87605731f23824a0000001'),  ...}
ids = users.map{|user|   db["users"].insert(user)}
                          {"_id"=>BSON::ObjectId('4d87605731f23824a0000002'),  ...}
                          {"_id"=>BSON::ObjectId('4d87605731f23824a0000003'),  ...}

puts ids.map{|id| db["users"].find_one(:_id => id)}
retrieve all objects with $in

users = [
{:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"},
{:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"},
{:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"}
]


ids = users.map{|user| db["users"].insert(user)}

puts ids.map{|id| db["users"].find_one(:_id => id)}
retrieve all objects with $in

users = [
{:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"},
{:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"},
{:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"}
]


ids = users.map{|user| db["users"].insert(user)}
ids = db["users"].insert(users)

puts ids.map{|id| db["users"].find_one(:_id => id)}
puts db["users"].find(:_id => {:$in => ids}).all
retrieve all objects with $in

users = [
{:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"},
{:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"},
{:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"}
]

                          $  ruby  src/find_by_all_ids.rb  
                          {"_id"=>BSON::ObjectId('4d87605731f23824a0000001'),  ...}
ids =   users.map{|user| db["users"].insert(user)}
                          {"_id"=>BSON::ObjectId('4d87605731f23824a0000002'),  ...}
ids =   db["users"].insert(users)
                          {"_id"=>BSON::ObjectId('4d87605731f23824a0000003'),  ...}


puts ids.map{|id| db["users"].find_one(:_id => id)}
puts db["users"].find(:_id => {:$in => ids}).all
query &
design          use
          conventions to
           build smart
              object
            identifiers
conventions are fun to play with

>  db.user_scores.find({},  {"_id":  1})

{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐week-­‐200944"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐month-­‐200911"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐year-­‐2009"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐user"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐advertising"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐week-­‐200944-­‐advertising"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐art"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐week-­‐200944-­‐art"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐artist"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐week-­‐200944-­‐artist"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐information"  }
conventions are fun to play with

>  db.user_scores.findOne(
        {"_id":  "4d873ce631f238241d00000d-­‐day-­‐20091106"}
    )  

{
     "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106",
     "pomodori"  :  15,
     "pomodori_squashed"  :  3,
     "breaks"  :  7,
     "tasks_created"  :  8,
     "tasks_done"  :  6,
     "estimation_accuracy"  :  0,
     "seconds_of_focused_time"  :  22500,
     "seconds_of_wasted_time"  :  1999,
     "seconds_of_breaks"  :  8820
}
conventions are fun to play with
(user scores in day per tag)

>  db.user_scores.find(
        {"_id":  /^4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/},  {"_id":  1}
    )
                
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐advertising"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐art"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐artist"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐blogging"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐culture"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐html"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐illustration"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐information"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐inspiration"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐marketing"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐movies"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐resources"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐technology"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐tool"  }
{  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐tutorials"  }
conventions are fun to play with
(list of tags per day)
>  db.user_scores.find(
        {"_id":  /^4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/},  {"_id":  1}

    ).map(function(document)  {
        return  document._id.replace(
            "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐",  ""
        )
    })

[
     "advertising",
     "art",
     "artist",
     "blogging",
     "culture",
     "html",
     "illustration",
     "information",
     ...
]
conventions are fun to play with
(anchored regexp uses indexes)
>  db.user_scores.find(
        {"_id":  /^4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/},  {"_id":  1}
    ).explain()
                                                                                                                                                      
                                                                                                                                                      
{
   "cursor"  :  "BtreeCursor  _id_  multi",
   "nscanned"  :  15,
   "nscannedObjects"  :  15,
   "n"  :  15,
   "millis"  :  0,
   "indexBounds"  :  {
      "_id"  :  [
         [
            "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐",
            "4d873ce631f238241d00000d-­‐day-­‐20091106."
         ],
         [
            /^4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/,
            /^4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/
         ]
      ]
conventions are fun to play with
(anchored regexp uses indexes)
>  db.user_scores.find(
        {"_id":  /4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/},  {"_id":  1}
    ).explain()

{
     "cursor"  :  "BtreeCursor  _id_  multi",
     "nscanned"  :  109349,
     "nscannedObjects"  :  15,
     "n"  :  15,
     "millis"  :  217,
     "indexBounds"  :  {
        "_id"  :  [
           ...
        ]
     }
}
query &   use “group”
design     method to
            do small
          computations
             without
            fetching
            related
           documents
group to compute data in mongo
(inject client side)

days = [ 20091110, 20091111, 20091112 ]
scores_id = %r{^4d87d00931f2380c7700000d-day-(#{days.join("|")})$}

scores = db["user_scores"].find(:_id => scores_id)

pomodori = scores.inject(0) do |pomodori, scores|
  pomodori + scores["pomodori"]
end

puts "Pomodori in days #{days.join(",")}: #{pomodori}"
group to compute data in mongo
(inject client side)

days = [ 20091110, 20091111, 20091112 ]
scores_id = %r{^4d87d00931f2380c7700000d-day-(#{days.join("|")})$}

scores = db["user_scores"].find(:_id => scores_id)

pomodori = scores.inject(0) do |pomodori, scores|
                        $  ruby  src/inject_for_reduce.rb  
  pomodori + scores["pomodori"]
                        Pomodori  in  days  20091110,20091111,20091112:  36
end

puts "Pomodori in days #{days.join(",")}: #{pomodori}"
group to compute data in mongo
(group server side)

days = [ 20091110, 20091111, 20091112 ]
scores_id = %r{^4d87d00931f2380c7700000d-day-(#{days.join("|")})$}

result = db["user_scores"].group(
  :cond => { :_id => scores_id },
  :initial => { :pomodori => 0 },
  :reduce => <<-EOF
    function(document, result) {
      result.pomodori += document.pomodori
    }
  EOF
)

puts "Pomodori in days #{days.join(",")}: #{result.first["pomodori"]}"
group to compute data in mongo
(group server side)

days = [ 20091110, 20091111, 20091112 ]
scores_id = %r{^4d87d00931f2380c7700000d-day-(#{days.join("|")})$}

result = db["user_scores"].group(
  :cond => { :_id => scores_id },
  :initial => { :pomodori => 0 },
  :reduce => <<-EOF     $  ruby  src/group_for_reduce.rb  
                        Pomodori  in  days  20091110,20091111,20091112:  36
    function(document, result) {
      result.pomodori += document.pomodori
    }
  EOF
)

puts "Pomodori in days #{days.join(",")}: #{result.first["pomodori"]}"
group to compute data in mongo
(ex. sum pomodori by tag “ruby”)

result = db["user_scores"].group(
  :cond => {
     :_id => /^4d87d00931f2380c7700000d-day-d{8}-ruby$/
  },
  :initial => { :pomodori => 0, :days => 0 },
  :reduce => <<-EOF
     function(document, result) {
       result.days += 1
       result.pomodori += document.pomodori
     }
  EOF
).first

puts "In #{result["days"]} days, #{result["pomodori"]} done for ruby"
group to compute data in mongo
(ex. sum pomodori by tag “ruby”)

result = db["user_scores"].group(
  :cond => {
     :_id => /^4d87d00931f2380c7700000d-day-d{8}-ruby$/
  },
  :initial => { :pomodori => 0, :days => 0 },
  :reduce => <<-EOF
     function(document, result) {
                         $  ruby  src/group_for_ruby_tag.rb  
                         In  43  days,  45  pomodori
       result.days += 1
       result.pomodori += document.pomodori
     }
  EOF
).first

puts "In #{result["days"]} days, #{result["pomodori"]} pomodori"
group to compute data in mongo
(ex. sum pomodori by tag “ruby”)
>  db.user_scores.find({
        "_id":  /^4d87d00931f2380c7700000d-­‐day-­‐d{8}-­‐ruby$/
    }).explain()

{
     "cursor"  :  "BtreeCursor  _id_  multi",
     "nscanned"  :  43,
     "nscannedObjects"  :  43,
     "n"  :  43,
     "millis"  :  3,
     "indexBounds"  :  {
        "_id"  :  [...]
     }
}
query &
design    create indexes
           on arrays to
           create local
             reverse
            indexes in
            documents
reverse index in place
(an array could be indexed)

>  db.tasks.find({  "tags":  {  $in:  [  "nosqlday"  ]  }  })
                                                                            
{  "_id"  :  ObjectId("4d7de446175ca8243d000004"),  
    "tags"  :  [  "nosqlday"  ],  
    "description"  :  "#nosqlday  keynote",  
    "is_recurrent"  :  false,
    "estimated"  :  0,  
    "worked_in"  :  [
   "Mon  Mar  14  2011  00:00:00  GMT+0100  (CET)",
   "Tue  Mar  15  2011  00:00:00  GMT+0100  (CET)"
    ],
    "done_at"  :  "Tue  Mar  15  2011  13:05:03  GMT+0100  (CET)",
    "todo_at"  :  null,
    "created_at"  :  "Mon  Mar  14  2011  10:47:50  GMT+0100  (CET)",
    "updated_at"  :  "Tue  Mar  15  2011  13:05:03  GMT+0100  (CET)",
    "keywords":  [  "nosqldai",  "keynot"  ],
    "user_id":  ObjectId("4d53996c137ce423ff000001"),
    "annotations"  :  [  ]
}
reverse index in place
(an array could be indexed)
>  db.tasks.getIndexes()
[
   {
      "name"  :  "_id_",
      "ns"  :  "app435386.tasks",
      "key"  :  {
         "_id"  :  1
      }
   },
   {
      "name"  :  "tags_1",
      "ns"  :  "app435386.tasks",
      "key"  :  {
         "tags"  :  1
      },
      "unique"  :  false
   },
      ...
]
reverse index in place
(container for deduced data, array)

db["orders"].insert({
   :placed_at => [
      now.strftime("%Y"),    # year: "2011"
      now.strftime("%Y%m"),  # month: "201103"
      now.strftime("%Yw%U"), # week: "2011w11"
      now.strftime("%Y%m%d") # day: "20110316"
   ],
   :user_id => user,
   :items => items_in_order.map{|item| item[:id]},
   :total => items_in_order.inject(0){|total,item| total += item[:price]}
})

# ...

db["orders"].ensure_index([["placed_at", Mongo::DESCENDING]])
reverse index in place
(container for deduced data, array)
>  db.orders.findOne()

{  "_id"  :  ObjectId("4d88bf1f31f23812de0003fd"),  
    "placed_at"  :  [  "2011",  "201103",  "2011w11",  "20110316"  ],
    "user_id"  :  ObjectId("4d88bf1f31f23812de0003e9"),
    "items"  :  [
        ObjectId("4d88bf1f31f23812de0003da"),
        ObjectId("4d88bf1f31f23812de000047"),
        ObjectId("4d88bf1f31f23812de000078"),
        ObjectId("4d88bf1f31f23812de000068"),
      ObjectId("4d88bf1f31f23812de000288")
    ],
    "total"  :  3502
}
reverse index in place
(container for deduced data, array)
>  db.orders.find({  "placed_at":  "20110310"  }).count()
77

>  db.orders.find({  "placed_at":  "20110310"  }).explain()
{
   "cursor"  :  "BtreeCursor  placed_at_-­‐1",
   "nscanned"  :  77,
   "nscannedObjects"  :  77,
   "n"  :  77,
   "millis"  :  0,
   "indexBounds"  :  {
      "placed_at"  :  [
         [
            "20110310",
            "20110310"
         ]
      ]
   }
}
reverse index in place
(container for deduced data, hash)

db["orders"].insert({
    :placed_at => [
       { :year => now.strftime("%Y") },
       { :month => now.strftime("%Y%m") },
       { :week => now.strftime("%Y%U") },
       { :day => now.strftime("%Y%m%d") }
    ],
    :user_id => user,
    :items => items_in_order.map{|item| item[:id]},
    :total => items_in_order.inject(0){|total,item| total += item[:price]}
 })

# ...

db["orders"].ensure_index([["placed_at", Mongo::DESCENDING]])
reverse index in place
(container for deduced data, hash)

>  db.orders.findOne()
                                                            
{  "_id"  :  ObjectId("4d88c31531f23812fe0003ea"),
    "placed_at"  :  [
        {  "year"  :  "2009"  },
        {  "month"  :  "200911"  },
        {  "week"  :  "200945"  },
        {  "day"  :  "20091109"  }
    ],
    "user_id"  :  ObjectId("4d88c31531f23812fe0003e9"),
    "items"  :  [
        ObjectId("4d88c31531f23812fe00013f"),
        ObjectId("4d88c31531f23812fe000176"),
        ObjectId("4d88c31531f23812fe0003e2"),
        ObjectId("4d88c31531f23812fe0003d1"),
        ObjectId("4d88c31531f23812fe0001c1"),
        ObjectId("4d88c31531f23812fe000118"),
        ObjectId("4d88c31531f23812fe00031d")
    ],
    "total"  :  10149
}
reverse index in place
(container for deduced data, hash)

>  db.orders.find({  "placed_at.week":  "201101"  }).count()                
331

>  db.orders.find({  "placed_at.week":  "201101"  }).explain()            
{
   "cursor"  :  "BasicCursor",
   "nscanned"  :  22374,
   "nscannedObjects"  :  22374,
   "n"  :  331,
   "millis"  :  23,
   "indexBounds"  :  {
     
   }
}
reverse index in place
(container for deduced data, hash)

>  db.orders.find({  "placed_at":  {  "week":  "201101"  }}).count()    
331

>  db.orders.find({  "placed_at":  {  "week":  "201101"  }}).explain()
{
   "cursor"  :  "BtreeCursor  placed_at_-­‐1",
   "nscanned"  :  331,
   "nscannedObjects"  :  331,
   "n"  :  331,
   "millis"  :  0,
   "indexBounds"  :  {
      "placed_at"  :  [
         [
            {  "week"  :  "2011w01"  },
            {  "week"  :  "2011w01"  }
         ]
      ]
   }
}
query &
design
          use dates but
           be aware of
          some pitfalls
plain dates are good too

db["orders"].insert({
    :placed_at => now,
    :user_id => user,
    :items => items_in_order.map{|item| item[:id]},
    :total => items_in_order.inject(0){|total,item| total += item[:price]}
 })

# ...

db["orders"].ensure_index([["placed_at", Mongo::DESCENDING]])
plain dates are good too

>  db.orders.findOne()
                                                                                                                                              
{
   "_id"  :  ObjectId("4d88d1f931f23813a10003ea"),
   "placed_at"  :  "Mon  Nov  09  2009  08:00:00  GMT+0100  (CET)",
   "user_id"  :  ObjectId("4d88d1f931f23813a10003e9"),
   "items"  :  [
      ObjectId("4d88d1f931f23813a100016d"),
      ObjectId("4d88d1f931f23813a1000346"),
      ObjectId("4d88d1f931f23813a10001e7"),
      ObjectId("4d88d1f931f23813a10000db"),
      ObjectId("4d88d1f931f23813a1000091"),
      ObjectId("4d88d1f931f23813a10001c1"),
      ObjectId("4d88d1f931f23813a10001d3"),
      ObjectId("4d88d1f931f23813a100031b"),
      ObjectId("4d88d1f931f23813a1000130")
   ],
   "total"  :  5871
}
plain dates are good too

>  db.orders.find({
        "placed_at":  {  
            $gte:  new  Date(2011,2,10),
            $lt:  new  Date(2011,2,11)
        }
    }).explain()

{
     "cursor"  :  "BtreeCursor  placed_at_-­‐1",
     "nscanned"  :  53,
     "nscannedObjects"  :  53,
     "n"  :  53,
     "millis"  :  0,
     "indexBounds"  :  {
        "placed_at"  :  [
           [
              "Fri  Mar  11  2011  00:00:00  GMT+0100  (CET)",
              "Thu  Mar  10  2011  00:00:00  GMT+0100  (CET)"
           ]
        ]
     }
plain dates are good too, but...
(total sold on this year’s mondays)

# find all mondays of the year
now = Time.now.beginning_of_year

now += 1.day until now.monday?
mondays = [ now ]
mondays << now += 7.days while now.year == Time.now.year

# find all orders placed on mondays
query = {
  :$or => mondays.map do |day|
    { :placed_at => {
        :$gte => day.beginning_of_day,
        :$lte => day.end_of_day
      }
    }
  end
}

puts query
plain dates are good too, but...
(total sold on this year’s mondays)

# find all mondays of the year
now = Time.now.beginning_of_year

now += 1.day until now.monday?
mondays = [ now ]
mondays << now += 7.days while now.year == Time.now.year
                                $  ruby  src/orders_on_mondays.rb  
# find all orders placed on mondays
                        {:$or=>[
query = {                   {:placed_at=>{
  :$or => mondays.map do |day|
                                :$gte=>2011-­‐01-­‐03  00:00:00  +0100,
    { :placed_at => {         :$lte=>2011-­‐01-­‐03  23:59:59  +0100
                            }},
        :$gte => day.beginning_of_day,
                            {:placed_at=>{
        :$lte => day.end_of_day
                                :$gte=>2011-­‐01-­‐10  00:00:00  +0100,
                                :$lte=>2011-­‐01-­‐10  23:59:59  +0100
      }                     }},
    }                       {:placed_at=>{
                                :$gte=>2011-­‐01-­‐17  00:00:00  +0100,
  end                           :$lte=>2011-­‐01-­‐17  23:59:59  +0100
}                           }},
                            ...
                                ]}
puts query
plain dates are good too, but...
(it works but it’s too slooow)

db["orders"].find({
   :$or => mondays.map do |day|
     { :placed_at => {
         :$gte => day.beginning_of_day,
         :$lte => day.end_of_day
       }
     }
   end
})
plain dates are good too, but...
(why it’s too slow)
>  db.orders.find({
        $or:  [
            "placed_at":{  $gte:  new  Date(2011,2,3),  $lt:  new  Date(2011,2,4)  },
            "placed_at":{  $gte:  new  Date(2011,2,10),  $lt:  new  Date(2011,2,11)  }
        ]
    }).explain()

{
    "clauses"  :  [{    
        "cursor"  :  "BtreeCursor  placed_at_-­‐1",
            "indexBounds"  :  {
                "placed_at"  :  [[
                    "Tue  Mar  3  2011  00:00:00  GMT+0100  (CET)",
                    "Wed  Mar  4  2011  00:00:00  GMT+0100  (CET)"
                ]]}
    },  {
        "cursor"  :  "BtreeCursor  placed_at_-­‐1",
            "indexBounds"  :  {
                "placed_at"  :  [[
                    "Tue  Mar  10  2011  00:00:00  GMT+0100  (CET)",
                    "Wed  Mar  11  2011  00:00:00  GMT+0100  (CET)"          
with destructured dates
(total sold on mondays this year)
>  db.orders.findOne()

{  "_id"  :  ObjectId("4d88bf1f31f23812de0003fd"),  
    "placed_at"  :  [  "2011",  "201103",  "2011w11",  "20110316"  ],
    "user_id"  :  ObjectId("4d88bf1f31f23812de0003e9"),
    "items"  :  [
        ObjectId("4d88bf1f31f23812de0003da"),
        ObjectId("4d88bf1f31f23812de000047"),
        ObjectId("4d88bf1f31f23812de000078"),
        ObjectId("4d88bf1f31f23812de000068"),
        ObjectId("4d88bf1f31f23812de000288")
    ],
    "total"  :  3502
}
with destructured dates
(total sold on mondays this year)

now = Time.now.beginning_of_year

now += 1.day until now.monday?
mondays = [ now ]
mondays << now += 7.days while now.year == Time.now.year

orders = db["orders"].find({
   :placed_at => {
     :$in => mondays.map {|day| day.strftime("%Y%m%d")}
   }
})

puts orders.explain
with destructured dates
(total sold on mondays this year)

now = Time.now.beginning_of_year

now += 1.day until now.monday?
mondays = [ now ]
mondays << now += 7.days while now.year == Time.now.year

orders = db["orders"].find({
                         $  ruby  src/orders_on_mondays.rb  
   :placed_at => {
                         {  "cursor"=>"BtreeCursor  placed_at_-­‐1  multi",
     :$in => mondays.map     "nscanned"=>744,
                          {|day| day.strftime("%Y%m%d")}
   }                         "nscannedObjects"=>744,
                             "n"=>744,
})                           "millis"=>1,
                                  "indexBounds"=>{
                                      "placed_at"=>[
puts orders.explain                       ["20120102",  "20120102"],  ["20111226",  "20111226"],
                                          ["20111219",  "20111219"],  ["20111212",  "20111212"],  
                                          ["20111205",  "20111205"],  ["20111128",  "20111128"],  
                                          ["20111121",  "20111121"],  ...
                                      ]
                                  }
                              }
query &
design
          full query
          power with
            $where
           operator
pomodori
(find who is ticking)

>  db.pomodori.findOne()
{
   "_id"  :  ObjectId("4d8916ed31f2381480000021"),
   "duration"  :  1500,
   "interruptions"  :  0,
   "after_break_of"  :  0,
   "started_at"  :  "Mon  Mar  14  2011  08:05:00  GMT+0100  (CET)",
   "squashed_at"  :  "Mon  Mar  14  2011  08:07:31  GMT+0100  (CET)",
   "in_day"  :  {
      "position"  :  1,
      "is_last"  :  false
   },
   "task_id"  :  ObjectId("4d8916ec31f2381480000014"),
   "user_id"  :  ObjectId("4d8916ec31f2381480000010"),
   "annotations"  :  [  ]
}
pomodori
(find who is ticking)

now = Time.now.yesterday.beginning_of_day + 10.hours
timestamp_of_now = now.to_i

ticking = db["pomodori"].find(
  :$where => <<-EOF
    var startedAt = this.started_at.getTime()/1000
    return
      ((startedAt + this.duration) > #{timestamp_of_now}) &&
      (startedAt < #{timestamp_of_now})
  EOF
)

puts ticking.map{|pomodoro| pomodoro["_id"]}
pomodori
(find who is ticking)

now = Time.now.yesterday.beginning_of_day + 10.hours
timestamp_of_now = now.to_i

ticking = db["pomodori"].find(
  :$where => <<-EOF
    var startedAt = this.started_at.getTime()/1000
    return              $  ruby  src/find_who_is_ticking.rb  
                        4d8916ef31f238148000011d
      ((startedAt + this.duration) > #{timestamp_of_now}) &&
                        4d8916f231f2381480000271
      (startedAt < #{timestamp_of_now})
                        4d8916f931f23814800004dd
                        4d8916f931f23814800004e0
  EOF
)

puts ticking.map{|pomodoro| pomodoro["_id"]}
pomodori
(find who is ticking for an user)

now = Time.now.yesterday.beginning_of_day + 10.hours
timestamp_of_now = now.to_i
user_id = BSON::ObjectId.from_string("4d8916ec31f2381480000010")

ticking = db["pomodori"].find(
  :user_id => user_id,
  :$where => <<-EOF
    var startedAt = this.started_at.getTime()/1000
    return
      ((startedAt + this.duration) > #{timestamp_of_now}) &&
      (startedAt < #{timestamp_of_now})
  EOF
)

puts ticking.map{|pomodoro| pomodoro["_id"]}
pomodori
(find who is ticking for an user)

now = Time.now.yesterday.beginning_of_day + 10.hours
timestamp_of_now = now.to_i
user_id = BSON::ObjectId.from_string("4d8916ec31f2381480000010")

ticking = db["pomodori"].find(
  :user_id => user_id,
  :$where => <<-EOF     $  ruby  src/find_who_is_ticking_for_an_user.rb  
                        4d8916ef31f238148000011d
    var startedAt = this.started_at.getTime()/1000
    return
      ((startedAt + this.duration) > #{timestamp_of_now}) &&
      (startedAt < #{timestamp_of_now})
  EOF
)

puts ticking.map{|pomodoro| pomodoro["_id"]}
pomodori
(related to tasks tagged with “maps”)

related_to_maps = db["pomodori"].find(
  :$where => <<-EOF
    db.tasks.findOne({ "_id": this.task_id }).tags.indexOf("maps") >= 0
  EOF
)

puts related_to_maps.map{|pomodoro| pomodoro["_id"]}
pomodori
(related to tasks tagged with “maps”)

related_to_maps = db["pomodori"].find(
  :$where => <<-EOF
    db.tasks.findOne({ "_id": this.task_id }).tags.indexOf("maps") >= 0
  EOF
)
                          $  ruby  src/related_to_maps.rb  
puts   related_to_maps.map{|pomodoro| pomodoro["_id"]}
                          4d8916fa31f2381480000579
                          4d8916fa31f238148000057b
                          4d8916fa31f238148000057d
                          4d8916fa31f2381480000580
pomodori
(don’t be carried away :-))

related_to_maps = db["pomodori"].find(
  :$where => <<-EOF
    db.tasks.findOne({ "_id": this.task_id }).tags.indexOf("maps") >= 0
  EOF
)

                        $  ruby  src/related_to_maps.rb  
puts related_to_maps.explain
                        {  "cursor"=>"BasicCursor",  
                                 "nscanned"=>461,  
                                 "nscannedObjects"=>461,
                                 "n"=>4,
                                 "millis"=>52,  
                                 "indexBounds"=>{},  
                                 "allPlans"=>[...]
                             }
pomodori
(related to... a better solution)

related_to_maps = db["pomodori"].find(:task_id => {
   :$in => db["tasks"].find(
     {:tags => "maps"}, :fields => {:_id => 1}
   ).map{|task| task["_id"]}
})
                       $  ruby  src/related_to_maps.rb  
                       4d8916fa31f2381480000579
puts   related_to_maps.map{|pomodoro| pomodoro["_id"]}
                       4d8916fa31f238148000057b
                       4d8916fa31f238148000057d
                       4d8916fa31f2381480000580
pomodori
(related to... a better solution)

related_to_maps = db["pomodori"].find(:task_id => {
   :$in => db["tasks"].find(
     {:tags => "maps"}, :fields => {:_id => 1}
   ).map{|task| task["_id"]}
})
                       $  ruby  src/related_to_maps.rb  
                       {  "cursor"=>"BtreeCursor  tags_1",
puts   related_to_maps.map{|pomodoro| pomodoro["_id"]}
                           "nscanned"=>3,
                           "nscannedObjects"=>3,
                           "n"=>3,
                           "millis"=>0,
                           ...
                       }

                        {  "cursor"=>"BtreeCursor  task_id_1  multi",
                            "nscanned"=>4,
                            "nscannedObjects"=>4,
                            "n"=>4,
                            "millis"=>0,
                            ...
                        }
query &
design
             real time
          analytics with
           increments
keep track of url’s visits
(upsert with custom id)

result = db["visits"].update(
  { :_id => Digest::MD5.hexdigest(url) },
  { :$inc => { :hits => 1 } },
  :upsert => true,
  :safe => true
)

puts "Update: #{result.inspect}"

puts db["visits"].find_one(:_id => Digest::MD5.hexdigest(url))
keep track of url’s visits
(upsert with custom id)

result = db["visits"].update(
  { :_id => Digest::MD5.hexdigest(url) },
  { :$inc => { :hits => 1 } },
  :upsert => true,
  :safe => true
)
                          $  ruby  src/realtime_analytics.rb  
                          Update:  {
puts   "Update: #{result.inspect}"
                              "err"=>nil,
                              "updatedExisting"=>false,
                              "n"=>1,
puts   db["visits"].find_one(:_id => Digest::MD5.hexdigest(url))
                              "ok"=>1.0
                          }
                          {"_id"=>"2d86a774beffe90e715a8028c7bd177b",  "hits"=>1}

                              $  ruby  src/realtime_analytics.rb  
                              Update:  {
                                  "err"=>nil,
                                  "updatedExisting"=>true,
                                  "n"=>1,
                                  "ok"=>1.0
                              }
                              {"_id"=>"2d86a774beffe90e715a8028c7bd177b",  "hits"=>2}
url’s visits aggregated by time
(upsert with multiple documents)

url_digest = Digest::MD5.hexdigest(url)
ids = [
  [ url_digest, Time.now.strftime("%Y%m%d") ].join("-"),
  [ url_digest, Time.now.strftime("%Y%m") ].join("-"),
  [ url_digest, Time.now.strftime("%Y") ].join("-"),
  [ url_digest, user_id ].join("-")
]
puts "Expect to upsert: n#{ids}"

result = db["visits"].update(
  { :_id => { :$in => ids } },
  { :$inc => { :hits => 1 } },
  :multi => true,
  :upsert => true,
  :safe => true
)
puts result.inspect
puts db["visits"].all
url’s visits aggregated by time
(upsert with multiple documents)

url_digest = Digest::MD5.hexdigest(url)
ids = [
  [ url_digest, Time.now.strftime("%Y%m%d") ].join("-"),
  [ url_digest, Time.now.strftime("%Y%m") ].join("-"),
  [ url_digest, Time.now.strftime("%Y") ].join("-"),
  [ url_digest, user_id ].join("-")
]                       $  ruby  src/realtime_analytics_with_aggregation.rb  
                        Expect  to  upsert:[
puts "Expect to upsert:     "2d86a774beffe90e715a8028c7bd177b-­‐20110323",
                         n#{ids}"
                              "2d86a774beffe90e715a8028c7bd177b-­‐201103",
                              "2d86a774beffe90e715a8028c7bd177b-­‐2011",  
result =   db["visits"].update(
                              "2d86a774beffe90e715a8028c7bd177b-­‐4d899fab31f238165c000001"
  { :_id   => { :$in => ids } },
                          ]
  { :$inc => { :hits => {  "err"=>nil,
                         1 } },
  :multi => true,           "updatedExisting"=>false,
                            "upserted"=>BSON::ObjectId('4d899fabe23bd37e768ae76d'),  
  :upsert => true,          "n"=>1,
  :safe => true             "ok"=>1.0
                        }
)
puts result.inspect     {"_id"=>BSON::ObjectId('4d899fabe23bd37e768ae76d'),  "hits"=>1}
puts db["visits"].all
url’s visits aggregated by time
(upsert with multiple documents)

url_digest = Digest::MD5.hexdigest(url)
ids = [
  [ url_digest, Time.now.strftime("%Y%m%d") ].join("-"),
  [ url_digest, Time.now.strftime("%Y%m") ].join("-"),
  [ url_digest, Time.now.strftime("%Y") ].join("-"),
  [ url_digest, user_id ].join("-")
]                       $  ruby  src/realtime_analytics_with_aggregation.rb  
                        Expect  to  upsert:[
puts "Expect to upsert:     "2d86a774beffe90e715a8028c7bd177b-­‐20110323",
                         n#{ids}"
                              "2d86a774beffe90e715a8028c7bd177b-­‐201103",
                              "2d86a774beffe90e715a8028c7bd177b-­‐2011",  
result =   db["visits"].update(
                              "2d86a774beffe90e715a8028c7bd177b-­‐4d899fab31f238165c000001"
  { :_id   => { :$in => ids } },
                          ]
  { :$inc => { :hits => {  "err"=>nil,
                         1 } },
  :multi => true,           "updatedExisting"=>false,
                            "upserted"=>BSON::ObjectId('4d899fabe23bd37e768ae76e'),  
  :upsert => true,          "n"=>1,
  :safe => true             "ok"=>1.0
                        }
)
puts result.inspect     {"_id"=>BSON::ObjectId('4d899fabe23bd37e768ae76d'),  "hits"=>1}
puts db["visits"].all {"_id"=>BSON::ObjectId('4d899fabe23bd37e768ae76e'),  "hits"=>1}
url’s visits aggregated by time
(look before you leap)
result = db["visits"].update(
  { :_id => { :$in => ids } },
  { :$inc => { :hits => 1 } },
  :multi => true,
  :upsert => true,
  :safe => true
)

if result["n"] != ids.size
  updated_ids = db["visits"].find(
    { :_id => { :$in => ids } }, :fields => { :_id => true }
  ).map{|document| document["_id"]}

  db["visits"].insert((ids - updated_ids).map do |id|
    { :_id => id, :hits => 1 }
  end)

  db["visits"].remove(:_id => result["upserted"]) if result["upserted"]
end
url’s visits aggregated by time
(look before you leap)
result = db["visits"].update(
  { :_id => { :$in => ids } },
  { :$inc => { :hits => 1 } },
  :multi => true,
  :upsert => true,
  :safe => true
)                       $  ruby  src/realtime_analytics_with_aggregation.rb
                        {  "err"=>nil,  
                            "updatedExisting"=>false,
if result["n"] != ids.size
                            "upserted"=>BSON::ObjectId('4d89a5ebe23bd37e768ae76f'),  
                            "n"=>1,
  updated_ids = db["visits"].find(
                            "ok"=>1.0
    { :_id => { :$in => ids } }, :fields => { :_id => true }
  ).map{|document| document["_id"]}
                        }

                        {"_id"=>"<url_digest>-­‐20110323",  "hits"=>1}
  db["visits"].insert((ids - updated_ids).map do |id|
                        {"_id"=>"<url_digest>-­‐201103",  "hits"=>1}
                        {"_id"=>"<url_digest>-­‐2011",  "hits"=>1}
    { :_id => id, :hits {"_id"=>"<url_digest>-­‐4d89a43b31f238167a000001",  "hits"=>1}
                        => 1 }
  end)

  db["visits"].remove(:_id => result["upserted"]) if result["upserted"]
end
url’s visits aggregated by time
(look before you leap)
result = db["visits"].update(
  { :_id => { :$in => ids } },
  { :$inc => { :hits => 1 } },
  :multi => true,
  :upsert => true,
  :safe => true
)                       $  ruby  src/realtime_analytics_with_aggregation.rb
                        {  "err"=>nil,
                            "updatedExisting"=>true,
if result["n"] != ids.size
                            "n"=>3,
                            "ok"=>1.0
  updated_ids = db["visits"].find(
                        }
    { :_id => { :$in => ids } }, :fields => { :_id => true }
                        {"_id"=>"<url_digest>-­‐20110323",  "hits"=>2}
  ).map{|document| document["_id"]}
                        {"_id"=>"<url_digest>-­‐201103",  "hits"=>2}
                        {"_id"=>"<url_digest>-­‐2011",  "hits"=>2}
  db["visits"].insert((ids - updated_ids).map do |id|
                        {"_id"=>"<url_digest>-­‐4d89a43b31f238167a000001",  "hits"=>1}
                        {"_id"=>"<url_digest>-­‐4d89a44231f238167e000001",  "hits"=>1}
    { :_id => id, :hits => 1         }
  end)

  db["visits"].remove(:_id => result["upserted"]) if result["upserted"]
end
query &
design
          incremental
          map/reduce
map/reduce hits per day
(we have raw events)

>  db.visit_events.findOne()
{
   "_id"  :  ObjectId("4d89fc6531f2381d2c00000b"),
   "url"  :  "8aa8b68e0b849f70df6dbb3031c6182b",
   "user_id"  :  ObjectId("4d89fc6531f2381d2c000005"),
   "at"  :  "Thu  Jan  13  2011  08:00:06  GMT+0100  (CET)"
}
map/reduce hits per day
(generate data WITH something like)

def generate_events(visits, db, now)
  visits.times do |time|
    now += BETWEEN_VISITS.sample.seconds
    db["visit_events"].insert(
      :url => Digest::MD5.hexdigest(URLS.sample),
      :user_id => USERS.sample[:id],
      :at => now
    )
  end
end

generate_events(10_000, db, now)
map/reduce hits per day
(simple map/reduce)
MAP = <<-EOF
  function() {
    emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 })
  }
EOF

REDUCE = <<-EOF
  function(key, values) {
    var hits = 0
    for(var index in values) hits += values[index]["hits"]
    return { "hits": hits }
  }
EOF

result = db["visit_events"].map_reduce(
  MAP, REDUCE, :out => "visits", :raw => true, :verbose => true
)

puts result.inspect
map/reduce hits per day
(date.prototype.format don’t exists)
MAP = <<-EOF
  function() {
    emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 })
  }
EOF

REDUCE = <<-EOF
  function(key, values) {
    var hits = 0
    for(var index in values) hits += values[index]["hits"]
    return { "hits": hits }
  }
EOF

result = db["visit_events"].map_reduce(
  MAP, REDUCE, :out => "visits", :raw => true, :verbose => true
)

puts result.inspect
map/reduce hits per day
(implement format in place)

MAP = <<-EOF
  function() {
    Date.prototype.format = function(format) {
      ...
    }
    emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 })
  }
EOF

REDUCE = <<-EOF
  function(key, values) {
    var hits = 0
    for(var index in values) hits += values[index]["hits"]
    return { "hits": hits }
  }
EOF
map/reduce hits per day
(implement format only if needed)

MAP = <<-EOF
  function() {
    if (!Date.prototype.format) {
      Date.prototype.format = function(format) {
        ...
      }
    }
    emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 })
  }
EOF

REDUCE = <<-EOF
  function(key, values) {
    var hits = 0
    for(var index in values) hits += values[index]["hits"]
    return { "hits": hits }
  }
EOF
map/reduce hits per day
(implement format once and for all)
db[Mongo::DB::SYSTEM_JS_COLLECTION].save(
  :_id => "formatDate",
  :value => BSON::Code.new(
    <<-EOF
      function(date, format) {
        if (!Date.prototype.format) {
           Date.prototype.format = function(format) { ... }
        }
        return date.format(format)
      }
    EOF
  )
)

MAP = <<-EOF
  function() {
    emit([ this.url, formatDate(this.at, "Ymd") ].join("-"), {"hits":1})
  }
EOF
map/reduce hits per day
(implement format once and for all)
db[Mongo::DB::SYSTEM_JS_COLLECTION].save(
  :_id => "load",
  :value => BSON::Code.new(
    <<-EOF
      function(module) {
        if ((module === "date") && !Date.prototype.format) {
           Date.prototype.format = function(format) { ... }
        }
        return true
      }
    EOF
  )
)


MAP = <<-EOF
  function() {
    load("date") && emit(
      [ this.url, this.at.format("Ymd") ].join("-"),
      { "hits": 1 }
    )
  }
EOF
map/reduce hits per day
(ok, but could be taking too long)
MAP = <<-EOF
  function() {
    emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 })
  }
EOF

REDUCE = <<-EOF         $  ruby  src/incremental_mr.rb
  function(key, values)   
                         {
                        {  "result"=>"visits",
    var hits = 0            "timeMillis"=>4197,
    for(var index in values) hits += values[index]["hits"]
                            "timing"=>  {
                                "mapTime"=>3932,
    return { "hits": hits }
                                "emitLoop"=>4170,
  }                             "total"=>4197
EOF                         },
                            "counts"=>  {
                                  "input"=>10000,
result   = db["visit_events"].map_reduce(
                                  "emit"=>10000,
                                  "output"=>200
  MAP,   REDUCE, :out => "visits", :raw =>
                              },                    true, :verbose => true
)                             "ok"=>1.0
                          }

puts result.inspect
map/reduce hits per day
(ok, every time we need to start over)
>  db.visits.find()                                                  

{  "_id"  :  "019640ff7952425b1b8695605459d223-­‐20110316",
    "value"  :  {  "hits"  :  47  }
}

{  "_id"  :  "019640ff7952425b1b8695605459d223-­‐20110317",
    "value"  :  {  "hits"  :  49  }
}

{  "_id"  :  "019640ff7952425b1b8695605459d223-­‐20110318",
    "value"  :  {  "hits"  :  59  }  
}

{  "_id"  :  "019640ff7952425b1b8695605459d223-­‐20110319",
    "value"  :  {  "hits"  :  37  }  
}
map/reduce hits per day
(incremental with savepoints)



         visit-elements       visit
           collection      collection



     map/reduce
   on last changed        upsert
      documents




            temporary
            collection
map/reduce hits per day
(incremental with savepoints)

db.create_collection("visit_events",
  :capped => true,
                                        visit-elements
  :max => 50_000,
  :size => 5_000_000                      collection
)



                                    map/reduce
                                  on last changed
                                     documents




                                           temporary
                                           collection
map/reduce hits per day
(incremental with savepoints)

FINALIZE = <<-EOF
  function(key, value) {
    db.visits.update(                         visit
      { "_id": key },                      collection
      { $inc: { "hits": value.hits } },
      true
    )
  }
EOF
                                          upsert




                 temporary
                 collection
map/reduce hits per day
(incremental with savepoints)

generate_events(number_of_events, db, now)

from = from_last_updated(db)
to = to_last_inserted(db)

result = db["visit_events"].map_reduce(
  MAP, REDUCE,
  :finalize => FINALIZE,
  :query => { :_id => { :$gt => from, :$lte => to } },
  :raw => true,
  :verbose => true
)

db["visits"].save(:_id => "savepoint", :at => to)
map/reduce hits per day
(incremental with savepoints)

generate_events(number_of_events, db, now)

from = from_last_updated(db)
to = to_last_inserted(db)

result = db["visit_events"].map_reduce(
  MAP, REDUCE,          $  ruby  src/incremental_mr.rb  -­‐e  10000
                          
  :finalize => FINALIZE,{  "result"=>"tmp.mr.mapreduce_1300892393_60",
  :query => { :_id => { :$gt => from, :$lte => to } },
                            "timeMillis"=>4333,
                            "timing"=>{...},
  :raw => true,             "counts"=>{
  :verbose => true              "input"=>10000,  
                                "emit"=>10000,
)                               "output"=>196
                                },
                                "ok"=>1.0
db["visits"].save(:_id     => "savepoint",
                            }                      :at => to)

                             {  "_id"=>"05241f07d0e3ab6a227e67b33ea0b509-­‐20110113",      
                                 "hits"=>26
                             }
map/reduce hits per day
(incremental with savepoints)

generate_events(number_of_events, db, now)

from = from_last_updated(db)
to = to_last_inserted(db)

result = db["visit_events"].map_reduce(
  MAP, REDUCE,          $  ruby  src/incremental_mr.rb  -­‐e  4999
                          
  :finalize => FINALIZE,{  "result"=>"tmp.mr.mapreduce_1300892399_61",
  :query => { :_id => { :$gt => from, :$lte => to } },
                            "timeMillis"=>2159,
                            "timing"=>{...},
  :raw => true,             "counts"=>{
  :verbose => true              "input"=>4999,
                                "emit"=>4999,
)                               "output"=>146
                                },
                                "ok"=>1.0
db["visits"].save(:_id     => "savepoint",
                            }                     :at => to)

                             {  "_id"=>"05241f07d0e3ab6a227e67b33ea0b509-­‐20110113",  
                                 "hits"=>64
                             }
map/reduce hits per day
(incremental with savepoints)

def savepoint(db)
  db["visits"].find_one(:_id => "savepoint") or
    { "at" => BSON::ObjectId.from_time(10.years.ago) }
end

def from_last_updated(db)
  savepoint["at"]
end

def to_last_inserted(db)
  db["visit_events"].find.sort([:_id, Mongo::DESCENDING]).first["_id"]
end
query &
design
           external
          map/reduce
use an external mongod process
to execute map/reduce jobs



  master                     slave

            replicate data
use an external mongod process
to execute map/reduce jobs



  master                 slave



            map/reduce
              on last
            replicated
               data
use an external mongod process
to execute map/reduce jobs



  master                       slave

           push back results
look at the shell source
is more powerful than you think
query &
design    documents
          embedded
              or
           linked?
life cycle:
when root document
  is deleted, he can
 stand for himself?




  if yes       if no
embedded      linked
are always fetched
     together?




  if yes       if no
embedded      linked
his attributes are
used to find the root
      document?




  if yes        if no
embedded       linked
he’s small?




  if yes       if no
embedded      linked
he’s unique or there
   are less then
     hundreds?




  if yes       if no
embedded      linked
Style


query &
design


scale
scale
        distributed
        reads with
          replica
           sets
slave
                                   replicate




          read
                                  master

             read/write



                          slave
           read



                                   replicate



+ Durability
+ fault tolerance
scale
        (seems stupid but...)

           pump
           your
         hardware
scale
        (seems stupid but...)

         call 10gen
        sure they can
           help :-)
Questions?

                gabriele lana
   gabriele.lana@cleancode.it
       twitter: @gabrielelana
         http://joind.in/2943

More Related Content

What's hot

MongoDB .local London 2019: Tips and Tricks++ for Querying and Indexing MongoDB
MongoDB .local London 2019: Tips and Tricks++ for Querying and Indexing MongoDBMongoDB .local London 2019: Tips and Tricks++ for Querying and Indexing MongoDB
MongoDB .local London 2019: Tips and Tricks++ for Querying and Indexing MongoDBMongoDB
 
Intro to MongoDB and datamodeling
Intro to MongoDB and datamodeling Intro to MongoDB and datamodeling
Intro to MongoDB and datamodeling rogerbodamer
 
MongoDB .local Munich 2019: Tips and Tricks++ for Querying and Indexing MongoDB
MongoDB .local Munich 2019: Tips and Tricks++ for Querying and Indexing MongoDBMongoDB .local Munich 2019: Tips and Tricks++ for Querying and Indexing MongoDB
MongoDB .local Munich 2019: Tips and Tricks++ for Querying and Indexing MongoDBMongoDB
 
MongoDB Performance Tuning
MongoDB Performance TuningMongoDB Performance Tuning
MongoDB Performance TuningPuneet Behl
 
Mongo db basic installation
Mongo db basic installationMongo db basic installation
Mongo db basic installationKishor Parkhe
 
MongoDB World 2016: Deciphering .explain() Output
MongoDB World 2016: Deciphering .explain() OutputMongoDB World 2016: Deciphering .explain() Output
MongoDB World 2016: Deciphering .explain() OutputMongoDB
 
4시간만에 따라해보는 Windows 10 앱 개발 샘플코드
4시간만에 따라해보는 Windows 10 앱 개발 샘플코드4시간만에 따라해보는 Windows 10 앱 개발 샘플코드
4시간만에 따라해보는 Windows 10 앱 개발 샘플코드영욱 김
 
Indexing and Query Optimization
Indexing and Query OptimizationIndexing and Query Optimization
Indexing and Query OptimizationMongoDB
 
Swift - 혼자 공부하면 분명히 안할테니까 같이 공부하기
Swift - 혼자 공부하면 분명히 안할테니까 같이 공부하기Swift - 혼자 공부하면 분명히 안할테니까 같이 공부하기
Swift - 혼자 공부하면 분명히 안할테니까 같이 공부하기Suyeol Jeon
 
JavaScript Objects and OOP Programming with JavaScript
JavaScript Objects and OOP Programming with JavaScriptJavaScript Objects and OOP Programming with JavaScript
JavaScript Objects and OOP Programming with JavaScriptLaurence Svekis ✔
 
The Ring programming language version 1.5.2 book - Part 43 of 181
The Ring programming language version 1.5.2 book - Part 43 of 181The Ring programming language version 1.5.2 book - Part 43 of 181
The Ring programming language version 1.5.2 book - Part 43 of 181Mahmoud Samir Fayed
 
Desenvolvimento web com Ruby on Rails (parte 5)
Desenvolvimento web com Ruby on Rails (parte 5)Desenvolvimento web com Ruby on Rails (parte 5)
Desenvolvimento web com Ruby on Rails (parte 5)Joao Lucas Santana
 
First app online conf
First app   online confFirst app   online conf
First app online confMongoDB
 
The Ring programming language version 1.8 book - Part 49 of 202
The Ring programming language version 1.8 book - Part 49 of 202The Ring programming language version 1.8 book - Part 49 of 202
The Ring programming language version 1.8 book - Part 49 of 202Mahmoud Samir Fayed
 
From mysql to MongoDB(MongoDB2011北京交流会)
From mysql to MongoDB(MongoDB2011北京交流会)From mysql to MongoDB(MongoDB2011北京交流会)
From mysql to MongoDB(MongoDB2011北京交流会)Night Sailer
 
1403 app dev series - session 5 - analytics
1403   app dev series - session 5 - analytics1403   app dev series - session 5 - analytics
1403 app dev series - session 5 - analyticsMongoDB
 
Storing tree structures with MongoDB
Storing tree structures with MongoDBStoring tree structures with MongoDB
Storing tree structures with MongoDBVyacheslav
 
The Ring programming language version 1.2 book - Part 32 of 84
The Ring programming language version 1.2 book - Part 32 of 84The Ring programming language version 1.2 book - Part 32 of 84
The Ring programming language version 1.2 book - Part 32 of 84Mahmoud Samir Fayed
 

What's hot (20)

MongoDB .local London 2019: Tips and Tricks++ for Querying and Indexing MongoDB
MongoDB .local London 2019: Tips and Tricks++ for Querying and Indexing MongoDBMongoDB .local London 2019: Tips and Tricks++ for Querying and Indexing MongoDB
MongoDB .local London 2019: Tips and Tricks++ for Querying and Indexing MongoDB
 
Intro to MongoDB and datamodeling
Intro to MongoDB and datamodeling Intro to MongoDB and datamodeling
Intro to MongoDB and datamodeling
 
MongoDB .local Munich 2019: Tips and Tricks++ for Querying and Indexing MongoDB
MongoDB .local Munich 2019: Tips and Tricks++ for Querying and Indexing MongoDBMongoDB .local Munich 2019: Tips and Tricks++ for Querying and Indexing MongoDB
MongoDB .local Munich 2019: Tips and Tricks++ for Querying and Indexing MongoDB
 
MongoDB Performance Tuning
MongoDB Performance TuningMongoDB Performance Tuning
MongoDB Performance Tuning
 
Mongo db basic installation
Mongo db basic installationMongo db basic installation
Mongo db basic installation
 
I regret nothing
I regret nothingI regret nothing
I regret nothing
 
MongoDB World 2016: Deciphering .explain() Output
MongoDB World 2016: Deciphering .explain() OutputMongoDB World 2016: Deciphering .explain() Output
MongoDB World 2016: Deciphering .explain() Output
 
4시간만에 따라해보는 Windows 10 앱 개발 샘플코드
4시간만에 따라해보는 Windows 10 앱 개발 샘플코드4시간만에 따라해보는 Windows 10 앱 개발 샘플코드
4시간만에 따라해보는 Windows 10 앱 개발 샘플코드
 
Indexing and Query Optimization
Indexing and Query OptimizationIndexing and Query Optimization
Indexing and Query Optimization
 
Swift - 혼자 공부하면 분명히 안할테니까 같이 공부하기
Swift - 혼자 공부하면 분명히 안할테니까 같이 공부하기Swift - 혼자 공부하면 분명히 안할테니까 같이 공부하기
Swift - 혼자 공부하면 분명히 안할테니까 같이 공부하기
 
JavaScript Objects and OOP Programming with JavaScript
JavaScript Objects and OOP Programming with JavaScriptJavaScript Objects and OOP Programming with JavaScript
JavaScript Objects and OOP Programming with JavaScript
 
The Ring programming language version 1.5.2 book - Part 43 of 181
The Ring programming language version 1.5.2 book - Part 43 of 181The Ring programming language version 1.5.2 book - Part 43 of 181
The Ring programming language version 1.5.2 book - Part 43 of 181
 
Desenvolvimento web com Ruby on Rails (parte 5)
Desenvolvimento web com Ruby on Rails (parte 5)Desenvolvimento web com Ruby on Rails (parte 5)
Desenvolvimento web com Ruby on Rails (parte 5)
 
First app online conf
First app   online confFirst app   online conf
First app online conf
 
The Ring programming language version 1.8 book - Part 49 of 202
The Ring programming language version 1.8 book - Part 49 of 202The Ring programming language version 1.8 book - Part 49 of 202
The Ring programming language version 1.8 book - Part 49 of 202
 
From mysql to MongoDB(MongoDB2011北京交流会)
From mysql to MongoDB(MongoDB2011北京交流会)From mysql to MongoDB(MongoDB2011北京交流会)
From mysql to MongoDB(MongoDB2011北京交流会)
 
1403 app dev series - session 5 - analytics
1403   app dev series - session 5 - analytics1403   app dev series - session 5 - analytics
1403 app dev series - session 5 - analytics
 
Storing tree structures with MongoDB
Storing tree structures with MongoDBStoring tree structures with MongoDB
Storing tree structures with MongoDB
 
MongoDB
MongoDB MongoDB
MongoDB
 
The Ring programming language version 1.2 book - Part 32 of 84
The Ring programming language version 1.2 book - Part 32 of 84The Ring programming language version 1.2 book - Part 32 of 84
The Ring programming language version 1.2 book - Part 32 of 84
 

Viewers also liked

Minimum Viable Product
Minimum Viable ProductMinimum Viable Product
Minimum Viable ProductGabriele Lana
 
Parse Everything With Elixir
Parse Everything With ElixirParse Everything With Elixir
Parse Everything With ElixirGabriele Lana
 
Resource Oriented Design
Resource Oriented DesignResource Oriented Design
Resource Oriented DesignGabriele Lana
 
Professional Programmer
Professional ProgrammerProfessional Programmer
Professional ProgrammerGabriele Lana
 
Milano Legacy Coderetreat 2013
Milano Legacy Coderetreat 2013Milano Legacy Coderetreat 2013
Milano Legacy Coderetreat 2013Gabriele Lana
 
Agileday Coderetreat 2013
Agileday Coderetreat 2013Agileday Coderetreat 2013
Agileday Coderetreat 2013Gabriele Lana
 
Professional Programmer (3 Years Later)
Professional Programmer (3 Years Later)Professional Programmer (3 Years Later)
Professional Programmer (3 Years Later)Gabriele Lana
 
Introduction to Nodejs
Introduction to NodejsIntroduction to Nodejs
Introduction to NodejsGabriele Lana
 
It is not supposed to fly but it does
It is not supposed to fly but it doesIt is not supposed to fly but it does
It is not supposed to fly but it doesGabriele Lana
 
Nodejs Explained with Examples
Nodejs Explained with ExamplesNodejs Explained with Examples
Nodejs Explained with ExamplesGabriele Lana
 

Viewers also liked (19)

#Iad14 intro
#Iad14 intro#Iad14 intro
#Iad14 intro
 
Nosql
NosqlNosql
Nosql
 
Magic of Ruby
Magic of RubyMagic of Ruby
Magic of Ruby
 
Why couchdb is cool
Why couchdb is coolWhy couchdb is cool
Why couchdb is cool
 
Minimum Viable Product
Minimum Viable ProductMinimum Viable Product
Minimum Viable Product
 
Parse Everything With Elixir
Parse Everything With ElixirParse Everything With Elixir
Parse Everything With Elixir
 
Resource Oriented Design
Resource Oriented DesignResource Oriented Design
Resource Oriented Design
 
Beyond Phoenix
Beyond PhoenixBeyond Phoenix
Beyond Phoenix
 
API Over HTTP
API Over HTTPAPI Over HTTP
API Over HTTP
 
Professional Programmer
Professional ProgrammerProfessional Programmer
Professional Programmer
 
Milano Legacy Coderetreat 2013
Milano Legacy Coderetreat 2013Milano Legacy Coderetreat 2013
Milano Legacy Coderetreat 2013
 
Agileday Coderetreat 2013
Agileday Coderetreat 2013Agileday Coderetreat 2013
Agileday Coderetreat 2013
 
coderetreat
coderetreatcoderetreat
coderetreat
 
Professional Programmer (3 Years Later)
Professional Programmer (3 Years Later)Professional Programmer (3 Years Later)
Professional Programmer (3 Years Later)
 
CouchDB Vs MongoDB
CouchDB Vs MongoDBCouchDB Vs MongoDB
CouchDB Vs MongoDB
 
Introduction to Nodejs
Introduction to NodejsIntroduction to Nodejs
Introduction to Nodejs
 
The Magic Of Elixir
The Magic Of ElixirThe Magic Of Elixir
The Magic Of Elixir
 
It is not supposed to fly but it does
It is not supposed to fly but it doesIt is not supposed to fly but it does
It is not supposed to fly but it does
 
Nodejs Explained with Examples
Nodejs Explained with ExamplesNodejs Explained with Examples
Nodejs Explained with Examples
 

Similar to MongoDB With Style

Schema Design with MongoDB
Schema Design with MongoDBSchema Design with MongoDB
Schema Design with MongoDBrogerbodamer
 
Back to Basics: My First MongoDB Application
Back to Basics: My First MongoDB ApplicationBack to Basics: My First MongoDB Application
Back to Basics: My First MongoDB ApplicationMongoDB
 
Back to Basics 2017 - Your First MongoDB Application
Back to Basics 2017 - Your First MongoDB ApplicationBack to Basics 2017 - Your First MongoDB Application
Back to Basics 2017 - Your First MongoDB ApplicationJoe Drumgoole
 
Map/Confused? A practical approach to Map/Reduce with MongoDB
Map/Confused? A practical approach to Map/Reduce with MongoDBMap/Confused? A practical approach to Map/Reduce with MongoDB
Map/Confused? A practical approach to Map/Reduce with MongoDBUwe Printz
 
Building a Scalable Inbox System with MongoDB and Java
Building a Scalable Inbox System with MongoDB and JavaBuilding a Scalable Inbox System with MongoDB and Java
Building a Scalable Inbox System with MongoDB and Javaantoinegirbal
 
How to leverage what's new in MongoDB 3.6
How to leverage what's new in MongoDB 3.6How to leverage what's new in MongoDB 3.6
How to leverage what's new in MongoDB 3.6Maxime Beugnet
 
MongoDB World 2018: Keynote
MongoDB World 2018: KeynoteMongoDB World 2018: Keynote
MongoDB World 2018: KeynoteMongoDB
 
Building Apps with MongoDB
Building Apps with MongoDBBuilding Apps with MongoDB
Building Apps with MongoDBNate Abele
 
MongoDB Performance Tuning
MongoDB Performance TuningMongoDB Performance Tuning
MongoDB Performance TuningMongoDB
 
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-Tsuyoshi Yamamoto
 
MongoDB dla administratora
MongoDB dla administratora MongoDB dla administratora
MongoDB dla administratora 3camp
 
2013-03-23 - NoSQL Spartakiade
2013-03-23 - NoSQL Spartakiade2013-03-23 - NoSQL Spartakiade
2013-03-23 - NoSQL SpartakiadeJohannes Hoppe
 
Back to Basics, webinar 2: La tua prima applicazione MongoDB
Back to Basics, webinar 2: La tua prima applicazione MongoDBBack to Basics, webinar 2: La tua prima applicazione MongoDB
Back to Basics, webinar 2: La tua prima applicazione MongoDBMongoDB
 
MongoDB全機能解説2
MongoDB全機能解説2MongoDB全機能解説2
MongoDB全機能解説2Takahiro Inoue
 
Inside PyMongo - MongoNYC
Inside PyMongo - MongoNYCInside PyMongo - MongoNYC
Inside PyMongo - MongoNYCMike Dirolf
 
Tame Accidental Complexity with Ruby and MongoMapper
Tame Accidental Complexity with Ruby and MongoMapperTame Accidental Complexity with Ruby and MongoMapper
Tame Accidental Complexity with Ruby and MongoMapperGiordano Scalzo
 

Similar to MongoDB With Style (20)

Talk MongoDB - Amil
Talk MongoDB - AmilTalk MongoDB - Amil
Talk MongoDB - Amil
 
Schema Design with MongoDB
Schema Design with MongoDBSchema Design with MongoDB
Schema Design with MongoDB
 
Back to Basics: My First MongoDB Application
Back to Basics: My First MongoDB ApplicationBack to Basics: My First MongoDB Application
Back to Basics: My First MongoDB Application
 
Back to Basics 2017 - Your First MongoDB Application
Back to Basics 2017 - Your First MongoDB ApplicationBack to Basics 2017 - Your First MongoDB Application
Back to Basics 2017 - Your First MongoDB Application
 
Map/Confused? A practical approach to Map/Reduce with MongoDB
Map/Confused? A practical approach to Map/Reduce with MongoDBMap/Confused? A practical approach to Map/Reduce with MongoDB
Map/Confused? A practical approach to Map/Reduce with MongoDB
 
Building a Scalable Inbox System with MongoDB and Java
Building a Scalable Inbox System with MongoDB and JavaBuilding a Scalable Inbox System with MongoDB and Java
Building a Scalable Inbox System with MongoDB and Java
 
How to leverage what's new in MongoDB 3.6
How to leverage what's new in MongoDB 3.6How to leverage what's new in MongoDB 3.6
How to leverage what's new in MongoDB 3.6
 
MongoDB
MongoDBMongoDB
MongoDB
 
MongoDB World 2018: Keynote
MongoDB World 2018: KeynoteMongoDB World 2018: Keynote
MongoDB World 2018: Keynote
 
Building Apps with MongoDB
Building Apps with MongoDBBuilding Apps with MongoDB
Building Apps with MongoDB
 
MongoDB Performance Tuning
MongoDB Performance TuningMongoDB Performance Tuning
MongoDB Performance Tuning
 
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
 
MongoDB dla administratora
MongoDB dla administratora MongoDB dla administratora
MongoDB dla administratora
 
Mongo db dla administratora
Mongo db dla administratoraMongo db dla administratora
Mongo db dla administratora
 
2013-03-23 - NoSQL Spartakiade
2013-03-23 - NoSQL Spartakiade2013-03-23 - NoSQL Spartakiade
2013-03-23 - NoSQL Spartakiade
 
Back to Basics, webinar 2: La tua prima applicazione MongoDB
Back to Basics, webinar 2: La tua prima applicazione MongoDBBack to Basics, webinar 2: La tua prima applicazione MongoDB
Back to Basics, webinar 2: La tua prima applicazione MongoDB
 
MongoDB and RDBMS
MongoDB and RDBMSMongoDB and RDBMS
MongoDB and RDBMS
 
MongoDB全機能解説2
MongoDB全機能解説2MongoDB全機能解説2
MongoDB全機能解説2
 
Inside PyMongo - MongoNYC
Inside PyMongo - MongoNYCInside PyMongo - MongoNYC
Inside PyMongo - MongoNYC
 
Tame Accidental Complexity with Ruby and MongoMapper
Tame Accidental Complexity with Ruby and MongoMapperTame Accidental Complexity with Ruby and MongoMapper
Tame Accidental Complexity with Ruby and MongoMapper
 

More from Gabriele Lana

Microservice Architectures
Microservice ArchitecturesMicroservice Architectures
Microservice ArchitecturesGabriele Lana
 
Professional Programmer 2018
Professional Programmer 2018Professional Programmer 2018
Professional Programmer 2018Gabriele Lana
 
Refactoring In Tdd The Missing Part
Refactoring In Tdd The Missing PartRefactoring In Tdd The Missing Part
Refactoring In Tdd The Missing PartGabriele Lana
 
Erlang: the language and the platform
Erlang: the language and the platformErlang: the language and the platform
Erlang: the language and the platformGabriele Lana
 
Resource Oriented Architectures
Resource Oriented ArchitecturesResource Oriented Architectures
Resource Oriented ArchitecturesGabriele Lana
 
Sustainable Agile Development
Sustainable Agile DevelopmentSustainable Agile Development
Sustainable Agile DevelopmentGabriele Lana
 
Introduction to Erlang
Introduction to ErlangIntroduction to Erlang
Introduction to ErlangGabriele Lana
 

More from Gabriele Lana (8)

Microservice Architectures
Microservice ArchitecturesMicroservice Architectures
Microservice Architectures
 
Professional Programmer 2018
Professional Programmer 2018Professional Programmer 2018
Professional Programmer 2018
 
ProgrammingKatas
ProgrammingKatasProgrammingKatas
ProgrammingKatas
 
Refactoring In Tdd The Missing Part
Refactoring In Tdd The Missing PartRefactoring In Tdd The Missing Part
Refactoring In Tdd The Missing Part
 
Erlang: the language and the platform
Erlang: the language and the platformErlang: the language and the platform
Erlang: the language and the platform
 
Resource Oriented Architectures
Resource Oriented ArchitecturesResource Oriented Architectures
Resource Oriented Architectures
 
Sustainable Agile Development
Sustainable Agile DevelopmentSustainable Agile Development
Sustainable Agile Development
 
Introduction to Erlang
Introduction to ErlangIntroduction to Erlang
Introduction to Erlang
 

Recently uploaded

TeamStation AI System Report LATAM IT Salaries 2024
TeamStation AI System Report LATAM IT Salaries 2024TeamStation AI System Report LATAM IT Salaries 2024
TeamStation AI System Report LATAM IT Salaries 2024Lonnie McRorey
 
A Journey Into the Emotions of Software Developers
A Journey Into the Emotions of Software DevelopersA Journey Into the Emotions of Software Developers
A Journey Into the Emotions of Software DevelopersNicole Novielli
 
TrustArc Webinar - How to Build Consumer Trust Through Data Privacy
TrustArc Webinar - How to Build Consumer Trust Through Data PrivacyTrustArc Webinar - How to Build Consumer Trust Through Data Privacy
TrustArc Webinar - How to Build Consumer Trust Through Data PrivacyTrustArc
 
Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!Manik S Magar
 
Training state-of-the-art general text embedding
Training state-of-the-art general text embeddingTraining state-of-the-art general text embedding
Training state-of-the-art general text embeddingZilliz
 
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptx
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptxThe Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptx
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptxLoriGlavin3
 
The Ultimate Guide to Choosing WordPress Pros and Cons
The Ultimate Guide to Choosing WordPress Pros and ConsThe Ultimate Guide to Choosing WordPress Pros and Cons
The Ultimate Guide to Choosing WordPress Pros and ConsPixlogix Infotech
 
Time Series Foundation Models - current state and future directions
Time Series Foundation Models - current state and future directionsTime Series Foundation Models - current state and future directions
Time Series Foundation Models - current state and future directionsNathaniel Shimoni
 
"Debugging python applications inside k8s environment", Andrii Soldatenko
"Debugging python applications inside k8s environment", Andrii Soldatenko"Debugging python applications inside k8s environment", Andrii Soldatenko
"Debugging python applications inside k8s environment", Andrii SoldatenkoFwdays
 
How AI, OpenAI, and ChatGPT impact business and software.
How AI, OpenAI, and ChatGPT impact business and software.How AI, OpenAI, and ChatGPT impact business and software.
How AI, OpenAI, and ChatGPT impact business and software.Curtis Poe
 
Rise of the Machines: Known As Drones...
Rise of the Machines: Known As Drones...Rise of the Machines: Known As Drones...
Rise of the Machines: Known As Drones...Rick Flair
 
Gen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfGen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfAddepto
 
SALESFORCE EDUCATION CLOUD | FEXLE SERVICES
SALESFORCE EDUCATION CLOUD | FEXLE SERVICESSALESFORCE EDUCATION CLOUD | FEXLE SERVICES
SALESFORCE EDUCATION CLOUD | FEXLE SERVICESmohitsingh558521
 
The State of Passkeys with FIDO Alliance.pptx
The State of Passkeys with FIDO Alliance.pptxThe State of Passkeys with FIDO Alliance.pptx
The State of Passkeys with FIDO Alliance.pptxLoriGlavin3
 
Merck Moving Beyond Passwords: FIDO Paris Seminar.pptx
Merck Moving Beyond Passwords: FIDO Paris Seminar.pptxMerck Moving Beyond Passwords: FIDO Paris Seminar.pptx
Merck Moving Beyond Passwords: FIDO Paris Seminar.pptxLoriGlavin3
 
Moving Beyond Passwords: FIDO Paris Seminar.pdf
Moving Beyond Passwords: FIDO Paris Seminar.pdfMoving Beyond Passwords: FIDO Paris Seminar.pdf
Moving Beyond Passwords: FIDO Paris Seminar.pdfLoriGlavin3
 
Ensuring Technical Readiness For Copilot in Microsoft 365
Ensuring Technical Readiness For Copilot in Microsoft 365Ensuring Technical Readiness For Copilot in Microsoft 365
Ensuring Technical Readiness For Copilot in Microsoft 3652toLead Limited
 
The Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptx
The Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptxThe Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptx
The Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptxLoriGlavin3
 
Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!Commit University
 
Dev Dives: Streamline document processing with UiPath Studio Web
Dev Dives: Streamline document processing with UiPath Studio WebDev Dives: Streamline document processing with UiPath Studio Web
Dev Dives: Streamline document processing with UiPath Studio WebUiPathCommunity
 

Recently uploaded (20)

TeamStation AI System Report LATAM IT Salaries 2024
TeamStation AI System Report LATAM IT Salaries 2024TeamStation AI System Report LATAM IT Salaries 2024
TeamStation AI System Report LATAM IT Salaries 2024
 
A Journey Into the Emotions of Software Developers
A Journey Into the Emotions of Software DevelopersA Journey Into the Emotions of Software Developers
A Journey Into the Emotions of Software Developers
 
TrustArc Webinar - How to Build Consumer Trust Through Data Privacy
TrustArc Webinar - How to Build Consumer Trust Through Data PrivacyTrustArc Webinar - How to Build Consumer Trust Through Data Privacy
TrustArc Webinar - How to Build Consumer Trust Through Data Privacy
 
Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!Anypoint Exchange: It’s Not Just a Repo!
Anypoint Exchange: It’s Not Just a Repo!
 
Training state-of-the-art general text embedding
Training state-of-the-art general text embeddingTraining state-of-the-art general text embedding
Training state-of-the-art general text embedding
 
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptx
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptxThe Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptx
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptx
 
The Ultimate Guide to Choosing WordPress Pros and Cons
The Ultimate Guide to Choosing WordPress Pros and ConsThe Ultimate Guide to Choosing WordPress Pros and Cons
The Ultimate Guide to Choosing WordPress Pros and Cons
 
Time Series Foundation Models - current state and future directions
Time Series Foundation Models - current state and future directionsTime Series Foundation Models - current state and future directions
Time Series Foundation Models - current state and future directions
 
"Debugging python applications inside k8s environment", Andrii Soldatenko
"Debugging python applications inside k8s environment", Andrii Soldatenko"Debugging python applications inside k8s environment", Andrii Soldatenko
"Debugging python applications inside k8s environment", Andrii Soldatenko
 
How AI, OpenAI, and ChatGPT impact business and software.
How AI, OpenAI, and ChatGPT impact business and software.How AI, OpenAI, and ChatGPT impact business and software.
How AI, OpenAI, and ChatGPT impact business and software.
 
Rise of the Machines: Known As Drones...
Rise of the Machines: Known As Drones...Rise of the Machines: Known As Drones...
Rise of the Machines: Known As Drones...
 
Gen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfGen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdf
 
SALESFORCE EDUCATION CLOUD | FEXLE SERVICES
SALESFORCE EDUCATION CLOUD | FEXLE SERVICESSALESFORCE EDUCATION CLOUD | FEXLE SERVICES
SALESFORCE EDUCATION CLOUD | FEXLE SERVICES
 
The State of Passkeys with FIDO Alliance.pptx
The State of Passkeys with FIDO Alliance.pptxThe State of Passkeys with FIDO Alliance.pptx
The State of Passkeys with FIDO Alliance.pptx
 
Merck Moving Beyond Passwords: FIDO Paris Seminar.pptx
Merck Moving Beyond Passwords: FIDO Paris Seminar.pptxMerck Moving Beyond Passwords: FIDO Paris Seminar.pptx
Merck Moving Beyond Passwords: FIDO Paris Seminar.pptx
 
Moving Beyond Passwords: FIDO Paris Seminar.pdf
Moving Beyond Passwords: FIDO Paris Seminar.pdfMoving Beyond Passwords: FIDO Paris Seminar.pdf
Moving Beyond Passwords: FIDO Paris Seminar.pdf
 
Ensuring Technical Readiness For Copilot in Microsoft 365
Ensuring Technical Readiness For Copilot in Microsoft 365Ensuring Technical Readiness For Copilot in Microsoft 365
Ensuring Technical Readiness For Copilot in Microsoft 365
 
The Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptx
The Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptxThe Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptx
The Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptx
 
Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!
 
Dev Dives: Streamline document processing with UiPath Studio Web
Dev Dives: Streamline document processing with UiPath Studio WebDev Dives: Streamline document processing with UiPath Studio Web
Dev Dives: Streamline document processing with UiPath Studio Web
 

MongoDB With Style

  • 1. with style gabriele lana gabriele.lana@cleancode.it twitter: @gabrielelana http://joind.in/2943
  • 3. mongo console $  ~/Work/opt/mongodb-­‐1.6.5/bin/mongod            -­‐-­‐dbpath=~/Work/src/nosqlday/db/mongodb.01            -­‐-­‐logpath=~/Work/src/nosqlday/log/mongodb.01            -­‐-­‐fork  -­‐-­‐port  30001 $  ~/Work/opt/mongodb-­‐1.6.5/bin/mongo  localhost:30001 MongoDB  shell  version:  1.6.5 connecting  to:  localhost:30001/test >  use  nosqlday switched  to  db  nosqlday >  db.getCollectionNames() [  "system.indexes",  "users"  ] >  db.users.find({  "name":  "Gabriele"  }) {  "_id"  :  ObjectId("4d8706767bb037a8a8f98db2"),  "name"  :  "Gabriele",   "surname"  :  "Lana",  "job"  :  "softwarecraftsman"  } >  exit bye
  • 4. ruby driver require "mongo" db = Mongo::Connection.new("localhost", 30001).db("nosqlday") puts "Collections:" db.collections.each do |collection| puts "t#{collection.name}" end puts "Gabriele:" db["users"].find(:name => "Gabriele").each do |user| puts "t#{user["_id"]}" end db.connection.close
  • 5. ruby driver require "mongo" db = Mongo::Connection.new("localhost", 30001).db("nosqlday") puts "Collections:" db.collections.each do |collection| puts "t#{collection.name}" $  ruby  src/connect.rb   Collections: end   users   system.indexes Gabriele: puts "Gabriele:"   4d8706767bb037a8a8f98db2 db["users"].find(:name => "Gabriele").each do |user| puts "t#{user["_id"]}" end db.connection.close
  • 7. Style know your driver
  • 9. puts "Gabriele:" db["users"].find(:name => "Gabriele").each do |user| puts "t#{user["_id"]}" end puts "Gabriele:" db["users"].select{|user| user["name"] == "Gabriele"}.each do |user| puts "t#{user["_id"]}" end mongo smart driver
  • 10. puts "Gabriele:" db["users"].find(:name => "Gabriele").each do |user| puts "t#{user["_id"]}" end puts "Gabriele:" $  ruby  src/find_vs_select.rb   db["users"].select{|user| user["name"] == "Gabriele"}.each do |user| Gabriele: puts "t#{user["_id"]}" 4d8706767bb037a8a8f98db2   Gabriele: end   4d8706767bb037a8a8f98db2 mongo smart driver
  • 11. puts "Gabriele:" db["users"].find(:name => "Gabriele").each do |user| puts "t#{user["_id"]}" end puts "Gabriele:" db["users"].select{|user| user["name"] == "Gabriele"}.each do |user| puts "t#{user["_id"]}" end mongo smart driver
  • 12. Style incremental design based on application behavior
  • 13. the best design is the one where needed data can be easily extracted the way you need to query your data should influence your design
  • 14. Style incremental design based on application monitoring
  • 15. monitoring and adapting is better than doing it right the first time ...actually the first time is the worst time :-)
  • 16. monitoring & adapting >  db.setProfilingLevel(1,  5)                                                                                                 {  "was"  :  1,  "slowms"  :  100,  "ok"  :  1  } //  after  product  usage  find  problematic  queries >  db.system.profile.find().sort({millis:-­‐1})                                                                 {  "ts":  "Mon  Mar  21  2011  14:30:56  GMT+0100  (CET)",    "info":  "        query  pomodorist.pomodori            reslen:202            nscanned:26950            query:                  {  $query:  {  task_id:  ObjectId('4d6f1d3931f2386e9c089796')  }}            nreturned:1      ",      "millis":17 }
  • 17. monitoring & adapting >  db.pomodori.find({        $query:  {  task_id:  ObjectId('4d6f1d3931f2386e9c089796')  },        $explain:  true })                                                       {  "cursor":  "BasicCursor",    "nscanned":  26950,    "nscannedObjects":  26950,    "n":  1,    "millis":  17,    "indexBounds":  {  },    "allPlans":  [        {  "cursor"  :  "BasicCursor",  "indexBounds"  :  {  }  }      ] }
  • 18. monitoring & adapting >  db.pomodori.ensureIndex({"task_id":  1})                                                                                                               >  db.pomodori.find({        $query:  {  task_id:  ObjectId('4d6f1d3931f2386e9c089796')  },        $explain:  true }) {  "cursor":  "BtreeCursor  task_id_1",    "nscanned":  1,    "nscannedObjects":  1,    "n":  1,    "millis":  0,    "indexBounds":  {        "task_id":  [      [                ObjectId("4d6f1d3931f2386e9c089796"),                ObjectId("4d6f1d3931f2386e9c089796")          ]    ]},  "allPlans":  [...] }
  • 20. query & design use $in operator for batch query
  • 21. retrieve all objects with $in users = [ {:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"}, {:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"}, {:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"} ] ids = users.map{|user| db["users"].insert(user)} puts ids.map{|id| db["users"].find_one(:_id => id)}
  • 22. retrieve all objects with $in users = [ {:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"}, {:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"}, {:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"} ] $  ruby  src/find_by_all_ids.rb   {"_id"=>BSON::ObjectId('4d87605731f23824a0000001'),  ...} ids = users.map{|user| db["users"].insert(user)} {"_id"=>BSON::ObjectId('4d87605731f23824a0000002'),  ...} {"_id"=>BSON::ObjectId('4d87605731f23824a0000003'),  ...} puts ids.map{|id| db["users"].find_one(:_id => id)}
  • 23. retrieve all objects with $in users = [ {:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"}, {:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"}, {:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"} ] ids = users.map{|user| db["users"].insert(user)} puts ids.map{|id| db["users"].find_one(:_id => id)}
  • 24. retrieve all objects with $in users = [ {:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"}, {:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"}, {:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"} ] ids = users.map{|user| db["users"].insert(user)} ids = db["users"].insert(users) puts ids.map{|id| db["users"].find_one(:_id => id)} puts db["users"].find(:_id => {:$in => ids}).all
  • 25. retrieve all objects with $in users = [ {:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"}, {:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"}, {:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"} ] $  ruby  src/find_by_all_ids.rb   {"_id"=>BSON::ObjectId('4d87605731f23824a0000001'),  ...} ids = users.map{|user| db["users"].insert(user)} {"_id"=>BSON::ObjectId('4d87605731f23824a0000002'),  ...} ids = db["users"].insert(users) {"_id"=>BSON::ObjectId('4d87605731f23824a0000003'),  ...} puts ids.map{|id| db["users"].find_one(:_id => id)} puts db["users"].find(:_id => {:$in => ids}).all
  • 26. query & design use conventions to build smart object identifiers
  • 27. conventions are fun to play with >  db.user_scores.find({},  {"_id":  1}) {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐week-­‐200944"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐month-­‐200911"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐year-­‐2009"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐user"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐advertising"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐week-­‐200944-­‐advertising"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐art"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐week-­‐200944-­‐art"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐artist"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐week-­‐200944-­‐artist"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐information"  }
  • 28. conventions are fun to play with >  db.user_scores.findOne(        {"_id":  "4d873ce631f238241d00000d-­‐day-­‐20091106"}    )   {   "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106",   "pomodori"  :  15,   "pomodori_squashed"  :  3,   "breaks"  :  7,   "tasks_created"  :  8,   "tasks_done"  :  6,   "estimation_accuracy"  :  0,   "seconds_of_focused_time"  :  22500,   "seconds_of_wasted_time"  :  1999,   "seconds_of_breaks"  :  8820 }
  • 29. conventions are fun to play with (user scores in day per tag) >  db.user_scores.find(        {"_id":  /^4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/},  {"_id":  1}    )                 {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐advertising"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐art"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐artist"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐blogging"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐culture"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐html"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐illustration"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐information"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐inspiration"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐marketing"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐movies"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐resources"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐technology"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐tool"  } {  "_id"  :  "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐tutorials"  }
  • 30. conventions are fun to play with (list of tags per day) >  db.user_scores.find(        {"_id":  /^4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/},  {"_id":  1}    ).map(function(document)  {        return  document._id.replace(            "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐",  ""        )    }) [   "advertising",   "art",   "artist",   "blogging",   "culture",   "html",   "illustration",   "information",   ... ]
  • 31. conventions are fun to play with (anchored regexp uses indexes) >  db.user_scores.find(        {"_id":  /^4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/},  {"_id":  1}    ).explain()                                                                                                                                                         {   "cursor"  :  "BtreeCursor  _id_  multi",   "nscanned"  :  15,   "nscannedObjects"  :  15,   "n"  :  15,   "millis"  :  0,   "indexBounds"  :  {     "_id"  :  [       [         "4d873ce631f238241d00000d-­‐day-­‐20091106-­‐",         "4d873ce631f238241d00000d-­‐day-­‐20091106."       ],       [         /^4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/,         /^4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/       ]     ]
  • 32. conventions are fun to play with (anchored regexp uses indexes) >  db.user_scores.find(        {"_id":  /4d873ce631f238241d00000d-­‐day-­‐20091106-­‐/},  {"_id":  1}    ).explain() {   "cursor"  :  "BtreeCursor  _id_  multi",   "nscanned"  :  109349,   "nscannedObjects"  :  15,   "n"  :  15,   "millis"  :  217,   "indexBounds"  :  {     "_id"  :  [       ...     ]   } }
  • 33. query & use “group” design method to do small computations without fetching related documents
  • 34. group to compute data in mongo (inject client side) days = [ 20091110, 20091111, 20091112 ] scores_id = %r{^4d87d00931f2380c7700000d-day-(#{days.join("|")})$} scores = db["user_scores"].find(:_id => scores_id) pomodori = scores.inject(0) do |pomodori, scores| pomodori + scores["pomodori"] end puts "Pomodori in days #{days.join(",")}: #{pomodori}"
  • 35. group to compute data in mongo (inject client side) days = [ 20091110, 20091111, 20091112 ] scores_id = %r{^4d87d00931f2380c7700000d-day-(#{days.join("|")})$} scores = db["user_scores"].find(:_id => scores_id) pomodori = scores.inject(0) do |pomodori, scores| $  ruby  src/inject_for_reduce.rb   pomodori + scores["pomodori"] Pomodori  in  days  20091110,20091111,20091112:  36 end puts "Pomodori in days #{days.join(",")}: #{pomodori}"
  • 36. group to compute data in mongo (group server side) days = [ 20091110, 20091111, 20091112 ] scores_id = %r{^4d87d00931f2380c7700000d-day-(#{days.join("|")})$} result = db["user_scores"].group( :cond => { :_id => scores_id }, :initial => { :pomodori => 0 }, :reduce => <<-EOF function(document, result) { result.pomodori += document.pomodori } EOF ) puts "Pomodori in days #{days.join(",")}: #{result.first["pomodori"]}"
  • 37. group to compute data in mongo (group server side) days = [ 20091110, 20091111, 20091112 ] scores_id = %r{^4d87d00931f2380c7700000d-day-(#{days.join("|")})$} result = db["user_scores"].group( :cond => { :_id => scores_id }, :initial => { :pomodori => 0 }, :reduce => <<-EOF $  ruby  src/group_for_reduce.rb   Pomodori  in  days  20091110,20091111,20091112:  36 function(document, result) { result.pomodori += document.pomodori } EOF ) puts "Pomodori in days #{days.join(",")}: #{result.first["pomodori"]}"
  • 38. group to compute data in mongo (ex. sum pomodori by tag “ruby”) result = db["user_scores"].group( :cond => { :_id => /^4d87d00931f2380c7700000d-day-d{8}-ruby$/ }, :initial => { :pomodori => 0, :days => 0 }, :reduce => <<-EOF function(document, result) { result.days += 1 result.pomodori += document.pomodori } EOF ).first puts "In #{result["days"]} days, #{result["pomodori"]} done for ruby"
  • 39. group to compute data in mongo (ex. sum pomodori by tag “ruby”) result = db["user_scores"].group( :cond => { :_id => /^4d87d00931f2380c7700000d-day-d{8}-ruby$/ }, :initial => { :pomodori => 0, :days => 0 }, :reduce => <<-EOF function(document, result) { $  ruby  src/group_for_ruby_tag.rb   In  43  days,  45  pomodori result.days += 1 result.pomodori += document.pomodori } EOF ).first puts "In #{result["days"]} days, #{result["pomodori"]} pomodori"
  • 40. group to compute data in mongo (ex. sum pomodori by tag “ruby”) >  db.user_scores.find({        "_id":  /^4d87d00931f2380c7700000d-­‐day-­‐d{8}-­‐ruby$/    }).explain() {   "cursor"  :  "BtreeCursor  _id_  multi",   "nscanned"  :  43,   "nscannedObjects"  :  43,   "n"  :  43,   "millis"  :  3,   "indexBounds"  :  {     "_id"  :  [...]   } }
  • 41. query & design create indexes on arrays to create local reverse indexes in documents
  • 42. reverse index in place (an array could be indexed) >  db.tasks.find({  "tags":  {  $in:  [  "nosqlday"  ]  }  })                                                                             {  "_id"  :  ObjectId("4d7de446175ca8243d000004"),      "tags"  :  [  "nosqlday"  ],      "description"  :  "#nosqlday  keynote",      "is_recurrent"  :  false,    "estimated"  :  0,      "worked_in"  :  [   "Mon  Mar  14  2011  00:00:00  GMT+0100  (CET)",   "Tue  Mar  15  2011  00:00:00  GMT+0100  (CET)"    ],    "done_at"  :  "Tue  Mar  15  2011  13:05:03  GMT+0100  (CET)",    "todo_at"  :  null,    "created_at"  :  "Mon  Mar  14  2011  10:47:50  GMT+0100  (CET)",    "updated_at"  :  "Tue  Mar  15  2011  13:05:03  GMT+0100  (CET)",    "keywords":  [  "nosqldai",  "keynot"  ],    "user_id":  ObjectId("4d53996c137ce423ff000001"),    "annotations"  :  [  ] }
  • 43. reverse index in place (an array could be indexed) >  db.tasks.getIndexes() [   {     "name"  :  "_id_",     "ns"  :  "app435386.tasks",     "key"  :  {       "_id"  :  1     }   },   {     "name"  :  "tags_1",     "ns"  :  "app435386.tasks",     "key"  :  {       "tags"  :  1     },     "unique"  :  false   },      ... ]
  • 44. reverse index in place (container for deduced data, array) db["orders"].insert({ :placed_at => [ now.strftime("%Y"), # year: "2011" now.strftime("%Y%m"), # month: "201103" now.strftime("%Yw%U"), # week: "2011w11" now.strftime("%Y%m%d") # day: "20110316" ], :user_id => user, :items => items_in_order.map{|item| item[:id]}, :total => items_in_order.inject(0){|total,item| total += item[:price]} }) # ... db["orders"].ensure_index([["placed_at", Mongo::DESCENDING]])
  • 45. reverse index in place (container for deduced data, array) >  db.orders.findOne() {  "_id"  :  ObjectId("4d88bf1f31f23812de0003fd"),      "placed_at"  :  [  "2011",  "201103",  "2011w11",  "20110316"  ],    "user_id"  :  ObjectId("4d88bf1f31f23812de0003e9"),    "items"  :  [        ObjectId("4d88bf1f31f23812de0003da"),        ObjectId("4d88bf1f31f23812de000047"),        ObjectId("4d88bf1f31f23812de000078"),        ObjectId("4d88bf1f31f23812de000068"),      ObjectId("4d88bf1f31f23812de000288")    ],    "total"  :  3502 }
  • 46. reverse index in place (container for deduced data, array) >  db.orders.find({  "placed_at":  "20110310"  }).count() 77 >  db.orders.find({  "placed_at":  "20110310"  }).explain() {   "cursor"  :  "BtreeCursor  placed_at_-­‐1",   "nscanned"  :  77,   "nscannedObjects"  :  77,   "n"  :  77,   "millis"  :  0,   "indexBounds"  :  {     "placed_at"  :  [       [         "20110310",         "20110310"       ]     ]   } }
  • 47. reverse index in place (container for deduced data, hash) db["orders"].insert({ :placed_at => [ { :year => now.strftime("%Y") }, { :month => now.strftime("%Y%m") }, { :week => now.strftime("%Y%U") }, { :day => now.strftime("%Y%m%d") } ], :user_id => user, :items => items_in_order.map{|item| item[:id]}, :total => items_in_order.inject(0){|total,item| total += item[:price]} }) # ... db["orders"].ensure_index([["placed_at", Mongo::DESCENDING]])
  • 48. reverse index in place (container for deduced data, hash) >  db.orders.findOne()                                                             {  "_id"  :  ObjectId("4d88c31531f23812fe0003ea"),    "placed_at"  :  [        {  "year"  :  "2009"  },        {  "month"  :  "200911"  },        {  "week"  :  "200945"  },        {  "day"  :  "20091109"  }    ],    "user_id"  :  ObjectId("4d88c31531f23812fe0003e9"),    "items"  :  [        ObjectId("4d88c31531f23812fe00013f"),        ObjectId("4d88c31531f23812fe000176"),        ObjectId("4d88c31531f23812fe0003e2"),        ObjectId("4d88c31531f23812fe0003d1"),        ObjectId("4d88c31531f23812fe0001c1"),        ObjectId("4d88c31531f23812fe000118"),        ObjectId("4d88c31531f23812fe00031d")    ],    "total"  :  10149 }
  • 49. reverse index in place (container for deduced data, hash) >  db.orders.find({  "placed_at.week":  "201101"  }).count()                 331 >  db.orders.find({  "placed_at.week":  "201101"  }).explain()             {   "cursor"  :  "BasicCursor",   "nscanned"  :  22374,   "nscannedObjects"  :  22374,   "n"  :  331,   "millis"  :  23,   "indexBounds"  :  {       } }
  • 50. reverse index in place (container for deduced data, hash) >  db.orders.find({  "placed_at":  {  "week":  "201101"  }}).count()     331 >  db.orders.find({  "placed_at":  {  "week":  "201101"  }}).explain() {   "cursor"  :  "BtreeCursor  placed_at_-­‐1",   "nscanned"  :  331,   "nscannedObjects"  :  331,   "n"  :  331,   "millis"  :  0,   "indexBounds"  :  {     "placed_at"  :  [       [         {  "week"  :  "2011w01"  },         {  "week"  :  "2011w01"  }       ]     ]   } }
  • 51. query & design use dates but be aware of some pitfalls
  • 52. plain dates are good too db["orders"].insert({ :placed_at => now, :user_id => user, :items => items_in_order.map{|item| item[:id]}, :total => items_in_order.inject(0){|total,item| total += item[:price]} }) # ... db["orders"].ensure_index([["placed_at", Mongo::DESCENDING]])
  • 53. plain dates are good too >  db.orders.findOne()                                                                                                                                               {   "_id"  :  ObjectId("4d88d1f931f23813a10003ea"),   "placed_at"  :  "Mon  Nov  09  2009  08:00:00  GMT+0100  (CET)",   "user_id"  :  ObjectId("4d88d1f931f23813a10003e9"),   "items"  :  [     ObjectId("4d88d1f931f23813a100016d"),     ObjectId("4d88d1f931f23813a1000346"),     ObjectId("4d88d1f931f23813a10001e7"),     ObjectId("4d88d1f931f23813a10000db"),     ObjectId("4d88d1f931f23813a1000091"),     ObjectId("4d88d1f931f23813a10001c1"),     ObjectId("4d88d1f931f23813a10001d3"),     ObjectId("4d88d1f931f23813a100031b"),     ObjectId("4d88d1f931f23813a1000130")   ],   "total"  :  5871 }
  • 54. plain dates are good too >  db.orders.find({        "placed_at":  {              $gte:  new  Date(2011,2,10),            $lt:  new  Date(2011,2,11)        }    }).explain() {   "cursor"  :  "BtreeCursor  placed_at_-­‐1",   "nscanned"  :  53,   "nscannedObjects"  :  53,   "n"  :  53,   "millis"  :  0,   "indexBounds"  :  {     "placed_at"  :  [       [         "Fri  Mar  11  2011  00:00:00  GMT+0100  (CET)",         "Thu  Mar  10  2011  00:00:00  GMT+0100  (CET)"       ]     ]   }
  • 55. plain dates are good too, but... (total sold on this year’s mondays) # find all mondays of the year now = Time.now.beginning_of_year now += 1.day until now.monday? mondays = [ now ] mondays << now += 7.days while now.year == Time.now.year # find all orders placed on mondays query = { :$or => mondays.map do |day| { :placed_at => { :$gte => day.beginning_of_day, :$lte => day.end_of_day } } end } puts query
  • 56. plain dates are good too, but... (total sold on this year’s mondays) # find all mondays of the year now = Time.now.beginning_of_year now += 1.day until now.monday? mondays = [ now ] mondays << now += 7.days while now.year == Time.now.year $  ruby  src/orders_on_mondays.rb   # find all orders placed on mondays {:$or=>[ query = {    {:placed_at=>{ :$or => mondays.map do |day|        :$gte=>2011-­‐01-­‐03  00:00:00  +0100, { :placed_at => {        :$lte=>2011-­‐01-­‐03  23:59:59  +0100    }}, :$gte => day.beginning_of_day,    {:placed_at=>{ :$lte => day.end_of_day        :$gte=>2011-­‐01-­‐10  00:00:00  +0100,        :$lte=>2011-­‐01-­‐10  23:59:59  +0100 }    }}, }    {:placed_at=>{        :$gte=>2011-­‐01-­‐17  00:00:00  +0100, end        :$lte=>2011-­‐01-­‐17  23:59:59  +0100 }    }},    ... ]} puts query
  • 57. plain dates are good too, but... (it works but it’s too slooow) db["orders"].find({ :$or => mondays.map do |day| { :placed_at => { :$gte => day.beginning_of_day, :$lte => day.end_of_day } } end })
  • 58. plain dates are good too, but... (why it’s too slow) >  db.orders.find({        $or:  [            "placed_at":{  $gte:  new  Date(2011,2,3),  $lt:  new  Date(2011,2,4)  },            "placed_at":{  $gte:  new  Date(2011,2,10),  $lt:  new  Date(2011,2,11)  }        ]    }).explain() {    "clauses"  :  [{            "cursor"  :  "BtreeCursor  placed_at_-­‐1",            "indexBounds"  :  {                "placed_at"  :  [[                    "Tue  Mar  3  2011  00:00:00  GMT+0100  (CET)",                    "Wed  Mar  4  2011  00:00:00  GMT+0100  (CET)"                ]]}    },  {        "cursor"  :  "BtreeCursor  placed_at_-­‐1",            "indexBounds"  :  {                "placed_at"  :  [[                    "Tue  Mar  10  2011  00:00:00  GMT+0100  (CET)",                    "Wed  Mar  11  2011  00:00:00  GMT+0100  (CET)"          
  • 59. with destructured dates (total sold on mondays this year) >  db.orders.findOne() {  "_id"  :  ObjectId("4d88bf1f31f23812de0003fd"),      "placed_at"  :  [  "2011",  "201103",  "2011w11",  "20110316"  ],    "user_id"  :  ObjectId("4d88bf1f31f23812de0003e9"),    "items"  :  [        ObjectId("4d88bf1f31f23812de0003da"),        ObjectId("4d88bf1f31f23812de000047"),        ObjectId("4d88bf1f31f23812de000078"),        ObjectId("4d88bf1f31f23812de000068"),        ObjectId("4d88bf1f31f23812de000288")    ],    "total"  :  3502 }
  • 60. with destructured dates (total sold on mondays this year) now = Time.now.beginning_of_year now += 1.day until now.monday? mondays = [ now ] mondays << now += 7.days while now.year == Time.now.year orders = db["orders"].find({ :placed_at => { :$in => mondays.map {|day| day.strftime("%Y%m%d")} } }) puts orders.explain
  • 61. with destructured dates (total sold on mondays this year) now = Time.now.beginning_of_year now += 1.day until now.monday? mondays = [ now ] mondays << now += 7.days while now.year == Time.now.year orders = db["orders"].find({ $  ruby  src/orders_on_mondays.rb   :placed_at => { {  "cursor"=>"BtreeCursor  placed_at_-­‐1  multi", :$in => mondays.map    "nscanned"=>744, {|day| day.strftime("%Y%m%d")} }    "nscannedObjects"=>744,    "n"=>744, })    "millis"=>1,    "indexBounds"=>{        "placed_at"=>[ puts orders.explain            ["20120102",  "20120102"],  ["20111226",  "20111226"],            ["20111219",  "20111219"],  ["20111212",  "20111212"],              ["20111205",  "20111205"],  ["20111128",  "20111128"],              ["20111121",  "20111121"],  ...        ]    } }
  • 62. query & design full query power with $where operator
  • 63. pomodori (find who is ticking) >  db.pomodori.findOne() {   "_id"  :  ObjectId("4d8916ed31f2381480000021"),   "duration"  :  1500,   "interruptions"  :  0,   "after_break_of"  :  0,   "started_at"  :  "Mon  Mar  14  2011  08:05:00  GMT+0100  (CET)",   "squashed_at"  :  "Mon  Mar  14  2011  08:07:31  GMT+0100  (CET)",   "in_day"  :  {     "position"  :  1,     "is_last"  :  false   },   "task_id"  :  ObjectId("4d8916ec31f2381480000014"),   "user_id"  :  ObjectId("4d8916ec31f2381480000010"),   "annotations"  :  [  ] }
  • 64. pomodori (find who is ticking) now = Time.now.yesterday.beginning_of_day + 10.hours timestamp_of_now = now.to_i ticking = db["pomodori"].find( :$where => <<-EOF var startedAt = this.started_at.getTime()/1000 return ((startedAt + this.duration) > #{timestamp_of_now}) && (startedAt < #{timestamp_of_now}) EOF ) puts ticking.map{|pomodoro| pomodoro["_id"]}
  • 65. pomodori (find who is ticking) now = Time.now.yesterday.beginning_of_day + 10.hours timestamp_of_now = now.to_i ticking = db["pomodori"].find( :$where => <<-EOF var startedAt = this.started_at.getTime()/1000 return $  ruby  src/find_who_is_ticking.rb   4d8916ef31f238148000011d ((startedAt + this.duration) > #{timestamp_of_now}) && 4d8916f231f2381480000271 (startedAt < #{timestamp_of_now}) 4d8916f931f23814800004dd 4d8916f931f23814800004e0 EOF ) puts ticking.map{|pomodoro| pomodoro["_id"]}
  • 66. pomodori (find who is ticking for an user) now = Time.now.yesterday.beginning_of_day + 10.hours timestamp_of_now = now.to_i user_id = BSON::ObjectId.from_string("4d8916ec31f2381480000010") ticking = db["pomodori"].find( :user_id => user_id, :$where => <<-EOF var startedAt = this.started_at.getTime()/1000 return ((startedAt + this.duration) > #{timestamp_of_now}) && (startedAt < #{timestamp_of_now}) EOF ) puts ticking.map{|pomodoro| pomodoro["_id"]}
  • 67. pomodori (find who is ticking for an user) now = Time.now.yesterday.beginning_of_day + 10.hours timestamp_of_now = now.to_i user_id = BSON::ObjectId.from_string("4d8916ec31f2381480000010") ticking = db["pomodori"].find( :user_id => user_id, :$where => <<-EOF $  ruby  src/find_who_is_ticking_for_an_user.rb   4d8916ef31f238148000011d var startedAt = this.started_at.getTime()/1000 return ((startedAt + this.duration) > #{timestamp_of_now}) && (startedAt < #{timestamp_of_now}) EOF ) puts ticking.map{|pomodoro| pomodoro["_id"]}
  • 68. pomodori (related to tasks tagged with “maps”) related_to_maps = db["pomodori"].find( :$where => <<-EOF db.tasks.findOne({ "_id": this.task_id }).tags.indexOf("maps") >= 0 EOF ) puts related_to_maps.map{|pomodoro| pomodoro["_id"]}
  • 69. pomodori (related to tasks tagged with “maps”) related_to_maps = db["pomodori"].find( :$where => <<-EOF db.tasks.findOne({ "_id": this.task_id }).tags.indexOf("maps") >= 0 EOF ) $  ruby  src/related_to_maps.rb   puts related_to_maps.map{|pomodoro| pomodoro["_id"]} 4d8916fa31f2381480000579 4d8916fa31f238148000057b 4d8916fa31f238148000057d 4d8916fa31f2381480000580
  • 70. pomodori (don’t be carried away :-)) related_to_maps = db["pomodori"].find( :$where => <<-EOF db.tasks.findOne({ "_id": this.task_id }).tags.indexOf("maps") >= 0 EOF ) $  ruby  src/related_to_maps.rb   puts related_to_maps.explain {  "cursor"=>"BasicCursor",      "nscanned"=>461,      "nscannedObjects"=>461,    "n"=>4,    "millis"=>52,      "indexBounds"=>{},      "allPlans"=>[...] }
  • 71. pomodori (related to... a better solution) related_to_maps = db["pomodori"].find(:task_id => { :$in => db["tasks"].find( {:tags => "maps"}, :fields => {:_id => 1} ).map{|task| task["_id"]} }) $  ruby  src/related_to_maps.rb   4d8916fa31f2381480000579 puts related_to_maps.map{|pomodoro| pomodoro["_id"]} 4d8916fa31f238148000057b 4d8916fa31f238148000057d 4d8916fa31f2381480000580
  • 72. pomodori (related to... a better solution) related_to_maps = db["pomodori"].find(:task_id => { :$in => db["tasks"].find( {:tags => "maps"}, :fields => {:_id => 1} ).map{|task| task["_id"]} }) $  ruby  src/related_to_maps.rb   {  "cursor"=>"BtreeCursor  tags_1", puts related_to_maps.map{|pomodoro| pomodoro["_id"]}    "nscanned"=>3,    "nscannedObjects"=>3,    "n"=>3,    "millis"=>0,    ... } {  "cursor"=>"BtreeCursor  task_id_1  multi",    "nscanned"=>4,    "nscannedObjects"=>4,    "n"=>4,    "millis"=>0,    ... }
  • 73. query & design real time analytics with increments
  • 74. keep track of url’s visits (upsert with custom id) result = db["visits"].update( { :_id => Digest::MD5.hexdigest(url) }, { :$inc => { :hits => 1 } }, :upsert => true, :safe => true ) puts "Update: #{result.inspect}" puts db["visits"].find_one(:_id => Digest::MD5.hexdigest(url))
  • 75. keep track of url’s visits (upsert with custom id) result = db["visits"].update( { :_id => Digest::MD5.hexdigest(url) }, { :$inc => { :hits => 1 } }, :upsert => true, :safe => true ) $  ruby  src/realtime_analytics.rb   Update:  { puts "Update: #{result.inspect}"    "err"=>nil,    "updatedExisting"=>false,    "n"=>1, puts db["visits"].find_one(:_id => Digest::MD5.hexdigest(url))    "ok"=>1.0 } {"_id"=>"2d86a774beffe90e715a8028c7bd177b",  "hits"=>1} $  ruby  src/realtime_analytics.rb   Update:  {    "err"=>nil,    "updatedExisting"=>true,    "n"=>1,    "ok"=>1.0 } {"_id"=>"2d86a774beffe90e715a8028c7bd177b",  "hits"=>2}
  • 76. url’s visits aggregated by time (upsert with multiple documents) url_digest = Digest::MD5.hexdigest(url) ids = [ [ url_digest, Time.now.strftime("%Y%m%d") ].join("-"), [ url_digest, Time.now.strftime("%Y%m") ].join("-"), [ url_digest, Time.now.strftime("%Y") ].join("-"), [ url_digest, user_id ].join("-") ] puts "Expect to upsert: n#{ids}" result = db["visits"].update( { :_id => { :$in => ids } }, { :$inc => { :hits => 1 } }, :multi => true, :upsert => true, :safe => true ) puts result.inspect puts db["visits"].all
  • 77. url’s visits aggregated by time (upsert with multiple documents) url_digest = Digest::MD5.hexdigest(url) ids = [ [ url_digest, Time.now.strftime("%Y%m%d") ].join("-"), [ url_digest, Time.now.strftime("%Y%m") ].join("-"), [ url_digest, Time.now.strftime("%Y") ].join("-"), [ url_digest, user_id ].join("-") ] $  ruby  src/realtime_analytics_with_aggregation.rb   Expect  to  upsert:[ puts "Expect to upsert:    "2d86a774beffe90e715a8028c7bd177b-­‐20110323", n#{ids}"    "2d86a774beffe90e715a8028c7bd177b-­‐201103",    "2d86a774beffe90e715a8028c7bd177b-­‐2011",   result = db["visits"].update(    "2d86a774beffe90e715a8028c7bd177b-­‐4d899fab31f238165c000001" { :_id => { :$in => ids } }, ] { :$inc => { :hits => {  "err"=>nil, 1 } }, :multi => true,    "updatedExisting"=>false,    "upserted"=>BSON::ObjectId('4d899fabe23bd37e768ae76d'),   :upsert => true,    "n"=>1, :safe => true    "ok"=>1.0 } ) puts result.inspect {"_id"=>BSON::ObjectId('4d899fabe23bd37e768ae76d'),  "hits"=>1} puts db["visits"].all
  • 78. url’s visits aggregated by time (upsert with multiple documents) url_digest = Digest::MD5.hexdigest(url) ids = [ [ url_digest, Time.now.strftime("%Y%m%d") ].join("-"), [ url_digest, Time.now.strftime("%Y%m") ].join("-"), [ url_digest, Time.now.strftime("%Y") ].join("-"), [ url_digest, user_id ].join("-") ] $  ruby  src/realtime_analytics_with_aggregation.rb   Expect  to  upsert:[ puts "Expect to upsert:    "2d86a774beffe90e715a8028c7bd177b-­‐20110323", n#{ids}"    "2d86a774beffe90e715a8028c7bd177b-­‐201103",    "2d86a774beffe90e715a8028c7bd177b-­‐2011",   result = db["visits"].update(    "2d86a774beffe90e715a8028c7bd177b-­‐4d899fab31f238165c000001" { :_id => { :$in => ids } }, ] { :$inc => { :hits => {  "err"=>nil, 1 } }, :multi => true,    "updatedExisting"=>false,    "upserted"=>BSON::ObjectId('4d899fabe23bd37e768ae76e'),   :upsert => true,    "n"=>1, :safe => true    "ok"=>1.0 } ) puts result.inspect {"_id"=>BSON::ObjectId('4d899fabe23bd37e768ae76d'),  "hits"=>1} puts db["visits"].all {"_id"=>BSON::ObjectId('4d899fabe23bd37e768ae76e'),  "hits"=>1}
  • 79. url’s visits aggregated by time (look before you leap) result = db["visits"].update( { :_id => { :$in => ids } }, { :$inc => { :hits => 1 } }, :multi => true, :upsert => true, :safe => true ) if result["n"] != ids.size updated_ids = db["visits"].find( { :_id => { :$in => ids } }, :fields => { :_id => true } ).map{|document| document["_id"]} db["visits"].insert((ids - updated_ids).map do |id| { :_id => id, :hits => 1 } end) db["visits"].remove(:_id => result["upserted"]) if result["upserted"] end
  • 80. url’s visits aggregated by time (look before you leap) result = db["visits"].update( { :_id => { :$in => ids } }, { :$inc => { :hits => 1 } }, :multi => true, :upsert => true, :safe => true ) $  ruby  src/realtime_analytics_with_aggregation.rb {  "err"=>nil,      "updatedExisting"=>false, if result["n"] != ids.size    "upserted"=>BSON::ObjectId('4d89a5ebe23bd37e768ae76f'),      "n"=>1, updated_ids = db["visits"].find(    "ok"=>1.0 { :_id => { :$in => ids } }, :fields => { :_id => true } ).map{|document| document["_id"]} } {"_id"=>"<url_digest>-­‐20110323",  "hits"=>1} db["visits"].insert((ids - updated_ids).map do |id| {"_id"=>"<url_digest>-­‐201103",  "hits"=>1} {"_id"=>"<url_digest>-­‐2011",  "hits"=>1} { :_id => id, :hits {"_id"=>"<url_digest>-­‐4d89a43b31f238167a000001",  "hits"=>1} => 1 } end) db["visits"].remove(:_id => result["upserted"]) if result["upserted"] end
  • 81. url’s visits aggregated by time (look before you leap) result = db["visits"].update( { :_id => { :$in => ids } }, { :$inc => { :hits => 1 } }, :multi => true, :upsert => true, :safe => true ) $  ruby  src/realtime_analytics_with_aggregation.rb {  "err"=>nil,    "updatedExisting"=>true, if result["n"] != ids.size    "n"=>3,    "ok"=>1.0 updated_ids = db["visits"].find( } { :_id => { :$in => ids } }, :fields => { :_id => true } {"_id"=>"<url_digest>-­‐20110323",  "hits"=>2} ).map{|document| document["_id"]} {"_id"=>"<url_digest>-­‐201103",  "hits"=>2} {"_id"=>"<url_digest>-­‐2011",  "hits"=>2} db["visits"].insert((ids - updated_ids).map do |id| {"_id"=>"<url_digest>-­‐4d89a43b31f238167a000001",  "hits"=>1} {"_id"=>"<url_digest>-­‐4d89a44231f238167e000001",  "hits"=>1} { :_id => id, :hits => 1 } end) db["visits"].remove(:_id => result["upserted"]) if result["upserted"] end
  • 82. query & design incremental map/reduce
  • 83. map/reduce hits per day (we have raw events) >  db.visit_events.findOne() {   "_id"  :  ObjectId("4d89fc6531f2381d2c00000b"),   "url"  :  "8aa8b68e0b849f70df6dbb3031c6182b",   "user_id"  :  ObjectId("4d89fc6531f2381d2c000005"),   "at"  :  "Thu  Jan  13  2011  08:00:06  GMT+0100  (CET)" }
  • 84. map/reduce hits per day (generate data WITH something like) def generate_events(visits, db, now) visits.times do |time| now += BETWEEN_VISITS.sample.seconds db["visit_events"].insert( :url => Digest::MD5.hexdigest(URLS.sample), :user_id => USERS.sample[:id], :at => now ) end end generate_events(10_000, db, now)
  • 85. map/reduce hits per day (simple map/reduce) MAP = <<-EOF function() { emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 }) } EOF REDUCE = <<-EOF function(key, values) { var hits = 0 for(var index in values) hits += values[index]["hits"] return { "hits": hits } } EOF result = db["visit_events"].map_reduce( MAP, REDUCE, :out => "visits", :raw => true, :verbose => true ) puts result.inspect
  • 86. map/reduce hits per day (date.prototype.format don’t exists) MAP = <<-EOF function() { emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 }) } EOF REDUCE = <<-EOF function(key, values) { var hits = 0 for(var index in values) hits += values[index]["hits"] return { "hits": hits } } EOF result = db["visit_events"].map_reduce( MAP, REDUCE, :out => "visits", :raw => true, :verbose => true ) puts result.inspect
  • 87. map/reduce hits per day (implement format in place) MAP = <<-EOF function() { Date.prototype.format = function(format) { ... } emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 }) } EOF REDUCE = <<-EOF function(key, values) { var hits = 0 for(var index in values) hits += values[index]["hits"] return { "hits": hits } } EOF
  • 88. map/reduce hits per day (implement format only if needed) MAP = <<-EOF function() { if (!Date.prototype.format) { Date.prototype.format = function(format) { ... } } emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 }) } EOF REDUCE = <<-EOF function(key, values) { var hits = 0 for(var index in values) hits += values[index]["hits"] return { "hits": hits } } EOF
  • 89. map/reduce hits per day (implement format once and for all) db[Mongo::DB::SYSTEM_JS_COLLECTION].save( :_id => "formatDate", :value => BSON::Code.new( <<-EOF function(date, format) { if (!Date.prototype.format) { Date.prototype.format = function(format) { ... } } return date.format(format) } EOF ) ) MAP = <<-EOF function() { emit([ this.url, formatDate(this.at, "Ymd") ].join("-"), {"hits":1}) } EOF
  • 90. map/reduce hits per day (implement format once and for all) db[Mongo::DB::SYSTEM_JS_COLLECTION].save( :_id => "load", :value => BSON::Code.new( <<-EOF function(module) { if ((module === "date") && !Date.prototype.format) { Date.prototype.format = function(format) { ... } } return true } EOF ) ) MAP = <<-EOF function() { load("date") && emit( [ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 } ) } EOF
  • 91. map/reduce hits per day (ok, but could be taking too long) MAP = <<-EOF function() { emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 }) } EOF REDUCE = <<-EOF $  ruby  src/incremental_mr.rb function(key, values)   { {  "result"=>"visits", var hits = 0    "timeMillis"=>4197, for(var index in values) hits += values[index]["hits"]    "timing"=>  {        "mapTime"=>3932, return { "hits": hits }        "emitLoop"=>4170, }        "total"=>4197 EOF    },    "counts"=>  {        "input"=>10000, result = db["visit_events"].map_reduce(        "emit"=>10000,        "output"=>200 MAP, REDUCE, :out => "visits", :raw =>    }, true, :verbose => true )    "ok"=>1.0 } puts result.inspect
  • 92. map/reduce hits per day (ok, every time we need to start over) >  db.visits.find()                                                   {  "_id"  :  "019640ff7952425b1b8695605459d223-­‐20110316",    "value"  :  {  "hits"  :  47  } } {  "_id"  :  "019640ff7952425b1b8695605459d223-­‐20110317",    "value"  :  {  "hits"  :  49  } } {  "_id"  :  "019640ff7952425b1b8695605459d223-­‐20110318",    "value"  :  {  "hits"  :  59  }   } {  "_id"  :  "019640ff7952425b1b8695605459d223-­‐20110319",    "value"  :  {  "hits"  :  37  }   }
  • 93. map/reduce hits per day (incremental with savepoints) visit-elements visit collection collection map/reduce on last changed upsert documents temporary collection
  • 94. map/reduce hits per day (incremental with savepoints) db.create_collection("visit_events", :capped => true, visit-elements :max => 50_000, :size => 5_000_000 collection ) map/reduce on last changed documents temporary collection
  • 95. map/reduce hits per day (incremental with savepoints) FINALIZE = <<-EOF function(key, value) { db.visits.update( visit { "_id": key }, collection { $inc: { "hits": value.hits } }, true ) } EOF upsert temporary collection
  • 96. map/reduce hits per day (incremental with savepoints) generate_events(number_of_events, db, now) from = from_last_updated(db) to = to_last_inserted(db) result = db["visit_events"].map_reduce( MAP, REDUCE, :finalize => FINALIZE, :query => { :_id => { :$gt => from, :$lte => to } }, :raw => true, :verbose => true ) db["visits"].save(:_id => "savepoint", :at => to)
  • 97. map/reduce hits per day (incremental with savepoints) generate_events(number_of_events, db, now) from = from_last_updated(db) to = to_last_inserted(db) result = db["visit_events"].map_reduce( MAP, REDUCE, $  ruby  src/incremental_mr.rb  -­‐e  10000   :finalize => FINALIZE,{  "result"=>"tmp.mr.mapreduce_1300892393_60", :query => { :_id => { :$gt => from, :$lte => to } },    "timeMillis"=>4333,    "timing"=>{...}, :raw => true,    "counts"=>{ :verbose => true        "input"=>10000,          "emit"=>10000, )        "output"=>196    },    "ok"=>1.0 db["visits"].save(:_id => "savepoint", } :at => to) {  "_id"=>"05241f07d0e3ab6a227e67b33ea0b509-­‐20110113",          "hits"=>26 }
  • 98. map/reduce hits per day (incremental with savepoints) generate_events(number_of_events, db, now) from = from_last_updated(db) to = to_last_inserted(db) result = db["visit_events"].map_reduce( MAP, REDUCE, $  ruby  src/incremental_mr.rb  -­‐e  4999   :finalize => FINALIZE,{  "result"=>"tmp.mr.mapreduce_1300892399_61", :query => { :_id => { :$gt => from, :$lte => to } },    "timeMillis"=>2159,    "timing"=>{...}, :raw => true,    "counts"=>{ :verbose => true        "input"=>4999,        "emit"=>4999, )        "output"=>146    },    "ok"=>1.0 db["visits"].save(:_id => "savepoint", } :at => to) {  "_id"=>"05241f07d0e3ab6a227e67b33ea0b509-­‐20110113",      "hits"=>64 }
  • 99. map/reduce hits per day (incremental with savepoints) def savepoint(db) db["visits"].find_one(:_id => "savepoint") or { "at" => BSON::ObjectId.from_time(10.years.ago) } end def from_last_updated(db) savepoint["at"] end def to_last_inserted(db) db["visit_events"].find.sort([:_id, Mongo::DESCENDING]).first["_id"] end
  • 100. query & design external map/reduce
  • 101. use an external mongod process to execute map/reduce jobs master slave replicate data
  • 102. use an external mongod process to execute map/reduce jobs master slave map/reduce on last replicated data
  • 103. use an external mongod process to execute map/reduce jobs master slave push back results
  • 104. look at the shell source is more powerful than you think
  • 105. query & design documents embedded or linked?
  • 106. life cycle: when root document is deleted, he can stand for himself? if yes if no embedded linked
  • 107. are always fetched together? if yes if no embedded linked
  • 108. his attributes are used to find the root document? if yes if no embedded linked
  • 109. he’s small? if yes if no embedded linked
  • 110. he’s unique or there are less then hundreds? if yes if no embedded linked
  • 112. scale distributed reads with replica sets
  • 113. slave replicate read master read/write slave read replicate + Durability + fault tolerance
  • 114. scale (seems stupid but...) pump your hardware
  • 115. scale (seems stupid but...) call 10gen sure they can help :-)
  • 116. Questions? gabriele lana gabriele.lana@cleancode.it twitter: @gabrielelana http://joind.in/2943